学习一篇NC的单细胞文章（六）：Gene expression signatures

学习一篇NC的单细胞文章（一）：质控
 学习一篇NC的单细胞文章（二）：标准化
 学习一篇NC的单细胞文章（三）：复杂热图
 学习一篇NC的单细胞文章（四）：Clustering
学习一篇NC的单细胞文章（五）：tSNE

在第五节，由于大部分细胞（868 个）都被归为上皮细胞群中（Fig2 c），这868个细胞可被分成5个cluster，接着对这5个cluster细胞进行探索。我们使用一组来自对乳腺肿块的非监督分析的基因表达特征对5个cluster进行了研究。这些基因表达特征通过比较三阴性乳腺癌（TNBC）的四个亚型(ERBB2 amplicon，Luminal Subtype 、Basal epithelial-cell enriched 和Luminal epithelial gene cluster containing ER)而建立。先看看这5个clusters的基底细胞来源的细胞群有多少。

## 读取数据
basal_PNAS_all <- read.table("data/genes_for_basal_vs_non_basal_tnbc_PNAS.txt", header = TRUE, sep = "\t")
#提取Basal.epithelial.cell.enriched.cluster的基因
basal_PNAS_long <- basal_PNAS_all$Basal.epithelial.cell.enriched.cluster
#合并剩下17个基因
basal_PNAS <- intersect(basal_PNAS_long, rownames(mat_ct))
> basal_PNAS
 [1] "SOX9"   "GALNT3" "CDH3"   "LAMC2"  "CX3CL1" "TRIM29" "KRT17"  "KRT5"   "CHI3L2"
[10] "SLPI"   "NFIB"   "MRAS"   "TGFB2"  "CAPN6"  "DMD"    "FABP7"  "CXCL1" 
#算出17个basal_PNAS基因在1112个细胞的表达平均值
basal_PNAS_avg_exprs <- apply(mat_ct[match(basal_PNAS, rownames(mat_ct)),], 2, mean)
#检查一下数据
all.equal(names(basal_PNAS_avg_exprs), colnames(mat_ct))
#提取868个上皮细胞群体的17个basal_PNAS基因表达平均值
basal_PNAS_avg_exprs <- basal_PNAS_avg_exprs[which(pd_ct$cell_types_cl_all == "epithelial")]

#检查一下数据
all.equal(colnames(HSMM_allepith_clustering), names(basal_PNAS_avg_exprs))
#把17个basal_PNAS基因表达平均值赋给HSMM_allepith_clustering，以便于后续分析
pData(HSMM_allepith_clustering)$basal_PNAS_avg_exprs <- basal_PNAS_avg_exprs
#画figS9b
plot_cell_clusters(HSMM_allepith_clustering, 1, 2, color = "basal_PNAS_avg_exprs", cell_size = 2) + facet_wrap(~patient) +
  scale_color_continuous(low = "yellow", high = "blue")

figS9b

#画figS9a
  plot_cell_clusters(HSMM_allepith_clustering, 1, 2, color = "basal_PNAS_avg_exprs", cell_size = 2) + facet_wrap(~Cluster) +
  scale_color_continuous(low = "yellow", high = "blue")

figS9a

figS9a:大多数TNBC样本都有basal gene signature的表达。figS9b:在868个上皮细胞群中，cluster2的basal gene signature表达量最丰富。

接着，使用另外一个基因表达特征数据集TNBCtype4 signatures，这个signatures根据基因将细胞分为6个类：basal_like_1、basal_like_2、immunomodulatory、mesenchymal、mesenchymal_stem_like和luminal_ar。作者将基因表达特征中上调基因的平均表达值减去下调基因的平均表达值，将差值作为每个细胞在TNBCtype4 signatures （basal_like_1、basal_like_2、mesenchymal和luminal_ar）中的每个基因表达值，挑选最高基因表达值对应的signature，将其分配给对应的细胞。

#读取数据
lehman_long <- read.table("data/Lehman_signature.txt", sep = "\t", header = TRUE, stringsAsFactors = FALSE)
#这个for循环提取了lehman_long里面的四列gene、regulation、no_samples和signature，建立一个data.rrame
for (i in 0:5) {
  
  gene <- "gene"
  regulation <- "regulation"
  no_samples <- "no_samples"
  signature <- "signature"
  
  if (i == 0) {
    lehman <- lehman_long[, 1:4]
    lehman <- lehman[-which(lehman$signature == ""),]
  }
  
  
  if (i > 0) {
    gene <- paste("gene", i, sep = ".")
    regulation <- paste("regulation", i, sep = ".")
    no_samples <- paste("no_samples", i, sep = ".")
    signature <- paste("signature", i, sep = ".")
    
    mat_to_bind <- lehman_long[, c(gene, regulation, no_samples, signature)]
    colnames(mat_to_bind) <- c("gene", "regulation", "no_samples", "signature")
    if (length(which(is.na(mat_to_bind$no_samples))) > 0 )
      mat_to_bind <- mat_to_bind[-which(mat_to_bind$signature == ""),]
    lehman <- rbind(lehman, mat_to_bind)
  }
}
#删掉一些mat_ct没有检测到的基因
lehman <- lehman[which(!is.na(match(lehman$gene, rownames(mat_ct)))),]
lehman_signatures <- unique(lehman$signature)
lehman_avg_exps <- apply(mat_ct, 2, function(x){
  
  mns <- matrix(NA, nrow = length(lehman_signatures), ncol = 2)
  rownames(mns) <- lehman_signatures
  for (s in 1:length(lehman_signatures)) {
    sign <- lehman_signatures[s] # current signature
    lehman_here <- lehman %>%
      dplyr::filter(signature == sign)
    lehman_here_up <- lehman_here %>%
      dplyr::filter(regulation == "UP")
    lehman_here_down <- lehman_here %>%
      dplyr::filter(regulation == "DOWN")
    
  
    idx_genes_up <- match(lehman_here_up$gene, rownames(mat_ct)) 
    idx_genes_down <- match(lehman_here_down$gene, rownames(mat_ct))
    
    mns[s,] <- c(mean(x[idx_genes_up]), mean(x[idx_genes_down])) #算上调、下调的基因在样本中的平均表达值
  }
  return(mns)
})
#检查数据
all.equal(colnames(lehman_avg_exps), rownames(pd_ct))
#只看868个上皮细胞的情况
lehman_avg_exprs_epithelial <- lehman_avg_exps[,which(pd_ct$cell_types_cl_all == "epithelial")]
#提取lehman_avg_exps前面6行，对应的是up
lehman_avg_ups <- lehman_avg_exps[c(1:6), ]
rownames(lehman_avg_ups) <- lehman_signatures
all.equal(colnames(lehman_avg_ups), rownames(pd_ct))
lehman_avg_ups_epithelial <- lehman_avg_ups[,which(pd_ct$cell_types_cl_all == "epithelial")]
#提取lehman_avg_exps后面6行，对应的是down
lehman_avg_downs <- lehman_avg_exps[c(7:12),]
rownames(lehman_avg_downs) <- lehman_signatures
all.equal(colnames(lehman_avg_downs), rownames(pd_ct))
lehman_avg_downs_epithelial <- lehman_avg_downs[,which(pd_ct$cell_types_cl_all == "epithelial")]
#上调基因的平均表达值减去下调基因的平均表达值
lehman_avg_both <- lehman_avg_ups - lehman_avg_downs
all.equal(colnames(lehman_avg_both), rownames(pd_ct))
#挑选最高基因表达值对应的signature，将其分配给对应的细胞。
assignments_lehman_both <- apply(lehman_avg_both, 2, function(x){rownames(lehman_avg_both)[which.max(x)]})
assignments_lehman_both_epithelial <- assignments_lehman_both[which(pd_ct$cell_types_cl_all == "epithelial")]
#删除immunomodulatory和mesenchymal_stem_like signature
lehman_avg_both_epithelial_new <- lehman_avg_both_epithelial[-which(rownames(lehman_avg_both_epithelial) %in% c("immunomodulatory", "mesenchymal_stem_like")),]
assignments_lehman_both_epithelial_new <- apply(lehman_avg_both_epithelial_new, 2, function(x){rownames(lehman_avg_both_epithelial_new)[which.max(x)]})

接下来画图，同样地，需要对heatmap函数代码进行修改。

ha_lehman_epith_pat <- list()
for (i in 1:length(patients_now)) {
  
  if (i == 1)
    ha_lehman_epith_pat[[i]] <- HeatmapAnnotation(df=data.frame(cluster_all = clusterings_sep_allepith[[i]]), 
                                                  col = list(cluster_all = c("1" = "#ee204d", "2" = "#17806d", "3" = "#b2ec5d", "4" = "#cda4de", "5" = "#1974d2")),
                                                  annotation_name_side = "left", annotation_name_gp = gpar(fontsize = 12),
                                                  annotation_legend_param = list(list(title_position = "topcenter", title = "cluster")),
                                                  show_annotation_name = FALSE,
                                                
                                                  gap = unit(c(2), "mm"),
                                                  show_legend = FALSE)
  
  if (i > 1 && i != 5 )
    ha_lehman_epith_pat[[i]] <- HeatmapAnnotation(df=data.frame(cluster_all = clusterings_sep_allepith[[i]]), 
                                                  col = list(cluster_all = c("1" = "#ee204d", "2" = "#17806d", "3" = "#b2ec5d", "4" = "#cda4de", "5" = "#1974d2")),
                                                  annotation_name_side = "left", annotation_name_gp = gpar(fontsize = 12),
                                                  annotation_legend_param = list(list(title_position = "topcenter", title = "cluster")),
                                                  show_annotation_name = FALSE,
                                                  gap = unit(c(2), "mm"),
                                                  show_legend = FALSE)
  
  if (i == 5)
    ha_lehman_epith_pat[[i]] <- HeatmapAnnotation(df=data.frame(cluster_all = clusterings_sep_allepith[[i]]), 
                                                  col = list(cluster_all = c("1" = "#ee204d", "2" = "#17806d", "3" = "#b2ec5d", "4" = "#cda4de", "5" = "#1974d2")),
                                                  annotation_name_side = "right", annotation_name_gp = gpar(fontsize = 12),
                                                  annotation_legend_param = list(list(title_position = "topcenter",title = "cluster")),
                                                  show_annotation_name = FALSE,
                                                  gap = unit(c(2), "mm"),
                                                  show_legend = TRUE)
}
#检查数据
all.equal(names(lehmans_epith_pat_both), patients_now)
#将basal signature添加进去，以便后续作图
lehmans_epith_pat_both_wbasal_new <- lehmans_epith_pat_both_new
for (i in 1:length(patients_now)) {
  lehmans_epith_pat_both_wbasal_new[[i]] <- rbind(lehmans_epith_pat_both_new[[i]], pData(HSMM_allepith_clustering)$basal_PNAS_avg_exprs[which(HSMM_allepith_clustering$patient == patients_now[i])])
  rownames(lehmans_epith_pat_both_wbasal_new[[i]])[5] <- "intrinsic_basal"
}

# 画图
ht_sep_lehmans_both_wbasal_new <-
  Heatmap(lehmans_epith_pat_both_wbasal_new[[1]],
          col = colorRamp2(c(-0.7, 0, 1), c("blue","white", "red")),
          cluster_rows = FALSE,
          show_column_names = FALSE,
          column_title = patients_now[1],
          column_title_gp = gpar(fontsize = 12),
          top_annotation = ha_lehman_epith_pat[[1]],
          name = patients_now[1], 
          show_row_names = FALSE,
         
          heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) +
  Heatmap(lehmans_epith_pat_both_wbasal_new[[2]],
          col = colorRamp2(c(-0.7, 0, 1), c("blue","white", "red")),
          cluster_rows = FALSE,
          show_column_names = FALSE,
          column_title = patients_now[2],
          column_title_gp = gpar(fontsize = 12),
          top_annotation = ha_lehman_epith_pat[[2]],
          name = patients_now[2], 
          show_row_names = FALSE,
          
          heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) +
  Heatmap(lehmans_epith_pat_both_wbasal_new[[3]],
          col = colorRamp2(c(-0.7, 0, 1), c("blue","white", "red")),
          cluster_rows = FALSE,
          show_column_names = FALSE,
          column_title = patients_now[3],
          column_title_gp = gpar(fontsize = 12),
          top_annotation = ha_lehman_epith_pat[[3]],
          name = patients_now[3], 
          show_row_names = FALSE,
         
          heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) +
  Heatmap(lehmans_epith_pat_both_wbasal_new[[4]],
          col = colorRamp2(c(-0.7, 0, 1), c("blue","white", "red")),
          cluster_rows = FALSE,
          show_column_names = FALSE,
          column_title = patients_now[4],
          column_title_gp = gpar(fontsize = 12),
          top_annotation = ha_lehman_epith_pat[[4]],
          name = patients_now[4], 
          show_row_names = FALSE,
         
          heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) +
  Heatmap(lehmans_epith_pat_both_wbasal_new[[5]],
          col = colorRamp2(c(-0.7, 0, 1), c("blue","white", "red")),
          cluster_rows = FALSE,
          show_column_names = FALSE,
          column_title = patients_now[5],
          column_title_gp = gpar(fontsize = 12),
          top_annotation = ha_lehman_epith_pat[[5]],
          name = patients_now[5], 
          show_row_names = FALSE,
         
          heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) +
  Heatmap(lehmans_epith_pat_both_wbasal_new[[6]],
          col = colorRamp2(c(-0.7, 0, 1), c("blue","white", "red")),
          cluster_rows = FALSE,
          row_names_side = "right",
          column_title = patients_now[6],
          column_title_gp = gpar(fontsize = 12),
          top_annotation = ha_lehman_epith_pat[[6]],
          name = patients_now[6], 
          show_column_names = FALSE,
          top_annotation_height = unit(c(2), "cm"),
         
          heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9)))
#画fig3d
print(draw(ht_sep_lehmans_both_wbasal_new, annotation_legend_side = "right"))

fig3d

我们只需要把右边注释条PS一下，就可以达到跟文献的图片一模一样了。

#检查数据
all.equal(colnames(HSMM_allepith_clustering), names(assignments_lehman_both_epithelial_new))
pData(HSMM_allepith_clustering)$assignments_lehman_both_new <- assignments_lehman_both_epithelial_new
画fig3g
plot_cell_clusters(HSMM_allepith_clustering, 1, 2, color = "assignments_lehman_both_new", cell_size = 2) + facet_wrap(~patient)

fig3g

#画figS8
plot_cell_clusters(HSMM_allepith_clustering, 1, 2, color = "assignments_lehman_both_new", cell_size = 2) + facet_wrap(~Cluster)

figS8

cluster4也富集了 Basal-Like 1 signature，而cluster3高度富集了TNBCtype “Luminal Androgen Receptor” signature。为了更清楚的看到上皮细胞群的5个cluster对应的TNBCtype signatures的平均表达量，接着继续探索下去...

clust_avg_lehman_both_new <- matrix(NA, nrow = length(unique(HSMM_allepith_clustering$Cluster)), ncol = nrow(lehman_avg_both_epithelial_new))
#列名：cluster1,cluster2,cluster3,cluster4,cluster5
rownames(clust_avg_lehman_both_new) <- paste("clust", c(1:length(unique(HSMM_allepith_clustering$Cluster))), sep = "")
#行名：basal_like_1、basal_like_2、mesenchymal、luminal_ar"  
colnames(clust_avg_lehman_both_new) <- rownames(lehman_avg_both_epithelial_new)
#算出每个cluster的signatures 平均值
for (c in 1:length(unique(HSMM_allepith_clustering$Cluster))) {
  clust_avg_lehman_both_new[c,] <- apply(lehman_avg_both_epithelial_new[,which(HSMM_allepith_clustering$Cluster == c)], 1, mean)
}

clust_avg_lehman_both_new <- as.data.frame(clust_avg_lehman_both_new)
#增加一列cluster
clust_avg_lehman_both_new$Cluster <- rownames(clust_avg_lehman_both_new)
#拆分数据
clust_avg_lehman_melt_new <- melt(clust_avg_lehman_both_new, "Cluster")

#画fig3e
ggplot(clust_avg_lehman_melt_new, aes(Cluster, value, fill = factor(variable), color = factor(variable), 
                                      shape = factor(variable))) + 
  geom_point(size = 3, stroke = 1) +
  scale_shape_discrete(solid = T) + 
  #guides(colour = guide_legend(override.aes = list(size=3))) + 
  ylab("average expression of signature in cluster") +
  xlab("cluster") +
  ylim(c(-0.35, 0.5))

fig3e

可以看到：cluster2和4强烈地表达Basal-Like 1 signature，而cluster3显著表达Basal-Like 2 signature和 luminal AR signature

接着，读取另外一个gene signatures，作者通过从上调基因的平均表达量中减去下调基因的平均表达量，计算出three normal breast signatures下每个细胞的表达量(Lim et al., 2009a)，然后将每个细胞分配给其具有最高表达量的signatures。这三个normal breast signatures 是：mature luminal (ML)，basal和luminal progenitor (LP)，在每个signatures中，都有对应的上调基因和下调基因。

## normal signatures
ml_signature_long <- read.table("data/ML_signature.txt", sep = "\t", header = TRUE)
if (length(which(ml_signature_long$Symbol == "")) > 0)
#将空白行去掉
  ml_signature_long <- ml_signature_long[-which(ml_signature_long$Symbol == ""),]
 #按照基因字母进行排序，如果基因字母有一样的，则按照Average.log.fold.change绝对值的负数进行从小到大排序
ml_signature_long <- ml_signature_long[order(ml_signature_long$Symbol, -abs(ml_signature_long$Average.log.fold.change) ), ]
#对基因取唯一值，去重复
ml_signature_long <- ml_signature_long[ !duplicated(ml_signature_long$Symbol), ]
#总共有818个基因
ml_signature <- ml_signature_long[which(!is.na(match(ml_signature_long$Symbol, rownames(mat_ct)))), ]
#上调基因有384个
ml_up <- ml_signature[which(ml_signature$Average.log.fold.change > 0), ]
#下调基因有197个
ml_down <- ml_signature[which(ml_signature$Average.log.fold.change < 0), ]
#匹配一下
idx_ml_up <- match(ml_up$Symbol, rownames(mat_ct))
idx_ml_down <- match(ml_down$Symbol, rownames(mat_ct))
#读取basal signature，处理过程跟上面的一样的。
basal_signature_long <- read.table("data/basal_signature.txt", sep = "\t", header = TRUE)
if (length(which(basal_signature_long$Symbol == "")) > 0)
  basal_signature_long <- basal_signature_long[-which(basal_signature_long$Symbol == ""),]
basal_signature_long <- basal_signature_long[order(basal_signature_long$Symbol, -abs(basal_signature_long$Average.log.fold.change) ), ]
basal_signature_long <- basal_signature_long[ !duplicated(basal_signature_long$Symbol), ]
#总共有1335个基因
basal_signature <- basal_signature_long[which(!is.na(match(basal_signature_long$Symbol, rownames(mat_ct)))), ]
#上调基因有588个
basal_up <- basal_signature[which(basal_signature$Average.log.fold.change > 0), ]
#下调基因有757个
basal_down <- basal_signature[which(basal_signature$Average.log.fold.change < 0), ]
idx_basal_up <- match(basal_up$Symbol, rownames(mat_ct))
idx_basal_down <- match(basal_down$Symbol, rownames(mat_ct))

#读取LP signature,还是同样的操作
lp_signature_long <- read.table("data/Lp_signature.txt", sep = "\t", header = TRUE)
if (length(which(lp_signature_long$Symbol == "")) > 0)
  lp_signature_long <- lp_signature_long[-which(lp_signature_long$Symbol == ""),]
lp_signature_long <- lp_signature_long[order(lp_signature_long$Symbol, -abs(lp_signature_long$Average.log.fold.change) ), ]
lp_signature_long <- lp_signature_long[ !duplicated(lp_signature_long$Symbol), ]
lp_signature <- lp_signature_long[which(!is.na(match(lp_signature_long$Symbol, rownames(mat_ct)))), ]
lp_up <- lp_signature[which(lp_signature$Average.log.fold.change > 0), ]
lp_down <- lp_signature[which(lp_signature$Average.log.fold.change < 0), ]
idx_lp_up <- match(lp_up$Symbol, rownames(mat_ct))
idx_lp_down <- match(lp_down$Symbol, rownames(mat_ct))
#对ML、basal和LP 3个signatures基因，将上调基因的表达值减去下调基因表达值，并分别返回结果。
normsig_avg_exprs <- apply(mat_ct, 2, function(x){
  
  avg_ml_up <- mean(x[idx_ml_up])
  avg_ml_down <- mean(x[idx_ml_down])
  avg_ml_both <- avg_ml_up - avg_ml_down
  
  avg_basal_up <- mean(x[idx_basal_up])
  avg_basal_down <- mean(x[idx_basal_down])
  avg_basal_both <- avg_basal_up - avg_basal_down
  
  avg_lp_up <- mean(x[idx_lp_up])
  avg_lp_down <- mean(x[idx_lp_down])
  avg_lp_both <- avg_lp_up - avg_lp_down
  
  return(c(avg_ml_up, avg_basal_up, avg_lp_up, avg_ml_both, avg_basal_both, avg_lp_both))
})
rownames(normsig_avg_exprs) <- c("avg_ml_up", "avg_basal_up", "avg_lp_up", "avg_ml_both", "avg_basal_both", "avg_lp_both")
#检查数据
all.equal(colnames(normsig_avg_exprs), rownames(pd_ct))
#只看上皮细胞群
normsig_avg_exprs_epithelial <- normsig_avg_exprs[,which(pd_ct$cell_types_cl_all == "epithelial")]

normsig_avg_ups <- normsig_avg_exprs[c(1:3), ]
all.equal(colnames(normsig_avg_ups), rownames(pd_ct))
normsig_avg_ups_epithelial <- normsig_avg_ups[,which(pd_ct$cell_types_cl_all == "epithelial")]

normsig_avg_both <- normsig_avg_exprs[c(4:6),]
all.equal(colnames(normsig_avg_both), rownames(pd_ct))
normsig_avg_both_epithelial <- normsig_avg_both[,which(pd_ct$cell_types_cl_all == "epithelial")]
#挑选最大值=上调基因的平均表达值最大数值分配给对应的细胞类型
assignments_normsig_ups <- apply(normsig_avg_ups, 2, function(x){rownames(normsig_avg_ups)[which.max(x)]})
assignments_normsig_ups_epithelial <- assignments_normsig_ups[which(pd_ct$cell_types_cl_all == "epithelial")]
#上调基因的平均表达值-下调基因的平均表达值的最大数值分配给对应的细胞类型
assignments_normsig_both <- apply(normsig_avg_both, 2, function(x){rownames(normsig_avg_both)[which.max(x)]})
assignments_normsig_both_epithelial <- assignments_normsig_both[which(pd_ct$cell_types_cl_all == "epithelial")]
pd_ct_epith <- pd_ct[which(pd_ct$cell_types_cl_all == "epithelial"),]
normsig_epith_pat_both <- list()
normsig_epith_pat_ups <- list()
pds_epith_ct <- list()
for (i in 1:length(patients_now)) {
  normsig_epith_pat_both[[i]] <- normsig_avg_both_epithelial[,which(pd_ct_epith$patient == patients_now[i])]
  normsig_epith_pat_ups[[i]] <- normsig_avg_ups_epithelial[,which(pd_ct_epith$patient == patients_now[i])]
  pds_epith_ct[[i]] <- pds_ct[[i]][which(pds_ct[[i]]$cell_types_cl_all == "epithelial"),]
}
names(normsig_epith_pat_both) <- patients_now
names(normsig_epith_pat_ups) <- patients_now
names(pds_epith_ct) <- patients_now

ht_sep_normsig_both <-
  Heatmap(normsig_epith_pat_both[[1]],
          col = colorRamp2(c(-0.7, -0.2, 0.7), c("blue","white", "red")),
          cluster_rows = FALSE,
          show_column_names = FALSE,
          column_title = patients_now[1],
          top_annotation = ha_lehman_epith_pat[[1]],
          column_title_gp = gpar(fontsize = 12),
          show_row_names = FALSE,
          name = patients_now[1], 
        
          heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) +
  Heatmap(normsig_epith_pat_both[[2]],
          col = colorRamp2(c(-0.7, -0.2, 0.7), c("blue","white", "red")),
          cluster_rows = FALSE,
          show_column_names = FALSE,
          column_title = patients_now[2],
          column_title_gp = gpar(fontsize = 12),
          top_annotation = ha_lehman_epith_pat[[2]],
          name = patients_now[2], 
          show_row_names = FALSE,
       
          heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) +
  Heatmap(normsig_epith_pat_both[[3]],
          col = colorRamp2(c(-0.7, -0.2, 0.7), c("blue","white", "red")),
          cluster_rows = FALSE,
          show_column_names = FALSE,
          column_title = patients_now[3],
          column_title_gp = gpar(fontsize = 12),
          top_annotation = ha_lehman_epith_pat[[3]],
          name = patients_now[3], 
          show_row_names = FALSE,
          top_annotation_height = unit(c(2), "cm"),
          heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) +
  Heatmap(normsig_epith_pat_both[[4]],
          col = colorRamp2(c(-0.7, -0.2, 0.7), c("blue","white", "red")),
          cluster_rows = FALSE,
          show_column_names = FALSE,
          column_title = patients_now[4],
          column_title_gp = gpar(fontsize = 12),
          top_annotation = ha_lehman_epith_pat[[4]],
          name = patients_now[4], 
          show_row_names = FALSE,
          top_annotation_height = unit(c(2), "cm"),
          heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) +
  Heatmap(normsig_epith_pat_both[[5]],
          col = colorRamp2(c(-0.7, -0.2, 0.7), c("blue","white", "red")),
          cluster_rows = FALSE,
          show_column_names = FALSE,
          column_title = patients_now[5],
          column_title_gp = gpar(fontsize = 12),
          top_annotation = ha_lehman_epith_pat[[5]],
          name = patients_now[5], 
          show_row_names = FALSE,
          top_annotation_height = unit(c(2), "cm"),
          heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) +
  Heatmap(normsig_epith_pat_both[[6]],
          col = colorRamp2(c(-0.7, -0.2, 0.7), c("blue","white", "red")),
          cluster_rows = FALSE,
          row_names_side = "right",
          column_title = patients_now[6],
          column_title_gp = gpar(fontsize = 12),
          top_annotation = ha_lehman_epith_pat[[6]],
          name = patients_now[6], 
          show_column_names = FALSE,
        
          top_annotation_height = unit(c(2), "cm"),
          heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9)))
#画fig3b.pdf
print(draw(ht_sep_normsig_both, annotation_legend_side = "bottom"))

fig3b

# 每个样本的normal signatures 数目
all.equal(colnames(HSMM_allepith_clustering), names(assignments_normsig_both_epithelial))
pData(HSMM_allepith_clustering)$assignments_normsig_both <- assignments_normsig_both_epithelial
pData(HSMM_allepith_clustering)$assignments_normsig_ups <- assignments_normsig_ups_epithelial
#画fig3f
plot_cell_clusters(HSMM_allepith_clustering, 1, 2, color = "assignments_normsig_both", cell_size = 2) + facet_wrap(~patient)

fig3f

plot_cell_clusters(HSMM_allepith_clustering, 1, 2, color = "assignments_normsig_both", cell_size = 2) + facet_wrap(~Cluster)

#检查数据
all.equal(HSMM_allepith_clustering$Cluster, clustering_allepith)
all.equal(colnames(normsig_avg_both_epithelial), colnames(HSMM_allepith_clustering))
clust_avg_normsig_both <- matrix(NA, nrow = length(unique(HSMM_allepith_clustering$Cluster)), ncol = nrow(normsig_avg_both_epithelial))
rownames(clust_avg_normsig_both) <- paste("clust", c(1:length(unique(HSMM_allepith_clustering$Cluster))), sep = "")
colnames(clust_avg_normsig_both) <- rownames(normsig_avg_both_epithelial)
#算上皮细胞群的avg_both 平均表达值，接下来还是同样的操作
for (c in 1:length(unique(HSMM_allepith_clustering$Cluster))) {
  clust_avg_normsig_both[c,] <- apply(normsig_avg_both_epithelial[,which(HSMM_allepith_clustering$Cluster == c)], 1, mean)
}

clust_avg_normsig_both <- as.data.frame(clust_avg_normsig_both)
clust_avg_normsig_both$Cluster <- rownames(clust_avg_normsig_both)
clust_avg_normsig_melt <- melt(clust_avg_normsig_both, "Cluster")
#画fig3e
ggplot(clust_avg_normsig_melt, aes(Cluster, value, fill = factor(variable), color = factor(variable), 
                                   shape = factor(variable))) + 
  geom_point(size = 3, stroke = 1) +
  scale_shape_discrete(solid = T) + 
  ylab("average expression of signature in cluster") +
  xlab("cluster") +
  ylim(c(-0.35, 0.5))

fig3e

fig3e：Clusters 2和Clusters4 强烈表达the LP signature, 而cluster 3则高表达 ML signature.
接着为了进一步探究临床相关性，作者使用了三个gene signatures，进一步探究这868个上皮细胞的特征，这868个上皮细胞真的被研究到很彻底，真的佩服，这工作量真的好大....
第一个gene signatures:70-gene prognostic signature ，该signatures最初是从对有无转移复发患者的原发肿瘤之间差异表达基因的分析中得出的，总共70个基因。

mammaprint_long <- read.table("data/mammaprint_sig_new.txt", header = TRUE, sep = "\t")
mammaprint <- apply(mammaprint_long, 2, function(x){return(intersect(x, rownames(mat_ct)))})[,1]
mammaprint_avg_exprs <- apply(mat_ct[match(mammaprint, rownames(mat_ct)),], 2, mean)
all.equal(names(mammaprint_avg_exprs), colnames(mat_ct))
mammaprint_avg_exprs <- mammaprint_avg_exprs[which(pd_ct$cell_types_cl_all == "epithelial")]

all.equal(colnames(HSMM_allepith_clustering), names(mammaprint_avg_exprs))
pData(HSMM_allepith_clustering)$mammaprint_avg_exprs <- mammaprint_avg_exprs

# 画figS13b
plot_cell_clusters(HSMM_allepith_clustering, 1, 2, color = "mammaprint_avg_exprs", cell_size = 2) + facet_wrap(~patient) +
  scale_color_continuous(low = "yellow", high = "blue")

figS13b

# 画figS13a
plot_cell_clusters(HSMM_allepith_clustering, 1, 2, color = "mammaprint_avg_exprs", cell_size = 2) + facet_wrap(~Cluster) +
  scale_color_continuous(low = "yellow", high = "blue")

figS13a

第二个gene signatures:49-gene metastatic burden signature.该signatures可以区分了患者来源的小鼠TNBC异种移植模型中单个循环转移细胞所产生的高转移负荷和低转移负荷，总共包括49个基因。

zenawerb_long <- read.table("data/werb_49_metastasis_sig.txt", header = TRUE, sep = "\t")
zenawerb <- apply(zenawerb_long, 2, function(x){return(intersect(x, rownames(mat_ct)))})[,1]
zenawerb_avg_exprs <- apply(mat_ct[match(zenawerb, rownames(mat_ct)),], 2, mean)
all.equal(names(zenawerb_avg_exprs), colnames(mat_ct))
zenawerb_avg_exprs <- zenawerb_avg_exprs[which(pd_ct$cell_types_cl_all == "epithelial")]

all.equal(colnames(HSMM_allepith_clustering), names(zenawerb_avg_exprs))
pData(HSMM_allepith_clustering)$zenawerb_avg_exprs <- zenawerb_avg_exprs

#画figS14b
plot_cell_clusters(HSMM_allepith_clustering, 1, 2, color = "zenawerb_avg_exprs", cell_size = 2) + facet_wrap(~patient) +
  scale_color_continuous(low = "yellow", high = "blue")

figS14b

#画figS14a
plot_cell_clusters(HSMM_allepith_clustering, 1, 2, color = "zenawerb_avg_exprs", cell_size = 2) + facet_wrap(~Cluster) +
  scale_color_continuous(low = "yellow", high = "blue")

figS14a

第三个gene siganatures:从接受手术前化疗治疗的原发性乳腺癌患者的残存肿瘤群体中富集的基因中获得的，包括354个基因。

artega_long <- read.table("data/artega_sig.txt", header = TRUE, sep = "\t")
artega <- apply(artega_long, 2, function(x){return(intersect(x, rownames(mat_ct)))})[,1]
artega_avg_exprs <- apply(mat_ct[match(artega, rownames(mat_ct)),], 2, mean)
all.equal(names(artega_avg_exprs), colnames(mat_ct))
artega_avg_exprs <- artega_avg_exprs[which(pd_ct$cell_types_cl_all == "epithelial")]
all.equal(colnames(HSMM_allepith_clustering), names(artega_avg_exprs))
pData(HSMM_allepith_clustering)$artega_avg_exprs <- artega_avg_exprs
画figS15a
plot_cell_clusters(HSMM_allepith_clustering, 1, 2, color = "artega_avg_exprs", cell_size = 2) + facet_wrap(~patient) +
  scale_color_continuous(low = "yellow", high = "blue")

figS15a

#画figS15b
plot_cell_clusters(HSMM_allepith_clustering, 1, 2, color = "artega_avg_exprs", cell_size = 2) + facet_wrap(~Cluster) +
  scale_color_continuous(low = "yellow", high = "blue")

figS15b

#将3个gene signatures的表达值合成一个数据框
prognosis_sig <- cbind(mammaprint_avg_exprs, zenawerb_avg_exprs, artega_avg_exprs)
#取行名
colnames(prognosis_sig) <- c("mammaprint", "zenawerb", "artega")

prognosis_epith_pat <- list()
for (i in 1:length(patients_now)) {
  prognosis_epith_pat[[i]] <- t(prognosis_sig)[,which(pd_ct_epith$patient == patients_now[i])]
}
names(prognosis_epith_pat) <- patients_now
for (i in 1:length(patients_now)) {
  print(all.equal(colnames(prognosis_epith_pat[[1]]), rownames(pds_epith_ct[[1]])))
  print(all.equal(names(clusterings_sep_allepith[[1]]), colnames(prognosis_epith_pat[[1]])))
}
ht_sep_prognosis <-
  Heatmap(prognosis_epith_pat[[1]],
          cluster_rows = FALSE,
          col = colorRamp2(c(-0.2, 0.2, 1), c("blue","white", "red")),
          show_column_names = FALSE,
          column_title = patients_now[1],
          top_annotation = ha_lehman_epith_pat[[1]],
          column_title_gp = gpar(fontsize = 12),
          show_row_names = FALSE,
          name = patients_now[1], 
          
          heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) +
  Heatmap(prognosis_epith_pat[[2]],
          col = colorRamp2(c(-0.2, 0.2, 1), c("blue","white", "red")),
          cluster_rows = FALSE,
          show_column_names = FALSE,
          column_title = patients_now[2],
          column_title_gp = gpar(fontsize = 12),
          top_annotation = ha_lehman_epith_pat[[2]],
          name = patients_now[2], 
          show_row_names = FALSE,
         
          heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) +
  Heatmap(prognosis_epith_pat[[3]],
          col = colorRamp2(c(-0.2, 0.2, 1), c("blue","white", "red")),
          cluster_rows = FALSE,
          show_column_names = FALSE,
          column_title = patients_now[3],
          column_title_gp = gpar(fontsize = 12),
          top_annotation = ha_lehman_epith_pat[[3]],
          name = patients_now[3], 
          show_row_names = FALSE,
          
          heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) +
  Heatmap(prognosis_epith_pat[[4]],
          col = colorRamp2(c(-0.2, 0.2, 1), c("blue","white", "red")),
          cluster_rows = FALSE,
          show_column_names = FALSE,
          column_title = patients_now[4],
          column_title_gp = gpar(fontsize = 12),
          top_annotation = ha_lehman_epith_pat[[4]],
          name = patients_now[4], 
          show_row_names = FALSE,
         
          heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) +
  Heatmap(prognosis_epith_pat[[5]],
          col = colorRamp2(c(-0.2, 0.2, 1), c("blue","white", "red")),
          cluster_rows = FALSE,
          show_column_names = FALSE,
          column_title = patients_now[5],
          column_title_gp = gpar(fontsize = 12),
          top_annotation = ha_lehman_epith_pat[[5]],
          name = patients_now[5], 
          show_row_names = FALSE,
         
          heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) +
  Heatmap(prognosis_epith_pat[[6]],
          col = colorRamp2(c(-0.2, 0.2, 1), c("blue","white", "red")),
          cluster_rows = FALSE,
          row_names_side = "right",
          column_title = patients_now[6],
          column_title_gp = gpar(fontsize = 12),
          top_annotation = ha_lehman_epith_pat[[6]],
          name = patients_now[6], 
          show_column_names = FALSE,
          
          heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9)))
#画fig4a
print(draw(ht_sep_prognosis, annotation_legend_side = "right"))

fig4a

接着看这3个gene signatures在不同clusters的细胞之间的表达分布。

clust_avg_prognosis <- matrix(NA, nrow = length(unique(HSMM_allepith_clustering$Cluster)), ncol = ncol(prognosis_sig))
#列明是clusters1-5
rownames(clust_avg_prognosis) <- paste("clust", c(1:length(unique(HSMM_allepith_clustering$Cluster))), sep = "")
#行名是mammaprint、zenawerb、artega和Cluster 
colnames(clust_avg_prognosis) <- colnames(prognosis_sig)
#每个clusters的三个gene signatures的平均值
for (c in 1:length(unique(HSMM_allepith_clustering$Cluster))) {
  clust_avg_prognosis[c,] <- apply(prognosis_sig[which(HSMM_allepith_clustering$Cluster == c),], 2, mean)}

prognosis_sig <- as.data.frame(prognosis_sig)
all.equal(rownames(prognosis_sig), colnames(HSMM_allepith_clustering))
prognosis_sig$Cluster <- as.numeric(HSMM_allepith_clustering$Cluster)

prognosis_melt <- melt(prognosis_sig, id.vars = c("Cluster"))
prognosis_melt$value <- as.numeric(prognosis_melt$value)
prognosis_melt <- prognosis_melt %>%
  filter(Cluster %in% c(2,3,4))

#画fig4b
p <- ggplot(prognosis_melt, aes(factor(Cluster), value, fill = factor(Cluster))) +
  scale_fill_manual(values = c("1" = "#ee204d", "2" = "#17806d", "3" = "#b2ec5d", "4" = "#cda4de", "5" = "#1974d2"))
p + geom_violin(adjust = .5) + facet_wrap(~variable) + stat_summary(fun.y = mean, geom = "point", shape = 22, size = 3)

fig4b

#画figS16
venn(list("PS" = mammaprint, "MBS" = zenawerb, "RTS" = artega))

figS16

可以看到这3个gene signatures没有重叠的基因，并且它们来源不同，但这3个 gene signatures均在clusters 2亚群中都高表达。接着，作者想跟临床预后联系到一起，于是选取了3个有代表性的数据集gene signature，对868个上皮细胞的5个clusters进行探索，结果提示clusters2具有与其他clusters不同的特征，接着对clusters2进行进一步探索。在之前的第五篇笔记tSNE中，已经用differentialGeneTest对每一个上皮细胞cluster与全部的上皮细胞进行差异分析后，以FDR=0.1为阈值，选取前10个有显著差异变化的基因在METABRIC 数据库中进行生存分析，作者在补充材料里已经描述得很详细。

image-20210209084756920.png

结果显示高表达clusters2的肿瘤与OS显著负相关相关性。相反，但是在三个gene signatures中的预后则无统计学意义。此外，clusters1、3、4和5的基因无法预测临床结果。这些发现揭示了单细胞测序分析能揭示与临床预后相关细胞状态的能力，但这些细胞状态还没有在肿瘤真实世界样本中没被发现。

image-20210124111022195.png

我们继续探索。在fig3a中，我们已经对868个上皮细胞进行tSNE,接着我们将这个tSNE结果，进行进一步注释可并可视化。

image-20210209091113221.png

## 可视化normal signatures，figS10b
plot_cell_clusters(HSMM_allepith_clustering, 1, 2, color = "assignments_normsig_both", cell_size = 3)

figS10b

#可视化TNBCtype4 Lehman signatures
#去掉immunomodulatory和mesenchymal_stem_like上皮细胞
lehman_avg_both_epithelial_new <- lehman_avg_both_epithelial[-which(rownames(lehman_avg_both_epithelial) %in% c("immunomodulatory", "mesenchymal_stem_like")),]
#给868个上皮细胞进行分类（basal_like_1,basal_like_2,mesenchymal,luminal_ar）
assignments_lehman_both_epithelial_new <- apply(lehman_avg_both_epithelial_new, 2, function(x){rownames(lehman_avg_both_epithelial_new)[which.max(x)]})
all.equal(colnames(HSMM_allepith_clustering), names(assignments_lehman_both_epithelial_new))
pData(HSMM_allepith_clustering)$assignments_lehman_both_new <- assignments_lehman_both_epithelial_new
#画figS10c
plot_cell_clusters(HSMM_allepith_clustering, 1, 2, color = "assignments_lehman_both_new", cell_size = 3)

figS10c

#可视化mammaprint signature (PS)
plot_cell_clusters(HSMM_allepith_clustering, 1, 2, color = "mammaprint_avg_exprs", cell_size = 3) +
  scale_color_continuous(low = "yellow", high = "blue")

figS10d

然后，通过研究在cluster2中与其他上皮细胞群的差异表达的基因中富集的通路变化丰，来确定cluster2亚群背后的功能机制。我们发现，最富集的通路与鞘糖脂的生物合成和溶酶体的周转有关，这两种途径影响了同样富集到的先天免疫系统的细胞因子途径。接着可视化每个样本的鞘糖脂的生物合成和先天免疫系统的细胞因子两条途径的热图。

糖鞘糖脂最近被认为是乳腺癌中许多促进肿瘤特性的介质，包括改变的生长因子信号、EMT和干样行为。该途径中的多个关键基因在cluster2亚群中高表达，包括糖脂转移蛋白基因GLTP和鞘脂生物合成关键亚单位基因SPTLC1。鞘磷脂也被认为是天然免疫系统和获得性免疫系统的重要调节剂，并与多种上皮组织炎症相关癌变有关。

#path1是糖鞘糖脂的关键基因
path1 <- c("ST3GAL4", "ST3GAL6", "ST8SIA1", "FUT1", "FUT2", "FUT3", "FUT4", "FUT6", "FUT9", "GLTP", "SPTLC1", "KDSR", "SPTLC2", "CERK", "ARSA")
idx_path1 <- match(path1, rownames(HSMM_allepith_clustering))
#path1是先天免疫系统的细胞因子途径的关键基因
path2 <- c("CCL20", "CCL22", "CCL4", "CCR6", "IL11", "IL12RB1", "IL6R", "CSF2RA", "BMP7", "GLMN", "GPI", "INHBA", "TNF", "TNFSF15", "GHR", "LEPR", "TLR1", "TLR2", "TLR5", "TLR7", "TLR10", "F11R")
idx_path2 <- match(path2, rownames(HSMM_allepith_clustering))

path3 <- c("ERBB2", "AKT1", "AKT3", "PIK3R3", "PIK3R4", "RPS6KB2", "TRIB3", "BTK", "GRB10", "GRB2", "ILK", "PAK1", "PRKCZ", "CSNK2A1",
           "IRS1", "IRAK1", "MYD88", "MAP2K1", "MAPK8", "MAPK1", "PTPN11", "EIF4E", "EIF4EBP1", "EIF4G1", "FKBP1A", "PDK1", "RHEB", "RPS6KB1")
idx_path3 <- match(path3, rownames(HSMM_allepith_clustering))

all_idx_paths <- c(idx_path1, idx_path2)
#命名
names_path <- c(rep("glycosphigolipid metabolism", length(idx_path1)), rep("innate immunity", length(idx_path2)))
#上色
anno_cols_path <- c("glycosphigolipid metabolism" = "#ff9baa", "innate immunity" = "#17806d")
ha_path_rows <- HeatmapAnnotation(df = data.frame(pathway = names_path),
                                  annotation_legend_param = list(pathway = list(ncol = 1, title = "pathway", title_position = "topcenter")),
                                  which = "row", col = list("pathway" = anno_cols_path), annotation_width = unit(3, "mm"))
#分离每个cluster的矩阵，并提取关键基因。
mat_epith_allpats <- exprs(HSMM_allepith_clustering)
mats_epith_patient <- list()
pds_epith_patient <- list()
mats_epith_patient_clusters <- list()
for (i in 1:length(patients_now)) {
  pds_epith_patient[[i]] <- pData(HSMM_allepith_clustering)[which(pData(HSMM_allepith_clustering)$patient == patients_now[i]), ]
  mats_epith_patient[[i]] <- mat_epith_allpats[all_idx_paths, which(pData(HSMM_allepith_clustering)$patient == patients_now[i])]
  mats_epith_patient_clusters[[i]] <- list()
  for (c in 1:length(unique(HSMM_allepith_clustering$Cluster))) {
    mats_epith_patient_clusters[[i]][[c]] <- mats_epith_patient[[i]][, which(pds_epith_patient[[i]]$Cluster == c)]
  }
  names(mats_epith_patient_clusters[[i]]) <- paste0("clust", c(1:5))
}
names(mats_epith_patient) <- patients_now
names(pds_epith_patient) <- patients_now

# 接下来就是画图了。
ht_paths <-
  ha_path_rows + 
  Heatmap(mats_epith_patient_clusters[[1]][[1]], 
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE, 
          show_column_dend = TRUE, show_column_names = FALSE,
          name = "cluster1", column_title = "cluster1",
          row_names_gp = gpar(fontsize = 9, col = anno_cols_path),
          split = names_path) +
  Heatmap(mats_epith_patient_clusters[[1]][[2]],
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE,
          name = "cluster2", column_title = "cluster2",
          show_row_names = FALSE,
          show_column_dend = TRUE, show_column_names = FALSE) +
  Heatmap(mats_epith_patient_clusters[[1]][[3]],
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE, 
          name = "cluster3", column_title = "cluster3",
          show_row_names = FALSE,
          show_column_dend = TRUE, show_column_names = FALSE) +
  Heatmap(mats_epith_patient_clusters[[1]][[4]],
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE, 
          name = "cluster4", column_title = "cluster4",
          show_row_names = FALSE,
          show_column_dend = TRUE, show_column_names = FALSE) +
  Heatmap(mats_epith_patient_clusters[[1]][[5]],
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE, 
          name = "cluster5", column_title = "cluster5",
          show_row_names = FALSE,
          show_column_dend = TRUE, show_column_names = FALSE) +
  Heatmap(mats_epith_patient_clusters[[2]][[1]], 
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE, 
          show_column_dend = TRUE, show_column_names = FALSE,
          name = "cluster1_2", column_title = "cluster1",
          row_names_gp = gpar(fontsize = 9, col = anno_cols_path),
          split = names_path) +
  Heatmap(mats_epith_patient_clusters[[2]][[2]],
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE,
          name = "cluster2_2", column_title = "cluster2",
          show_row_names = FALSE,
          show_column_dend = TRUE, show_column_names = FALSE) +
  Heatmap(mats_epith_patient_clusters[[2]][[3]],
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE, 
          name = "cluster3_2", column_title = "cluster3",
          show_row_names = FALSE,
          show_column_dend = TRUE, show_column_names = FALSE) +
  Heatmap(mats_epith_patient_clusters[[2]][[4]],
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE, 
          name = "cluster4_2", column_title = "cluster4",
          show_row_names = FALSE,
          show_column_dend = TRUE, show_column_names = FALSE) +
  Heatmap(mats_epith_patient_clusters[[2]][[5]],
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE, 
          name = "cluster5_2", column_title = "cluster5",
          show_row_names = FALSE,
          show_column_dend = TRUE, show_column_names = FALSE) + 
  Heatmap(mats_epith_patient_clusters[[3]][[1]],
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE,
          name = "cluster1_3", column_title = "cluster1",
          show_row_names = FALSE,
          show_column_dend = TRUE, show_column_names = FALSE) +
  Heatmap(mats_epith_patient_clusters[[3]][[2]],
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE,
          name = "cluster2_3", column_title = "cluster2",
          show_row_names = FALSE,
          show_column_dend = TRUE, show_column_names = FALSE) +
  Heatmap(mats_epith_patient_clusters[[3]][[3]],
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE, 
          name = "cluster3_3", column_title = "cluster3",
          show_row_names = FALSE,
          show_column_dend = TRUE, show_column_names = FALSE) +
  Heatmap(mats_epith_patient_clusters[[3]][[4]],
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE, 
          name = "cluster4_3", column_title = "cluster4",
          show_row_names = FALSE,
          show_column_dend = TRUE, show_column_names = FALSE) +
  Heatmap(mats_epith_patient_clusters[[3]][[5]],
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE, 
          name = "cluster5_3", column_title = "cluster5",
          show_row_names = FALSE,
          show_column_dend = TRUE, show_column_names = FALSE) +
  Heatmap(mats_epith_patient_clusters[[4]][[1]],
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE,
          name = "cluster1_4", column_title = "cluster1",
          show_row_names = FALSE,
          show_column_dend = TRUE, show_column_names = FALSE) +
  Heatmap(mats_epith_patient_clusters[[4]][[2]],
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE,
          name = "cluster2_4", column_title = "cluster2",
          show_row_names = FALSE,
          show_column_dend = TRUE, show_column_names = FALSE) +
  Heatmap(mats_epith_patient_clusters[[4]][[3]],
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE, 
          name = "cluster3_4", column_title = "cluster3",
          show_row_names = FALSE,
          show_column_dend = TRUE, show_column_names = FALSE) +
  Heatmap(mats_epith_patient_clusters[[4]][[4]],
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE, 
          name = "cluster4_4", column_title = "cluster4",
          show_row_names = FALSE,
          show_column_dend = TRUE, show_column_names = FALSE) +
  Heatmap(mats_epith_patient_clusters[[4]][[5]],
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE, 
          name = "cluster5_4", column_title = "cluster5",
          show_row_names = FALSE,
          show_column_dend = TRUE, show_column_names = FALSE) +
  Heatmap(mats_epith_patient_clusters[[5]][[1]],
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE,
          name = "cluster1_5", column_title = "cluster1",
          show_row_names = FALSE,
          show_column_dend = TRUE, show_column_names = FALSE) +
  Heatmap(mats_epith_patient_clusters[[5]][[2]],
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE,
          name = "cluster2_5", column_title = "cluster2",
          show_row_names = FALSE,
          show_column_dend = TRUE, show_column_names = FALSE) +
  Heatmap(mats_epith_patient_clusters[[5]][[3]],
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE, 
          name = "cluster3_5", column_title = "cluster3",
          show_row_names = FALSE,
          show_column_dend = TRUE, show_column_names = FALSE) +
  Heatmap(mats_epith_patient_clusters[[5]][[4]],
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE, 
          name = "cluster4_5", column_title = "cluster4",
          show_row_names = FALSE,
          show_column_dend = TRUE, show_column_names = FALSE) +
  Heatmap(mats_epith_patient_clusters[[5]][[5]],
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE, 
          name = "cluster5_5", column_title = "cluster5",
          show_row_names = FALSE,
          show_column_dend = TRUE, show_column_names = FALSE) +
  Heatmap(mats_epith_patient_clusters[[6]][[1]],
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE,
          name = "cluster1_6", column_title = "cluster1",
          show_row_names = FALSE,
          show_column_dend = TRUE, show_column_names = FALSE) +
  Heatmap(mats_epith_patient_clusters[[6]][[2]],
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE,
          name = "cluster2_6", column_title = "cluster2",
          show_row_names = FALSE,
          show_column_dend = TRUE, show_column_names = FALSE) +
  Heatmap(mats_epith_patient_clusters[[6]][[3]],
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE, 
          name = "cluster3_6", column_title = "cluster3",
          show_row_names = FALSE,
          show_column_dend = TRUE, show_column_names = FALSE) +
  Heatmap(mats_epith_patient_clusters[[6]][[4]],
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE, 
          name = "cluster4_6", column_title = "cluster4",
          show_row_names = FALSE,
          show_column_dend = TRUE, show_column_names = FALSE) +
  Heatmap(mats_epith_patient_clusters[[6]][[5]],
          col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
          cluster_rows = TRUE, show_row_dend = FALSE, 
          name = "cluster5_6", column_title = "cluster5",
          show_row_names = FALSE,
          show_column_dend = TRUE, show_column_names = FALSE)

fig4d

画得有点丑，画图参数得调整需要参考ComplexHeatmap包的说明书，继续美化一下。
到了这里，这篇文献的单细胞部分就学习完了，花了一个月的时间断断续续把这个系列共6篇笔记写完。作者把这1000多个细胞不断分群，不断探索，最后集中在了上皮细胞群，进一步细分，然后结合临床预后情况，继续探索。这是很标准的单细胞分析流程。当然，文章中也加了一些免疫组化的实验方法，加以验证。还有一部分外显子测序，这又是另外一个需要学习的领域了，在单细胞学习的路上，感恩遇到得到了“单细胞天地”公众号团队的答疑解惑。2021年，希望能学到更多的东西，写更多的笔记，相信一切的努力都不会白白付出。

最后编辑于：2021.02.09 11:49:33

人面猴
序言：七十年代末，一起剥皮案震惊了整个滨河市，随后出现的几起案子，更是在滨河造成了极大的恐慌，老刑警刘岩，带你破解...
沈念sama阅读 158,425评论 4赞 361
死咒
序言：滨河连续发生了三起死亡事件，死亡现场离奇诡异，居然都是意外死亡，警方通过查阅死者的电脑和手机，发现死者居然都...
沈念sama阅读 67,058评论 1赞 291
救了他两次的神仙让他今天三更去死
文/潘晓璐我一进店门，熙熙楼的掌柜王于贵愁眉苦脸地迎上来，“玉大人，你说我怎么就摊上这事。” “怎么了？”我有些...
开封第一讲书人阅读 108,186评论 0赞 243
道士缉凶录：失踪的卖姜人
文/不坏的土叔我叫张陵，是天一观的道长。经常有香客问我，道长，这世上最难降的妖魔是什么？我笑而不...
开封第一讲书人阅读 43,848评论 0赞 204
港岛之恋（遗憾婚礼）
正文为了忘掉前任，我火速办了婚礼，结果婚礼上，老公的妹妹穿的比我还像新娘。我一直安慰自己，他们只是感情好，可当我...
茶点故事阅读 52,249评论 3赞 286
恶毒庶女顶嫁案：这布局不是一般人想出来的
文/花漫我一把揭开白布。她就那样静静地躺着，像睡着了一般。火红的嫁衣衬着肌肤如雪。梳的纹丝不乱的头发上，一...
开封第一讲书人阅读 40,554评论 1赞 216
城市分裂传说
那天，我揣着相机与录音，去河边找鬼。笑死，一个胖子当着我的面吹牛，可吹牛的内容都是我干的。我是一名探鬼主播，决...
沈念sama阅读 31,830评论 2赞 312
双鸳鸯连环套：你想象不到人心有多黑
文/苍兰香墨我猛地睁开眼，长吁一口气：“原来是场噩梦啊……” “哼！你这毒妇竟也来了？” 一声冷哼从身侧响起，我...
开封第一讲书人阅读 30,536评论 0赞 197
万荣杀人案实录
序言：老挝万荣一对情侣失踪，失踪者是张志新（化名）和其女友刘颖，没想到半个月后，有当地人在树林里发现了一具尸体，经...
沈念sama阅读 34,239评论 1赞 241
护林员之死
正文独居荒郊野岭守林人离奇死亡，尸身上长有42处带血的脓包…… 初始之章·张勋以下内容为张勋视角年9月15日...
茶点故事阅读 30,505评论 2赞 244
白月光启示录
正文我和宋清朗相恋三年，在试婚纱的时候发现自己被绿了。大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
茶点故事阅读 32,004评论 1赞 258
活死人
序言：一个原本活蹦乱跳的男人离奇死亡，死状恐怖，灵堂内的尸体忽然破棺而出，到底是诈尸还是另有隐情，我是刑警宁泽，带...
沈念sama阅读 28,346评论 2赞 253
日本核电站爆炸内幕
正文年R本政府宣布，位于F岛的核电站，受9级特大地震影响，放射性物质发生泄漏。R本人自食恶果不足惜，却给世界环境...
茶点故事阅读 32,999评论 3赞 235
男人毒药：我在死后第九天来索命
文/蒙蒙一、第九天我趴在偏房一处隐蔽的房顶上张望。院中可真热闹，春花似锦、人声如沸。这庄子的主人今日做“春日...
开封第一讲书人阅读 26,060评论 0赞 8
一桩弑父案，背后竟有这般阴谋
文/苍兰香墨我抬头看了看天上的太阳。三九已至，却和暖如春，着一层夹袄步出监牢的瞬间，已是汗流浃背。一阵脚步声响...
开封第一讲书人阅读 26,821评论 0赞 194
情欲美人皮
我被黑心中介骗来泰国打工，没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留，地道东北人。一个月前我还...
沈念sama阅读 35,574评论 2赞 271
代替公主和亲
正文我出身青楼，却偏偏与公主长得像，于是被迫代替她去往敌国和亲。传闻我的和亲对象是个残疾皇子，可洞房花烛夜当晚...
茶点故事阅读 35,480评论 2赞 267

学习一篇NC的单细胞文章（六）：Gene expression signatures

推荐阅读更多精彩内容