# Aim: load data from GEO, use Harmony instead PC to do follow steps
setwd("/home/wangjl/data/project/outsource/shenwh/")
getwd()
outputRoot=""
library(Seurat)
library(ggplot2)
library(dplyr)
library(patchwork)
obj=read.table("GSE181919_UMI_counts.txt")
dim(obj) #20000 54239
obj[1:4, 1:4]
colnames(obj)=sub("\\.", "-", colnames(obj) )
obj[1:4, 1:4]
meta =read.table("GSE181919_Barcode_metadata.txt")
dim(meta) #54239 8
meta[1:4, ]
scObj <- CreateSeuratObject(counts = obj, project = "HNSCC",
min.cells = 3, min.features = 200,
meta.data = meta)
scObj
#rm(obj)
gc()
head( rownames( scObj@meta.data) )
head( colnames(scObj@assays$RNA@counts) )
head( scObj@meta.data)
dim(scObj@assays$RNA@data) #20000 54239
dim(scObj@assays$RNA@data)
scObj[["percent.mt"]] <- PercentageFeatureSet(scObj, pattern = "^MT-")
VlnPlot(scObj, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size=0)
plot1 <- FeatureScatter(scObj, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(scObj, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot1 + plot2
#DimPlot(scObj)
scObj <- NormalizeData(scObj, normalization.method = "LogNormalize", scale.factor = 10000)
scObj <- FindVariableFeatures(scObj, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(scObj)
scObj <- ScaleData(scObj, features = all.genes)
scObj=RunPCA(scObj)
# Harmony ----
# install.packages("harmony")
library( harmony )
# 代替PC,用于后续
scObj = RunHarmony(scObj, "sample.id", plot_convergence = TRUE)
scObj@reductions$harmony[1:10, 1:5]
#细胞的嵌入
harmony_embeddings <- Embeddings(scObj, 'harmony')
harmony_embeddings[1:5, 1:5]
# PCA(use harmony instead) ----
scObj <- RunPCA(scObj, features = VariableFeatures(object = scObj))
DimPlot(scObj, reduction = "pca")
ElbowPlot(scObj, ndims = 50)
# umap ----
scObj <- RunUMAP(scObj, dims = 1:50, reduction = "harmony")
DimPlot(scObj, reduction = "umap", label = T, group.by = "patient.id")
# Cell cluster ----
scObj <- FindNeighbors(scObj, dims = 1:50, reduction = "harmony")
scObj <- FindClusters(scObj, resolution = 0.2)
DimPlot(scObj, reduction = "umap", label = T)
DimPlot(scObj, reduction = "umap", label = T, group.by = "cell.type")
table(scObj$sample.id)
table(scObj$patient.id)
table(scObj$subsite)
table(scObj$seurat_clusters, scObj$cell.type)
head(colnames(scObj))
head(rownames(meta))
table( colnames(scObj) == rownames(meta) )
table(meta$cell.type)
# marker ----
## manual ----
markers=list(
malignant=c('KRT14', 'KRT17', 'KRT6A', 'KRT5', 'KRT19', 'KRT8', 'KRT16', 'KRT18', 'KRT6B', 'KRT15', 'KRT6C', 'KRTCAP3', 'EPCAM', 'SFN'),
fibroblasts=c('FAP', 'PDPN', 'COL1A2', 'DCN', 'COL3A1', 'COL6A1'),
myocytes=c('ACTA1', 'ACTN2', 'MYL2', 'MYH2'),
T =c('CD2', 'CD3D', 'CD3E', 'CD3G', 'CD4', 'CD8A', 'CD8B', 'FOXP3'),
NK=c('NKG7', 'GNLY', 'GZMA', 'GZMB'),
B_Plasma = c( 'CD79A', 'CD79B', 'CD19', 'MS4A1', 'SDC1',
'SLAMF7','BLNK', 'FCRL5'),
mono=c('CD14', "FCGR3A"),
macrophages=c('CD163', 'CD68', 'FCGR2A', 'CSF1R'),
dendritic=c('ITGAX','CD40', 'CD80', 'CD83', 'CCR7'),
mast = c('CMA1', 'MS4A2', 'TPSAB1', 'TPSB2'),
endothelial =c('PECAM1', 'VWF', 'ENG'),
cycle=c('MKI67', 'CCND1', 'CCND3', 'CCNE1', #'CCNE2',
'CCNB1', 'CCNB2'),
'APA' =c("SCCPDH", 'NUDT21', "CPSF6"),
stem=c('CD34','SOCS2', 'SOCS1'),
meta=c("GAPDH", 'ACTB')
)
DotPlot(scObj, features = markers, cluster.idents = T) + RotatedAxis()
DotPlot(scObj, features = markers, group.by = "cell.type") + RotatedAxis()
DotPlot(scObj, features = unique(c(
'NUDT21', 'SCCPDH', 'GRB10', "UBFD1", "IFT20", "NUTF2", "CD59", "SFT2D1",
"BLOC1S5", "MAP2", "CDKN2B", "KRAS",
'CD34', 'SOCS2',
c('CD2', 'CD3D', 'CD3E', 'CD3G', 'CD4', 'CD8A', 'CD8B'), #T
'KRT19', 'KRT8', 'KRT16', 'KRT18', #epithelial
grep("^CPSF", rownames(scObj), value=T),
grep("^CSTF", rownames(scObj), value=T),
grep("^GZM", rownames(scObj), value=T),
grep("^CCN", rownames(scObj), value=T),
#grep("^SOCS", rownames(scObj), value=T),
#grep("^IGH", rownames(scObj), value=T),
"GAPDH")), cluster.idents = T) + RotatedAxis()
FeaturePlot(scObj, features = c("NUDT21", 'CPSF6', 'SCCPDH', 'MKI67', 'CD34', 'SOCS2'),
cols = c('lightgrey', 'red'), pt.size = 0.1, ncol=3) & NoLegend()
table(scObj$cell.type)
tmp=FindMarkers(scObj, ident.1 = "Malignant.cells", ident.2 = "Epithelial.cells",
group.by = "cell.type")
DotPlot(scObj, features = unique(
tmp %>% filter(p_val_adj<0.05) %>% top_n(n = 20, wt = avg_log2FC) %>%
rownames()
), cluster.idents = T) + RotatedAxis()
VlnPlot(scObj, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),
ncol = 1, pt.size=0)
DotPlot(scObj, features = c("CD79A",
grep("^IG", rownames(scObj), value=T)))+RotatedAxis()
FeaturePlot(scObj, features = c("CD79A", "IGHG4", "IGLC7" ))+RotatedAxis()
DotPlot(scObj, features =c("CD34", 'NUDT21', 'SCCPDH', 'CCNB1'),
split.by = "tissue.type") + RotatedAxis()
scObj$tissue.type=factor(scObj$tissue.type,
levels = c("NL", "LP", 'CA', 'LN'))
DotPlot(scObj, features = unique(c(
"CD34", 'NUDT21', 'SCCPDH', 'CCNB1',
"MKI67", "CCNB2", "CCNE1",
unlist(markers)
)), cluster.idents = F,
split.by = "tissue.type", cols = c("red", 'orange', "green", 'blue')
)+RotatedAxis() + ggtitle("among 4 tissue types") + ggplot2::coord_flip()
ggsave("a1.scObj-by-tissue.sample.Seurat.dotplot.pdf", width=16, height=15)
## auto marker (30min) ----
### (2c) T cell markers ----
scObj.markers.T <- FindMarkers(scObj, ident.1 = "T.cells",group.by = "cell.type",
only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
write.table(scObj.markers.T, "a1.scObj-by-cluster.T-marker.txt")
DotPlot(scObj, features = unique(c(
c("PTPRC", "CD3D", "CD3E", "CD3G", "CD4", "CD8A", "CD8B", 'FOXP3', "CD274", "GAPDH"),
scObj.markers.T |> filter(p_val_adj<0.05) %>% top_n(n = 800, wt = avg_log2FC) |>
rownames() |> head(100)
)), cols = c("lightgrey", "red"), cluster.idents = T ) + RotatedAxis()+
ggtitle("T markers among all cells") + coord_flip()
ggsave("a1.scObj-by-cluster.T-marker.Seurat.dotplot.pdf", width=7.5, height=20)
DoHeatmap(scObj, features = scObj.markers.T |> filter(p_val_adj<0.05) |> rownames() )
# find markers for every cluster compared to all remaining cells, report only the positive ones
scObj.markers <- FindAllMarkers(scObj, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
scObj.markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_log2FC)
c("SCCPDH", "MSL1",
"CD34","KIT", "CD38", "SOCS2","GATA1",
"MKI67","CCNE1", "CCNE2",
"NUDT21", "CPSF6",
"GAPDH",
"PTPRC", "CD3D", "CD3E", "CD4", 'CD8A', 'CD8B',
'CD79A', 'CD79B', 'IGHG4',
'NKG7', 'GNLY',
"CD14", "FCGR3A",
'C1QB')
DotPlot(scObj, features = unique(c(
#sapply(c("Fcer1a", "Itgb7", "Fcer1g", "Cpa3","Mcpt8", "Csf1"),toupper),
#"EPCAM", 'KRT8', 'KRT18', 'KRT19', 'KRT5', 'KRT15',
#"CD3D", "CD3E", "CD4", "CD8A", "CD8B", "SELL",
#"NOS2","IRF5","PTGS2", "CD163", "VSIG4", "MS4A4A", "ARG2",
scObj.markers %>% group_by(cluster) %>%
filter(cluster %in% c(15) ) %>% #by cluster
top_n(n = 70, wt = avg_log2FC) %>% #arrange( desc(pct.1) ) %>%
pull("gene") %>% unique() %>% jsonlite::toJSON()
)), cluster.idents = T, cols=c('lightgrey', "red")) + RotatedAxis()+
ggtitle("Myocyte")
#ggsave("a1.scObj.Seurat.dotplot.pdf", width=30, height=5.5)
grep("^PABP", rownames(scObj), value=T)
grep("^PAPOL", rownames(scObj), value=T)
DotPlot(scObj, features = unique(c(
#"EPB41", "EPB41L2", "EPB41L3",
#"CD274", 'LAG3',
"SCCPDH",
"NUDT21", 'CPSF6','CSTF2', 'FIP1L1', 'PAPOLA',
'XBP1', 'GPX1', 'FLT3', 'PIM1', 'MYC', 'HOXA9', 'BMI1',
#"CD79A", "CD79B", "MS4A1", "CD19", "VPREB3",
#"NOS2","IRF5","PTGS2", "CD163", "VSIG4", "MS4A4A",
"FLT3", "CD34",'RUNX1', 'RUNX1T1'
#sapply(
# #c("Bcl2","Rgs1","Xcl1","Dusp2","Sh2d2a","S100a10","Cd2","Ugcg","Lck","Txk","Vps37b","Gimap4","Gimap5","Klrb1a","Klrb1c","Klre1","Klrd1","Klrk1","Klrc2","Klri2","Klra4","Klra8","Klra9","Klra7","Klra3","Ncr1","Nkg7","Ifngr1","Prf1","Ifng","Gzmb","Eomes","Ccl5","Ccl3","Ccl4","Cd7","Serpinb6b","Serpinb9","Ctla2a","Gzma","Id2","Il2rb","Lgals1","Sytl3","H2-K1","H2-Q7","Ctsw","Ahnak","AW112010","Ms4a4b"),
# c("Fn1","Dbi","Vim","Zeb2","Cst3","Mafb","Ctsz","Psma7","Anxa5","S100a4","Ctss","Klf4","Prdx1","Sdc3","Plac8","Rassf4","Clec4a1","Clec4a3","Apoe","Ctsc","Ifitm3","Smpdl3a","Psap","Lyz2","Lrp1","Lamp1","Hpgd","Lpl","Hmox1","Lgals3","Ctsb","Trf","Ccr2","Cd68","Ccl9","Gngt2","Grn","F13a1","Npc2","Lgmn","Pld4","Crip1","Selenop","Lgals1","Aif1","Csf1r","Ahnak","Ms4a4c","Ms4a6c","Mpeg1"),
# toupper)
#intersect(t1,t2),
#setdiff(t1, t2),
#setdiff(t2, t1)
)), cluster.idents = T, cols=c('lightgrey', "red")) + RotatedAxis()#+
ggtitle("mono")
FeaturePlot(scObj, features = c(
"NOS2","IRF5","PTGS2", "CD163", "VSIG4", "MS4A4A"
), ncol=3, cols=c('lightgrey', "red")) & NoLegend()
### (100a)apa factors ----
apa_factors=list(
CPSF=c("CPSF1","CPSF2","CPSF3", "CPSF3L", "CPSF4","CPSF4L","CPSF6","CPSF7"),
CSTF=c("CSTF1","CSTF2","CSTF2T","CSTF3"),
PABP=c("PABPC1","PABPC1L","PABPC3","PABPC4","PABPC4L","PABPC5","PABPN1"),
PAPOL=c("PAPOLG", "PAPOLA"),
other=c("SYMPK", "RBBP6", "WDR33", "FIP1L1", "NUDT21","CLP1", "PCF11", "SCCPDH") #无法归类
)
### gene mean exp of core factors in each cluster ----
apa_exp_data = AverageExpression(scObj, features = unlist(apa_factors), slot="data",
group.by = "seurat_clusters")
apa_exp_data=apa_exp_data$RNA
apa_exp_data[1:4,1:4]
# rm all 0 rows
apa_exp_data=apa_exp_data[rowSums(apa_exp_data)>0,]
dim(apa_exp_data)
library(pheatmap)
pic1 = pheatmap(log1p(apa_exp_data),
border_color = NA,
scale = "row",
clustering_method = 'ward.D2',
#filename = "a10_apa_factors_HNSCC.heatmap.pdf", width = 5, height = 4.8,
main="APA core factors in HNSCC")
#
## save Or load ----
# saveRDS(scObj, file = "data/scObj.Seurat.rds")
# scObj =readRDS(file = "data/scObj.Seurat.rds")
write.table(scObj.markers, "data/scObj.markers.txt")
# scObj.markers=read.table("data/scObj.markers.txt")
table(scObj$seurat_clusters)
scObj_down=subset(scObj, downsample=2000)
table(scObj_down$seurat_clusters)
DoHeatmap(scObj_down,
#features = unlist(apa_factors),
features = pic1$tree_row$labels[pic1$tree_row$order],
group.by = "seurat_clusters" )+
ggtitle("version:1146")
#
#1. Macrophage(cluster4): normal -> leukoplakia -> primary-> metastasis ----
## (1) get macrophage cell
sc_sub = subset(scObj, idents=4)
sc_sub = subset(sc_sub, UMAP_1<5 & UMAP_2<5 & UMAP_2>-10)
sc_sub
DimPlot(sc_sub, label = T)
table(sc_sub$tissue.type)
# CA LN LP NL
# 3361 659 514 694
sc_sub$tissue.type=factor(sc_sub$tissue.type, levels = c("NL", "LP", 'CA', 'LN'))
DimPlot(sc_sub, label = F, split.by = "tissue.type")
## (2) compare between group
Idents(sc_sub)="tissue.type"
markers.tissue = FindAllMarkers(sc_sub, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
top10 <- markers.tissue %>% group_by(cluster) %>% top_n(n = 30, wt = avg_log2FC)
#DoHeatmap(sc_sub, features = unique(top10$gene), label = FALSE) + #+ NoLegend()
# scale_fill_gradientn(colors = c("blue", "white", "red"))+ggtitle("Macrophage")
DotPlot(sc_sub, features = unique(top10$gene), cols = c('lightgrey', 'red')) +
RotatedAxis() + ggtitle("Macrophage cells")
write.table(markers.tissue, "a1.scObj-by-tissue.Marcrophage-DEG.txt")
#2. Epithelial(cluster3): normal -> leukoplakia -> primary-> metastasis ----
## (1) get macrophage cell
sc_sub3 = subset(scObj, idents=3)
sc_sub3 = subset(sc_sub3, UMAP_1<5 & UMAP_2< (-7) )
sc_sub3
DimPlot(sc_sub3, label = T)
#table(sc_sub$tissue.type)
# CA LN LP NL
# 3366 659 514 694
# 3361 659 514 694 after trim
sc_sub3$tissue.type=factor(sc_sub3$tissue.type, levels = c("NL", "LP", 'CA', 'LN'))
DimPlot(sc_sub3, label = F, split.by = "tissue.type")
## (2) compare between group
Idents(sc_sub3)="tissue.type"
markers.tissue3 = FindAllMarkers(sc_sub3, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
top10_3 <- markers.tissue3 %>% group_by(cluster) %>% top_n(n = 20, wt = avg_log2FC)
DoHeatmap(sc_sub3, features = unique(top10_3$gene), label = FALSE) + #+ NoLegend()
scale_fill_gradientn(colors = c("blue", "white", "red"))+ggtitle("Epithelial")
#3. fibro (cluster0) ----
## (1) get macrophage cell
sc_sub3 = subset(scObj, idents=3)
sc_sub3 = subset(sc_sub3, UMAP_1<5 & UMAP_2< (-7) )
sc_sub3
DimPlot(sc_sub3, label = T)
#table(sc_sub3$tissue.type)
# CA LN LP NL
# 100 162 5204 245
sc_sub3$tissue.type=factor(sc_sub3$tissue.type, levels = c("NL", "LP", 'CA', 'LN'))
DimPlot(sc_sub3, label = F, split.by = "tissue.type")
## (2) compare between group
Idents(sc_sub3)="tissue.type"
markers.tissue3 = FindAllMarkers(sc_sub3, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
top10_3 <- markers.tissue3 %>% group_by(cluster) %>% top_n(n = 20, wt = avg_log2FC)
DoHeatmap(sc_sub3, features = unique(top10_3$gene), label = FALSE) + #+ NoLegend()
scale_fill_gradientn(colors = c("blue", "white", "red"))+ggtitle("Epithelial")
DotPlot(sc_sub3, features = unique(top10_3$gene)) + RotatedAxis()+
ggtitle("Epithelial cells")
#4. T cells(1,2,8) ----
FeaturePlot(scObj, features = c('PTPRC',"CD3D", "CD3E", 'CD3G', 'CD4', 'CD8A', 'CD8B',
'GZMA', 'GZMB', 'GZMK', 'GZMH', "CD274"),
cols = c('lightgrey', 'red'))&NoLegend()
## (1) get the cell
sc_sub4 = subset(scObj, idents=c(1,2,8) )
sc_sub4 = subset(sc_sub4, UMAP_1< (-2) & UMAP_2< (6) & UMAP_2> (-8) )
sc_sub4
DimPlot(sc_sub4, label = T)
table(sc_sub4$tissue.type)
# CA LN LP NL
#4138 1398 7601 4661
sc_sub4$tissue.type=factor(sc_sub4$tissue.type, levels = c("NL", "LP", 'CA', 'LN'))
DimPlot(sc_sub4, label = F, split.by = "tissue.type")
## (2) compare between group
Idents(sc_sub4)="tissue.type"
markers.tissue4 = FindAllMarkers(sc_sub4, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
top10_4 <- markers.tissue4 %>% group_by(cluster) %>% top_n(n = 20, wt = avg_log2FC)
DoHeatmap(sc_sub4, features = unique(top10_4$gene), label = FALSE) + #+ NoLegend()
scale_fill_gradientn(colors = c("blue", "white", "red"))+ggtitle("T cells")
ggsave( paste0(outputRoot, "04_heatmap.tissule.T.pdf"), width=4, height=9.5, useDingbats = F)
DotPlot(sc_sub4, features = unique(top10_4$gene)) + RotatedAxis()+
ggtitle("T cells")
# barplot
tbl_dat=table(sc_sub4$tissue.type, sc_sub4$seurat_clusters);tbl_dat
tbl_dat=tbl_dat[, colSums(tbl_dat)>0]
colors = c("blue", "orange", "red", "pink")
barplot(tbl_dat, col =colors , main="Cell number in each cluster")
legend("topright", legend = rownames(tbl_dat), fill =colors, border = NA,bty="n" )
#
barplot(apply(tbl_dat, 2, function(x){x/sum(x)}), col =colors , main="Cell number in each cluster")
#legend("topright", legend = rownames(tbl_dat), fill =colors, border = NA,bty="n" )
## (1c). DEG along cell.type ----
markers.tissue4 %>% filter(p_val_adj<0.05) %>% dim() #591 7
tmp1=markers.tissue4 %>% filter(p_val_adj<0.05) %>%
group_by(cluster) %>% top_n(n = 200, wt = avg_log2FC)
tmp1 |> dim()
DotPlot(sc_sub4, features = unique(tmp1$gene)[1:30] ) + RotatedAxis()+
ggtitle("T cells")
# mean expression level
scObj_T.marksers = as.data.frame(AverageExpression(sc_sub4, group.by = "tissue.type" )$RNA)
scObj_T.marksers=scObj_T.marksers[order(-scObj_T.marksers$LN),]
head(scObj_T.marksers)
scObj_T.increase = (function(){
result=c()
len = nrow(scObj_T.marksers)
for(i in 1:len){
line = scObj_T.marksers[i, , drop=F]
if(line[1,1]<=line[1,2] & line[1,2]<=line[1,3] & line[1,3]<=line[1,4]){
result =c(result, rownames(line))
}
}
print(length(result))
result
})()
#
#scObj_T.increase |> jsonlite::toJSON() #2734
# ["HES4","ISG15","FAM132A","UBE2J2","PUSL1","CPTP","AURKAIP1","MRPL20","SSU72"]
# overlap
scObj.markers_ol = intersect(scObj_T.increase, tmp1$gene)
length(scObj.markers_ol) #118
length(unique(scObj.markers_ol)) #118
scObj.markers_ol |> jsonlite::toJSON()
#
DotPlot(sc_sub4, features = scObj.markers_ol[1:60], cols = c("lightgrey", "red") ) + RotatedAxis()+
ggtitle("T cells: 1-60 increase genes")
DotPlot(sc_sub4, features = scObj.markers_ol[61:118], cols = c("lightgrey", "red") ) + RotatedAxis()+
ggtitle("T cells: 61-118 increase genes")
#5. CD8+ T cells(2,8) ----
FeaturePlot(scObj, features = c('PTPRC',"CD3D", "CD3E", 'CD3G', 'CD4', 'CD8A', 'CD8B',
'GZMA', 'GZMB', 'GZMK', 'GZMH', "CD274"),
cols = c('lightgrey', 'red'))&NoLegend()
## (1) get the cell
sc_sub5 = subset(scObj, idents=c(2,8) )
sc_sub5 = subset(sc_sub5, UMAP_1< (-2) & UMAP_2< (6) & UMAP_2> (-7) )
sc_sub5
DimPlot(sc_sub5, label = T)
#table(sc_sub5$tissue.type)
# CA LN LP NL
#2153 616 4201 2707
sc_sub5$tissue.type=factor(sc_sub5$tissue.type, levels = c("NL", "LP", 'CA', 'LN'))
DimPlot(sc_sub5, label = F, split.by = "tissue.type")
## (2) compare between group
Idents(sc_sub5)="tissue.type"
markers.tissue5 = FindAllMarkers(sc_sub5, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
top10_5 <- markers.tissue5 %>% group_by(cluster) %>% top_n(n = 20, wt = avg_log2FC)
DoHeatmap(sc_sub5, features = unique(top10_4$gene), label = FALSE) + #+ NoLegend()
scale_fill_gradientn(colors = c("blue", "white", "red"))+ggtitle("CD8+ T cells")
#ggsave( paste0(outputRoot, "04_heatmap.tissule.T.pdf"), width=4, height=9.5, useDingbats = F)
DotPlot(sc_sub5, features = unique(top10_5$gene)) + RotatedAxis()+
ggtitle("CD8+T cells")
# barplot
tbl_dat=table(sc_sub5$tissue.type, sc_sub5$seurat_clusters);tbl_dat
tbl_dat=tbl_dat[, colSums(tbl_dat)>0]
colors = c("blue", "orange", "red", "pink")
barplot(tbl_dat, col =colors , main="Cell number in each cluster")
legend("topright", legend = rownames(tbl_dat), fill =colors, border = NA,bty="n" )
#
barplot(apply(tbl_dat, 2, function(x){x/sum(x)}), col =colors , main="Cell number in each cluster")
#legend("topright", legend = rownames(tbl_dat), fill =colors, border = NA,bty="n" )
# 6. markers between T cells ----
## (1) get the cell
sc_sub6 = subset(scObj, idents=c(1, 2, 8) )
sc_sub6 = subset(sc_sub6, UMAP_1< (-2) & UMAP_2< (6) & UMAP_2> (-7) )
sc_sub6
DimPlot(sc_sub6, label = T)
#table(sc_sub6$tissue.type)
# CA LN LP NL
#4138 1398 7601 4661
sc_sub6$tissue.type=factor(sc_sub6$tissue.type, levels = c("NL", "LP", 'CA', 'LN'))
DimPlot(sc_sub6, label = F, split.by = "tissue.type")
## (2) compare between group
#Idents(sc_sub6)="tissue.type"
markers.tissue6 = FindAllMarkers(sc_sub6, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
top10_6 <- markers.tissue6 %>% group_by(cluster) %>% top_n(n = 20, wt = avg_log2FC)
# all T cells
DotPlot(sc_sub6, features = unique(c(
"CD3D", "CD3E", "CD3G", "CD4", "CD8A", "CD8B", "FOXP3",
top10_6$gene)
)) + RotatedAxis()+ ggtitle("T cells")
DotPlot(sc_sub6, features = unique(c(
"CD3D", "CD3E", "CD3G", "CD4", "CD8A", "CD8B", "FOXP3",
top10_6$gene)
), split.by = "tissue.type",
cols = c("red", "pink", "blue", "orange" )) +
RotatedAxis()+ ggtitle("T cells, split.by=tissue.type")
# all cells
DotPlot(scObj, features = unique(c(
"CD3D", "CD3E", "CD3G", "CD4", "CD8A", "CD8B", "FOXP3",
top10_6$gene)
), cluster.idents = T) + RotatedAxis()+ ggtitle("All cells")
## all cell barplot ----
tbl_dat=table(scObj$cell.type, scObj$seurat_clusters);tbl_dat
tbl_dat=tbl_dat[, colSums(tbl_dat)>0]
colors =1:10 #c("blue", "orange", "red", "pink")
barplot(tbl_dat, col =colors , main="Cell number in each cluster")
legend("topright", legend = rownames(tbl_dat), fill =colors, border = NA,bty="n" )
#
barplot(apply(tbl_dat, 2, function(x){x/sum(x)}), col =colors , main="Cell number in each cluster")
#legend("topright", legend = rownames(tbl_dat), fill =colors, border = NA,bty="n" )
# 7. correlation & APA ----
dat = FetchData(scObj, vars = c(
#"FOXP3", 'CD274'
"SCCPDH", "NUDT21"
) )
dat=dat[rowSums(dat)>0,]
head(dat)
sum(dat)
dat[1:10, ]
cor.test(dat[,1], dat[,2])
grep("CD300", rownames(scObj), value=T)
DotPlot(scObj, features = c("SCCPDH",
"CD3D", "CD3E", "CD4","CD8A","CD8B", "NKG7", "GZMA", "GZMB", "CD79A",
"CD34", "SOCS2",
"CD274", "LAG3", 'ICOS', "CTLA4", "TIGIT",
#"FIP1L1", "CSTF2", "CPSF3", 'CPSF2', 'CSTF3',
#'NUDT21', 'CPSF6',
"CSF1R","CD68", grep("CD300", rownames(scObj), value=T),
#"MRPL45",
"CCNE1",'CCNE2', 'CCNB1', "MKI67",
"PCF11"
#'GAPDH'
),
#split.by = "tissue.type", cols = c("red", 'orange', "green", 'blue'),
#group.by = "cell.type",
#group.by = "tissue.type",
group.by = "seurat_clusters",
cluster.idents = F)+RotatedAxis() + coord_flip()
### (2)Re plot DotPlot ----
# named color
color.head=c('#617381', "#76AB9B", '#CE5A5B', "#EA7C5B");
names(color.head) =c("NL", "LP", "CA", "LN")
color.head
# draw with Seurat
g1=DotPlot(scObj, features = c("CD300LF"), group.by = "tissue.type",
cluster.idents = F)+labs(x="", y="") + NoLegend(); g1 #+ #+RotatedAxis() #+ coord_flip()
# >> get data from ggplot obj
head(g1$data)
# avg.exp pct.exp features.plot id avg.exp.scaled
#CD300LF 0.04571820 4.370236 CD300LF CA 1.0291080
#CD300LF1 0.03949094 4.144320 CD300LF LN 0.6632240
# re-plot with ggplot2
ggplot(g1$data, aes(x=features.plot, y=id, size=pct.exp, color=id))+
geom_point(show.legend = F)+
scale_color_manual(values=color.head)+
theme_classic(base_size = 14)+
labs(x="", y="")
#
VlnPlot(scObj, features = c("CD34", "SCCPDH", "LAG3",
'NUDT21', 'CPSF6',"MKI67",
#"CCNE1",'CCNE2',
'CCNB1', 'SOCS2'),
ncol = 2, pt.size = 0)
FeaturePlot(scObj, features = c("CD34", "SCCPDH", "NUDT21", "CPSF6", "MKI67", "CCNB1"),
cols = c("lightgrey", 'red'), pt.size = 0.1) & NoLegend()
sc_cluster8=subset(scObj, idents=8)
sc_cluster8=subset(sc_cluster8, UMAP_1<(-2) & UMAP_2 <5 & UMAP_2> (-7))
DimPlot(sc_cluster8)
VlnPlot(sc_cluster8, features = c("SCCPDH", 'LAG3', 'NUDT21', 'MKI67'),
group.by = "tissue.type", pt.size = 0, ncol = 1)
### (3)Re plot half VlnPlot ----
gene.symbol="PCF11"
g2=VlnPlot(scObj,
features = c(gene.symbol),
#features = c("CD68"),
#group.by="tissue.type",
group.by="cell.type",
#cols = color.head,
pt.size=0)+RotatedAxis(); g2 #fig1
# scale_color_manual(color.head)
head(g2$data)
# PCF11 ident
#AAACGGGCATGACGGA-1 1.370958e-05 T.cells
#AAAGATGAGCAGACTG-1 -5.646982e-06 T.cells
# BiocManager::install("gghalves")
# options(BioC_mirror="https://mirrors.tuna.tsinghua.edu.cn/bioconductor")
# options("repos" = c(CRAN="https://mirrors.tuna.tsinghua.edu.cn/CRAN/"))
library(gghalves)
dat=data.frame(
ident=g2$data$ident,
value=g2$data[,1]
)
# rm 0 values?
dim(dat)
dat=dat[which(dat$value>1e-4),]
dim(dat)
ggplot(dat, aes(x = ident, y = value, fill = ident) )+
geom_jitter(size=0.1, color="grey80") +
geom_violin(trim=F, #末尾是否截断
aes(fill=ident), #填充颜色
scale="width", #默认是按面积 area,推荐改为width
bw=0.1)+ #取样宽度,越小越精细模拟原始值
geom_boxplot(width=0.1, fill="white", show.legend = F,
outlier.colour = "#00000000") + #离群值 透明
theme_classic(base_size = 14)+
RotatedAxis()+
ggtitle(gene.symbol) #fig2
#
ggplot(dat, aes(x = ident, y = value, fill = ident) ) +
geom_half_point(size=0.1, side="l", aes(color=ident) )+
geom_half_violin(
aes(fill = ident),
alpha=0.7,
#position = position_nudge(x = .15, y = 0),
#adjust=0.5,
trim=F,
colour=NA, #描边颜色
bw=0.1,
side = 'r',
show.legend = F
)+
geom_boxplot(width=0.1, fill="white", show.legend = F,
outlier.colour = "#00000000") + #离群值 透明
#scale_fill_manual(values = color.head) +
#scale_color_manual(values = color.head) +
theme_classic(base_size = 14)+
RotatedAxis()+
labs(x="", y="log(Normalized counts +1)", title = bquote( italic(.(gene.symbol)) ) )+
guides(colour = guide_legend(override.aes = list(alpha = 1, size=3))) #fig3
#
#
FeaturePlot(scObj, features = c("PCF11", "CD300LF", "CD274", "NUDT21", "CPSF6", "CPSF3"),
ncol = 2,
cols =c("lightgrey", "red"), order=T)