liangyuli12138 · liangyuli12138 · May 19, 2023
diff --git a/GSE131907_Treg_test.r b/GSE131907_Treg_test.r
@@ -0,0 +1,93 @@
+
+#======================================================================================================
+# 2020-11-30
+# use samgsung data to run monocle 
+#======================================================================================================
+library(Biobase)
+library(knitr)
+library(reshape2)
+library(ggplot2)
+library(Seurat)
+library(monocle)
+
+dat <- readRDS("/public/workspace/lily/Lung2Brain/GSE131907/all_cell.RDS")
+
+#=======================================================================================================
+tmp.dat <- subset(dat,cells=which(dat$Cell_subtype%in%c("Naive CD4+ T","Treg","CD4+ Th")))
+tmp.dat.f <- subset(tmp.dat,cells=which(tmp.dat$type%in%c("Brian_metastasis","tumor_Lung_early")))
+
+
+tmp_dat <- FindVariableFeatures(object = tmp.dat.f,nfeatures= 3000)
+# scaling
+all.genes <- rownames(x = tmp_dat)
+tmp_dat <- ScaleData(object = tmp_dat, features = all.genes)
+
+
+#======================================================================================================
+data <- as(as.matrix(tmp_dat@assays$RNA@data), 'sparseMatrix')
+pd <- new('AnnotatedDataFrame', data = [email protected])
+fData <- data.frame(gene_short_name = row.names(data), row.names = row.names(data))
+fd <- new('AnnotatedDataFrame', data = fData)
+# make a object
+cds <- newCellDataSet(data,
+                         phenoData = pd,
+                         featureData = fd,
+                         lowerDetectionLimit = 0.5,
+                         expressionFamily = negbinomial.size())
+
+cds <- estimateSizeFactors(cds)
+cds <- estimateDispersions(cds)
+cds <- detectGenes(cds, min_expr = 0.1)
+
+ordering_genes <- VariableFeatures(tmp_dat)
+cds <- setOrderingFilter(cds, ordering_genes)
+cds <- reduceDimension(cds,method = 'DDRTree')
+cds <- orderCells(cds)
+
+
+plot_cell_trajectory(cds,color_by="Cell_subtype")+scale_colour_manual(values=c("#bff199","#bff199","#70b29c"))+facet_wrap(~type, nrow = 3)
+
+plot_cell_trajectory(cds,color_by="Cell_subtype")+scale_colour_manual(values=c("#bff199","#bff199","#70b29c"))+facet_wrap(~State, nrow = 3)
+
+
+treg <- pData(cds)[which(cds$State%in%c(4,5)&cds$Cell_subtype=="Treg"),c("Cell_subtype","State")]
+
+# treg.dat <- subset(tmp_dat,cells=which(colnames(tmp_dat)%in%rownames(treg)&tmp_dat$type%in%c("Brian_metastasis","tumor_Lung_early")))
+treg.dat <- subset(tmp_dat,cells=which(colnames(tmp_dat)%in%rownames(treg)&tmp_dat$type%in%c("Brian_metastasis")))
+treg.dat <- subset(tmp_dat,cells=which(colnames(tmp_dat)%in%rownames(treg)&tmp_dat$type%in%c("tumor_Lung_early")))
+treg.dat$State <- treg$State[which(rownames(treg)%in%colnames(treg.dat))]
+
+
+[email protected] <- as.factor(treg.dat$State)
+DefaultAssay(treg.dat) <- "RNA"
+geneset_tmp <- FindMarkers(treg.dat,ident.1=4,ident.2=5,assay="RNA",logfc.threshold = 0.1)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/Multiple_LB.R b/Multiple_LB.R
@@ -0,0 +1,237 @@
+
+
+# 2021-4-6
+# run multiple Lung cancer sample in Lung2Brain 
+# run in 202.195.187.3
+# D0927 and E0927
+# 季桂枝 省人医
+#====================================================================================================
+library(Seurat)
+# D0927
+tmp <- Read10X(data.dir = "/public/workspace/lily/Mutiple_LB/D0927/D0927/outs/filtered_feature_bc_matrix/")
+tmp.dat <- CreateSeuratObject(counts = tmp,  project = "D0927",min.cells = 3, min.features = 200)
+
+prepare <- function(tmp_dat){
+    tmp_dat[["percent.mt"]] <- PercentageFeatureSet(object = tmp_dat, pattern = "^MT-")
+    tmp_dat = subset(x=tmp_dat,subset=nFeature_RNA > 200 & nFeature_RNA < 7500 & percent.mt < 20)
+# seurat object
+	tmp_dat = NormalizeData(object = tmp_dat)
+	tmp_dat <- FindVariableFeatures(object = tmp_dat)
+	# scaling
+	all.genes <- rownames(x = tmp_dat)
+	tmp_dat <- ScaleData(object = tmp_dat, features = all.genes)
+	# PCA
+	tmp_dat <- RunPCA(object = tmp_dat, features = VariableFeatures(object = tmp_dat))
+	# clustering
+	tmp_dat <- FindNeighbors(object = tmp_dat,dims=1:10)
+	# select proper resolution
+	tmp_dat <- FindClusters(object = tmp_dat,resolution=0.8)
+	# T-SNE
+	tmp_dat <- RunTSNE(object = tmp_dat,dims=1:10,check_duplicates = FALSE)
+	tmp_dat <- RunUMAP(tmp_dat,dims=1:10)
+	return(tmp_dat)
+}
+tmp.dat <- prepare(tmp.dat)
+dat <- tmp.dat
+# use genemarker 
+    pdf(paste0("/public/workspace/lily/Mutiple_LB/D0927_violin.pdf"),width=12,height=9)
+    DefaultAssay(dat) <- "RNA"
+    VlnPlot(dat,features=c("CD3D","CD3E","CD2","PTPRC"),assay="RNA",group.by="seurat_clusters",pt.size=0) # T cells
+    VlnPlot(dat,features=c("CD19","MS4A1","CD79A","PTPRC"),assay="RNA",group.by="seurat_clusters",pt.size=0) # B cells
+    VlnPlot(dat,features=c("CD68","FCGR3A","LYZ","PTPRC"),assay="RNA",group.by="seurat_clusters",pt.size=0) # Myeloid 
+    VlnPlot(dat,features=c("NCAM1","NKG7","CD3D","KLRD1"),assay="RNA",group.by="seurat_clusters",pt.size=0) # NK cells
+    VlnPlot(dat,features=c("COL1A1","COL1A2","THY1","PTPRC"),assay="RNA",group.by="seurat_clusters",pt.size=0) # Fibroblast
+    VlnPlot(dat,features=c("PTPRC","CLDN5","FLT1","RAMP2"),assay="RNA",group.by="seurat_clusters",pt.size=0) #Endothelials
+    VlnPlot(dat,features=c("EPCAM","KRT19","CDH1","EGFR"),assay="RNA",group.by="seurat_clusters",pt.size=0) # Epithelials
+    VlnPlot(dat,features=c("TPSAB1","TPSB2","MS4A2","KIT"),assay="RNA",group.by="seurat_clusters",pt.size=0) # Mast cells
+    VlnPlot(dat,features=c("CLEC10A","CD1C", "CLEC4C", "IL3RA"),assay="RNA",group.by="seurat_clusters",pt.size=0) # pDC 
+    dev.off()
+
+    pdf(paste0("/public/workspace/lily/Mutiple_LB/D0927_feature.pdf"),width=10,height=10)
+    DimPlot(dat,label=T)
+    FeaturePlot(dat,features=c("CD3D","CD3E","CD2","PTPRC"),label=T) # T cells
+    FeaturePlot(dat,features=c("CD19","MS4A1","CD79A","PTPRC"),label=T) # B cells
+    FeaturePlot(dat,features=c("CD68","FCGR3A","LYZ","PTPRC"),label=T) # Myeloid 
+    FeaturePlot(dat,features=c("NCAM1","NKG7","CD3D","KLRD1"),label=T) # NK cells
+    FeaturePlot(dat,features=c("COL1A1","COL1A2","THY1","PTPRC"),label=T) # Fibroblast
+    FeaturePlot(dat,features=c("PTPRC","CLDN5","FLT1","RAMP2"),label=T) #Endothelials
+    FeaturePlot(dat,features=c("EPCAM","KRT19","CDH1","EGFR"),label=T) # Epithelials
+    FeaturePlot(dat,features=c("TPSAB1","TPSB2","MS4A2","KIT"),label=T) # Mast cells
+    FeaturePlot(dat,features=c("CLEC10A","CD1C","CLEC4C", "IL3RA"),label=T) # pDC 
+    dev.off()
+
+dat$infercnv.type <- "tmp"
+dat$infercnv.type[which(dat$seurat_clusters%in%c(2,15))] <- "Tcell"
+dat$infercnv.type[which(dat$seurat_clusters%in%c(0,1,4,8,11))] <- "Tumor"
+saveRDS(dat,file="/public/workspace/lily/Mutiple_LB/D0927.RDS")
+
+
+###############################################################################################################################
+# run infercnv 
+library(infercnv)
+library(Seurat)
+D0927 <- readRDS("/public/workspace/lily/Mutiple_LB/D0927.RDS")
+sub.dat <- subset(D0927,cells=which(D0927$infercnv.type%in%c("Tcell","Tumor")))
+
+
+setwd('/public/workspace/lily/Mutiple_LB/infercnv/')
+dir.create('./D0927')
+write.table(sub.dat$infercnv.type,file="D0927_cell_info.txt",sep="\t",col.names=F,quote=F)
+count<-as.matrix(sub.dat@assays$RNA@counts)
+write.table(count,file="D0927_count_exp.txt",sep="\t",quote=F)
+
+infercnv_obj = CreateInfercnvObject(raw_counts_matrix="/public/workspace/lily/Mutiple_LB/infercnv/D0927_count_exp.txt",
+         annotations_file="/public/workspace/lily/Mutiple_LB/infercnv/D0927_cell_info.txt",
+         delim="\t",
+         gene_order_file="/public/workspace/lily/REF/INDEX-hg19/anno/gencode_hg19_pos.txt",
+         ref_group_names="Tcell")
+
+
+setwd('/public/workspace/lily/Mutiple_LB/infercnv/D0927')
+infercnv_obj = infercnv::run(infercnv_obj,
+                             cutoff=0.1, # cutoff=1 works well for Smart-seq2, and cutoff=0.1 works well for 10x Genomics
+                             out_dir="./", 
+                             cluster_by_groups=T, 
+                             plot_steps=F,
+                             no_prelim_plot = TRUE,
+                             num_threads=10, #big
+                             no_plot=F ,
+                             output_format = "pdf" # maybe can more quick 
+                             # used for final scaling to fit range (0,2) centered at 1.
+                             )
+
+
+
+
+
+
+#===========================================================================================================================
+# E0927
+tmp <- Read10X(data.dir = "/public/workspace/lily/Mutiple_LB/E0927/E0927/outs/filtered_feature_bc_matrix/")
+tmp.dat <- CreateSeuratObject(counts = tmp,  project = "E0927",min.cells = 3, min.features = 200)
+
+prepare <- function(tmp_dat){
+    tmp_dat[["percent.mt"]] <- PercentageFeatureSet(object = tmp_dat, pattern = "^MT-")
+    tmp_dat = subset(x=tmp_dat,subset=nFeature_RNA > 200 & nFeature_RNA < 7500 & percent.mt < 20)
+# seurat object
+	tmp_dat = NormalizeData(object = tmp_dat)
+	tmp_dat <- FindVariableFeatures(object = tmp_dat)
+	# scaling
+	all.genes <- rownames(x = tmp_dat)
+	tmp_dat <- ScaleData(object = tmp_dat, features = all.genes)
+	# PCA
+	tmp_dat <- RunPCA(object = tmp_dat, features = VariableFeatures(object = tmp_dat))
+	# clustering
+	tmp_dat <- FindNeighbors(object = tmp_dat,dims=1:10)
+	# select proper resolution
+	tmp_dat <- FindClusters(object = tmp_dat,resolution=0.8)
+	# T-SNE
+	tmp_dat <- RunTSNE(object = tmp_dat,dims=1:10,check_duplicates = FALSE)
+	tmp_dat <- RunUMAP(tmp_dat,dims=1:10)
+	return(tmp_dat)
+}
+tmp.dat <- prepare(tmp.dat)
+dat <- tmp.dat
+# use genemarker 
+    pdf(paste0("/public/workspace/lily/Mutiple_LB/E0927_violin.pdf"),width=12,height=9)
+    DefaultAssay(dat) <- "RNA"
+    VlnPlot(dat,features=c("CD3D","CD3E","CD2","PTPRC"),assay="RNA",group.by="seurat_clusters",pt.size=0) # T cells
+    VlnPlot(dat,features=c("CD19","MS4A1","CD79A","PTPRC"),assay="RNA",group.by="seurat_clusters",pt.size=0) # B cells
+    VlnPlot(dat,features=c("CD68","FCGR3A","LYZ","PTPRC"),assay="RNA",group.by="seurat_clusters",pt.size=0) # Myeloid 
+    VlnPlot(dat,features=c("NCAM1","NKG7","CD3D","KLRD1"),assay="RNA",group.by="seurat_clusters",pt.size=0) # NK cells
+    VlnPlot(dat,features=c("COL1A1","COL1A2","THY1","PTPRC"),assay="RNA",group.by="seurat_clusters",pt.size=0) # Fibroblast
+    VlnPlot(dat,features=c("PTPRC","CLDN5","FLT1","RAMP2"),assay="RNA",group.by="seurat_clusters",pt.size=0) #Endothelials
+    VlnPlot(dat,features=c("EPCAM","KRT19","CDH1","EGFR"),assay="RNA",group.by="seurat_clusters",pt.size=0) # Epithelials
+    VlnPlot(dat,features=c("TPSAB1","TPSB2","MS4A2","KIT"),assay="RNA",group.by="seurat_clusters",pt.size=0) # Mast cells
+    VlnPlot(dat,features=c("CLEC10A","CD1C", "CLEC4C", "IL3RA"),assay="RNA",group.by="seurat_clusters",pt.size=0) # pDC 
+    dev.off()
+
+    pdf(paste0("/public/workspace/lily/Mutiple_LB/E0927_feature.pdf"),width=10,height=10)
+    DimPlot(dat,label=T)
+    FeaturePlot(dat,features=c("CD3D","CD3E","CD2","PTPRC"),label=T) # T cells
+    FeaturePlot(dat,features=c("CD19","MS4A1","CD79A","PTPRC"),label=T) # B cells
+    FeaturePlot(dat,features=c("CD68","FCGR3A","LYZ","PTPRC"),label=T) # Myeloid 
+    FeaturePlot(dat,features=c("NCAM1","NKG7","CD3D","KLRD1"),label=T) # NK cells
+    FeaturePlot(dat,features=c("COL1A1","COL1A2","THY1","PTPRC"),label=T) # Fibroblast
+    FeaturePlot(dat,features=c("PTPRC","CLDN5","FLT1","RAMP2"),label=T) #Endothelials
+    FeaturePlot(dat,features=c("EPCAM","KRT19","CDH1","EGFR"),label=T) # Epithelials
+    FeaturePlot(dat,features=c("TPSAB1","TPSB2","MS4A2","KIT"),label=T) # Mast cells
+    FeaturePlot(dat,features=c("CLEC10A","CD1C","CLEC4C", "IL3RA"),label=T) # pDC 
+    dev.off()
+
+dat$infercnv.type <- "tmp"
+dat$infercnv.type[which(dat$seurat_clusters%in%c(0,13,14))] <- "Tcell"
+dat$infercnv.type[which(dat$seurat_clusters%in%c(7,9,11,17))] <- "Tumor"
+saveRDS(dat,file="/public/workspace/lily/Mutiple_LB/E0927.RDS")
+
+
+
+
+############################################################################################################################
+# run infercnv 
+library(infercnv)
+library(Seurat)
+E0927 <- readRDS("/public/workspace/lily/Mutiple_LB/E0927.RDS")
+sub.dat <- subset(E0927,cells=which(E0927$infercnv.type%in%c("Tcell","Tumor")))
+
+
+setwd('/public/workspace/lily/Mutiple_LB/infercnv/')
+dir.create('./E0927')
+write.table(sub.dat$infercnv.type,file="E0927_cell_info.txt",sep="\t",col.names=F,quote=F)
+count<-as.matrix(sub.dat@assays$RNA@counts)
+write.table(count,file="E0927_count_exp.txt",sep="\t",quote=F)
+
+infercnv_obj = CreateInfercnvObject(raw_counts_matrix="/public/workspace/lily/Mutiple_LB/infercnv/E0927_count_exp.txt",
+         annotations_file="/public/workspace/lily/Mutiple_LB/infercnv/E0927_cell_info.txt",
+         delim="\t",
+         gene_order_file="/public/workspace/lily/REF/INDEX-hg19/anno/gencode_hg19_pos.txt",
+         ref_group_names="Tcell")
+
+
+setwd('/public/workspace/lily/Mutiple_LB/infercnv/E0927')
+infercnv_obj = infercnv::run(infercnv_obj,
+                             cutoff=0.1, # cutoff=1 works well for Smart-seq2, and cutoff=0.1 works well for 10x Genomics
+                             out_dir="./", 
+                             cluster_by_groups=T, 
+                             plot_steps=F,
+                             no_prelim_plot = TRUE,
+                             num_threads=10, #big
+                             no_plot=F ,
+                             output_format = "pdf" # maybe can more quick 
+                             # used for final scaling to fit range (0,2) centered at 1.
+                             )
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+