Add final analyses

icbi-lab · Oct 7, 2023 · 3d8d473 · 3d8d473
1 parent be4f8f0
commit 3d8d473
Show file tree

Hide file tree

Showing 6 changed files with 235 additions and 63 deletions.
diff --git a/README.md b/README.md
@@ -1,8 +1,9 @@
-# kirchmair_2023
+## Kirchmair et al., Frontiers in Immunology 2023
 
-Data analyses for Kirchmair et al., 2023
+Data analyses for '<sup>13</sup>C tracer analysis reveals the landscape of metabolic checkpoints in human CD8<sup>+</sup> T cell differentiation and exhaustion' (Kirchmair et al., Frontiers in Immunology 2023)
 
 
+
 ## Environment setup
 ```bash
 # source lib/make_env.sh # initial code to make conda envs
@@ -17,7 +18,7 @@ Rscript -e 'devtools::install_github("AlexanderKirchmair/DeLuciatoR")' # version
 
 mkdir logs
 ```
-
+ 
 Set up [NGSCheckMate-1.0.0](https://github.com/parklab/NGSCheckMate)
 ```bash
 cd lib
@@ -36,8 +37,8 @@ cd ..
 ```bash
 conda activate cd8
 ```
-
-Memory differentiation samples: [GSE234099](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE234099)
+ 
+Memory differentiation samples ([GSE234099](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE234099)):
 ```bash
 mkdir -p data/rnaseq/MEM/00_RAW
 accs=$(awk 'NR>1 {print $2 "-" $1}' "tables/GSE234099.txt")
@@ -47,8 +48,8 @@ do
   while [ $(qstat -s pr | grep -w -c "DOWNLOAD") -gt 3 ]; do sleep 3; done
 done
 ```
-
-Exhaustion samples: [GSE234100](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE234100)
+ 
+Exhaustion samples ([GSE234100](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE234100)):
 ```bash
 mkdir -p data/rnaseq/EXH/00_RAW
 accs=$(awk 'NR>1 {print $2 "-" $1}' "tables/GSE234100.txt")
@@ -58,7 +59,7 @@ do
   while [ $(qstat -s pr | grep -w -c "DOWNLOAD") -gt 3 ]; do sleep 3; done
 done
 ```
-
+ 
 ### Preprocessing
 
 Trimming:
@@ -76,7 +77,7 @@ do
 done
 ```
 
-Read alignment and quantification using the [nf-core/rnaseq-3.4](https://nf-co.re/rnaseq/3.4) pipeline (set genome paths in 'lib/run_rnaseq.sh'):
+Read alignment and quantification using the [nf-core/rnaseq-3.4](https://nf-co.re/rnaseq/3.4) pipeline (set genome paths in `lib/run_rnaseq.sh`):
 ```bash
 bash -i lib/run_rnaseq.sh 'tables/samplesheet_mem.csv' 'data/rnaseq/MEM/02_NF_results'
 mv .nextflow.log logs/mem.nextflow.log
@@ -103,17 +104,21 @@ mv r_script.r.Rout data/rnaseq/EXH/samplecheck/
 Rscript lib/plot_NGSCheckMate.R
 ```
 
+Gene sets were prepared by running `Rscript lib/prepare_genesets.R`.
 
-## Metabolomics data
-13C metabolomics data: 'data/metabolomics'
 
+## Metabolomics data
+<sup>13</sup>C metabolomics data: `data/metabolomics`
+
 
 ## Seahorse data
-Seahorse data: 'data/seahorse'
-
+Seahorse data: `data/seahorse`
+ 
 
 ## Analysis
-Gene sets were prepared by running 'Rscript lib/prepare_genesets.R'.
+
+The main analyses can be reproduced by rendering the the .Rmd files:
+
 ```bash
 conda activate cd8
 Rscript -e "rmarkdown::render('analyses/01-RNA-Differentiation.Rmd')"
@@ -122,12 +127,16 @@ Rscript -e "rmarkdown::render('analyses/03-RNA-Exhaustion.Rmd')"
 Rscript -e "rmarkdown::render('analyses/04-13C-Exhaustion.Rmd')"
 Rscript -e "rmarkdown::render('analyses/05-RNA-Exhaustion-Public.Rmd')"
 Rscript -e "rmarkdown::render('analyses/06-RNA-Mitochondria.Rmd')"
+Rscript -e "rmarkdown::render('analyses/07-Public-Dataset-Comparison.Rmd')"
 ```
 
 
-## Results (figures and tables)
+## Results
+
+To reproduce the final figures and tables, run:
+
 ```bash
 conda activate cd8
-Rscript -e "rmarkdown::render('analyses/07-Results.Rmd')"
+Rscript -e "rmarkdown::render('analyses/08-Results.Rmd')"
 ```
 
diff --git a/analyses/07-Public-Dataset-Comparison.Rmd b/analyses/07-Public-Dataset-Comparison.Rmd
@@ -0,0 +1,64 @@
+---
+title: "Public-Dataset-Comparison"
+author: "Alexander Kirchmair"
+params:
+  data:    ../data/public
+---
+
+```{r setup, include=FALSE}
+library(Seurat)
+library(DESeq2)
+library(limma)
+library(datamisc)
+library(dplyr)
+```
+
+
+Subset markers
+```{r}
+RNA <- list()
+RNAmem <- readRDS(fp("../data", "RNAmem.rds"))
+RNAexh <- readRDS(fp("../data", "RNAexh.rds"))
+
+RNA$counts <- cjoin(RNAmem$counts, RNAexh$counts)
+RNA$design <- full_join(mutate(RNAmem$design, Celltype = as.character(Celltype), Donor = as.character(Donor)),
+                        mutate(RNAexh$design, Exp = NULL, Celltype = as.character(Celltype), Donor = as.character(as.numeric(Donor)+3)))
+rownames(RNA$design) <- c(rownames(RNAmem$design), rownames(RNAexh$design))
+
+RNA$markers <- getmarkers(as.matrix(RNA$counts)[,rownames(RNA$design)], RNA$design, group = "Celltype", formula = ~ group, log2FC > 1 & padj < 0.05)
+RNA$markers <- lapply(RNA$markers, function(x){ x[1:min(length(x),200)] })
+```
+
+
+Comparison to public bulk RNA sequencing data
+```{r}
+public <- list()
+public$bulk <- read.csv("../data/public/cd8_subset_profiles.csv", row.names = 1)
+public$gsva_bulk <- runGSVA(public$bulk, genesets = RNA$markers)
+colnames(public$gsva_bulk) <- paste0(colnames(public$gsva_bulk), " (public)")
+```
+
+
+Comparison to public single-cell RNA sequencing data
+```{r}
+if (!file.exists(fp(params$data, "CD8_Tcellmap.rds"))){
+  download.file("https://singlecell.mdanderson.org/TCM/download/CD8",
+                fp(params$data, "CD8_Tcellmap.rds"), method = "wget")
+}
+tcellmap <- readRDS(fp(params$data, "CD8_Tcellmap.rds"))
+
+Idents(tcellmap) <- tcellmap$cell.type
+tcellmap <- NormalizeData(tcellmap)
+public$sc <- AverageExpression(tcellmap, group.by = "cell.type", assays = "RNA")[["RNA"]]
+colnames(public$sc) <- make.names(colnames(public$sc))
+public$sc <- public$sc[order(rowMeans(public$sc), decreasing = TRUE),]
+public$gsva_sc <- runGSVA(public$sc[1:15000,], genesets = RNA$markers)
+
+```
+
+
+```{r}
+saveRDS(public, "../data/public.rds")
+```
+
+