From 3d8d473070ff5985c2a0c0f95475546a9a932be2 Mon Sep 17 00:00:00 2001
From: kirchmair <alexander.kirchmair@i-med.ac.at>
Date: Sat, 7 Oct 2023 11:32:43 +0200
Subject: [PATCH] Add final analyses

---
 README.md                                   |  41 +++---
 analyses/07-Public-Dataset-Comparison.Rmd   |  64 ++++++++++
 analyses/{07-Results.Rmd => 08-Results.Rmd} | 133 ++++++++++++++------
 env/cd8.yml                                 |  34 +++--
 lib/R-functions.R                           |  24 ++++
 lib/make_env.sh                             |   2 +-
 6 files changed, 235 insertions(+), 63 deletions(-)
 create mode 100644 analyses/07-Public-Dataset-Comparison.Rmd
 rename analyses/{07-Results.Rmd => 08-Results.Rmd} (86%)
diff --git a/README.md b/README.md
index 9be7b78..1376d27 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,9 @@
-# kirchmair_2023
+## Kirchmair et al., Frontiers in Immunology 2023
 
-Data analyses for Kirchmair et al., 2023
+Data analyses for '<sup>13</sup>C tracer analysis reveals the landscape of metabolic checkpoints in human CD8<sup>+</sup> T cell differentiation and exhaustion' (Kirchmair et al., Frontiers in Immunology 2023)
 
 
+ 
 ## Environment setup
 ```bash
 # source lib/make_env.sh # initial code to make conda envs
@@ -17,7 +18,7 @@ Rscript -e 'devtools::install_github("AlexanderKirchmair/DeLuciatoR")' # version
 
 mkdir logs
 ```
-
+ 
 Set up [NGSCheckMate-1.0.0](https://github.com/parklab/NGSCheckMate)
 ```bash
 cd lib
@@ -36,8 +37,8 @@ cd ..
 ```bash
 conda activate cd8
 ```
-
-Memory differentiation samples: [GSE234099](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE234099)
+ 
+Memory differentiation samples ([GSE234099](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE234099)):
 ```bash
 mkdir -p data/rnaseq/MEM/00_RAW
 accs=$(awk 'NR>1 {print $2 "-" $1}' "tables/GSE234099.txt")
@@ -47,8 +48,8 @@ do
   while [ $(qstat -s pr | grep -w -c "DOWNLOAD") -gt 3 ]; do sleep 3; done
 done
 ```
-
-Exhaustion samples: [GSE234100](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE234100)
+ 
+Exhaustion samples ([GSE234100](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE234100)):
 ```bash
 mkdir -p data/rnaseq/EXH/00_RAW
 accs=$(awk 'NR>1 {print $2 "-" $1}' "tables/GSE234100.txt")
@@ -58,7 +59,7 @@ do
   while [ $(qstat -s pr | grep -w -c "DOWNLOAD") -gt 3 ]; do sleep 3; done
 done
 ```
-
+ 
 ### Preprocessing
 
 Trimming:
@@ -76,7 +77,7 @@ do
 done
 ```
 
-Read alignment and quantification using the [nf-core/rnaseq-3.4](https://nf-co.re/rnaseq/3.4) pipeline (set genome paths in 'lib/run_rnaseq.sh'):
+Read alignment and quantification using the [nf-core/rnaseq-3.4](https://nf-co.re/rnaseq/3.4) pipeline (set genome paths in `lib/run_rnaseq.sh`):
 ```bash
 bash -i lib/run_rnaseq.sh 'tables/samplesheet_mem.csv' 'data/rnaseq/MEM/02_NF_results'
 mv .nextflow.log logs/mem.nextflow.log
@@ -103,17 +104,21 @@ mv r_script.r.Rout data/rnaseq/EXH/samplecheck/
 Rscript lib/plot_NGSCheckMate.R
 ```
 
+Gene sets were prepared by running `Rscript lib/prepare_genesets.R`.
 
-## Metabolomics data
-13C metabolomics data: 'data/metabolomics'
 
+## Metabolomics data
+<sup>13</sup>C metabolomics data: `data/metabolomics`
+ 
 
 ## Seahorse data
-Seahorse data: 'data/seahorse'
-
+Seahorse data: `data/seahorse`
+ 
 
 ## Analysis
-Gene sets were prepared by running 'Rscript lib/prepare_genesets.R'.
+
+The main analyses can be reproduced by rendering the the .Rmd files:
+
 ```bash
 conda activate cd8
 Rscript -e "rmarkdown::render('analyses/01-RNA-Differentiation.Rmd')"
@@ -122,12 +127,16 @@ Rscript -e "rmarkdown::render('analyses/03-RNA-Exhaustion.Rmd')"
 Rscript -e "rmarkdown::render('analyses/04-13C-Exhaustion.Rmd')"
 Rscript -e "rmarkdown::render('analyses/05-RNA-Exhaustion-Public.Rmd')"
 Rscript -e "rmarkdown::render('analyses/06-RNA-Mitochondria.Rmd')"
+Rscript -e "rmarkdown::render('analyses/07-Public-Dataset-Comparison.Rmd')"
 ```
 
 
-## Results (figures and tables)
+## Results
+
+To reproduce the final figures and tables, run:
+
 ```bash
 conda activate cd8
-Rscript -e "rmarkdown::render('analyses/07-Results.Rmd')"
+Rscript -e "rmarkdown::render('analyses/08-Results.Rmd')"
 ```
 
diff --git a/analyses/07-Public-Dataset-Comparison.Rmd b/analyses/07-Public-Dataset-Comparison.Rmd
new file mode 100644
index 0000000..e6ba332
--- /dev/null
+++ b/analyses/07-Public-Dataset-Comparison.Rmd
@@ -0,0 +1,64 @@
+---
+title: "Public-Dataset-Comparison"
+author: "Alexander Kirchmair"
+params:
+  data:    ../data/public
+---
+
+```{r setup, include=FALSE}
+library(Seurat)
+library(DESeq2)
+library(limma)
+library(datamisc)
+library(dplyr)
+```
+
+
+Subset markers
+```{r}
+RNA <- list()
+RNAmem <- readRDS(fp("../data", "RNAmem.rds"))
+RNAexh <- readRDS(fp("../data", "RNAexh.rds"))
+
+RNA$counts <- cjoin(RNAmem$counts, RNAexh$counts)
+RNA$design <- full_join(mutate(RNAmem$design, Celltype = as.character(Celltype), Donor = as.character(Donor)),
+                        mutate(RNAexh$design, Exp = NULL, Celltype = as.character(Celltype), Donor = as.character(as.numeric(Donor)+3)))
+rownames(RNA$design) <- c(rownames(RNAmem$design), rownames(RNAexh$design))
+
+RNA$markers <- getmarkers(as.matrix(RNA$counts)[,rownames(RNA$design)], RNA$design, group = "Celltype", formula = ~ group, log2FC > 1 & padj < 0.05)
+RNA$markers <- lapply(RNA$markers, function(x){ x[1:min(length(x),200)] })
+```
+
+
+Comparison to public bulk RNA sequencing data
+```{r}
+public <- list()
+public$bulk <- read.csv("../data/public/cd8_subset_profiles.csv", row.names = 1)
+public$gsva_bulk <- runGSVA(public$bulk, genesets = RNA$markers)
+colnames(public$gsva_bulk) <- paste0(colnames(public$gsva_bulk), " (public)")
+```
+
+
+Comparison to public single-cell RNA sequencing data
+```{r}
+if (!file.exists(fp(params$data, "CD8_Tcellmap.rds"))){
+  download.file("https://singlecell.mdanderson.org/TCM/download/CD8",
+                fp(params$data, "CD8_Tcellmap.rds"), method = "wget")
+}
+tcellmap <- readRDS(fp(params$data, "CD8_Tcellmap.rds"))
+
+Idents(tcellmap) <- tcellmap$cell.type
+tcellmap <- NormalizeData(tcellmap)
+public$sc <- AverageExpression(tcellmap, group.by = "cell.type", assays = "RNA")[["RNA"]]
+colnames(public$sc) <- make.names(colnames(public$sc))
+public$sc <- public$sc[order(rowMeans(public$sc), decreasing = TRUE),]
+public$gsva_sc <- runGSVA(public$sc[1:15000,], genesets = RNA$markers)
+
+```
+
+
+```{r}
+saveRDS(public, "../data/public.rds")
+```
+
+
diff --git a/analyses/07-Results.Rmd b/analyses/08-Results.Rmd
similarity index 86%
rename from analyses/07-Results.Rmd
rename to analyses/08-Results.Rmd
index 131719b..782726e 100644
--- a/analyses/07-Results.Rmd
+++ b/analyses/08-Results.Rmd
@@ -6,6 +6,7 @@ params:
   figures:  ../results
   tables:  ../results
   format:   pdf
+  label:    FALSE
 ---
 
 ```{r setup, include=FALSE}
@@ -173,7 +174,7 @@ hm <- RNAmem$heatmap_data |>
                           name = "z-expr.",
                           right_annotation = ha,
                           row_split = rsplit,
-                          row_gap = grid::unit(7, "pt"),
+                          row_gap = grid::unit(5, "pt"),
                           row_title_rot = 0,
                           column_title_gp = grid::gpar(fontsize = titlesize),
                           row_title_gp = grid::gpar(fontsize = titlesize),
@@ -185,18 +186,6 @@ hm <- RNAmem$heatmap_data |>
 
 hm_anno_colors <- pals::polychrome(length(hm_annotation)) |> colorspace::darken(amount = 0.7) |> setNames(names(hm_annotation))
 
-hmdraw <- function(){
-  draw(hm, heatmap_legend_side = "left")
-  for(i in seq(hm_annotation)) {
-      decorate_annotation(annotation = "pathways",
-                          slice = i,
-                          code = {
-                            grid.rect(x = 0, width = unit(1, "mm"), gp = gpar(fill = hm_anno_colors[i], col = NA), just = "left")
-                            grid.text(paste(hm_annotation[[i]], collapse = "\n"), gp = gpar(col = hm_anno_colors[i], fontsize = fontsize), x = unit(5, "mm"), just = "left")
-                          })
-  }
-}
-
 E <- grid.grabExpr(hmdraw())
 
 ```
@@ -218,6 +207,9 @@ DEE
 
 Fig1 <-  wrap_plots(p, guides = "collect", tag_level = "new", design = layout)
 Fig1 <- Fig1 + plot_annotation(tag_levels = 'A',  theme = theme(plot.tag = element_text(size = 8))) & theme_fig
+if (params$label == TRUE){
+  Fig1 <- plot_grid(Fig1, textGrob("Figure 1", x = unit(0.5, "npc"), y = unit(0.5, "npc")), ncol = 1, rel_heights = c(0.95,0.05))
+}
 Fig1 %>% saveplot(file.path(params$figures, "Figure1"), dev = params$format, width =  full_width, height =  full_height*0.8)
 
 ```
@@ -236,22 +228,22 @@ C13cells <- subset(C13mem, Sampletype == "cells")
 Plots
 ```{r}
 
-dir.create(fp(params$figures, "isoplots"), showWarnings = FALSE)
-
-C13cells |>
-  isoplot(summarise_by = Celltype, assay = "mid_clean",
-          col_cex = 0.5, linewidth = 2, nacolor = "white", colorscale = c("white", "#1702a1"),
-          dir = fp(params$figures, "isoplots"), title = "ID", height = 10, title_size = 120, fontsize = 100,
-          dev = "pdf", label = FALSE, cumulative = TRUE, legend = FALSE)
-
-# plot legend
-leg_plot <- C13cells |>
-  isoplot(summarise_by = Celltype, assay = "mid_clean", mets = "lac", fontsize = 120, colorscale = c("white", "#1702a1"))
-leg_plot <- leg_plot + theme_basic() + theme(plot.background = element_blank(), panel.background = element_blank(), legend.background = element_blank()) +
-  guides(fill = guide_colourbar(barheight = grid::unit(200, "mm"), barwidth = grid::unit(20, "mm"), raster = FALSE, nbin = 20,
-                                ticks.colour = "black", frame.colour = "black", ticks.linewidth = 0.5, frame.linewidth = 0.5))
-cowplot::get_legend(leg_plot) |>
-  cowplot::ggsave2(filename = fp(params$figures, "isoplots", "legend.pdf"), dev = "pdf", width = 10, height = 20)
+# dir.create(fp(params$figures, "isoplots"), showWarnings = FALSE)
+# 
+# C13cells |>
+#   isoplot(summarise_by = Celltype, assay = "mid_clean",
+#           col_cex = 0.5, linewidth = 2, nacolor = "white", colorscale = c("white", "#1702a1"),
+#           dir = fp(params$figures, "isoplots"), title = "ID", height = 10, title_size = 120, fontsize = 100,
+#           dev = "pdf", label = FALSE, cumulative = TRUE, legend = FALSE)
+# 
+# # plot legend
+# leg_plot <- C13cells |>
+#   isoplot(summarise_by = Celltype, assay = "mid_clean", mets = "lac", fontsize = 120, colorscale = c("white", "#1702a1"))
+# leg_plot <- leg_plot + theme_basic() + theme(plot.background = element_blank(), panel.background = element_blank(), legend.background = element_blank()) +
+#   guides(fill = guide_colourbar(barheight = grid::unit(200, "mm"), barwidth = grid::unit(20, "mm"), raster = FALSE, nbin = 20,
+#                                 ticks.colour = "black", frame.colour = "black", ticks.linewidth = 0.5, frame.linewidth = 0.5))
+# cowplot::get_legend(leg_plot) |>
+#   cowplot::ggsave2(filename = fp(params$figures, "isoplots", "legend.pdf"), dev = "pdf", width = 10, height = 20)
 
 ```
 
@@ -376,6 +368,9 @@ p <- list(A, B, C, D, E, F, G, H, I)
 
 Fig3 <-  wrap_plots(p, guides = "collect", tag_level = "new", ncol = 3)
 Fig3 <- Fig3 + plot_annotation(tag_levels = 'A',  theme = theme(plot.tag = element_text(size = 8))) & theme_fig
+if (params$label == TRUE){
+  Fig3 <- plot_grid(Fig3, textGrob("Figure 3", x = unit(0.5, "npc"), y = unit(0.5, "npc")), ncol = 1, rel_heights = c(0.95,0.05))
+}
 Fig3 %>% saveplot(file.path(params$figures, "Figure3"), dev = params$format, width =  full_width, height =  full_height*0.7)
 ```
 
@@ -471,10 +466,13 @@ CE
 
 Fig4 <-  wrap_plots(p, guides = "keep", tag_level = "new", design = layout, widths = c(1,1))
 Fig4 <- Fig4 + plot_annotation(tag_levels = 'A',  theme = theme(plot.tag = element_text(size = 8))) & theme_fig
+if (params$format == "jpg"){
+  Fig4 <- plot_grid(Fig4, textGrob("Figure 4", x = unit(0.5, "npc"), y = unit(0.5, "npc")), ncol = 1, rel_heights = c(0.95,0.05))
+}
 Fig4 %>% saveplot(file.path(params$figures, "Figure4"), dev = params$format, width =  full_width, height =  full_height*0.6)
 
 # plot legend with color gradient separately as png:
-# cowplot::get_legend(C) |> cowplot::plot_grid() |> saveplot(file.path(params$figures, "Figure4legend"), dev = "png", width =  1000, height =  1500)
+cowplot::get_legend(C) |> cowplot::plot_grid() |> saveplot(file.path(params$figures, "Figure4legend"), dev = "png", width =  1000, height =  1500)
 ```
 
 
@@ -489,6 +487,8 @@ RNAexh <- readRDS(fp(params$data, "RNAexh.rds"))
 C13mem_cells <- readRDS(fp(params$data, "C13mem.rds")) |> subset(Sampletype == "cells" & Tracer == "13C")
 C13exh_cells <- readRDS(fp(params$data, "C13exh.rds")) |> subset(Sampletype == "cells" & Tracer == "13C")
 
+public <- readRDS(fp(params$data, "public.rds"))
+
 genesets <- read.delim("../tables/genesets.tsv.gz")
 markers <- readLines("../tables/markers.txt")
 
@@ -508,10 +508,39 @@ expr_mean <- log2(expr + 1) |> summarise_cols(coldata = df, by = Celltype, FUN =
 
 ### Supplementary Figure 2: Differentiation markers.
 ```{r}
-FigS2 <- expr_mean[intersect(markers, rownames(expr)), cols[1:4]] |>
+A <- public$gsva_bulk |> cxheatmap(scale = "row") |> draw() |> grid.grabExpr()
+B <- public$gsva_sc |> cxheatmap(scale = "row") |> draw() |> grid.grabExpr()
+C <- expr_mean[intersect(markers, rownames(expr)), cols[1:4]] |>
   cxheatmap(scale = "", title = "z-expr.", cluster_cols = FALSE,
-            fontsize = 18, rowcex = 0.8, colcex = 1.2, column_names_rot = 0, column_names_centered = TRUE)
-FigS2 %>% saveplot(file.path(params$figures, "FigureS2"), dev = params$format, width = full_width, height =  full_height*0.8)
+            fontsize = 18, rowcex = 0.7, colcex = 1.2, column_names_rot = 0, column_names_centered = TRUE) |> draw() |> grid.grabExpr()
+
+df <- (RNAmem$ora_cluster[c("UDDD","DUDD","DDUD","DDDU")] %L>% mutate(., cluster=.name)) |> Reduce(f=rbind)
+top <- RNAmem$ora_cluster[c("UDDD","DUDD","DDUD","DDDU")] %L>% subset(., padj <= 0.1) %L>% rownames()
+pw <- (top %L>% function(x) x[1:min(length(x),3)]) |> unlist() |> unique()
+df <- subset(df, ID %in% pw)
+df$cluster[df$cluster == "UDDD"] <- "TN"
+df$cluster[df$cluster == "DUDD"] <- "TSCM"
+df$cluster[df$cluster == "DDUD"] <- "TCM"
+df$cluster[df$cluster == "DDDU"] <- "TEM"
+df$cluster <- factor(df$cluster, ordered = TRUE, levels = c("TN","TSCM","TCM","TEM"))
+D <- ggplot(df, aes(cluster, ID, size=Count, color=-log10(padj))) +
+  theme_basic(base_size = 15) +
+  theme(panel.grid.major = element_line(color = "#dddddd")) +
+  geom_point() + xlab("") + ylab("")
+
+
+layout <- "
+AC
+BC
+DD
+"
+
+FigS2 <-  wrap_plots(list(A,B,C,D), guides = "keep", tag_level = "new", design = layout, heights = c(1,1,0.7))
+FigS2 <- FigS2 + plot_annotation(tag_levels = 'A',  theme = theme(plot.tag = element_text(size = 8))) & theme_fig
+if (params$label == TRUE){
+  FigS2 <- plot_grid(FigS2, textGrob("Figure S2", x = unit(0.5, "npc"), y = unit(0.55, "npc")), ncol = 1, rel_heights = c(0.95,0.05))
+}
+FigS2 %>% saveplot(file.path(params$figures, "FigureS2"), dev = params$format, width =  full_width, height =  full_height*0.9)
 
 ```
 
@@ -521,6 +550,11 @@ FigS2 %>% saveplot(file.path(params$figures, "FigureS2"), dev = params$format, w
 genes <- subset(genesets, term == "MetabolicAtlas glycolysis_gluconeogenesis")$gene
 FigS3 <- expr_mean[genes, cols[1:4]] |> cxheatmap(column_title = "MetabolicAtlas Glycolysis/Gluconeogenesis", title = "z-expr.", cluster_cols = FALSE,
                                                   fontsize = 18, rowcex = 0.8, colcex = 1.2, column_names_rot = 0, column_names_centered = TRUE)
+
+if (params$label == TRUE){
+  FigS3 <- FigS3 |> draw() |> grid.grabExpr()
+  FigS3 <- plot_grid(FigS3, textGrob("Figure S3", x = unit(0.5, "npc"), y = unit(0.5, "npc")), ncol = 1, rel_heights = c(0.95,0.05))
+}
 FigS3 %>% saveplot(file.path(params$figures, "FigureS3"), dev = params$format, width = full_width, height =  full_height*0.8)
 
 ```
@@ -586,6 +620,9 @@ EF
 
 FigS4 <-  wrap_plots(p, guides = "keep", tag_level = "new", design = layout, widths = c(1,1))
 FigS4 <- FigS4 + plot_annotation(tag_levels = 'A') & th
+if (params$format == "jpg"){
+  FigS4 <- plot_grid(FigS4, textGrob("Figure S4", x = unit(0.5, "npc"), y = unit(0.5, "npc")), ncol = 1, rel_heights = c(0.95,0.05))
+}
 FigS4 %>% saveplot(file.path(params$figures, "FigureS4"), dev = params$format, width =  full_width, height =  full_height)
 
 ```
@@ -595,7 +632,12 @@ FigS4 %>% saveplot(file.path(params$figures, "FigureS4"), dev = params$format, w
 ```{r}
 genes <- genesets |> subset(term %in% c("MitoCarta tca cycle")) |> pull(gene)
 FigS5 <- expr_mean[genes, cols[1:4]] |> cxheatmap(column_title = "MitoCarta TCA cycle", title = "z-expr.", cluster_cols = FALSE,
-                                                  fontsize = 18, rowcex = 0.8, colcex = 1.2, column_names_rot = 0, column_names_centered = TRUE)
+                                                  fontsize = 18, rowcex = 0.8, colcex = 1.2, column_names_rot = 0, column_names_centered = TRUE) 
+
+if (params$label == TRUE){
+  FigS5 <- FigS5 |> draw() |> grid.grabExpr()
+  FigS5 <- plot_grid(FigS5, textGrob("Figure S5", x = unit(0.5, "npc"), y = unit(0.5, "npc")), ncol = 1, rel_heights = c(0.95,0.05))
+}
 FigS5 %>% saveplot(file.path(params$figures, "FigureS5"), dev = params$format, width = full_width, height =  full_height*0.8)
 
 ```
@@ -614,10 +656,15 @@ genes <- list(
 df <- stack(genes) |> dplyr::rename(genes = values, shuttle = ind)
 FigS6 <- expr_mean[unlist(genes), cols[1:4]] |> cxheatmap(column_title = "NADH shuttles", title = "z-expr.", row_split = df$shuttle,
                                                           cluster_rows = FALSE, cluster_cols = FALSE,
-                                                          row_title_gp = grid::gpar(fontsize = 18),
+                                                          row_title_gp = grid::gpar(fontsize = 15),
                                                           row_gap = grid::unit(7, "pt"),
                                                           fontsize = 18, rowcex = 0.8, colcex = 1.2, column_names_rot = 0, column_names_centered = TRUE)
-FigS6 %>% saveplot(fp(params$figures, "FigureS6"), dev = params$format, width = full_width, height =  full_height*0.8)
+
+if (params$label == TRUE){
+  FigS6 <- FigS6 |> draw() |> grid.grabExpr()
+  FigS6 <- plot_grid(FigS6, textGrob("Figure S6", x = unit(0.5, "npc"), y = unit(0.5, "npc")), ncol = 1, rel_heights = c(0.95,0.05))
+}
+FigS6 %>% saveplot(fp(params$figures, "FigureS6"), dev = params$format, width = full_width, height =  full_height*0.9)
 
 ```
 
@@ -629,6 +676,11 @@ exp <- c(rep("differentiation", 4), rep("exhaustion", 2))
 FigS7 <- expr_mean[intersect(genes, rownames(expr_mean)), cols] |>
   cxheatmap(column_title = "GO L-amino acid transmembrane transporter activity", column_split = exp, title = "z-expr.", cluster_cols = FALSE,
             fontsize = 18, rowcex = 0.8, colcex = 1.2, column_names_rot = 0, column_names_centered = TRUE)
+
+if (params$label == TRUE){
+  FigS7 <- FigS7 |> draw() |> grid.grabExpr()
+  FigS7 <- plot_grid(FigS7, textGrob("Figure S7", x = unit(0.5, "npc"), y = unit(0.5, "npc")), ncol = 1, rel_heights = c(0.95,0.05))
+}
 FigS7 %>% saveplot(fp(params$figures, "FigureS7"), dev = params$format, width = full_width, height =  full_height*0.8)
 
 ```
@@ -640,6 +692,11 @@ genes <- genesets |> subset(term %in% c("MitoCarta oxphos subunits")) |> pull(ge
 FigS8 <- expr_mean[intersect(genes, rownames(expr_mean)), cols[1:4]] |>
   cxheatmap(column_title = "OXPHOS subunits", title = "z-expr.", cluster_cols = FALSE,
             fontsize = 18, rowcex = 0.6, colcex = 1.2, column_names_rot = 0, column_names_centered = TRUE)
+
+if (params$label == TRUE){
+  FigS8 <- FigS8 |> draw() |> grid.grabExpr()
+  FigS8 <- plot_grid(FigS8, textGrob("Figure S8", x = unit(0.5, "npc"), y = unit(0.5, "npc")), ncol = 1, rel_heights = c(0.95,0.05))
+}
 FigS8 %>% saveplot(fp(params$figures, "FigureS8"), dev = params$format, width = full_width, height =  full_height*0.8)
 
 ```
@@ -683,11 +740,15 @@ th <- theme(axis.text = txt,
 
 FigS9 <-  wrap_plots(A, wrap_plots(B, plot_spacer(), nrow = 1, widths = c(1, 0.3)), guides = "keep", tag_level = "new", ncol = 1)
 FigS9 <- FigS9 + plot_annotation(tag_levels = 'A') & th
+if (params$label == TRUE){
+  FigS9 <- plot_grid(FigS9, textGrob("Figure S9", x = unit(0.5, "npc"), y = unit(0.5, "npc")), ncol = 1, rel_heights = c(0.95,0.05))
+}
 FigS9 %>% saveplot(fp(params$figures, "FigureS9"), dev = params$format, width =  full_width, height =  full_height*0.6)
 ```
 
 
 
+
 # TABLES ----------------------------------------------------------------------------------------------------------------------------------------------------
 
 Data
diff --git a/env/cd8.yml b/env/cd8.yml
index 22d6392..6c9e3ef 100644
--- a/env/cd8.yml
+++ b/env/cd8.yml
@@ -24,15 +24,18 @@ dependencies:
   - bioconductor-annotate=1.72.0=r41hdfd78af_0
   - bioconductor-annotationdbi=1.56.2=r41hdfd78af_0
   - bioconductor-apeglm=1.16.0=r41hc247a5b_2
+  - bioconductor-beachmat=2.10.0=r41hc247a5b_2
   - bioconductor-biobase=2.54.0=r41hc0cfd56_2
   - bioconductor-biocfilecache=2.2.0=r41hdfd78af_0
   - bioconductor-biocgenerics=0.40.0=r41hdfd78af_0
   - bioconductor-biocparallel=1.28.3=r41hc247a5b_1
+  - bioconductor-biocsingular=1.10.0=r41hc247a5b_2
   - bioconductor-biomart=2.50.0=r41hdfd78af_0
   - bioconductor-biostrings=2.62.0=r41hc0cfd56_2
   - bioconductor-clusterprofiler=4.2.0=r41hdfd78af_0
   - bioconductor-complexheatmap=2.10.0=r41hdfd78af_0
   - bioconductor-delayedarray=0.20.0=r41hc0cfd56_2
+  - bioconductor-delayedmatrixstats=1.16.0=r41hdfd78af_0
   - bioconductor-deseq2=1.34.0=r41hc247a5b_3
   - bioconductor-do.db=2.9=r41hdfd78af_13
   - bioconductor-dose=3.20.0=r41hdfd78af_0
@@ -47,6 +50,10 @@ dependencies:
   - bioconductor-ggtree=3.2.0=r41hdfd78af_0
   - bioconductor-go.db=3.14.0=r41hdfd78af_1
   - bioconductor-gosemsim=2.20.0=r41hc247a5b_2
+  - bioconductor-graph=1.72.0=r41hc0cfd56_2
+  - bioconductor-gseabase=1.56.0=r41hdfd78af_0
+  - bioconductor-gsva=1.42.0=r41hc0cfd56_2
+  - bioconductor-hdf5array=1.22.1=r41hc0cfd56_1
   - bioconductor-ihw=1.22.0=r41hdfd78af_0
   - bioconductor-iranges=2.28.0=r41hc0cfd56_2
   - bioconductor-isocorrector=1.12.0=r41hdfd78af_0
@@ -56,7 +63,13 @@ dependencies:
   - bioconductor-matrixgenerics=1.6.0=r41hdfd78af_0
   - bioconductor-org.hs.eg.db=3.14.0=r41hdfd78af_1
   - bioconductor-qvalue=2.26.0=r41hdfd78af_0
+  - bioconductor-rhdf5=2.38.1=r41hbe1951d_0
+  - bioconductor-rhdf5filters=1.6.0=r41hc247a5b_2
+  - bioconductor-rhdf5lib=1.16.0=r41hc0cfd56_2
   - bioconductor-s4vectors=0.32.4=r41hc0cfd56_0
+  - bioconductor-scaledmatrix=1.2.0=r41hdfd78af_0
+  - bioconductor-singlecellexperiment=1.16.0=r41hdfd78af_0
+  - bioconductor-sparsematrixstats=1.6.0=r41hc247a5b_2
   - bioconductor-summarizedexperiment=1.24.0=r41hdfd78af_0
   - bioconductor-tcgabiolinks=2.22.1=r41hdfd78af_0
   - bioconductor-tcgabiolinksgui.data=1.14.1=r41hdfd78af_0
@@ -78,7 +91,7 @@ dependencies:
   - charset-normalizer=3.2.0=pyhd8ed1ab_0
   - comm=0.1.3=pyhd8ed1ab_0
   - coreutils=9.3=h0b41bf4_0
-  - curl=7.86.0=h2283fc2_1
+  - curl=7.86.0=h7bff187_1
   - cutadapt=4.0=py39hbf8eff0_0
   - debugpy=1.6.7=py39h227be39_0
   - decorator=5.1.1=pyhd8ed1ab_0
@@ -136,12 +149,12 @@ dependencies:
   - jupyterlab_server=2.24.0=pyhd8ed1ab_0
   - kernel-headers_linux-64=2.6.32=he073ed8_16
   - keyutils=1.6.1=h166bdaf_0
-  - krb5=1.19.3=h08a2579_0
+  - krb5=1.19.3=h3790be6_0
   - ld_impl_linux-64=2.40=h41732ed_0
   - lerc=4.0.0=h27087fc_0
   - libblas=3.9.0=17_linux64_openblas
   - libcblas=3.9.0=17_linux64_openblas
-  - libcurl=7.86.0=h2283fc2_1
+  - libcurl=7.86.0=h7bff187_1
   - libdeflate=1.17=h0b41bf4_0
   - libedit=3.1.20191231=he28a2e2_2
   - libev=4.33=h516909a_1
@@ -151,20 +164,20 @@ dependencies:
   - libgcc-ng=13.1.0=he5830b7_0
   - libgfortran-ng=13.1.0=h69a702a_0
   - libgfortran5=13.1.0=h15d22d2_0
-  - libgit2=1.6.4=h747ad27_0
+  - libgit2=1.5.1=ha98c156_0
   - libglib=2.76.4=hebfc3b9_0
   - libglu=9.0.0=he1b5a44_1001
   - libgomp=13.1.0=he5830b7_0
   - libiconv=1.17=h166bdaf_0
   - liblapack=3.9.0=17_linux64_openblas
-  - libnghttp2=1.52.0=h61bc06f_0
+  - libnghttp2=1.51.0=hdcd2b5c_0
   - libnsl=2.0.0=h7f98852_0
   - libopenblas=0.3.23=pthreads_h80387f5_0
   - libpng=1.6.39=h753d276_0
   - libsanitizer=13.1.0=hfd8a6a1_0
   - libsodium=1.0.18=h36c2ea0_1
   - libsqlite=3.42.0=h2797004_0
-  - libssh2=1.11.0=h0841786_0
+  - libssh2=1.10.0=haa6b8db_3
   - libstdcxx-devel_linux-64=13.1.0=he3cc6c4_0
   - libstdcxx-ng=13.1.0=hfd8a6a1_0
   - libtiff=4.5.0=h6adf6a1_2
@@ -188,7 +201,7 @@ dependencies:
   - notebook-shim=0.2.3=pyhd8ed1ab_0
   - numpy=1.25.1=py39h6183b62_0
   - openjdk=11.0.1=h516909a_1016
-  - openssl=3.1.2=hd590300_0
+  - openssl=1.1.1v=hd590300_0
   - overrides=7.3.1=pyhd8ed1ab_0
   - packaging=23.1=pyhd8ed1ab_0
   - pandoc=2.19.2=h32600fe_2
@@ -214,7 +227,7 @@ dependencies:
   - pycparser=2.21=pyhd8ed1ab_0
   - pygments=2.15.1=pyhd8ed1ab_0
   - pysocks=1.7.1=pyha2e5f31_6
-  - python=3.9.16=h2782a2a_0_cpython
+  - python=3.9.15=h47a2c10_0_cpython
   - python-dateutil=2.8.2=pyhd8ed1ab_0
   - python-fastjsonschema=2.18.0=pyhd8ed1ab_0
   - python-isal=1.2.0=py39hd1e30aa_0
@@ -318,7 +331,7 @@ dependencies:
   - r-future.apply=1.11.0=r41hc72bb7e_0
   - r-gargle=1.4.0=r41h785f33e_0
   - r-generics=0.1.3=r41hc72bb7e_1
-  - r-gert=1.9.2=r41h492061e_1
+  - r-gert=1.9.2=r41hf3f2ec2_0
   - r-getoptlong=1.0.5=r41hc72bb7e_1
   - r-ggforce=0.4.1=r41h7525677_1
   - r-ggfun=0.0.9=r41hc72bb7e_0
@@ -414,7 +427,7 @@ dependencies:
   - r-nloptr=2.0.3=r41hb13c81a_1
   - r-nnet=7.3_19=r41h57805ef_0
   - r-numderiv=2016.8_1.1=r41hc72bb7e_4
-  - r-openssl=2.0.6=r41habfbb5e_0
+  - r-openssl=2.0.5=r41hb1dc35e_0
   - r-openxlsx=4.2.5.2=r41h38f115c_0
   - r-pals=1.7=r41hc72bb7e_1
   - r-parallelly=1.36.0=r41hc72bb7e_0
@@ -486,6 +499,7 @@ dependencies:
   - r-rsqlite=2.3.1=r41h38f115c_0
   - r-rstatix=0.7.2=r41hc72bb7e_0
   - r-rstudioapi=0.14=r41hc72bb7e_1
+  - r-rsvd=1.0.5=r41hc72bb7e_0
   - r-rvcheck=0.1.8=r41hc72bb7e_1
   - r-rversions=2.1.2=r41hc72bb7e_1
   - r-rvest=1.0.3=r41hc72bb7e_1
diff --git a/lib/R-functions.R b/lib/R-functions.R
index eac3852..5703428 100644
--- a/lib/R-functions.R
+++ b/lib/R-functions.R
@@ -258,3 +258,27 @@ getLimits <- function (x, clip = TRUE, expand = 1, negative = TRUE){
 }
 
 
+getmarkers <- function(data, design, group = "Celltype", formula = ~ group, ...){
+  ct <- unique(design[[group]])
+  markers <- lapply(setNames(ct, ct), function(ctx){
+    design$group <- ifelse(design[[group]] == ctx, ctx, "other")
+    res <- runDESeq2(data, design, formula = formula, contrasts = list(de = c("group", ctx, "other")))
+    subset(res$results[[1]], ...)$gene
+  })
+  markers
+}
+
+
+hmdraw <- function(){
+  draw(hm, heatmap_legend_side = "left")
+  for(i in seq(hm_annotation)) {
+    decorate_annotation(annotation = "pathways",
+                        slice = i,
+                        code = {
+                          grid.rect(x = 0, width = unit(1, "mm"), gp = gpar(fill = hm_anno_colors[i], col = NA), just = "left")
+                          grid.text(paste(hm_annotation[[i]], collapse = "\n"), gp = gpar(col = hm_anno_colors[i], fontsize = fontsize), x = unit(5, "mm"), just = "left")
+                        })
+  }
+}
+
+
diff --git a/lib/make_env.sh b/lib/make_env.sh
index c3be58b..e0032e9 100644
--- a/lib/make_env.sh
+++ b/lib/make_env.sh
@@ -2,7 +2,7 @@
 # Main conda env
 envname="cd8"
 installconda="nextflow=21.10.0 cutadapt=4.0 r-base=4.1.3 r-essentials \
-              bioconductor-isocorrector bioconductor-org.hs.eg.db bioconductor-clusterprofiler bioconductor-deseq2 bioconductor-ihw \
+              bioconductor-isocorrector bioconductor-org.hs.eg.db bioconductor-clusterprofiler bioconductor-deseq2 bioconductor-ihw bioconductor-gsva \
               bioconductor-geoquery bioconductor-complexheatmap bioconductor-biomart bioconductor-limma bioconductor-tximport bioconductor-tcgabiolinks \
               r-nlme r-glmmtmb r-contrast r-multcomp r-betareg r-msigdbr r-r.utils r-dendsort r-pals \
               r-ggpubr r-dplyr r-tibble r-openxlsx r-ggplot2 r-ggrepel r-devtools r-crayon"