update BPCells vignettes; drop old sketch and atomic inte refs

alikhuseynov · Feb 25, 2023 · 983edac · 983edac
1 parent 9afe20b
commit 983edac
Show file tree

Hide file tree

Showing 5 changed files with 165 additions and 79 deletions.
diff --git a/vignettes/COVID_SCTMapping.Rmd b/vignettes/COVID_SCTMapping.Rmd
@@ -1,8 +1,37 @@
 ---
-title: "R Notebook"
-output: html_notebook
+title: "Map COVID PBMC datasets to a healthy reference"
+output:
+  html_document:
+    theme: united
+    df_print: kable
+  pdf_document: default
+date: 'Compiled: `r Sys.Date()`'
 ---
 
+```{r setup, include=TRUE}
+all_times <- list()  # store the time for each chunk
+knitr::knit_hooks$set(time_it = local({
+  now <- NULL
+  function(before, options) {
+    if (before) {
+      now <<- Sys.time()
+    } else {
+      res <- difftime(Sys.time(), now, units = "secs")
+      all_times[[options$label]] <<- res
+    }
+  }
+}))
+knitr::opts_chunk$set(
+  tidy = TRUE,
+  tidy.opts = list(width.cutoff = 95),
+  message = FALSE,
+  warning = FALSE,
+  fig.width = 10,
+  time_it = TRUE,
+  error = TRUE
+)
+```
+
 ## load package
 
 ```{r, warning=F, message=F}
@@ -13,13 +42,12 @@ library(dplyr)
 
 ## load matrix
 ```{r, warning=F, message=F}
-
 time0_loadMatrix <- system.time({ 
   
-file.dir <- "/brahms/haoy/vignette_data/PBMCVignette/"
+file.dir <- "../data/PBMCVignette/"
 files.set <- c("arunachalam_2020_processed.BPCells", "combes_2021_processed.BPCells", "lee_2020_processed.BPCells",
                "wilk_2020_processed.BPCells", "yao_2021_processed.BPCells")
-meta.list <- readRDS('/brahms/haoy/vignette_data/PBMCVignette/PBMC_meta.list')
+meta.list <- readRDS('../data/PBMCVignette/PBMC_meta.list')
 names(meta.list) <- gsub('_processed.BPCells','',files.set)
 
 input.list <- list()
@@ -54,9 +82,7 @@ time1_normalize <- system.time({
 
 ## load reference
 ```{r}
- 
-obj.ref <- readRDS("/brahms/haoy/seurat4_pbmc/pbmc_multimodal_2023.rds")
-
+obj.ref <- readRDS("../data/pbmc_multimodal_2023.rds")
 obj.ref 
 ```
 ## mapping
@@ -173,8 +199,24 @@ marker.list.filter <- lapply(marker.list, function(x) {
 bulk$disease <- factor(bulk$disease, levels = c('healthy', 'COVID-19'))
 
 ```
+
 ```{r, fig.width=10, fig.height=5}
 VlnPlot(bulk, features = 'MX1', group.by = 'celltype', split.by = 'disease', cols = c("#377eb8", "#e41a1c"))
+```
 
+```{r save.img, include=TRUE}
+library(ggplot2)
+ggsave(filename = "../output/images/COVID_SCTMapping.jpg", height = 10, width = 7, plot = p3, quality = 50)
+```
+
+```{r save.times, include=TRUE}
+print(as.data.frame(all_times))
+write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/COVID_SCTMapping.csv")
+```
+
+<details>
+  <summary>**Session Info**</summary>
+```{r}
+sessionInfo()
 ```
- 
+</details>
diff --git a/vignettes/MouseBrain_sketch_clustering.Rmd b/vignettes/MouseBrain_sketch_clustering.Rmd
@@ -1,7 +1,36 @@
 ---
-title: "R Notebook"
-output: html_notebook
+title: "Mouse brain sketch clustering"
+output:
+  html_document:
+    theme: united
+    df_print: kable
+  pdf_document: default
+date: 'Compiled: `r Sys.Date()`'
 ---
+
+```{r setup, include=TRUE}
+all_times <- list()  # store the time for each chunk
+knitr::knit_hooks$set(time_it = local({
+  now <- NULL
+  function(before, options) {
+    if (before) {
+      now <<- Sys.time()
+    } else {
+      res <- difftime(Sys.time(), now, units = "secs")
+      all_times[[options$label]] <<- res
+    }
+  }
+}))
+knitr::opts_chunk$set(
+  tidy = TRUE,
+  tidy.opts = list(width.cutoff = 95),
+  message = FALSE,
+  warning = FALSE,
+  fig.width = 10,
+  time_it = TRUE,
+  error = TRUE
+)
+```
 
 ## load library
 ```{r, warning=FALSE, message=FALSE}
@@ -13,10 +42,9 @@ library(BPCells)
 ```{r, warning=FALSE, message=FALSE}
 t0_CreateObject <- system.time({
 
-mat <- open_matrix_dir("/brahms/haoy/test/mouse_1M_neurons_counts")[,1:1e5]
+mat <- open_matrix_dir("../data/mouse_1M_neurons_counts")[,1:1e5]
  
-devtools::load_all("/brahms/haoy//package/MetricPatch/")
-mat <- ConvertEnsembleToSymbol(mat = mat, species = 'mouse')
+mat <- Azimuth::ConvertEnsembleToSymbol(mat = mat, species = 'mouse')
  
 options(Seurat.object.assay.version = "v5",  Seurat.object.assay.calcn = T)
 obj <- CreateSeuratObject(counts = mat )
@@ -152,6 +180,23 @@ obj.sub <- FindClusters(obj.sub)
 ```
 
 ```{r}
-DimPlot(obj.sub, label = T) + NoLegend()
+p <- DimPlot(obj.sub, label = T) + NoLegend()
+p
+```
 
-```
+```{r save.img, include=TRUE}
+library(ggplot2)
+ggsave(filename = "../output/images/MouseBrain_sketch_clustering.jpg", height = 10, width = 7, plot = p, quality = 50)
+```
+
+```{r save.times, include=TRUE}
+print(as.data.frame(all_times))
+write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/MouseBrain_sketch_clustering.csv")
+```
+
+<details>
+  <summary>**Session Info**</summary>
+```{r}
+sessionInfo()
+```
+</details>
diff --git a/vignettes/ParseBio_sketch_integration.Rmd b/vignettes/ParseBio_sketch_integration.Rmd
@@ -1,8 +1,37 @@
 ---
-title: "R Notebook"
-output: html_notebook
+title: "Sketch integration using a 1 million cell dataset from Parse Biosciences"
+output:
+  html_document:
+    theme: united
+    df_print: kable
+  pdf_document: default
+date: 'Compiled: `r Sys.Date()`'
 ---
 
+```{r setup, include=TRUE}
+all_times <- list()  # store the time for each chunk
+knitr::knit_hooks$set(time_it = local({
+  now <- NULL
+  function(before, options) {
+    if (before) {
+      now <<- Sys.time()
+    } else {
+      res <- difftime(Sys.time(), now, units = "secs")
+      all_times[[options$label]] <<- res
+    }
+  }
+}))
+knitr::opts_chunk$set(
+  tidy = TRUE,
+  tidy.opts = list(width.cutoff = 95),
+  message = FALSE,
+  warning = FALSE,
+  fig.width = 10,
+  time_it = TRUE,
+  error = TRUE
+)
+```
+
 ## load package
 
 ```{r, warning=F, message=F}
@@ -12,15 +41,12 @@ library(dplyr)
 ```
 
 ## load matrix
+
 ```{r, warning=F, message=F}
- 
 time0_loadMatrix <- system.time({ 
-  mat <- open_matrix_dir('/brahms/haoy/test/pbmc_150k_sparse/')
-  meta <- readRDS('/brahms/haoy/seurat5/S5_object/ParseBio_PBMC_meta_100K.rds')
+  mat <- open_matrix_dir('../data/pbmc_150k_sparse/')
+  meta <- readRDS('../data/ParseBio_PBMC_meta_100K.rds')
   meta$disease <- sample(c('H','D'), nrow(meta), replace = T)
-  #mat <- open_matrix_dir('/brahms/haoy/test/pbmc_ParseBio_sparse//')
-  #meta <- readRDS('/brahms/haoy/seurat5/S5_object/ParseBio_PBMC_meta.rds')
-  
 })
 ```
 
@@ -31,12 +57,6 @@ options(Seurat.object.assay.version = "v5",   Seurat.object.assay.calcn = T)
 time1_normalize <- system.time({
   object <- CreateSeuratObject(counts = mat, meta.data = meta)
   object <- NormalizeData(object, verbose = FALSE)
-
-  # object[['RNA']]$data <- write_matrix_dir(
-  #   mat = object[['RNA']]$data,
-  #   dir = '/brahms/haoy/test/pbmc_ParseBio_sparse_data'
-  #   )
-  #object[['RNA']]$data <- open_matrix_dir(dir = '/brahms/haoy/test/pbmc_ParseBio_sparse_data')
 })
  
 
@@ -111,10 +131,6 @@ p1<- DimPlot(object, reduction = 'umap.orig', group.by = 'sample',alpha = 0.5) +
 p2<- DimPlot(object, reduction = 'umap.orig', group.by = 'celltype.weight', label = T, alpha = 0.5) + NoLegend()
 p1+p2
 
-```
-## save object
-```{r}
-#time7_saveRDS <- system.time(saveRDS(object, "/brahms/haoy/test/pbmc_ParseBio_seurat.rds"))
 ```
 
 ## pseudo-bulk
@@ -179,17 +195,30 @@ for (i in 1:length(all_T)) {
 print(paste('Total time ', round(overall, digits = 1), 'mins' ))
 ```
 
-
-
-
 ```{r,fig.height = 20, fig.width = 15}
-
 Idents(bulk) <- 'celltype'
 marker <- FindAllMarkers(object = bulk, only.pos = TRUE, verbose = FALSE)
 marker %>%
     group_by(cluster) %>%
     top_n(n = -5, wt = p_val) -> top5
 bulk <- ScaleData(bulk, features = top5$gene)
 DoHeatmap(bulk, features = top5$gene) + NoLegend()
+```
 
+```{r save.img, include=TRUE}
+library(ggplot2)
+plot <- DoHeatmap(bulk, features = top5$gene) + NoLegend()
+ggsave(filename = "../output/images/ParseBio_sketch_integration.jpg", height = 10, width = 7, plot = plot, quality = 50)
+```
+
+```{r save.times, include=TRUE}
+print(as.data.frame(all_times))
+write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/ParseBio_sketch_integration.csv")
+```
+
+<details>
+  <summary>**Session Info**</summary>
+```{r}
+sessionInfo()
 ```
+</details>
diff --git a/vignettes/vignettes.yaml b/vignettes/vignettes.yaml
@@ -88,18 +88,6 @@
         Examples of how to perform normalization, feature selection, integration, and differential expression with an updated version of sctransform.
       image: assets/sctransform_v2.png
 
-    - title: Cross-modality Bridge Integration
-      name: bridge_integration_vignette
-      summary: |
-        Map scATAC-seq onto an scRNA-seq reference using a multi-omic bridge dataset.
-      image: bridge_integration.png
-
-    - title: Atomic sketch integration for scRNA-seq
-      name: atomic_integration
-      summary: |
-        Perform community-scale integration of scRNA-seq datasets by atomic sketch integration.
-      image: atomic_integration.png
-
 - category: Other
   vignettes:
     - title: Visualization

diff --git a/vignettes/vignettes_v5.yaml b/vignettes/vignettes_v5.yaml
@@ -42,38 +42,20 @@
         Mitigate the effects of cell cycle heterogeneity by computing cell cycle phase scores based on marker genes.
       image: cell_cycle_vignette.jpg
 
-    - title: BPCells Sketch Clustering (Log)
-      name: BPCells_sketch_clustering_mouse_brain
+    - title: Sketch Clustering (BPCells)
+      name: MouseBrain_sketch_clustering
       summary: |
-        Analyze a large mouse brain dataset using the on-disk capabilities introduced in Seurat5.
-      image: BPCells_sketch_clustering_mouse_brain.png
+        Analyze a large mouse brain dataset using the on-disk capabilities introduced in Seurat v5.
+      image: MouseBrain_sketch_clustering.jpg
 
-    - title: BPCells Sketch Clustering (SCTransform)
-      name: BPCells_sketch_clustering_mouse_brain_SCT
+    - title: COVID Mapping (BPCells)
+      name: COVID_SCTMapping
       summary: |
-        Analyze a large mouse brain dataset using the on-disk capabilities introduced in Seurat5.
-      image: BPCells_sketch_clustering_mouse_brain.png
+        Map PBMC datasets from COVID-19 patients to a healthy PBMC reference.
+      image: COVID_SCTMapping.jpg
 
-    - title: BPCells Sketch integration (Log)
-      name: BPCells_sketch_integration_1M
+    - title: Sketch Integration (BPCells)
+      name: ParseBio_sketch_integration
       summary: |
         Perform sketch integration on a large dataset from Parse Biosciences.
-      image: BPCells_sketch_inte_1M.png
-
-    - title: BPCells Sketch integration (SCTransform)
-      name: BPCells_sketch_integration_1M_SCT
-      summary: |
-        Perform sketch integration on a large dataset from Parse Biosciences.
-      image: BPCells_sketch_inte_1M.png
-
-    - title: Chunked mapping (Log)
-      name: BPCells_COVID_logMapping
-      summary: |
-        Iteratively map large COVID datasets onto a reference for cell type annotation.
-      image: BPCells_COVID.png
-
-    - title: Chunked mapping (SCTransform)
-      name: BPCells_COVID_SCTMapping
-      summary: |
-        Iteratively map large COVID datasets onto a reference for cell type annotation.
-      image: BPCells_COVID.png
+      image: ParseBio_sketch_integration.jpg