Skip to content

Commit

Permalink
update BPCells vignettes; drop old sketch and atomic inte refs
Browse files Browse the repository at this point in the history
  • Loading branch information
AustinHartman committed Feb 25, 2023
1 parent 9afe20b commit 983edac
Show file tree
Hide file tree
Showing 5 changed files with 165 additions and 79 deletions.
60 changes: 51 additions & 9 deletions vignettes/COVID_SCTMapping.Rmd
Original file line number Diff line number Diff line change
@@ -1,8 +1,37 @@
---
title: "R Notebook"
output: html_notebook
title: "Map COVID PBMC datasets to a healthy reference"
output:
html_document:
theme: united
df_print: kable
pdf_document: default
date: 'Compiled: `r Sys.Date()`'
---

```{r setup, include=TRUE}
all_times <- list() # store the time for each chunk
knitr::knit_hooks$set(time_it = local({
now <- NULL
function(before, options) {
if (before) {
now <<- Sys.time()
} else {
res <- difftime(Sys.time(), now, units = "secs")
all_times[[options$label]] <<- res
}
}
}))
knitr::opts_chunk$set(
tidy = TRUE,
tidy.opts = list(width.cutoff = 95),
message = FALSE,
warning = FALSE,
fig.width = 10,
time_it = TRUE,
error = TRUE
)
```

## load package

```{r, warning=F, message=F}
Expand All @@ -13,13 +42,12 @@ library(dplyr)

## load matrix
```{r, warning=F, message=F}
time0_loadMatrix <- system.time({
file.dir <- "/brahms/haoy/vignette_data/PBMCVignette/"
file.dir <- "../data/PBMCVignette/"
files.set <- c("arunachalam_2020_processed.BPCells", "combes_2021_processed.BPCells", "lee_2020_processed.BPCells",
"wilk_2020_processed.BPCells", "yao_2021_processed.BPCells")
meta.list <- readRDS('/brahms/haoy/vignette_data/PBMCVignette/PBMC_meta.list')
meta.list <- readRDS('../data/PBMCVignette/PBMC_meta.list')
names(meta.list) <- gsub('_processed.BPCells','',files.set)
input.list <- list()
Expand Down Expand Up @@ -54,9 +82,7 @@ time1_normalize <- system.time({

## load reference
```{r}
obj.ref <- readRDS("/brahms/haoy/seurat4_pbmc/pbmc_multimodal_2023.rds")
obj.ref <- readRDS("../data/pbmc_multimodal_2023.rds")
obj.ref
```
## mapping
Expand Down Expand Up @@ -173,8 +199,24 @@ marker.list.filter <- lapply(marker.list, function(x) {
bulk$disease <- factor(bulk$disease, levels = c('healthy', 'COVID-19'))
```

```{r, fig.width=10, fig.height=5}
VlnPlot(bulk, features = 'MX1', group.by = 'celltype', split.by = 'disease', cols = c("#377eb8", "#e41a1c"))
```

```{r save.img, include=TRUE}
library(ggplot2)
ggsave(filename = "../output/images/COVID_SCTMapping.jpg", height = 10, width = 7, plot = p3, quality = 50)
```

```{r save.times, include=TRUE}
print(as.data.frame(all_times))
write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/COVID_SCTMapping.csv")
```

<details>
<summary>**Session Info**</summary>
```{r}
sessionInfo()
```
</details>
59 changes: 52 additions & 7 deletions vignettes/MouseBrain_sketch_clustering.Rmd
Original file line number Diff line number Diff line change
@@ -1,7 +1,36 @@
---
title: "R Notebook"
output: html_notebook
title: "Mouse brain sketch clustering"
output:
html_document:
theme: united
df_print: kable
pdf_document: default
date: 'Compiled: `r Sys.Date()`'
---

```{r setup, include=TRUE}
all_times <- list() # store the time for each chunk
knitr::knit_hooks$set(time_it = local({
now <- NULL
function(before, options) {
if (before) {
now <<- Sys.time()
} else {
res <- difftime(Sys.time(), now, units = "secs")
all_times[[options$label]] <<- res
}
}
}))
knitr::opts_chunk$set(
tidy = TRUE,
tidy.opts = list(width.cutoff = 95),
message = FALSE,
warning = FALSE,
fig.width = 10,
time_it = TRUE,
error = TRUE
)
```

## load library
```{r, warning=FALSE, message=FALSE}
Expand All @@ -13,10 +42,9 @@ library(BPCells)
```{r, warning=FALSE, message=FALSE}
t0_CreateObject <- system.time({
mat <- open_matrix_dir("/brahms/haoy/test/mouse_1M_neurons_counts")[,1:1e5]
mat <- open_matrix_dir("../data/mouse_1M_neurons_counts")[,1:1e5]
devtools::load_all("/brahms/haoy//package/MetricPatch/")
mat <- ConvertEnsembleToSymbol(mat = mat, species = 'mouse')
mat <- Azimuth::ConvertEnsembleToSymbol(mat = mat, species = 'mouse')
options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T)
obj <- CreateSeuratObject(counts = mat )
Expand Down Expand Up @@ -152,6 +180,23 @@ obj.sub <- FindClusters(obj.sub)
```

```{r}
DimPlot(obj.sub, label = T) + NoLegend()
p <- DimPlot(obj.sub, label = T) + NoLegend()
p
```

```
```{r save.img, include=TRUE}
library(ggplot2)
ggsave(filename = "../output/images/MouseBrain_sketch_clustering.jpg", height = 10, width = 7, plot = p, quality = 50)
```

```{r save.times, include=TRUE}
print(as.data.frame(all_times))
write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/MouseBrain_sketch_clustering.csv")
```

<details>
<summary>**Session Info**</summary>
```{r}
sessionInfo()
```
</details>
73 changes: 51 additions & 22 deletions vignettes/ParseBio_sketch_integration.Rmd
Original file line number Diff line number Diff line change
@@ -1,8 +1,37 @@
---
title: "R Notebook"
output: html_notebook
title: "Sketch integration using a 1 million cell dataset from Parse Biosciences"
output:
html_document:
theme: united
df_print: kable
pdf_document: default
date: 'Compiled: `r Sys.Date()`'
---

```{r setup, include=TRUE}
all_times <- list() # store the time for each chunk
knitr::knit_hooks$set(time_it = local({
now <- NULL
function(before, options) {
if (before) {
now <<- Sys.time()
} else {
res <- difftime(Sys.time(), now, units = "secs")
all_times[[options$label]] <<- res
}
}
}))
knitr::opts_chunk$set(
tidy = TRUE,
tidy.opts = list(width.cutoff = 95),
message = FALSE,
warning = FALSE,
fig.width = 10,
time_it = TRUE,
error = TRUE
)
```

## load package

```{r, warning=F, message=F}
Expand All @@ -12,15 +41,12 @@ library(dplyr)
```

## load matrix

```{r, warning=F, message=F}
time0_loadMatrix <- system.time({
mat <- open_matrix_dir('/brahms/haoy/test/pbmc_150k_sparse/')
meta <- readRDS('/brahms/haoy/seurat5/S5_object/ParseBio_PBMC_meta_100K.rds')
mat <- open_matrix_dir('../data/pbmc_150k_sparse/')
meta <- readRDS('../data/ParseBio_PBMC_meta_100K.rds')
meta$disease <- sample(c('H','D'), nrow(meta), replace = T)
#mat <- open_matrix_dir('/brahms/haoy/test/pbmc_ParseBio_sparse//')
#meta <- readRDS('/brahms/haoy/seurat5/S5_object/ParseBio_PBMC_meta.rds')
})
```

Expand All @@ -31,12 +57,6 @@ options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T)
time1_normalize <- system.time({
object <- CreateSeuratObject(counts = mat, meta.data = meta)
object <- NormalizeData(object, verbose = FALSE)
# object[['RNA']]$data <- write_matrix_dir(
# mat = object[['RNA']]$data,
# dir = '/brahms/haoy/test/pbmc_ParseBio_sparse_data'
# )
#object[['RNA']]$data <- open_matrix_dir(dir = '/brahms/haoy/test/pbmc_ParseBio_sparse_data')
})
Expand Down Expand Up @@ -111,10 +131,6 @@ p1<- DimPlot(object, reduction = 'umap.orig', group.by = 'sample',alpha = 0.5) +
p2<- DimPlot(object, reduction = 'umap.orig', group.by = 'celltype.weight', label = T, alpha = 0.5) + NoLegend()
p1+p2
```
## save object
```{r}
#time7_saveRDS <- system.time(saveRDS(object, "/brahms/haoy/test/pbmc_ParseBio_seurat.rds"))
```

## pseudo-bulk
Expand Down Expand Up @@ -179,17 +195,30 @@ for (i in 1:length(all_T)) {
print(paste('Total time ', round(overall, digits = 1), 'mins' ))
```




```{r,fig.height = 20, fig.width = 15}
Idents(bulk) <- 'celltype'
marker <- FindAllMarkers(object = bulk, only.pos = TRUE, verbose = FALSE)
marker %>%
group_by(cluster) %>%
top_n(n = -5, wt = p_val) -> top5
bulk <- ScaleData(bulk, features = top5$gene)
DoHeatmap(bulk, features = top5$gene) + NoLegend()
```

```{r save.img, include=TRUE}
library(ggplot2)
plot <- DoHeatmap(bulk, features = top5$gene) + NoLegend()
ggsave(filename = "../output/images/ParseBio_sketch_integration.jpg", height = 10, width = 7, plot = plot, quality = 50)
```

```{r save.times, include=TRUE}
print(as.data.frame(all_times))
write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/ParseBio_sketch_integration.csv")
```

<details>
<summary>**Session Info**</summary>
```{r}
sessionInfo()
```
</details>
12 changes: 0 additions & 12 deletions vignettes/vignettes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,18 +88,6 @@
Examples of how to perform normalization, feature selection, integration, and differential expression with an updated version of sctransform.
image: assets/sctransform_v2.png

- title: Cross-modality Bridge Integration
name: bridge_integration_vignette
summary: |
Map scATAC-seq onto an scRNA-seq reference using a multi-omic bridge dataset.
image: bridge_integration.png

- title: Atomic sketch integration for scRNA-seq
name: atomic_integration
summary: |
Perform community-scale integration of scRNA-seq datasets by atomic sketch integration.
image: atomic_integration.png

- category: Other
vignettes:
- title: Visualization
Expand Down
40 changes: 11 additions & 29 deletions vignettes/vignettes_v5.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,38 +42,20 @@
Mitigate the effects of cell cycle heterogeneity by computing cell cycle phase scores based on marker genes.
image: cell_cycle_vignette.jpg

- title: BPCells Sketch Clustering (Log)
name: BPCells_sketch_clustering_mouse_brain
- title: Sketch Clustering (BPCells)
name: MouseBrain_sketch_clustering
summary: |
Analyze a large mouse brain dataset using the on-disk capabilities introduced in Seurat5.
image: BPCells_sketch_clustering_mouse_brain.png
Analyze a large mouse brain dataset using the on-disk capabilities introduced in Seurat v5.
image: MouseBrain_sketch_clustering.jpg

- title: BPCells Sketch Clustering (SCTransform)
name: BPCells_sketch_clustering_mouse_brain_SCT
- title: COVID Mapping (BPCells)
name: COVID_SCTMapping
summary: |
Analyze a large mouse brain dataset using the on-disk capabilities introduced in Seurat5.
image: BPCells_sketch_clustering_mouse_brain.png
Map PBMC datasets from COVID-19 patients to a healthy PBMC reference.
image: COVID_SCTMapping.jpg

- title: BPCells Sketch integration (Log)
name: BPCells_sketch_integration_1M
- title: Sketch Integration (BPCells)
name: ParseBio_sketch_integration
summary: |
Perform sketch integration on a large dataset from Parse Biosciences.
image: BPCells_sketch_inte_1M.png

- title: BPCells Sketch integration (SCTransform)
name: BPCells_sketch_integration_1M_SCT
summary: |
Perform sketch integration on a large dataset from Parse Biosciences.
image: BPCells_sketch_inte_1M.png

- title: Chunked mapping (Log)
name: BPCells_COVID_logMapping
summary: |
Iteratively map large COVID datasets onto a reference for cell type annotation.
image: BPCells_COVID.png

- title: Chunked mapping (SCTransform)
name: BPCells_COVID_SCTMapping
summary: |
Iteratively map large COVID datasets onto a reference for cell type annotation.
image: BPCells_COVID.png
image: ParseBio_sketch_integration.jpg

0 comments on commit 983edac

Please sign in to comment.