-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4 from satijalab/feat/lung
Add human lung reference
- Loading branch information
Showing
11 changed files
with
271 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,4 +21,5 @@ src/*.dll | |
**/.snakemake | ||
**/.snakemake_timestamp | ||
**/.wget-hsts | ||
**/.synapseCache | ||
**/**/.cache |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,11 @@ | ||
# Dockerfile for building references | ||
FROM satijalab/seurat:4.0.0 | ||
FROM satijalab/azimuth:0.3.2 | ||
|
||
# Install other R dependencies | ||
RUN echo "options(repos = 'https://cloud.r-project.org')" > $(R --no-echo --no-save -e "cat(Sys.getenv('R_HOME'))")/etc/Rprofile.site | ||
RUN R --no-echo --no-restore --no-save -e "install.packages('feather')" | ||
|
||
# Install Azimuth (0.3.0) | ||
RUN R --no-echo --no-restore --no-save -e "remotes::install_github('immunogenomics/presto')" | ||
RUN R --no-echo --no-restore --no-save -e "install.packages(c('DT', 'googlesheets4', 'shinyBS', 'shinydashboard', 'shinyjs'))" | ||
RUN R --no-echo --no-restore --no-save -e "BiocManager::install('glmGamPoi')" | ||
RUN R --no-echo --no-restore --no-save -e "remotes::install_github('satijalab/azimuth', ref = 'v0.3.0', dependencies = FALSE)" | ||
# Install synapse for download | ||
RUN pip3 install synapseclient | ||
|
||
CMD [ "R" ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
############################## Config ######################################### | ||
container: "docker://satijalab/azimuth-references:latest" | ||
envvars: | ||
"SYNAPSE_ID", | ||
"SYNAPSE_KEY" | ||
|
||
############################## All ############################################ | ||
rule all: | ||
input: | ||
"reference/ref.Rds", | ||
"reference/idx.annoy", | ||
"reference/braga_lung_demo.Rds" | ||
|
||
############################## Reference ###################################### | ||
rule download: | ||
params: | ||
user = os.environ["SYNAPSE_ID"], | ||
key = os.environ["SYNAPSE_KEY"] | ||
output: | ||
"data/krasnow_hlca_10x_UMIs.csv", | ||
"data/krasnow_hlca_10x_metadata.csv" | ||
shell: | ||
""" | ||
mkdir -p data/ | ||
mkdir -p logs | ||
cd data | ||
synapse -u {params.user} -p {params.key} get syn21560510 | ||
synapse -u {params.user} -p {params.key} get syn21560409 | ||
echo "Krasnow data downloaded on: $(date)" > ../logs/download_data.log | ||
""" | ||
|
||
rule setup: | ||
input: | ||
script = "scripts/setup.R", | ||
data = "data/krasnow_hlca_10x_UMIs.csv", | ||
metadata = "data/krasnow_hlca_10x_metadata.csv" | ||
output: | ||
"seurat_objects/krasnow_lung.rds" | ||
shell: | ||
""" | ||
Rscript {input.script} {input.data} {input.metadata} {output} > logs/setup.Rout 2>&1 | ||
""" | ||
|
||
rule export: | ||
input: | ||
script = "scripts/export.R", | ||
ob = "seurat_objects/krasnow_lung.rds" | ||
output: | ||
"reference/ref.Rds", | ||
"reference/idx.annoy", | ||
"seurat_objects/fullref.Rds" | ||
shell: | ||
""" | ||
Rscript {input.script} {input.ob} > logs/export.Rout 2>&1 | ||
""" | ||
|
||
############################## Demo ########################################## | ||
rule download_demo: | ||
params: | ||
data_url = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE130nnn/GSE130148/suppl/GSE130148%5Fraw%5Fcounts%2ERData%2Egz", | ||
metadata_url = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE130nnn/GSE130148/suppl/GSE130148%5Fbarcodes%5Fcell%5Ftypes%2Etxt%2Egz" | ||
output: | ||
"logs/download_demo_data.log", | ||
"data/GSE130148_raw_counts.RData", | ||
"data/GSE130148_barcodes_cell_types.txt.gz" | ||
shell: | ||
""" | ||
wget {params.data_url} -P data | ||
gzip -d data/GSE130148_raw_counts.RData.gz | ||
wget {params.metadata_url} -P data | ||
echo "Lung demo data downloaded on: $(date)" > logs/download_demo_data.log | ||
""" | ||
|
||
rule setup_demo: | ||
input: | ||
script = "scripts/setup_demo.R", | ||
data = "data/GSE130148_raw_counts.RData", | ||
metadata = "data/GSE130148_barcodes_cell_types.txt.gz" | ||
output: | ||
"reference/braga_lung_demo.Rds" | ||
shell: | ||
""" | ||
Rscript {input.script} {input.data} {input.metadata} {output} > logs/setup_demo.log 2>&1 | ||
""" | ||
|
||
############################## Explorer ######################################## | ||
rule export_zarr: | ||
input: | ||
ref = "reference/ref.Rds", | ||
fullref = "seurat_objects/fullref.Rds", | ||
script1 = "scripts/convert_to_h5ad.R", | ||
script2 = "scripts/convert_to_zarr.py" | ||
output: | ||
h5Seurat = "vitessce/vitessce_ref.h5Seurat", | ||
h5ad = "vitessce/vitessce_ref.h5ad", | ||
zarr = directory("vitessce/vitessce_ref.zarr") | ||
container: | ||
"docker://satijalab/azimuth-references:vitessce" | ||
shell: | ||
""" | ||
mkdir -p vitessce | ||
Rscript {input.script1} {input.ref} {input.fullref} {output.h5Seurat} > logs/export_zarr_anndata.Rout 2>&1 | ||
python3 {input.script2} {output.h5ad} {output.zarr} | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
{ | ||
"Azimuth.app.reference": "/reference-data/", | ||
"Azimuth.app.demodataset": "/reference-data/braga_lung_demo.Rds", | ||
"Azimuth.app.max_cells": 100000, | ||
"Azimuth.app.default_gene": "EMP2", | ||
"Azimuth.app.default_metadata": "annotation.l1", | ||
"Azimuth.app.homologs": "/reference-data/homologs.rds", | ||
"Azimuth.app.welcomebox": "div(\n h3(HTML(\"Please upload a dataset to map to our <b>human lung</b> reference\")),\n \"Upload a counts matrix from an scRNA-seq dataset of human lung in one\n of the following formats: hdf5, rds, h5ad, h5seurat\",\n width = 12\n )", | ||
"Azimuth.app.refdescriptor": "<div class='refdescriptor'><br>This reference consists of 65,662 human lung cells (10x Genomics v2) from <a href='https://www.nature.com/articles/s41586-020-2922-4' target='blank'>Travaglini et al. 2020</a>. Cells were sourced from three donor patients and from all lung tissue compartments (bronchi, bronchiole, alveoli) as well as circulating blood. For testing, we also provide a demo dataset of 10,360 human lung cells from <a href='https://www.nature.com/articles/s41591-019-0468-5', target='_blank'>Vieira-Braga et al, 2019</a> (Drop-seq), which is loaded automatically with the 'Load demo dataset' button or available for download <a href='https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE130148', target='blank'>here</a>.</div>", | ||
"shiny.maxRequestSize": 1048576000, | ||
"Azimuth.app.refuri": "https://seurat.nygenome.org/azimuth/references/v1.0.0/human_lung", | ||
"Azimuth.app.do_adt": "FALSE" | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#!/usr/bin/env Rscript | ||
library(Seurat) | ||
library(SeuratDisk) | ||
args <- commandArgs(trailingOnly = TRUE) | ||
|
||
ref <- readRDS(file = args[1]) | ||
fullref <- readRDS(file = args[2]) | ||
fullref <- subset(x = fullref, cells = Cells(x = ref)) | ||
fullref[['umap']] <- ref[['refUMAP']] | ||
Key(object = fullref[['umap']]) <- "umap_" | ||
DefaultAssay(object = fullref[['umap']]) <- "RNA" | ||
|
||
DefaultAssay(object = fullref) <- "RNA" | ||
fullref <- NormalizeData(object = fullref) | ||
fullref <- DietSeurat( | ||
object = fullref, | ||
dimreducs = "umap", | ||
assays = "RNA" | ||
) | ||
for (i in colnames(x = fullref[[]])) { | ||
fullref[[i]] <- NULL | ||
} | ||
fullref <- AddMetaData(object = fullref, metadata = ref[[]]) | ||
Misc(object = fullref[['umap']], slot = "model") <- NULL | ||
|
||
fullref <- RenameCells(object = fullref, new.names = paste0("cell", 1:ncol(x = fullref))) | ||
|
||
for (i in colnames(x = fullref[[]])) { | ||
if (is.factor(x = fullref[[i, drop = TRUE]])) { | ||
fullref[[i]] <- as.character(x = fullref[[i, drop = TRUE]]) | ||
} | ||
} | ||
|
||
SaveH5Seurat(object = fullref, file = args[3], overwrite = TRUE) | ||
Convert(args[3], dest = "h5ad", overwrite = TRUE) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#!/usr/bin/python | ||
import scanpy as sc | ||
import sys | ||
import zarr | ||
from scipy import sparse | ||
|
||
adata = sc.read_h5ad(sys.argv[1]) | ||
del adata.raw | ||
adata.var.index = adata.var.index.astype('str') | ||
adata.obs.index = adata.obs.index.astype('str') | ||
adata.var_names = adata.var_names.astype(str) | ||
adata.X = adata.X.tocsc() | ||
adata.write_zarr(sys.argv[2], [adata.shape[0], 10]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
#!/usr/bin/env Rscript | ||
library(Seurat) | ||
library(Azimuth) | ||
|
||
# Helper fxn | ||
annotate <- function(obj, curr, new) { | ||
if (!is.list(x = curr)) curr <- list(curr) | ||
curr <- lapply(X = curr, FUN = function(vec) { | ||
if (is.numeric(x = vec)) as.character(x = vec) else vec | ||
}) | ||
stopifnot(length(x = curr) == length(x = new)) | ||
new <- rep(x = new, times = sapply(X = curr, FUN = length)) | ||
obj <- RenameIdents(obj, setNames(as.list(x = new), nm = unlist(x = curr))) | ||
return(obj) | ||
} | ||
|
||
args <- commandArgs(trailingOnly = TRUE) | ||
ref.dir <- "reference/" | ||
ob.dir <- "seurat_objects/" | ||
|
||
ob <- readRDS(file = args[1]) | ||
ob[['annotation.l2']] <- ob[['free_annotation']] | ||
Idents(object = ob) <- 'annotation.l2' | ||
|
||
l2.id <- list( | ||
c('Basophil/Mast 1','Basophil/Mast 2'), | ||
c('Ciliated','Proximal Ciliated'), | ||
c('Signaling Alveolar Epithelial Type 2','Alveolar Epithelial Type 2'), | ||
c('Bronchial Vessel 1','Bronchial Vessel 2'), | ||
c('Capillary Intermediate 1','Capillary Intermediate 2'), | ||
c('Proximal Basal','Proliferating Basal','Differentiating Basal'), | ||
c('TREM2+ Dendritic','IGSF21+ Dendritic','Myeloid Dendritic Type 1', | ||
'Myeloid Dendritic Type 2','EREG+ Dendritic'), | ||
c('Nonclassical Monocyte','Intermediate Monocyte'), | ||
c('Classical Monocyte','OLR1+ Classical Monocyte'), | ||
c('Airway Smooth Muscle','Vascular Smooth Muscle'), | ||
c('Alveolar Fibroblast','Adventitial Fibroblast'), | ||
c('CD8+ Naive T','CD8+ Memory/Effector T'), | ||
c('CD4+ Memory/Effector T','CD4+ Naive T') | ||
) | ||
|
||
l1.id <- c( | ||
'Basophil/Mast','Ciliated','Alveolar Epithelial Type 2','Bronchial Vessel', | ||
'Capillary Intermediate','Basal','Dendritic','CD16+ Monocyte','CD14+ Monocyte', | ||
'Smooth Muscle','Fibroblast', | ||
'CD8 T','CD4 T' | ||
) | ||
|
||
ob <- annotate(ob, l2.id, l1.id) | ||
ob[['annotation.l1']] <- Idents(object = ob) | ||
|
||
ref <- AzimuthReference( | ||
object = ob, | ||
refUMAP = "umap", | ||
refDR = "pca", | ||
refAssay = "SCT", | ||
metadata = c("annotation.l1", "annotation.l2"), | ||
dims = 1:50, | ||
k.param = 31, | ||
reference.version = "1.0.0" | ||
) | ||
SaveAnnoyIndex(object = ref[["refdr.annoy.neighbors"]], file = file.path(ref.dir, "idx.annoy")) | ||
saveRDS(object = ref, file = file.path(ref.dir, "ref.Rds")) | ||
saveRDS(object = ob, file = file.path(ob.dir, "fullref.Rds")) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#!/usr/bin/env Rscript | ||
library(Seurat) | ||
library(data.table) | ||
|
||
args <- commandArgs(trailingOnly = TRUE) | ||
|
||
mat <- fread(file = args[1], header = TRUE, sep = ",") | ||
mat <- as(object = as.matrix(x = mat, rownames = 1), Class = 'dgCMatrix') | ||
meta <- read.csv(file = args[2], row.names = 1) | ||
|
||
ob <- CreateSeuratObject(counts = mat, meta.data = meta) | ||
ob <- SCTransform(object = ob) | ||
ob <- RunPCA(object = ob, verbose = FALSE) | ||
ob <- RunUMAP(object = ob, dims = 1:50, return.model = TRUE) | ||
|
||
saveRDS(object = ob, file = args[3]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#!/usr/bin/env Rscript | ||
library(Seurat) | ||
|
||
args <- commandArgs(trailingOnly = TRUE) | ||
|
||
load(file = args[1]) | ||
meta <- read.table(file = args[2], header = T, sep = "\t", row.names = 1) | ||
meta <- meta[, c("ID", "location", "celltype")] | ||
|
||
ob <- CreateSeuratObject(counts = raw_counts, meta.data = meta) | ||
saveRDS(object = ob, file = args[3]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters