Skip to content

Commit

Permalink
report number of HVF #26 and provide Parse conversion script #21
Browse files Browse the repository at this point in the history
  • Loading branch information
sreichl committed Nov 14, 2024
1 parent d437efd commit 743200a
Show file tree
Hide file tree
Showing 8 changed files with 80 additions and 8 deletions.
69 changes: 69 additions & 0 deletions helpers/parse_to_10x_conversion.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# code adapted from
# https://support.parsebiosciences.com/hc/en-us/articles/360053078092-Seurat-Tutorial-65k-PBMCs
# https://rdrr.io/github/MarioniLab/DropletUtils/src/R/write10xCounts.R

# libraries
library(Seurat)
library(dplyr)
library(Matrix)

# config
data_path <- "/path/to/PARSE/data/DGE_filtered"
result_path <- "/path/to/result/directory"
dir.create(result_path, recursive = TRUE)

#### LOAD DATA

# load PARSE count data
data <- ReadMtx(
mtx = file.path(data_path, "count_matrix.mtx"),
cells = file.path(data_path, "cell_metadata.csv"),
features = file.path(data_path, "all_genes.csv"),
cell.column = 1,
feature.column = 2,
cell.sep = ",",
feature.sep = ",",
skip.cell = 1,
skip.feature = 1,
mtx.transpose = TRUE,
unique.features = TRUE,
strip.suffix = FALSE
)

# load cell metadata from PARSE
cell_meta <- read.csv(file.path(data_path, "cell_metadata.csv"), row.names = 1)

#### TRANSFORM DATA

# check to see if empty gene names are present, add name if so.
table(rownames(data) == "")
rownames(mat)[rownames(data) == ""] <- "unknown"

# transform metadata into desired format (e.g., split or add columns)
# <ADD YOUR CODE HERE>

# create pre-filtered Seurat object to reduce size
data_object <- CreateSeuratObject(data, min.genes = 100, min.cells = 100, names.field = 0, meta.data = cell_meta, project="project_name")

# check the created Seurat object
print(data_object)

#### SAVE DATA

# save Seurat object as RData object
saveRDS(data_object, file=file.path(result_path, "seurat_object.rds"))

# save metadata
write.csv(data_object@meta.data, file.path(result_path, "metadata.csv"))

# save RNA counts
counts_RNA <- data_object@assays$RNA@counts
writeMM(counts_RNA, file=file.path(result_path, "matrix.mtx"))

# save barcodes (i.e., cells)
write(colnames(counts_RNA), file=file.path(result_path, "barcodes.tsv"))

# save features (i.e., genes)
gene.info <- data.frame(rownames(counts_RNA), rownames(counts_RNA), stringsAsFactors=FALSE)
gene.info$gene.type <- rep("Gene Expression", length.out=nrow(gene.info))
write.table(gene.info, file=file.path(result_path, "features.tsv"), row.names=FALSE, col.names=FALSE, quote=FALSE, sep="\t")
1 change: 0 additions & 1 deletion workflow/rules/normalize_correct_score.smk
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ rule normalize:


# correct data
#if len(config["variables_to_regress"])>0:
rule correct:
input:
# NORMALIZED object as input as only it contains post-normalization calculated scores to be regressed out e.g., cell-cycle scores
Expand Down
2 changes: 2 additions & 0 deletions workflow/rules/process.smk
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ rule merge:
"../envs/seurat.yaml"
log:
os.path.join("logs","rules","merge.log"),
params:
extra_metadata = config["extra_metadata"]
script:
"../scripts/merge.R"

Expand Down
2 changes: 1 addition & 1 deletion workflow/scripts/merge.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ snakemake@source("./utils.R")

# inputs
sample_object_paths <- snakemake@input
extra_metadata_path <- snakemake@config[["extra_metadata"]]
extra_metadata_path <- snakemake@params[["extra_metadata"]]

# outputs
merged_object <- snakemake@output[["merged_object"]]
Expand Down
4 changes: 2 additions & 2 deletions workflow/scripts/pseudobulk.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ metadata_aggregated <- metadata %>%
select(-where(~ any(is.na(.)))) # Remove columns that have NAs
# format metadata
metadata_aggregated <- as.data.frame(metadata_aggregated)
rownames(metadata_aggregated) <- apply(metadata_aggregated[, pseudobulk_by], 1, function(x) paste(x, collapse = "_"))
rownames(metadata_aggregated) <- apply(metadata_aggregated[, pseudobulk_by, drop = FALSE], 1, function(x) paste(x, collapse = "_"))
# filter by cell_count_th
metadata_aggregated <- metadata_aggregated[metadata_aggregated$cell_count>=pseudobulk_th, ]

Expand Down Expand Up @@ -75,7 +75,7 @@ for (modality in c("RNA", ab_flag, crispr_flag, custom_flag)){

# reformat df
tmp_pseudobulk <- as.data.frame(tmp_pseudobulk)
rownames(tmp_pseudobulk) <- apply(tmp_pseudobulk[, pseudobulk_by], 1, function(x) paste(x, collapse = "_"))
rownames(tmp_pseudobulk) <- apply(tmp_pseudobulk[, pseudobulk_by, drop = FALSE], 1, function(x) paste(x, collapse = "_"))
tmp_pseudobulk[,pseudobulk_by] <- NULL
tmp_pseudobulk <- as.data.frame(t(tmp_pseudobulk))

Expand Down
4 changes: 2 additions & 2 deletions workflow/scripts/sctransform_cellScore.R
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,8 @@ if (cell_cycle['s_phase_genes']!=""){
for (gene_list_name in names(gene_lists)){

# skip if no features in the data, to avoid Error
if(length(intersect(gene_lists[[gene_list_name]], data_features)==0)){
print(paste0("None of ",gene_list_name," features are present in the data."))
if(length(intersect(gene_lists[[gene_list_name]], data_features))<3){
print(paste0("Less than 3 of ",gene_list_name," features are present in the data."))
next
}

Expand Down
2 changes: 1 addition & 1 deletion workflow/scripts/seurat_plots.R
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ if (plot_type=="Heatmap"){
set.seed(42)
data_object <- subset(data_object, downsample = maxcells)
# update heatmap width accordingly
width <- length(colnames(data_object))*width_col + 2
width <- length(colnames(data_object))*width_col + 4
}

# cluster features (rows) using hclust
Expand Down
4 changes: 3 additions & 1 deletion workflow/scripts/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ save_seurat_object <- function (seurat_obj, result_dir){
# save metadata
fwrite(as.data.frame(seurat_obj[[]]), file = file.path(result_dir, "metadata.csv"), row.names=TRUE)
# save stats
stats <- paste0("cells: ",ncol(seurat_obj),"\nfeatures: ",nrow(seurat_obj))
stats <- paste0("cells: ",ncol(seurat_obj),
"\nfeatures: ",nrow(seurat_obj),
"\nvariable features: ",length(VariableFeatures(seurat_obj)))
write(stats, file=file.path(result_dir, "stats.txt"))
}

Expand Down

0 comments on commit 743200a

Please sign in to comment.