diff --git a/DESCRIPTION b/DESCRIPTION index 5ff711df..df53764f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,6 +26,7 @@ Imports: ggplot2, glue, jsonlite, + kableExtra, magrittr, Matrix, Matrix.utils, diff --git a/docker/scripts/install_scpca_deps.sh b/docker/scripts/install_scpca_deps.sh index f0da7b99..0b27de95 100644 --- a/docker/scripts/install_scpca_deps.sh +++ b/docker/scripts/install_scpca_deps.sh @@ -44,6 +44,7 @@ install2.r --error --skipinstalled -n $NCPUS \ DBI \ devtools \ here \ + kableExtra \ matrixStats \ optparse \ rmarkdown \ diff --git a/inst/rmd/qc_report.rmd b/inst/rmd/qc_report.rmd index 2977f307..ceff9c22 100644 --- a/inst/rmd/qc_report.rmd +++ b/inst/rmd/qc_report.rmd @@ -13,7 +13,7 @@ output: toc: true toc_depth: 2 toc_float: true - number_sections: true + number_sections: false code_download: true --- @@ -27,13 +27,13 @@ library(SingleCellExperiment) library(dplyr) library(ggplot2) -# Set standard ggplot theme +# Set default ggplot theme theme_set(theme_bw()) ``` ```{r sce_setup} # save some typing later -sample <- params$sample +sample_id <- params$sample unfiltered_sce <- params$unfiltered_sce filtered_sce <- params$filtered_sce @@ -47,11 +47,22 @@ if (!has_filtered){ if (is.null(unfiltered_sce$sum)){ unfiltered_sce <- scuttle::addPerCellQCMetrics(unfiltered_sce) } +if (is.null(filtered_sce$sum)){ + filtered_sce <- scuttle::addPerCellQCMetrics(filtered_sce) + filtered_sce$subsets_mito_percent <- NA_real_ + skip_miQC <- TRUE +} else{ + skip_miQC <- FALSE +} +# add miQC model if missing +if (is.null(metadata(filtered_sce)$miQC_model) & !skip_miQC){ + metadata(filtered_sce)$miQC_model <- miQC::mixtureModel(filtered_sce) +} ``` # Introduction -# Processing Information for `r sample` +# Processing Information for `r sample_id` ## Raw Sample Metrics @@ -60,39 +71,48 @@ if (is.null(unfiltered_sce$sum)){ unfiltered_meta <- metadata(unfiltered_sce) sample_information <- tibble::tibble( - "Sample id" = sample, - "Tech version" = unfiltered_meta$tech_version, + "Sample id" = sample_id, + "Tech version" = format(unfiltered_meta$tech_version), # format to keep nulls "Number of reads sequenced" = format(unfiltered_meta$total_reads, big.mark = ',', scientific = FALSE), "Number of mapped reads" = format(unfiltered_meta$mapped_reads, big.mark = ',', scientific = FALSE), "Number of cells reported by alevin-fry" = format(unfiltered_meta$af_num_cells, big.mark = ',', scientific = FALSE) ) %>% + mutate(across(.fns = ~ifelse(.x == "NULL", "N/A", .x))) %>% # reformat nulls t() # make table with sample information -knitr::kable(sample_information, "simple") +knitr::kable(sample_information) %>% + kableExtra::kable_styling(bootstrap_options = "striped", + full_width = FALSE, + position = "left") ``` ## Pre-Processing Information ```{r } processing_info <- tibble::tibble( - "Salmon version" = unfiltered_meta$salmon_version, - "Alevin-fry version" = unfiltered_meta$alevinfry_version, - "Transcriptome index" = unfiltered_meta$reference_index, - "Filtering method" = unfiltered_meta$af_permit_type, - "Resolution" = unfiltered_meta$af_resolution, + "Salmon version" = format(unfiltered_meta$salmon_version), + "Alevin-fry version" = format(unfiltered_meta$alevinfry_version), + "Transcriptome index" = format(unfiltered_meta$reference_index), + "Filtering method" = format(unfiltered_meta$af_permit_type), + "Resolution" = format(unfiltered_meta$af_resolution), "Transcripts included" = dplyr::case_when( - unfiltered_meta$transcript_type == "spliced" ~ "Spliced only", - unfiltered_meta$transcript_type == "unspliced" ~ "Spliced and unspliced" ) + format(unfiltered_meta$transcript_type) == "spliced" ~ "Spliced only", + format(unfiltered_meta$transcript_type) == "unspliced" ~ "Spliced and unspliced", + TRUE ~ format(unfiltered_meta$transcript_type)) ) %>% + mutate(across(.fns = ~ifelse(.x == "NULL", "N/A", .x))) %>% t() # make table with processing information -knitr::kable(processing_info, "simple") +knitr::kable(processing_info) %>% + kableExtra::kable_styling(bootstrap_options = "striped", + full_width = FALSE, + position = "left") ``` -# `r sample` Experiment Summary +# `r sample_id` Experiment Summary This sample has `r ncol(unfiltered_sce)` cells, assayed for `r nrow(unfiltered_sce)` genes. @@ -103,32 +123,85 @@ if(has_filtered){ ``` ## Knee plot -```{r} +```{r, fig.alt="Knee plot of filtered and unfiltered droplets"} unfiltered_celldata <- data.frame(colData(unfiltered_sce)) %>% mutate( - rank = rank(- unfiltered_sce$sum), # using full spec for clarity + rank = rank(- unfiltered_sce$sum, ties.method = "first"), # using full spec for clarity filter_status = factor(ifelse(colnames(unfiltered_sce) %in% colnames(filtered_sce), "Passed", "Excluded"), levels = c("Passed", "Excluded")) - ) + ) %>% + filter(sum > 0) %>% # remove zeros for plotting + arrange(desc(rank)) ggplot(unfiltered_celldata, aes(x = rank, y = sum, color = filter_status))+ - geom_point(aes(shape = filter_status)) + - scale_y_log10() + - scale_color_manual(values = c("navyblue", "grey")) + + geom_point(aes(shape = filter_status),alpha = 0.2, size = 2) + + scale_x_log10(labels = scales::label_number()) + + scale_y_log10(labels = scales::label_number()) + + scale_color_manual(values = c("darkgreen", "grey80")) + scale_shape_manual(values = c(16, 1)) + # filled and unfilled circles labs( x = "Rank", - y = "UMI count", + y = "Total UMI count", color = "Filter status", shape = "Filter status" - ) + ) + + guides(color = guide_legend(override.aes = list(alpha = 1))) + + theme(plot.margin = margin(rep(20, 4)), + legend.position = c(0, 0), + legend.justification = c(0,0), + legend.background = element_rect(color = "grey20", size = 0.25), + legend.box.margin = margin(rep(5, 4))) +``` + + +## Cell read metrics + +```{r, fig.alt="Total UMI x genes expressed"} +filtered_celldata <- data.frame(colData(filtered_sce)) + +ggplot(filtered_celldata, + aes (x = sum, + y = detected, + color = subsets_mito_percent)) + + geom_point(alpha = 0.3) + + scale_color_viridis_c(limits = c(0, 100)) + + labs(x = "Total UMI count", + y = "Number of genes detected", + color = "Percent reads\nmitochondrial") + + theme(plot.margin = margin(rep(20, 4)), + legend.position = c(0, 1), + legend.justification = c(0,1), + legend.background = element_rect(color = "grey20", size = 0.25), + legend.box.margin = margin(rep(5, 4))) +``` + +## miQC model diagnostics + +```{r, fig.alt="miQC model diagnostics plot", results='asis', warning=FALSE} +if(skip_miQC){ + cat("miQC model not created, skipping miQC plot. Usually this is because mitochondrial gene data was not available.") +}else{ + # remove prob_compromised if it exists, as this will cause errors with plotModel + filtered_sce$prob_compromised <- NULL + miQC::plotModel(filtered_sce, model = metadata(filtered_sce)$miQC_model) + + coord_cartesian(ylim = c(0,100)) + + labs(x = "Number of genes detected", + y = "Percent reads mitochondrial") + + theme(plot.margin = margin(rep(20, 4)), + legend.position = c(1, 1), + legend.justification = c(1,1), + legend.background = element_rect(color = "grey20", size = 0.25), + legend.box.margin = margin(rep(5, 4))) +} ``` # Session Info +
+R session information ```{r session_info} sessioninfo::session_info() ``` - +