From ef631aaad229b6b8ab2a2b25b7559bdbb0011696 Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Thu, 30 Sep 2021 13:55:02 -0400 Subject: [PATCH 1/9] Improve table formatting --- DESCRIPTION | 1 + inst/rmd/qc_report.rmd | 45 +++++++++++++++++++++++++----------------- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5ff711df..df53764f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,6 +26,7 @@ Imports: ggplot2, glue, jsonlite, + kableExtra, magrittr, Matrix, Matrix.utils, diff --git a/inst/rmd/qc_report.rmd b/inst/rmd/qc_report.rmd index 2977f307..82a58d20 100644 --- a/inst/rmd/qc_report.rmd +++ b/inst/rmd/qc_report.rmd @@ -13,7 +13,7 @@ output: toc: true toc_depth: 2 toc_float: true - number_sections: true + number_sections: false code_download: true --- @@ -27,13 +27,13 @@ library(SingleCellExperiment) library(dplyr) library(ggplot2) -# Set standard ggplot theme +# Set default ggplot theme theme_set(theme_bw()) ``` ```{r sce_setup} # save some typing later -sample <- params$sample +sample_id <- params$sample unfiltered_sce <- params$unfiltered_sce filtered_sce <- params$filtered_sce @@ -51,7 +51,7 @@ if (is.null(unfiltered_sce$sum)){ # Introduction -# Processing Information for `r sample` +# Processing Information for `r sample_id` ## Raw Sample Metrics @@ -60,39 +60,45 @@ if (is.null(unfiltered_sce$sum)){ unfiltered_meta <- metadata(unfiltered_sce) sample_information <- tibble::tibble( - "Sample id" = sample, - "Tech version" = unfiltered_meta$tech_version, + "Sample id" = sample_id, + "Tech version" = format(unfiltered_meta$tech_version), "Number of reads sequenced" = format(unfiltered_meta$total_reads, big.mark = ',', scientific = FALSE), "Number of mapped reads" = format(unfiltered_meta$mapped_reads, big.mark = ',', scientific = FALSE), "Number of cells reported by alevin-fry" = format(unfiltered_meta$af_num_cells, big.mark = ',', scientific = FALSE) ) %>% + mutate(across(.fns = ~ifelse(.x == "NULL", "N/A", .x))) %>% t() # make table with sample information -knitr::kable(sample_information, "simple") +knitr::kable(sample_information, + caption = "Sample Information") %>% + kableExtra::kable_styling(bootstrap_options = "striped") ``` ## Pre-Processing Information ```{r } processing_info <- tibble::tibble( - "Salmon version" = unfiltered_meta$salmon_version, - "Alevin-fry version" = unfiltered_meta$alevinfry_version, - "Transcriptome index" = unfiltered_meta$reference_index, - "Filtering method" = unfiltered_meta$af_permit_type, - "Resolution" = unfiltered_meta$af_resolution, + "Salmon version" = format(unfiltered_meta$salmon_version), + "Alevin-fry version" = format(unfiltered_meta$alevinfry_version), + "Transcriptome index" = format(unfiltered_meta$reference_index), + "Filtering method" = format(unfiltered_meta$af_permit_type), + "Resolution" = format(unfiltered_meta$af_resolution), "Transcripts included" = dplyr::case_when( - unfiltered_meta$transcript_type == "spliced" ~ "Spliced only", - unfiltered_meta$transcript_type == "unspliced" ~ "Spliced and unspliced" ) + format(unfiltered_meta$transcript_type) == "spliced" ~ "Spliced only", + format(unfiltered_meta$transcript_type) == "unspliced" ~ "Spliced and unspliced", + TRUE ~ format(unfiltered_meta$transcript_type)) ) %>% + mutate(across(.fns = ~ifelse(.x == "NULL", "N/A", .x))) %>% t() # make table with processing information -knitr::kable(processing_info, "simple") +knitr::kable(processing_info, caption ="Processing information") %>% + kableExtra::kable_styling(bootstrap_options = "striped") ``` -# `r sample` Experiment Summary +# `r sample_id` Experiment Summary This sample has `r ncol(unfiltered_sce)` cells, assayed for `r nrow(unfiltered_sce)` genes. @@ -114,7 +120,8 @@ unfiltered_celldata <- data.frame(colData(unfiltered_sce)) %>% ggplot(unfiltered_celldata, aes(x = rank, y = sum, color = filter_status))+ geom_point(aes(shape = filter_status)) + - scale_y_log10() + + scale_y_log10() + + scale_x_log10() + scale_color_manual(values = c("navyblue", "grey")) + scale_shape_manual(values = c(16, 1)) + # filled and unfilled circles labs( @@ -127,8 +134,10 @@ ggplot(unfiltered_celldata, aes(x = rank, y = sum, color = filter_status))+ # Session Info +
+R session information ```{r session_info} sessioninfo::session_info() ``` - +
From afa3812d64e9044c845d127b678bf51081314948 Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Thu, 30 Sep 2021 15:01:43 -0400 Subject: [PATCH 2/9] Add UMI/gene/mito plot --- inst/rmd/qc_report.rmd | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/inst/rmd/qc_report.rmd b/inst/rmd/qc_report.rmd index 82a58d20..2c53a962 100644 --- a/inst/rmd/qc_report.rmd +++ b/inst/rmd/qc_report.rmd @@ -46,6 +46,11 @@ if (!has_filtered){ # add cell stats if missing if (is.null(unfiltered_sce$sum)){ unfiltered_sce <- scuttle::addPerCellQCMetrics(unfiltered_sce) + unfiltered_sce$subsets_mito_percent <- NA_real_ +} +if (is.null(filtered_sce$sum)){ + filtered_sce <- scuttle::addPerCellQCMetrics(filtered_sce) + filtered_sce$subsets_mito_percent <- NA_real_ } ``` @@ -61,12 +66,12 @@ unfiltered_meta <- metadata(unfiltered_sce) sample_information <- tibble::tibble( "Sample id" = sample_id, - "Tech version" = format(unfiltered_meta$tech_version), + "Tech version" = format(unfiltered_meta$tech_version), # format to keep nulls "Number of reads sequenced" = format(unfiltered_meta$total_reads, big.mark = ',', scientific = FALSE), "Number of mapped reads" = format(unfiltered_meta$mapped_reads, big.mark = ',', scientific = FALSE), "Number of cells reported by alevin-fry" = format(unfiltered_meta$af_num_cells, big.mark = ',', scientific = FALSE) ) %>% - mutate(across(.fns = ~ifelse(.x == "NULL", "N/A", .x))) %>% + mutate(across(.fns = ~ifelse(.x == "NULL", "N/A", .x))) %>% # reformat nulls t() # make table with sample information @@ -109,7 +114,7 @@ if(has_filtered){ ``` ## Knee plot -```{r} +```{r, fig.cap="Knee plot of filtered and unfiltered droplets"} unfiltered_celldata <- data.frame(colData(unfiltered_sce)) %>% mutate( rank = rank(- unfiltered_sce$sum), # using full spec for clarity @@ -129,7 +134,32 @@ ggplot(unfiltered_celldata, aes(x = rank, y = sum, color = filter_status))+ y = "UMI count", color = "Filter status", shape = "Filter status" - ) + ) + + theme(legend.position = c(0, 0), + legend.justification = c(0,0), + legend.background = element_rect(color = "grey20", size = 0.25), + legend.box.margin = margin(rep(5, 4))) +``` + + +## UMI-gene count plot + +```{r, fig.cap="Total UMI x genes expressed"} +filtered_celldata <- data.frame(colData(filtered_sce)) + +ggplot(filtered_celldata, + aes (x = sum, + y = detected, + color = subsets_mito_percent)) + + geom_point(alpha = 0.3) + + scale_color_viridis_c(limits = c(0, 100)) + + labs(x = "Total UMI Count", + y = "Number of Genes Expressed", + color = "Mitochondrial\nPercent") + + theme(legend.position = c(0, 1), + legend.justification = c(0,1), + legend.background = element_rect(color = "grey20", size = 0.25), + legend.box.margin = margin(rep(5, 4))) ``` From 4ac7e861b4851adaf58f5bbc1e7332dfc2e74418 Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Thu, 30 Sep 2021 15:18:22 -0400 Subject: [PATCH 3/9] Update table formatting --- inst/rmd/qc_report.rmd | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/inst/rmd/qc_report.rmd b/inst/rmd/qc_report.rmd index 2c53a962..fadf0aea 100644 --- a/inst/rmd/qc_report.rmd +++ b/inst/rmd/qc_report.rmd @@ -75,9 +75,10 @@ sample_information <- tibble::tibble( t() # make table with sample information -knitr::kable(sample_information, - caption = "Sample Information") %>% - kableExtra::kable_styling(bootstrap_options = "striped") +knitr::kable(sample_information) %>% + kableExtra::kable_styling(bootstrap_options = "striped", + full_width = FALSE, + position = "left") ``` ## Pre-Processing Information @@ -99,8 +100,10 @@ processing_info <- tibble::tibble( # make table with processing information -knitr::kable(processing_info, caption ="Processing information") %>% - kableExtra::kable_styling(bootstrap_options = "striped") +knitr::kable(processing_info) %>% + kableExtra::kable_styling(bootstrap_options = "striped", + full_width = FALSE, + position = "left") ``` # `r sample_id` Experiment Summary From aa8ec0d6e69fc37814062fe86272ec458e432882 Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Thu, 30 Sep 2021 15:18:36 -0400 Subject: [PATCH 4/9] add kableExtra to docker --- docker/scripts/install_scpca_deps.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/scripts/install_scpca_deps.sh b/docker/scripts/install_scpca_deps.sh index f0da7b99..0b27de95 100644 --- a/docker/scripts/install_scpca_deps.sh +++ b/docker/scripts/install_scpca_deps.sh @@ -44,6 +44,7 @@ install2.r --error --skipinstalled -n $NCPUS \ DBI \ devtools \ here \ + kableExtra \ matrixStats \ optparse \ rmarkdown \ From 7aadede97f1ccff1a1cb5e5b29e6bd12cec09584 Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Thu, 30 Sep 2021 16:03:23 -0400 Subject: [PATCH 5/9] clean up plots --- inst/rmd/qc_report.rmd | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/inst/rmd/qc_report.rmd b/inst/rmd/qc_report.rmd index fadf0aea..24666fb9 100644 --- a/inst/rmd/qc_report.rmd +++ b/inst/rmd/qc_report.rmd @@ -120,17 +120,19 @@ if(has_filtered){ ```{r, fig.cap="Knee plot of filtered and unfiltered droplets"} unfiltered_celldata <- data.frame(colData(unfiltered_sce)) %>% mutate( - rank = rank(- unfiltered_sce$sum), # using full spec for clarity + rank = rank(- unfiltered_sce$sum, ties.method = "first"), # using full spec for clarity filter_status = factor(ifelse(colnames(unfiltered_sce) %in% colnames(filtered_sce), "Passed", "Excluded"), levels = c("Passed", "Excluded")) - ) + ) %>% + filter(sum > 0) %>% # remove zeros for plotting + arrange(desc(rank)) ggplot(unfiltered_celldata, aes(x = rank, y = sum, color = filter_status))+ - geom_point(aes(shape = filter_status)) + + geom_point(aes(shape = filter_status),alpha = 0.2, size = 2) + scale_y_log10() + scale_x_log10() + - scale_color_manual(values = c("navyblue", "grey")) + + scale_color_manual(values = c("navyblue", "grey40")) + scale_shape_manual(values = c(16, 1)) + # filled and unfilled circles labs( x = "Rank", @@ -138,6 +140,7 @@ ggplot(unfiltered_celldata, aes(x = rank, y = sum, color = filter_status))+ color = "Filter status", shape = "Filter status" ) + + guides(color = guide_legend(override.aes = list(alpha = 1))) + theme(legend.position = c(0, 0), legend.justification = c(0,0), legend.background = element_rect(color = "grey20", size = 0.25), @@ -145,7 +148,7 @@ ggplot(unfiltered_celldata, aes(x = rank, y = sum, color = filter_status))+ ``` -## UMI-gene count plot +## UMI - expressed gene count plot ```{r, fig.cap="Total UMI x genes expressed"} filtered_celldata <- data.frame(colData(filtered_sce)) From c13e290110f728a0f45d1f0ab0f002087d717762 Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Fri, 1 Oct 2021 16:35:42 -0400 Subject: [PATCH 6/9] Add miQC plot --- inst/rmd/qc_report.rmd | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/inst/rmd/qc_report.rmd b/inst/rmd/qc_report.rmd index 24666fb9..a983cb09 100644 --- a/inst/rmd/qc_report.rmd +++ b/inst/rmd/qc_report.rmd @@ -46,11 +46,17 @@ if (!has_filtered){ # add cell stats if missing if (is.null(unfiltered_sce$sum)){ unfiltered_sce <- scuttle::addPerCellQCMetrics(unfiltered_sce) - unfiltered_sce$subsets_mito_percent <- NA_real_ } if (is.null(filtered_sce$sum)){ filtered_sce <- scuttle::addPerCellQCMetrics(filtered_sce) filtered_sce$subsets_mito_percent <- NA_real_ + skip_miQC <- TRUE +} else{ + skip_miQC <- FALSE +} +# add miQC model if missing +if (is.null(metadata(filtered_sce)$miQC_model) & !skip_miQC){ + metadata(filtered_sce)$miQC_model <- miQC::mixtureModel(filtered_sce) } ``` @@ -168,6 +174,22 @@ ggplot(filtered_celldata, legend.box.margin = margin(rep(5, 4))) ``` +## miQC model diagnostics + +```{r, fig.cap="miQC model diagnostics plot", results='asis', warning=FALSE} +if(skip_miQC){ + cat("miQC model not created, skipping miQC plot. Usually this is because mitochondrial gene data was not available.") +}else{ + # remove prob_compromised if it exists, as this will cause errors with plotModel + filtered_sce$prob_compromised <- NULL + miQC::plotModel(filtered_sce, model = metadata(filtered_sce)$miQC_model) + + theme(legend.position = c(1, 1), + legend.justification = c(1,1), + legend.background = element_rect(color = "grey20", size = 0.25), + legend.box.margin = margin(rep(5, 4))) +} +``` + # Session Info
From 986b5a006441e780803929cc4fb1b5835e4d228b Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Fri, 1 Oct 2021 16:36:57 -0400 Subject: [PATCH 7/9] Remove setting mito_percent for unfiltered --- inst/rmd/qc_report.rmd | 1 - 1 file changed, 1 deletion(-) diff --git a/inst/rmd/qc_report.rmd b/inst/rmd/qc_report.rmd index 24666fb9..59f37582 100644 --- a/inst/rmd/qc_report.rmd +++ b/inst/rmd/qc_report.rmd @@ -46,7 +46,6 @@ if (!has_filtered){ # add cell stats if missing if (is.null(unfiltered_sce$sum)){ unfiltered_sce <- scuttle::addPerCellQCMetrics(unfiltered_sce) - unfiltered_sce$subsets_mito_percent <- NA_real_ } if (is.null(filtered_sce$sum)){ filtered_sce <- scuttle::addPerCellQCMetrics(filtered_sce) From 509af713509be0b6f5c62751cdffd4afcf20b153 Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Mon, 4 Oct 2021 13:55:16 -0400 Subject: [PATCH 8/9] Legend and graphical updates --- inst/rmd/qc_report.rmd | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/inst/rmd/qc_report.rmd b/inst/rmd/qc_report.rmd index a983cb09..aba9dcba 100644 --- a/inst/rmd/qc_report.rmd +++ b/inst/rmd/qc_report.rmd @@ -123,7 +123,7 @@ if(has_filtered){ ``` ## Knee plot -```{r, fig.cap="Knee plot of filtered and unfiltered droplets"} +```{r, fig.alt="Knee plot of filtered and unfiltered droplets"} unfiltered_celldata <- data.frame(colData(unfiltered_sce)) %>% mutate( rank = rank(- unfiltered_sce$sum, ties.method = "first"), # using full spec for clarity @@ -136,27 +136,28 @@ unfiltered_celldata <- data.frame(colData(unfiltered_sce)) %>% ggplot(unfiltered_celldata, aes(x = rank, y = sum, color = filter_status))+ geom_point(aes(shape = filter_status),alpha = 0.2, size = 2) + - scale_y_log10() + - scale_x_log10() + - scale_color_manual(values = c("navyblue", "grey40")) + + scale_x_log10(labels = scales::label_number()) + + scale_y_log10(labels = scales::label_number()) + + scale_color_manual(values = c("darkgreen", "grey80")) + scale_shape_manual(values = c(16, 1)) + # filled and unfilled circles labs( x = "Rank", - y = "UMI count", + y = "Total UMI count", color = "Filter status", shape = "Filter status" ) + guides(color = guide_legend(override.aes = list(alpha = 1))) + - theme(legend.position = c(0, 0), + theme(plot.margin = margin(rep(20, 4)), + legend.position = c(0, 0), legend.justification = c(0,0), legend.background = element_rect(color = "grey20", size = 0.25), legend.box.margin = margin(rep(5, 4))) ``` -## UMI - expressed gene count plot +## Cell read metrics -```{r, fig.cap="Total UMI x genes expressed"} +```{r, fig.alt="Total UMI x genes expressed"} filtered_celldata <- data.frame(colData(filtered_sce)) ggplot(filtered_celldata, @@ -165,10 +166,11 @@ ggplot(filtered_celldata, color = subsets_mito_percent)) + geom_point(alpha = 0.3) + scale_color_viridis_c(limits = c(0, 100)) + - labs(x = "Total UMI Count", - y = "Number of Genes Expressed", - color = "Mitochondrial\nPercent") + - theme(legend.position = c(0, 1), + labs(x = "Total UMI count", + y = "Number of genes detected", + color = "Percent reads\nmitochondrial") + + theme(plot.margin = margin(rep(20, 4)), + legend.position = c(0, 1), legend.justification = c(0,1), legend.background = element_rect(color = "grey20", size = 0.25), legend.box.margin = margin(rep(5, 4))) @@ -176,17 +178,21 @@ ggplot(filtered_celldata, ## miQC model diagnostics -```{r, fig.cap="miQC model diagnostics plot", results='asis', warning=FALSE} +```{r, fig.alt="miQC model diagnostics plot", results='asis', warning=FALSE} if(skip_miQC){ cat("miQC model not created, skipping miQC plot. Usually this is because mitochondrial gene data was not available.") }else{ # remove prob_compromised if it exists, as this will cause errors with plotModel filtered_sce$prob_compromised <- NULL miQC::plotModel(filtered_sce, model = metadata(filtered_sce)$miQC_model) + - theme(legend.position = c(1, 1), - legend.justification = c(1,1), - legend.background = element_rect(color = "grey20", size = 0.25), - legend.box.margin = margin(rep(5, 4))) + coord_cartesian(ylim = c(0,100)) + + labs(x = "Number of genes expressed", + y = "Percent reads mitochondrial") + + theme(plot.margin = margin(rep(20, 4)), + legend.position = c(1, 1), + legend.justification = c(1,1), + legend.background = element_rect(color = "grey20", size = 0.25), + legend.box.margin = margin(rep(5, 4))) } ``` From cee0c9a5179f0961b0fc8f2f31c63a4b42d4dfbc Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Mon, 4 Oct 2021 14:50:56 -0400 Subject: [PATCH 9/9] Update inst/rmd/qc_report.rmd Co-authored-by: Ally Hawkins <54039191+allyhawkins@users.noreply.github.com> --- inst/rmd/qc_report.rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/rmd/qc_report.rmd b/inst/rmd/qc_report.rmd index aba9dcba..ceff9c22 100644 --- a/inst/rmd/qc_report.rmd +++ b/inst/rmd/qc_report.rmd @@ -186,7 +186,7 @@ if(skip_miQC){ filtered_sce$prob_compromised <- NULL miQC::plotModel(filtered_sce, model = metadata(filtered_sce)$miQC_model) + coord_cartesian(ylim = c(0,100)) + - labs(x = "Number of genes expressed", + labs(x = "Number of genes detected", y = "Percent reads mitochondrial") + theme(plot.margin = margin(rep(20, 4)), legend.position = c(1, 1),