Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add plots to QC report #57

Merged
merged 10 commits into from
Oct 4, 2021
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ Imports:
ggplot2,
glue,
jsonlite,
kableExtra,
magrittr,
Matrix,
Matrix.utils,
Expand Down
1 change: 1 addition & 0 deletions docker/scripts/install_scpca_deps.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ install2.r --error --skipinstalled -n $NCPUS \
DBI \
devtools \
here \
kableExtra \
matrixStats \
optparse \
rmarkdown \
Expand Down
123 changes: 98 additions & 25 deletions inst/rmd/qc_report.rmd
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ output:
toc: true
toc_depth: 2
toc_float: true
number_sections: true
number_sections: false
code_download: true
---

Expand All @@ -27,13 +27,13 @@ library(SingleCellExperiment)
library(dplyr)
library(ggplot2)

# Set standard ggplot theme
# Set default ggplot theme
theme_set(theme_bw())
```

```{r sce_setup}
# save some typing later
sample <- params$sample
sample_id <- params$sample
unfiltered_sce <- params$unfiltered_sce
filtered_sce <- params$filtered_sce

Expand All @@ -47,11 +47,22 @@ if (!has_filtered){
if (is.null(unfiltered_sce$sum)){
unfiltered_sce <- scuttle::addPerCellQCMetrics(unfiltered_sce)
}
if (is.null(filtered_sce$sum)){
filtered_sce <- scuttle::addPerCellQCMetrics(filtered_sce)
filtered_sce$subsets_mito_percent <- NA_real_
skip_miQC <- TRUE
} else{
skip_miQC <- FALSE
}
# add miQC model if missing
if (is.null(metadata(filtered_sce)$miQC_model) & !skip_miQC){
metadata(filtered_sce)$miQC_model <- miQC::mixtureModel(filtered_sce)
}
```

# Introduction

# Processing Information for `r sample`
# Processing Information for `r sample_id`

## Raw Sample Metrics

Expand All @@ -60,39 +71,48 @@ if (is.null(unfiltered_sce$sum)){
unfiltered_meta <- metadata(unfiltered_sce)

sample_information <- tibble::tibble(
"Sample id" = sample,
"Tech version" = unfiltered_meta$tech_version,
"Sample id" = sample_id,
"Tech version" = format(unfiltered_meta$tech_version), # format to keep nulls
"Number of reads sequenced" = format(unfiltered_meta$total_reads, big.mark = ',', scientific = FALSE),
"Number of mapped reads" = format(unfiltered_meta$mapped_reads, big.mark = ',', scientific = FALSE),
"Number of cells reported by alevin-fry" = format(unfiltered_meta$af_num_cells, big.mark = ',', scientific = FALSE)
) %>%
mutate(across(.fns = ~ifelse(.x == "NULL", "N/A", .x))) %>% # reformat nulls
t()

# make table with sample information
knitr::kable(sample_information, "simple")
knitr::kable(sample_information) %>%
kableExtra::kable_styling(bootstrap_options = "striped",
full_width = FALSE,
position = "left")
```

## Pre-Processing Information

```{r }
processing_info <- tibble::tibble(
"Salmon version" = unfiltered_meta$salmon_version,
"Alevin-fry version" = unfiltered_meta$alevinfry_version,
"Transcriptome index" = unfiltered_meta$reference_index,
"Filtering method" = unfiltered_meta$af_permit_type,
"Resolution" = unfiltered_meta$af_resolution,
"Salmon version" = format(unfiltered_meta$salmon_version),
"Alevin-fry version" = format(unfiltered_meta$alevinfry_version),
"Transcriptome index" = format(unfiltered_meta$reference_index),
"Filtering method" = format(unfiltered_meta$af_permit_type),
"Resolution" = format(unfiltered_meta$af_resolution),
"Transcripts included" = dplyr::case_when(
unfiltered_meta$transcript_type == "spliced" ~ "Spliced only",
unfiltered_meta$transcript_type == "unspliced" ~ "Spliced and unspliced" )
format(unfiltered_meta$transcript_type) == "spliced" ~ "Spliced only",
format(unfiltered_meta$transcript_type) == "unspliced" ~ "Spliced and unspliced",
TRUE ~ format(unfiltered_meta$transcript_type))
) %>%
mutate(across(.fns = ~ifelse(.x == "NULL", "N/A", .x))) %>%
t()


# make table with processing information
knitr::kable(processing_info, "simple")
knitr::kable(processing_info) %>%
kableExtra::kable_styling(bootstrap_options = "striped",
full_width = FALSE,
position = "left")
```

# `r sample` Experiment Summary
# `r sample_id` Experiment Summary

This sample has `r ncol(unfiltered_sce)` cells, assayed for `r nrow(unfiltered_sce)` genes.

Expand All @@ -103,32 +123,85 @@ if(has_filtered){
```

## Knee plot
```{r}
```{r, fig.alt="Knee plot of filtered and unfiltered droplets"}
unfiltered_celldata <- data.frame(colData(unfiltered_sce)) %>%
mutate(
rank = rank(- unfiltered_sce$sum), # using full spec for clarity
rank = rank(- unfiltered_sce$sum, ties.method = "first"), # using full spec for clarity
filter_status = factor(ifelse(colnames(unfiltered_sce) %in% colnames(filtered_sce),
"Passed", "Excluded"),
levels = c("Passed", "Excluded"))
)
) %>%
filter(sum > 0) %>% # remove zeros for plotting
arrange(desc(rank))

ggplot(unfiltered_celldata, aes(x = rank, y = sum, color = filter_status))+
geom_point(aes(shape = filter_status)) +
scale_y_log10() +
scale_color_manual(values = c("navyblue", "grey")) +
geom_point(aes(shape = filter_status),alpha = 0.2, size = 2) +
scale_x_log10(labels = scales::label_number()) +
scale_y_log10(labels = scales::label_number()) +
scale_color_manual(values = c("darkgreen", "grey80")) +
scale_shape_manual(values = c(16, 1)) + # filled and unfilled circles
labs(
x = "Rank",
y = "UMI count",
y = "Total UMI count",
color = "Filter status",
shape = "Filter status"
)
) +
guides(color = guide_legend(override.aes = list(alpha = 1))) +
theme(plot.margin = margin(rep(20, 4)),
legend.position = c(0, 0),
legend.justification = c(0,0),
legend.background = element_rect(color = "grey20", size = 0.25),
legend.box.margin = margin(rep(5, 4)))
```


## Cell read metrics

```{r, fig.alt="Total UMI x genes expressed"}
filtered_celldata <- data.frame(colData(filtered_sce))

ggplot(filtered_celldata,
aes (x = sum,
y = detected,
color = subsets_mito_percent)) +
geom_point(alpha = 0.3) +
scale_color_viridis_c(limits = c(0, 100)) +
labs(x = "Total UMI count",
y = "Number of genes detected",
color = "Percent reads\nmitochondrial") +
theme(plot.margin = margin(rep(20, 4)),
legend.position = c(0, 1),
legend.justification = c(0,1),
legend.background = element_rect(color = "grey20", size = 0.25),
legend.box.margin = margin(rep(5, 4)))
```

## miQC model diagnostics

```{r, fig.alt="miQC model diagnostics plot", results='asis', warning=FALSE}
if(skip_miQC){
cat("miQC model not created, skipping miQC plot. Usually this is because mitochondrial gene data was not available.")
}else{
# remove prob_compromised if it exists, as this will cause errors with plotModel
filtered_sce$prob_compromised <- NULL
miQC::plotModel(filtered_sce, model = metadata(filtered_sce)$miQC_model) +
coord_cartesian(ylim = c(0,100)) +
labs(x = "Number of genes expressed",
jashapiro marked this conversation as resolved.
Show resolved Hide resolved
y = "Percent reads mitochondrial") +
theme(plot.margin = margin(rep(20, 4)),
legend.position = c(1, 1),
legend.justification = c(1,1),
legend.background = element_rect(color = "grey20", size = 0.25),
legend.box.margin = margin(rep(5, 4)))
}
```


# Session Info
<details>
<summary>R session information</summary>
```{r session_info}
sessioninfo::session_info()
```

</details>