Skip to content

Commit

Permalink
Merge branch 'sjspielman/817-scadvanced-readmes' of github.com:AlexsL…
Browse files Browse the repository at this point in the history
…emonade/training-modules into sjspielman/817-scadvanced-readmes
  • Loading branch information
sjspielman committed Dec 4, 2024
2 parents a8bb412 + 6f2174f commit 0c1132a
Show file tree
Hide file tree
Showing 13 changed files with 1,013 additions and 204 deletions.
4 changes: 4 additions & 0 deletions components/dictionary.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ CellMarker
centric
cheatsheet
cheatsheets
clustering's
clusterProfiler
Cmd
colData
Expand All @@ -70,6 +71,7 @@ ComplexHeatmap
concordantly
conda
config
connectedness
CPMs
csv
Ctrl
Expand Down Expand Up @@ -338,6 +340,7 @@ PBMCs
pDC
PDX
ped
permalink
phenotypes
Phred
Picelli
Expand Down Expand Up @@ -397,6 +400,7 @@ Sca
scater
SCE
SCE's
ScPCA
scran
scRNA
Sebire
Expand Down
4 changes: 2 additions & 2 deletions renv.lock
Original file line number Diff line number Diff line change
Expand Up @@ -4416,13 +4416,13 @@
},
"renv": {
"Package": "renv",
"Version": "1.0.7",
"Version": "1.0.11",
"Source": "Repository",
"Repository": "RSPM",
"Requirements": [
"utils"
],
"Hash": "397b7b2a265bc5a7a06852524dabae20"
"Hash": "47623f66b4e80b3b0587bc5d7b309888"
},
"reprex": {
"Package": "reprex",
Expand Down
105 changes: 95 additions & 10 deletions renv/activate.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
local({

# the requested version of renv
version <- "1.0.7"
version <- "1.0.11"
attr(version, "sha") <- NULL

# the project directory
Expand Down Expand Up @@ -98,6 +98,66 @@ local({
unloadNamespace("renv")

# load bootstrap tools
ansify <- function(text) {
if (renv_ansify_enabled())
renv_ansify_enhanced(text)
else
renv_ansify_default(text)
}

renv_ansify_enabled <- function() {

override <- Sys.getenv("RENV_ANSIFY_ENABLED", unset = NA)
if (!is.na(override))
return(as.logical(override))

pane <- Sys.getenv("RSTUDIO_CHILD_PROCESS_PANE", unset = NA)
if (identical(pane, "build"))
return(FALSE)

testthat <- Sys.getenv("TESTTHAT", unset = "false")
if (tolower(testthat) %in% "true")
return(FALSE)

iderun <- Sys.getenv("R_CLI_HAS_HYPERLINK_IDE_RUN", unset = "false")
if (tolower(iderun) %in% "false")
return(FALSE)

TRUE

}

renv_ansify_default <- function(text) {
text
}

renv_ansify_enhanced <- function(text) {

# R help links
pattern <- "`\\?(renv::(?:[^`])+)`"
replacement <- "`\033]8;;ide:help:\\1\a?\\1\033]8;;\a`"
text <- gsub(pattern, replacement, text, perl = TRUE)

# runnable code
pattern <- "`(renv::(?:[^`])+)`"
replacement <- "`\033]8;;ide:run:\\1\a\\1\033]8;;\a`"
text <- gsub(pattern, replacement, text, perl = TRUE)

# return ansified text
text

}

renv_ansify_init <- function() {

envir <- renv_envir_self()
if (renv_ansify_enabled())
assign("ansify", renv_ansify_enhanced, envir = envir)
else
assign("ansify", renv_ansify_default, envir = envir)

}

`%||%` <- function(x, y) {
if (is.null(x)) y else x
}
Expand Down Expand Up @@ -142,7 +202,10 @@ local({
# compute common indent
indent <- regexpr("[^[:space:]]", lines)
common <- min(setdiff(indent, -1L)) - leave
paste(substring(lines, common), collapse = "\n")
text <- paste(substring(lines, common), collapse = "\n")

# substitute in ANSI links for executable renv code
ansify(text)

}

Expand Down Expand Up @@ -305,8 +368,11 @@ local({
quiet = TRUE
)

if ("headers" %in% names(formals(utils::download.file)))
args$headers <- renv_bootstrap_download_custom_headers(url)
if ("headers" %in% names(formals(utils::download.file))) {
headers <- renv_bootstrap_download_custom_headers(url)
if (length(headers) && is.character(headers))
args$headers <- headers
}

do.call(utils::download.file, args)

Expand Down Expand Up @@ -385,10 +451,21 @@ local({
for (type in types) {
for (repos in renv_bootstrap_repos()) {

# build arguments for utils::available.packages() call
args <- list(type = type, repos = repos)

# add custom headers if available -- note that
# utils::available.packages() will pass this to download.file()
if ("headers" %in% names(formals(utils::download.file))) {
headers <- renv_bootstrap_download_custom_headers(repos)
if (length(headers) && is.character(headers))
args$headers <- headers
}

# retrieve package database
db <- tryCatch(
as.data.frame(
utils::available.packages(type = type, repos = repos),
do.call(utils::available.packages, args),
stringsAsFactors = FALSE
),
error = identity
Expand Down Expand Up @@ -470,23 +547,31 @@ local({

}

renv_bootstrap_github_token <- function() {
for (envvar in c("GITHUB_TOKEN", "GITHUB_PAT", "GH_TOKEN")) {
envval <- Sys.getenv(envvar, unset = NA)
if (!is.na(envval))
return(envval)
}
}

renv_bootstrap_download_github <- function(version) {

enabled <- Sys.getenv("RENV_BOOTSTRAP_FROM_GITHUB", unset = "TRUE")
if (!identical(enabled, "TRUE"))
return(FALSE)

# prepare download options
pat <- Sys.getenv("GITHUB_PAT")
if (nzchar(Sys.which("curl")) && nzchar(pat)) {
token <- renv_bootstrap_github_token()
if (nzchar(Sys.which("curl")) && nzchar(token)) {
fmt <- "--location --fail --header \"Authorization: token %s\""
extra <- sprintf(fmt, pat)
extra <- sprintf(fmt, token)
saved <- options("download.file.method", "download.file.extra")
options(download.file.method = "curl", download.file.extra = extra)
on.exit(do.call(base::options, saved), add = TRUE)
} else if (nzchar(Sys.which("wget")) && nzchar(pat)) {
} else if (nzchar(Sys.which("wget")) && nzchar(token)) {
fmt <- "--header=\"Authorization: token %s\""
extra <- sprintf(fmt, pat)
extra <- sprintf(fmt, token)
saved <- options("download.file.method", "download.file.extra")
options(download.file.method = "wget", download.file.extra = extra)
on.exit(do.call(base::options, saved), add = TRUE)
Expand Down
5 changes: 2 additions & 3 deletions scRNA-seq-advanced/01-read_filter_normalize_scRNA.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,8 @@ Some of these functions for other common data formats are discussed in [Chapter
```{r read SCE, live=TRUE}
# read SCE from matrix directory
raw_sce <- DropletUtils::read10xCounts(
raw_matrix_dir,
# ensure barcodes are set as column names in the SCE object
col.names = TRUE
raw_matrix_dir,
col.names = TRUE # ensure barcodes are set as column names in the SCE object
)
```

Expand Down
6 changes: 2 additions & 4 deletions scRNA-seq-advanced/02-dataset_integration.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -467,10 +467,8 @@ scater::plotReducedDim(merged_sce,
color_by = "sample",
point_size = 0.5,
point_alpha = 0.2) +
# Use a CVD-friendly color scheme and specify legend name
scale_color_brewer(palette = "Dark2", name = "sample") +
# Modify the legend key so its points are larger and easier to see
guides(color = guide_legend(override.aes = list(size = 3, alpha = 1))) +
scale_color_brewer(palette = "Dark2", name = "sample") + # Use a CVD-friendly color scheme and specify legend name
guides(color = guide_legend(override.aes = list(size = 3, alpha = 1))) + # Modify the legend key with larger, easier to see points
ggtitle("UMAP calculated on each sample separately")
```
Expand Down
2 changes: 1 addition & 1 deletion scRNA-seq-advanced/05-aucell.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ Far fewer genes in the gene set are ranked above the threshold, yielding a lower

Once we have the rankings, we can calculate the AUC scores for both gene sets in all cells with the `AUCell_calcAUC()` function.

```{r calc_auc, live=TRUE}
```{r calc_auc, live = TRUE}
cell_auc <- AUCell_calcAUC(geneSets = ewing_gene_set_collection,
rankings = cell_rankings,
aucMaxRank = auc_max_rank)
Expand Down
2 changes: 1 addition & 1 deletion scRNA-seq-advanced/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ The notebooks that comprise this module are:

This directory also contains pathway analysis modules customized to these data:

- [Gene-set enrichment analysis](04-gene_set_enrichment_analysis.nb.html)
- [Gene Set Enrichment Analysis](04-gene_set_enrichment_analysis.nb.html)
- [Pathway analysis with AUCell](05-aucell.nb.html)

Additional exercise notebooks:
Expand Down
5 changes: 3 additions & 2 deletions scRNA-seq-advanced/exercise_01-citeseq.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ Finally, in this chunk, include code to check if the `output_dir` directory exis
# Define the output RDS file
# Check if the output directory exists, and if not, create it
# Create the output directory if it doesn't exist
```

Expand All @@ -85,6 +85,7 @@ Next, set the random seed to ensure reproducibility of steps involving randomnes


In the following chunk, read in the Cell Ranger results files from `pbmc_dir` using the function `DropletUtils::read10xCounts()`, saving the result as `raw_sce`.
Make sure to also specify the argument `col.names = TRUE` to ensure barcodes are set as column names in the resulting SCE object.

```{r read cellranger, solution = TRUE}
# Read in the raw 10x dataset
Expand Down Expand Up @@ -650,7 +651,7 @@ We expect that no cells have any `IgG1` expression, since we filtered out those
```{r plot umap igg1}
# Plot the UMAP colored by IgG1 expression
scater::plotUMAP(normalized_sce,
colour_by = "IgG1")
color_by = "IgG1")
```
Indeed, this is a very boring plot – all cells have the same expression of 0!

Expand Down
10 changes: 8 additions & 2 deletions scRNA-seq-advanced/exercise_02-integration.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ Make sure that directory actually exists, and create it if it doesn't!
```{r output, solution = TRUE}
# Define a directory to save the integrated SCE object
# Create output directory if it doesn't exist
# Create the output directory if it doesn't exist
```

Expand Down Expand Up @@ -246,11 +246,13 @@ As our PCA is stored in the `"PCA"` `reducedDim` slot, we will similarly store t
```

Now we can use `scater::plotReducedDim()` or `scater::plotUMAP()` to visualize our merged but uncorrected results.
Use the chunk below to create a UMAP plot of the merged data, colored (`colour`ed) by donor.
Use the chunk below to create a UMAP plot of the merged data, colored by donor.

```{r plot uncorrected UMAP, solution = TRUE}
# Plot UMAP colored by donor
# add more CVD-friendly color scale and legend title
```

What do you see in this plot?
Expand Down Expand Up @@ -337,6 +339,8 @@ Ideally, we would see that the donors are all mixed within each "blob" of cells.
```{r fastMNN UMAP by donor, solution = TRUE}
# UMAP plot colored by donor
# add more CVD-friendly color scale and legend title
```

It looks like there is probably some good overlap there, but now we have a different problem.
Expand All @@ -358,6 +362,8 @@ Use the chunk below to do that!
```{r plot shuffled SCE object, solution = TRUE}
# shuffled UMAP plot colored by donor
# add more CVD-friendly color scale and legend title
```

Are you satisfied with this integration result?
Expand Down
11 changes: 5 additions & 6 deletions scRNA-seq-advanced/exercise_03-diffexp.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,10 @@ data_dir <- file.path("data", "rms")
# as created during instruction
rms_sce_file <- file.path(data_dir, "integrated", "rms_subset_sce.rds")
# analysis results directory, which should exist from instruction
# ensure analysis results directory has been created
# it should exist already from instruction
deseq_dir <- file.path("analysis", "rms", "deseq")
if(!dir.exists(deseq_dir)){
dir.create(deseq_dir, recursive = TRUE)
}
fs::dir_create(deseq_dir)
# File where we will output results from mesoderm DE analysis
deseq_mesoderm_file <- file.path(deseq_dir, "rms_mesoderm_deseq_results.tsv")
Expand Down Expand Up @@ -396,7 +395,7 @@ Let's do the same with the gene that is upregulated in ERMS, and again think abo
# Plot UMAP showing ENSG00000115762 expression across diagnosis groups
scater::plotReducedDim(mesoderm_sce,
dimred = "fastmnn_UMAP",
colour_by = "ENSG00000115762",
color_by = "ENSG00000115762",
other_fields = "diagnosis_group") +
facet_wrap(vars(diagnosis_group)) +
theme_bw()
Expand Down Expand Up @@ -580,7 +579,7 @@ deseq_results_all |>
```

Something you'll see in these results are some pretty different P-values between cell types (also note that the `NA` genes here are lncRNAs with no formally assigned gene symbol).
Specifically, the myoblast P-values are all 3-15 orders of magnitude lower than their mesoderm counterparts, which _may_ be a result of the relatively higher sample size for myoblast tests - larger sample sizes lead to more extreme P-values.
Specifically, the myoblast P-values are all 3-7 orders of magnitude lower than their mesoderm counterparts, which _may_ be a result of the relatively higher sample size for myoblast tests - larger sample sizes lead to more extreme P-values.
Importantly, we do _not_ want to compare these P-values directly and conclude that a given gene was "more or less significant" in one cell type or another, since P-values cannot be compared across tests (again, for a more robust assessment of differential expression, use a multivariate model that accounts for cell types!).

To wrap up, feel free to perform some quick visualization of some of these genes!
Expand Down
Loading

0 comments on commit 0c1132a

Please sign in to comment.