Merge branch 'sjspielman/817-scadvanced-readmes' of github.com:AlexsL…

…emonade/training-modules into sjspielman/817-scadvanced-readmes
AlexsLemonade · Dec 4, 2024 · 0c1132a · 0c1132a
2 parents a8bb412 + 6f2174f
commit 0c1132a
Show file tree

Hide file tree

Showing 13 changed files with 1,013 additions and 204 deletions.
diff --git a/components/dictionary.txt b/components/dictionary.txt
@@ -62,6 +62,7 @@ CellMarker
 centric
 cheatsheet
 cheatsheets
+clustering's
 clusterProfiler
 Cmd
 colData
@@ -70,6 +71,7 @@ ComplexHeatmap
 concordantly
 conda
 config
+connectedness
 CPMs
 csv
 Ctrl
@@ -338,6 +340,7 @@ PBMCs
 pDC
 PDX
 ped
+permalink
 phenotypes
 Phred
 Picelli
@@ -397,6 +400,7 @@ Sca
 scater
 SCE
 SCE's
+ScPCA
 scran
 scRNA
 Sebire

diff --git a/renv.lock b/renv.lock
@@ -4416,13 +4416,13 @@
     },
     "renv": {
       "Package": "renv",
-      "Version": "1.0.7",
+      "Version": "1.0.11",
       "Source": "Repository",
       "Repository": "RSPM",
       "Requirements": [
         "utils"
       ],
-      "Hash": "397b7b2a265bc5a7a06852524dabae20"
+      "Hash": "47623f66b4e80b3b0587bc5d7b309888"
     },
     "reprex": {
       "Package": "reprex",

diff --git a/renv/activate.R b/renv/activate.R
@@ -2,7 +2,7 @@
 local({
 
   # the requested version of renv
-  version <- "1.0.7"
+  version <- "1.0.11"
   attr(version, "sha") <- NULL
 
   # the project directory
@@ -98,6 +98,66 @@ local({
     unloadNamespace("renv")
 
   # load bootstrap tools   
+  ansify <- function(text) {
+    if (renv_ansify_enabled())
+      renv_ansify_enhanced(text)
+    else
+      renv_ansify_default(text)
+  }
+
+  renv_ansify_enabled <- function() {
+
+    override <- Sys.getenv("RENV_ANSIFY_ENABLED", unset = NA)
+    if (!is.na(override))
+      return(as.logical(override))
+
+    pane <- Sys.getenv("RSTUDIO_CHILD_PROCESS_PANE", unset = NA)
+    if (identical(pane, "build"))
+      return(FALSE)
+
+    testthat <- Sys.getenv("TESTTHAT", unset = "false")
+    if (tolower(testthat) %in% "true")
+      return(FALSE)
+
+    iderun <- Sys.getenv("R_CLI_HAS_HYPERLINK_IDE_RUN", unset = "false")
+    if (tolower(iderun) %in% "false")
+      return(FALSE)
+
+    TRUE
+
+  }
+
+  renv_ansify_default <- function(text) {
+    text
+  }
+
+  renv_ansify_enhanced <- function(text) {
+
+    # R help links
+    pattern <- "`\\?(renv::(?:[^`])+)`"
+    replacement <- "`\033]8;;ide:help:\\1\a?\\1\033]8;;\a`"
+    text <- gsub(pattern, replacement, text, perl = TRUE)
+
+    # runnable code
+    pattern <- "`(renv::(?:[^`])+)`"
+    replacement <- "`\033]8;;ide:run:\\1\a\\1\033]8;;\a`"
+    text <- gsub(pattern, replacement, text, perl = TRUE)
+
+    # return ansified text
+    text
+
+  }
+
+  renv_ansify_init <- function() {
+
+    envir <- renv_envir_self()
+    if (renv_ansify_enabled())
+      assign("ansify", renv_ansify_enhanced, envir = envir)
+    else
+      assign("ansify", renv_ansify_default, envir = envir)
+
+  }
+
   `%||%` <- function(x, y) {
     if (is.null(x)) y else x
   }
@@ -142,7 +202,10 @@ local({
     # compute common indent
     indent <- regexpr("[^[:space:]]", lines)
     common <- min(setdiff(indent, -1L)) - leave
-    paste(substring(lines, common), collapse = "\n")
+    text <- paste(substring(lines, common), collapse = "\n")
+
+    # substitute in ANSI links for executable renv code
+    ansify(text)
 
   }
 
@@ -305,8 +368,11 @@ local({
       quiet    = TRUE
     )
 
-    if ("headers" %in% names(formals(utils::download.file)))
-      args$headers <- renv_bootstrap_download_custom_headers(url)
+    if ("headers" %in% names(formals(utils::download.file))) {
+      headers <- renv_bootstrap_download_custom_headers(url)
+      if (length(headers) && is.character(headers))
+        args$headers <- headers
+    }
 
     do.call(utils::download.file, args)
 
@@ -385,10 +451,21 @@ local({
     for (type in types) {
       for (repos in renv_bootstrap_repos()) {
 
+        # build arguments for utils::available.packages() call
+        args <- list(type = type, repos = repos)
+
+        # add custom headers if available -- note that
+        # utils::available.packages() will pass this to download.file()
+        if ("headers" %in% names(formals(utils::download.file))) {
+          headers <- renv_bootstrap_download_custom_headers(repos)
+          if (length(headers) && is.character(headers))
+            args$headers <- headers
+        }
+
         # retrieve package database
         db <- tryCatch(
           as.data.frame(
-            utils::available.packages(type = type, repos = repos),
+            do.call(utils::available.packages, args),
             stringsAsFactors = FALSE
           ),
           error = identity
@@ -470,23 +547,31 @@ local({
 
   }
 
+  renv_bootstrap_github_token <- function() {
+    for (envvar in c("GITHUB_TOKEN", "GITHUB_PAT", "GH_TOKEN")) {
+      envval <- Sys.getenv(envvar, unset = NA)
+      if (!is.na(envval))
+        return(envval)
+    }
+  }
+
   renv_bootstrap_download_github <- function(version) {
 
     enabled <- Sys.getenv("RENV_BOOTSTRAP_FROM_GITHUB", unset = "TRUE")
     if (!identical(enabled, "TRUE"))
       return(FALSE)
 
     # prepare download options
-    pat <- Sys.getenv("GITHUB_PAT")
-    if (nzchar(Sys.which("curl")) && nzchar(pat)) {
+    token <- renv_bootstrap_github_token()
+    if (nzchar(Sys.which("curl")) && nzchar(token)) {
       fmt <- "--location --fail --header \"Authorization: token %s\""
-      extra <- sprintf(fmt, pat)
+      extra <- sprintf(fmt, token)
       saved <- options("download.file.method", "download.file.extra")
       options(download.file.method = "curl", download.file.extra = extra)
       on.exit(do.call(base::options, saved), add = TRUE)
-    } else if (nzchar(Sys.which("wget")) && nzchar(pat)) {
+    } else if (nzchar(Sys.which("wget")) && nzchar(token)) {
       fmt <- "--header=\"Authorization: token %s\""
-      extra <- sprintf(fmt, pat)
+      extra <- sprintf(fmt, token)
       saved <- options("download.file.method", "download.file.extra")
       options(download.file.method = "wget", download.file.extra = extra)
       on.exit(do.call(base::options, saved), add = TRUE)

diff --git a/scRNA-seq-advanced/01-read_filter_normalize_scRNA.Rmd b/scRNA-seq-advanced/01-read_filter_normalize_scRNA.Rmd
@@ -114,9 +114,8 @@ Some of these functions for other common data formats are discussed in [Chapter
 ```{r read SCE, live=TRUE}
 # read SCE from matrix directory
 raw_sce <- DropletUtils::read10xCounts(
-  raw_matrix_dir, 
-  # ensure barcodes are set as column names in the SCE object
-  col.names = TRUE
+  raw_matrix_dir,
+  col.names = TRUE # ensure barcodes are set as column names in the SCE object
 )
 ```
 

diff --git a/scRNA-seq-advanced/02-dataset_integration.Rmd b/scRNA-seq-advanced/02-dataset_integration.Rmd
@@ -467,10 +467,8 @@ scater::plotReducedDim(merged_sce,
                        color_by = "sample",
                        point_size = 0.5,
                        point_alpha = 0.2) +
-  # Use a CVD-friendly color scheme and specify legend name
-  scale_color_brewer(palette = "Dark2", name = "sample") +
-  # Modify the legend key so its points are larger and easier to see
-  guides(color = guide_legend(override.aes = list(size = 3, alpha = 1))) +
+  scale_color_brewer(palette = "Dark2", name = "sample") + # Use a CVD-friendly color scheme and specify legend name
+  guides(color = guide_legend(override.aes = list(size = 3, alpha = 1))) + # Modify the legend key with larger, easier to see points
   ggtitle("UMAP calculated on each sample separately")
 
 ```

diff --git a/scRNA-seq-advanced/05-aucell.Rmd b/scRNA-seq-advanced/05-aucell.Rmd
@@ -263,7 +263,7 @@ Far fewer genes in the gene set are ranked above the threshold, yielding a lower
 
 Once we have the rankings, we can calculate the AUC scores for both gene sets in all cells with the `AUCell_calcAUC()` function.
 
-```{r calc_auc, live=TRUE}
+```{r calc_auc, live = TRUE}
 cell_auc <- AUCell_calcAUC(geneSets = ewing_gene_set_collection, 
                            rankings = cell_rankings,
                            aucMaxRank = auc_max_rank)

diff --git a/scRNA-seq-advanced/README.md b/scRNA-seq-advanced/README.md
@@ -13,7 +13,7 @@ The notebooks that comprise this module are:
 
 This directory also contains pathway analysis modules customized to these data:
 
-- [Gene-set enrichment analysis](04-gene_set_enrichment_analysis.nb.html)
+- [Gene Set Enrichment Analysis](04-gene_set_enrichment_analysis.nb.html)
 - [Pathway analysis with AUCell](05-aucell.nb.html)
 
 Additional exercise notebooks:

diff --git a/scRNA-seq-advanced/exercise_01-citeseq.Rmd b/scRNA-seq-advanced/exercise_01-citeseq.Rmd
@@ -72,7 +72,7 @@ Finally, in this chunk, include code to check if the `output_dir` directory exis
 
 # Define the output RDS file
 
-# Check if the output directory exists, and if not, create it
+# Create the output directory if it doesn't exist
 
 ```
 
@@ -85,6 +85,7 @@ Next, set the random seed to ensure reproducibility of steps involving randomnes
 
 
 In the following chunk, read in the Cell Ranger results files from `pbmc_dir` using the function `DropletUtils::read10xCounts()`, saving the result as `raw_sce`.
+Make sure to also specify the argument `col.names = TRUE` to ensure barcodes are set as column names in the resulting SCE object.
 
 ```{r read cellranger, solution = TRUE}
 # Read in the raw 10x dataset
@@ -650,7 +651,7 @@ We expect that no cells have any `IgG1` expression, since we filtered out those
 ```{r plot umap igg1}
 # Plot the UMAP colored by IgG1 expression
 scater::plotUMAP(normalized_sce,
-                 colour_by = "IgG1")
+                 color_by = "IgG1")
 ```
 Indeed, this is a very boring plot – all cells have the same expression of 0!
 

diff --git a/scRNA-seq-advanced/exercise_02-integration.Rmd b/scRNA-seq-advanced/exercise_02-integration.Rmd
@@ -58,7 +58,7 @@ Make sure that directory actually exists, and create it if it doesn't!
 ```{r output, solution = TRUE}
 # Define a directory to save the integrated SCE object
 
-# Create output directory if it doesn't exist
+# Create the output directory if it doesn't exist
 
 ```
 
@@ -246,11 +246,13 @@ As our PCA is stored in the `"PCA"` `reducedDim` slot, we will similarly store t
 ```
 
 Now we can use `scater::plotReducedDim()` or `scater::plotUMAP()` to visualize our merged but uncorrected results.
-Use the chunk below to create a UMAP plot of the merged data, colored (`colour`ed) by donor.
+Use the chunk below to create a UMAP plot of the merged data, colored by donor.
 
 ```{r plot uncorrected UMAP, solution = TRUE}
 # Plot UMAP colored by donor
 
+  # add more CVD-friendly color scale and legend title
+
 ```
 
 What do you see in this plot?
@@ -337,6 +339,8 @@ Ideally, we would see that the donors are all mixed within each "blob" of cells.
 ```{r fastMNN UMAP by donor, solution = TRUE}
 # UMAP plot colored by donor
 
+  # add more CVD-friendly color scale and legend title
+
 ```
 
 It looks like there is probably some good overlap there, but now we have a different problem.
@@ -358,6 +362,8 @@ Use the chunk below to do that!
 ```{r plot shuffled SCE object, solution = TRUE}
 # shuffled UMAP plot colored by donor
 
+  # add more CVD-friendly color scale and legend title
+
 ```
 
 Are you satisfied with this integration result?

diff --git a/scRNA-seq-advanced/exercise_03-diffexp.Rmd b/scRNA-seq-advanced/exercise_03-diffexp.Rmd
@@ -63,11 +63,10 @@ data_dir <- file.path("data", "rms")
 #  as created during instruction
 rms_sce_file <- file.path(data_dir, "integrated",  "rms_subset_sce.rds")
 
-# analysis results directory, which should exist from instruction
+# ensure analysis results directory has been created
+# it should exist already from instruction
 deseq_dir <- file.path("analysis", "rms", "deseq")
-if(!dir.exists(deseq_dir)){
-  dir.create(deseq_dir, recursive = TRUE)
-}
+fs::dir_create(deseq_dir)
 
 # File where we will output results from mesoderm DE analysis
 deseq_mesoderm_file <- file.path(deseq_dir, "rms_mesoderm_deseq_results.tsv")
@@ -396,7 +395,7 @@ Let's do the same with the gene that is upregulated in ERMS, and again think abo
 # Plot UMAP showing ENSG00000115762 expression across diagnosis groups
 scater::plotReducedDim(mesoderm_sce,
                        dimred = "fastmnn_UMAP",
-                       colour_by = "ENSG00000115762",
+                       color_by = "ENSG00000115762",
                        other_fields = "diagnosis_group") +
   facet_wrap(vars(diagnosis_group)) +
   theme_bw()
@@ -580,7 +579,7 @@ deseq_results_all |>
 ```
 
 Something you'll see in these results are some pretty different P-values between cell types (also note that the `NA` genes here are lncRNAs with no formally assigned gene symbol).
-Specifically, the myoblast P-values are all 3-15 orders of magnitude lower than their mesoderm counterparts, which _may_ be a result of the relatively higher sample size for myoblast tests - larger sample sizes lead to more extreme P-values.
+Specifically, the myoblast P-values are all 3-7 orders of magnitude lower than their mesoderm counterparts, which _may_ be a result of the relatively higher sample size for myoblast tests - larger sample sizes lead to more extreme P-values.
 Importantly, we do _not_ want to compare these P-values directly and conclude that a given gene was "more or less significant" in one cell type or another, since P-values cannot be compared across tests (again, for a more robust assessment of differential expression, use a multivariate model that accounts for cell types!).
 
 To wrap up, feel free to perform some quick visualization of some of these genes!