Merge pull request #759 from joshua-d-campbell/master

Bioc update v2.14.0
compbiomed · May 3, 2024 · 578583f · 578583f
2 parents 14c9213 + b2d2806
commit 578583f
Show file tree

Hide file tree

Showing 423 changed files with 1,612 additions and 1,666 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: singleCellTK
 Type: Package
 Title: Comprehensive and Interactive Analysis of Single Cell RNA-Seq Data
-Version: 2.12.2
+Version: 2.14.0
 Authors@R: c(person(given="Yichen", family="Wang", email="[email protected]", role=c("aut"),
                     comment = c(ORCID = "0000-0003-4347-5199")),
              person(given="Irzam", family="Sarfraz", email="[email protected]", role=c("aut"),
@@ -62,13 +62,13 @@ Imports:
     ggrepel,
     ggtree,
     gridExtra,
-    GSVA (>= 1.26.0),
+    GSVA (>= 1.50.0),
     GSVAdata,
     igraph,
     KernSmooth,
     limma,
     MAST,
-    Matrix (>= 1.5-3),
+    Matrix (>= 1.6-1),
     matrixStats,
     methods,
     msigdbr,
@@ -119,7 +119,7 @@ Imports:
     utils,
     stats,
     zellkonverter
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
 Suggests:
     testthat,
     Rsubread,

diff --git a/Dockerfile b/Dockerfile
@@ -22,7 +22,7 @@ RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] http://packages.c
 RUN mkdir -p /SCTK_docker/ && mkdir /SCTK_docker/script && mkdir /SCTK_docker/modes 
 
 #ADD ./install_packages.R /SCTK_docker/script
-ADD ./exec/SCTK_runQC.R /SCTK_docker/script
+ADD ./exec/SCTK_runQC.R ./SCTK_docker/script
 
 #Install necessary R packages
 RUN R -e "install.packages('BiocManager')"
@@ -42,10 +42,12 @@ RUN R -e "BiocManager::install('scRNAseq')"
 RUN R -e "BiocManager::install('celda')"
 #RUN R -e "devtools::install_github('wleepang/shiny-directory-input')"
 RUN R -e "options(timeout=360000)" \
-	&& R -e "devtools::install_github('compbiomed/singleCellTK', force = TRUE, dependencies = TRUE)"
-
+	&& R -e "devtools::install_github('compbiomed/[email protected]', force = TRUE, dependencies = TRUE)"
+RUN R -e "install.packages('Matrix', version = '1.6-1')"
+RUN R -e "install.packages('irlba', type = 'source')"
+RUN R -e "install.packages('SeuratObject', type = 'source')"
 RUN R -e "install.packages('reticulate')"
 RUN R -e "Sys.setenv(RETICULATE_PYTHON = '/usr/bin/python3')"
 RUN R -e "reticulate::py_config()"
 
-ENTRYPOINT ["Rscript", "/usr/local/lib/R/site-library/singleCellTK/exec/SCTK_runQC.R"]
+ENTRYPOINT ["Rscript", "/usr/local/lib/R/site-library/singleCellTK/exec/SCTK_runQC.R"]
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,8 @@
+Changes in Version 2.14.0 (2024-05-03)
+================================================================================
+* Updated version to match Bioconductor 3.19
+* Update runGSVA fucntion to work with newer GSVA package
+
 Changes in Version 2.12.2 (2024-01-28)
 ================================================================================
 * Added support for Seurat V5

diff --git a/R/getTopHVG.R b/R/getTopHVG.R
@@ -15,8 +15,8 @@
 #' \code{"seurat_v3"}. Default \code{"vst"}
 #' @param hvgNumber Specify the number of top variable genes to extract.
 #' @param useFeatureSubset Get the feature names in the HVG list set by
-#' \code{setTopHVG}. Will ignore \code{method} and \code{hvgNumber} if not
-#' \code{NULL}. Default \code{NULL}.
+#' \code{setTopHVG}. \code{method} and \code{hvgNumber} will not be used if not
+#' this is not \code{NULL}. Default \code{"hvf"}.
 #' @param featureDisplay A character string for the \code{rowData} variable name
 #' to indicate what type of feature ID should be displayed. If set by
 #' \code{\link{setSCTKDisplayRow}}, will by default use it. If \code{NULL}, will
@@ -31,8 +31,7 @@
 #' the selected HVGs and store this subset in the \code{altExps} slot, named by
 #' \code{hvgListName}. Default \code{FALSE}.
 #' @param featureSubsetName A character string for the \code{rowData} variable
-#' name to store a logical index of selected features. Default \code{NULL}, will
-#' be determined basing on other parameters.
+#' name to store a logical index of selected features. Default \code{"hvg2000"}.
 #' @return
 #' \item{getTopHVG}{A character vector of the top \code{hvgNumber} variable
 #' feature names}
@@ -44,9 +43,23 @@
 #' @author Irzam Sarfraz, Yichen Wang
 #' @examples
 #' data("scExample", package = "singleCellTK")
-#' sce <- runSeuratFindHVG(sce)
-#' hvgs <- getTopHVG(sce, hvgNumber = 10)
-#' sce <- setTopHVG(sce, method = "vst", hvgNumber = 5)
+#' 
+#' # Create a "highy variable feature" subset using Seurat's vst method:
+#' sce <- runSeuratFindHVG(sce,  method = "vst", hvgNumber = 2000,
+#'        createFeatureSubset = "hvf")
+#'        
+#' # Get the list of genes for a feature subset:
+#' hvgs <- getTopHVG(sce, useFeatureSubset = "hvf")
+#' 
+#' # Create a new feature subset on the fly without rerunning the algorithm:
+#' sce <- setTopHVG(sce, method = "vst", hvgNumber = 100,
+#'                 featureSubsetName = "hvf100")
+#' hvgs <- getTopHVG(sce, useFeatureSubset = "hvf100")
+#' 
+#' # Get a list of variable features without creating a new feature subset:
+#' hvgs <- getTopHVG(sce, useFeatureSubset = NULL,
+#'                   method = "vst", hvgNumber = 10)
+#' 
 #' @seealso \code{\link{runFeatureSelection}}, \code{\link{runSeuratFindHVG}},
 #' \code{\link{runModelGeneVar}}, \code{\link{plotTopHVG}}
 #' @importFrom SummarizedExperiment rowData
@@ -56,7 +69,7 @@ getTopHVG <- function(inSCE,
                                  "mean.var.plot", "modelGeneVar", "seurat", 
                                  "seurat_v3", "cell_ranger"),
                       hvgNumber = 2000,
-                      useFeatureSubset = NULL,
+                      useFeatureSubset = "hvf",
                       featureDisplay = metadata(inSCE)$featureDisplay) {
     method <- match.arg(method)
     topGenes <- character()
@@ -85,6 +98,10 @@ getTopHVG <- function(inSCE,
         geneIdx <- featureIndex(topGenes, inSCE)
         topGenes <- rowData(inSCE)[[featureDisplay]][geneIdx]
     }
+
+    topGenes <- topGenes[!is.na(topGenes)]
+    topGenes <- topGenes[1:hvgNumber]
+
     return(topGenes)
 }
 
@@ -93,14 +110,14 @@ getTopHVG <- function(inSCE,
 #' @importFrom SingleCellExperiment rowSubset
 #' @importFrom S4Vectors metadata<-
 setTopHVG <- function(inSCE,
-                      method =  c("vst", "dispersion",
-                                  "mean.var.plot", "modelGeneVar", "seurat", 
-                                  "seurat_v3", "cell_ranger"),
+                      method = c("vst", "dispersion",
+                                 "mean.var.plot", "modelGeneVar", "seurat", 
+                                 "seurat_v3", "cell_ranger"),
                       hvgNumber = 2000,
-                      featureSubsetName = NULL,
+                      featureSubsetName = "hvg2000",
                       genes = NULL, genesBy = NULL,
                       altExp = FALSE) {
-    method <- match.arg(method)
+    method <- match.arg(method, choices = c("vst", "dispersion", "mean.var.plot", "modelGeneVar", "seurat", "seurat_v3", "cell_ranger"))
     features <- character()
     useAssay <- NULL
     if (!is.null(genes)) {
@@ -113,7 +130,7 @@ setTopHVG <- function(inSCE,
         }
     } else {
         # Use pre-calculated variability metrics
-        features <- getTopHVG(inSCE, method = method, hvgNumber = hvgNumber,
+        features <- getTopHVG(inSCE, method = method, hvgNumber = hvgNumber,useFeatureSubset = NULL,
                               featureDisplay = NULL)
         useAssay <- metadata(inSCE)$sctk$runFeatureSelection[[method]]$useAssay
     }

diff --git a/R/importGeneSets.R b/R/importGeneSets.R
@@ -315,7 +315,8 @@ importGeneSetsFromCollection <- function(inSCE, geneSetCollection,
 #' @param inSCE Input \linkS4class{SingleCellExperiment} object.
 #' @param categoryIDs Character vector containing the MSigDB gene set ids.
 #' The column \code{ID} in the table returned by \code{getMSigDBTable()} shows
-#' the list of possible gene set IDs that can be obtained.
+#' the list of possible gene set IDs that can be obtained. 
+#' Default is \code{"H"}.
 #' @param species Character. Species available can be found using the function
 #' \code{\link[msigdbr]{msigdbr_show_species}}. Default \code{"Homo sapiens"}.
 #' @param mapping Character. One of "gene_symbol", "human_gene_symbol", or
@@ -357,7 +358,7 @@ importGeneSetsFromCollection <- function(inSCE, geneSetCollection,
 #'                                 by = "feature_name")
 #' @export
 #' @importFrom SummarizedExperiment rowData
-importGeneSetsFromMSigDB <- function(inSCE, categoryIDs,
+importGeneSetsFromMSigDB <- function(inSCE, categoryIDs = "H",
                                      species = "Homo sapiens",
                                      mapping = c("gene_symbol",
                                             "human_gene_symbol",

diff --git a/R/plotBatchVariance.R b/R/plotBatchVariance.R
@@ -124,7 +124,7 @@ plotBatchCorrCompare <- function(inSCE, corrMat, batch = NULL, condition = NULL,
                                  title = "Batch Variance before correction") +
     ggplot2::theme(text=ggplot2::element_text(size=10))
 
-  inSCE <- runUMAP(inSCE, useAssay = origAssay, useReducedDim = NULL, 
+  inSCE <- runUMAP(inSCE, useAssay = origAssay, useReducedDim = NULL, initialDims = 25,
                    reducedDimName = "umap.before")
   umap.before <- plotSCEDimReduceColData(inSCE, batch, "umap.before",
                                          shape = condition, axisLabelSize = 9,
@@ -146,10 +146,10 @@ plotBatchCorrCompare <- function(inSCE, corrMat, batch = NULL, condition = NULL,
       ggplot2::theme(text=ggplot2::element_text(size=10))
 
     if (method == "ComBatSeq") {
-      inSCE <- runUMAP(inSCE, useAssay = corrMat, useReducedDim = NULL, 
+      inSCE <- runUMAP(inSCE, useAssay = corrMat, useReducedDim = NULL, initialDims = 25,
                        logNorm = TRUE, reducedDimName = "umap.after")
     } else {
-      inSCE <- runUMAP(inSCE, useAssay = corrMat, useReducedDim = NULL,
+      inSCE <- runUMAP(inSCE, useAssay = corrMat, useReducedDim = NULL, initialDims = 25,
                        logNorm = FALSE, reducedDimName = "umap.after")
     }
   } else if (matType == "altExp") {
@@ -175,7 +175,7 @@ plotBatchCorrCompare <- function(inSCE, corrMat, batch = NULL, condition = NULL,
       SingleCellExperiment::reducedDim(inSCE, "umap.after") <-
         SingleCellExperiment::reducedDim(inSCE, corrMat)
     } else {
-      inSCE <- runUMAP(inSCE, useReducedDim = corrMat,
+      inSCE <- runUMAP(inSCE, useReducedDim = corrMat, initialDims = 25,
                        reducedDimName = "umap.after")
     }
   } else {

diff --git a/R/plotDEAnalysis.R b/R/plotDEAnalysis.R
@@ -59,7 +59,8 @@
 #' data("sceBatches")
 #' logcounts(sceBatches) <- log1p(counts(sceBatches))
 #' sce.w <- subsetSCECols(sceBatches, colData = "batch == 'w'")
-#' sce.w <- runWilcox(sce.w, class = "cell_type", classGroup1 = "alpha",
+#' sce.w <- runWilcox(sce.w, class = "cell_type",
+#'                    classGroup1 = "alpha", classGroup2 = "beta",
 #'                    groupName1 = "w.alpha", groupName2 = "w.beta",
 #'                    analysisName = "w.aVSb")
 #' plotDEGViolin(sce.w, "w.aVSb")
@@ -169,7 +170,8 @@ plotDEGViolin <- function(inSCE, useResult, threshP = FALSE, labelBy = NULL,
 #' data("sceBatches")
 #' logcounts(sceBatches) <- log1p(counts(sceBatches))
 #' sce.w <- subsetSCECols(sceBatches, colData = "batch == 'w'")
-#' sce.w <- runWilcox(sce.w, class = "cell_type", classGroup1 = "alpha",
+#' sce.w <- runWilcox(sce.w, class = "cell_type",
+#'                    classGroup1 = "alpha", classGroup2 = "beta",
 #'                    groupName1 = "w.alpha", groupName2 = "w.beta",
 #'                    analysisName = "w.aVSb")
 #' plotDEGRegression(sce.w, "w.aVSb")
@@ -309,7 +311,8 @@ plotDEGRegression <- function(inSCE, useResult, threshP = FALSE, labelBy = NULL,
 #' data("sceBatches")
 #' sceBatches <- scaterlogNormCounts(sceBatches, "logcounts")
 #' sce.w <- subsetSCECols(sceBatches, colData = "batch == 'w'")
-#' sce.w <- runWilcox(sce.w, class = "cell_type", classGroup1 = "alpha",
+#' sce.w <- runWilcox(sce.w, class = "cell_type",
+#'                    classGroup1 = "alpha", classGroup2 = "beta",
 #'                    groupName1 = "w.alpha", groupName2 = "w.beta",
 #'                    analysisName = "w.aVSb")
 #' getDEGTopTable(sce.w, "w.aVSb")
@@ -404,7 +407,8 @@ getDEGTopTable <- function(inSCE, useResult,
 #' data("sceBatches")
 #' logcounts(sceBatches) <- log1p(counts(sceBatches))
 #' sce.w <- subsetSCECols(sceBatches, colData = "batch == 'w'")
-#' sce.w <- runWilcox(sce.w, class = "cell_type", classGroup1 = "alpha",
+#' sce.w <- runWilcox(sce.w, class = "cell_type",
+#'                    classGroup1 = "alpha", classGroup2 = "beta",
 #'                    groupName1 = "w.alpha", groupName2 = "w.beta",
 #'                    analysisName = "w.aVSb")
 #' plotDEGHeatmap(sce.w, "w.aVSb")
@@ -583,7 +587,8 @@ plotDEGHeatmap <- function(inSCE, useResult, onlyPos = FALSE,
 #' data("sceBatches")
 #' sceBatches <- scaterlogNormCounts(sceBatches, "logcounts")
 #' sce.w <- subsetSCECols(sceBatches, colData = "batch == 'w'")
-#' sce.w <- runWilcox(sce.w, class = "cell_type", classGroup1 = "alpha",
+#' sce.w <- runWilcox(sce.w, class = "cell_type",
+#'                    classGroup1 = "alpha", classGroup2 = "beta",
 #'                    groupName1 = "w.alpha", groupName2 = "w.beta",
 #'                    analysisName = "w.aVSb")
 #' plotDEGVolcano(sce.w, "w.aVSb")

diff --git a/R/plotDimRed.R b/R/plotDimRed.R
@@ -2,7 +2,7 @@
 #' and UMAP
 #'
 #' @param inSCE Input SCE object
-#' @param useReduction Reduction to plot
+#' @param useReduction Reduction to plot. Default is \code{"PCA"}.
 #' @param showLegend If legends should be plotted or not
 #' @param xDim Numeric value indicating the dimension to use for X-axis.
 #'  Default is 1 (refers to PC1).
@@ -17,7 +17,7 @@
 #' @examples
 #' data("mouseBrainSubsetSCE", package = "singleCellTK")
 #' plotDimRed(mouseBrainSubsetSCE, "PCA_logcounts")
-plotDimRed <- function(inSCE, useReduction,
+plotDimRed <- function(inSCE, useReduction = "PCA",
                        showLegend = FALSE,
                        xDim = 1,
                        yDim = 2,

diff --git a/R/plotPCA.R b/R/plotPCA.R
@@ -24,7 +24,7 @@ plotPCA <- function(inSCE, colorBy=NULL, shape=NULL, pcX="PC1",
   if(!(reducedDimName %in% names(SingleCellExperiment::reducedDims(inSCE)))){
     if (runPCA){
       inSCE <- scaterPCA(inSCE, useAssay = useAssay,
-                      reducedDimName = reducedDimName)
+                      reducedDimName = reducedDimName, useFeatureSubset = NULL)
     } else {
       stop(reducedDimName,
            " dimension not found. Run scaterPCA() or set runPCA to TRUE.")

diff --git a/R/plotTopHVG.R b/R/plotTopHVG.R
@@ -8,9 +8,9 @@
 #' name to store a logical index of selected features. Default \code{NULL}. See
 #' details.
 #' @param hvgNumber Specify the number of top genes to highlight in red. Default
-#' \code{NULL}. See details.
+#' \code{2000}. See details.
 #' @param labelsCount Specify the number of data points/genes to label. Should
-#' be less than \code{hvgNumber}. Default \code{20}. See details.
+#' be less than \code{hvgNumber}. Default \code{10}. See details.
 #' @param featureDisplay A character string for the \code{rowData} variable name
 #' to indicate what type of feature ID should be displayed. If set by
 #' \code{\link{setSCTKDisplayRow}}, will by default use it. If \code{NULL}, will
@@ -37,16 +37,16 @@
 #' @importFrom SummarizedExperiment rowData
 #' @importFrom S4Vectors metadata
 plotTopHVG <- function(inSCE,
-                       method = c("vst", "mean.var.plot", "dispersion",
-                                  "modelGeneVar"),
-                       hvgNumber = NULL,
+                       method = "modelGeneVar",
+                       hvgNumber = 2000,
                        useFeatureSubset = NULL,
-                       labelsCount = 20,
+                       labelsCount = 10,
                        featureDisplay = metadata(inSCE)$featureDisplay,
                        labelSize = 2, dotSize = 2, textSize = 12
                        )
 {
-  method <- match.arg(method)
+  method <- match.arg(method, choices = c("vst", "mean.var.plot", "dispersion",
+                                          "modelGeneVar"))
   metric <- .dfFromHVGMetric(inSCE, method)
   yLabelChoice <- list(vst = "Standardized Variance",
                        mean.var.plot = "Dispersion", dispersion = "Dispersion",
@@ -62,7 +62,7 @@ plotTopHVG <- function(inSCE,
     hvgNumber <- length(hvgList)
   } else if (!is.null(hvgNumber)) {
     hvgList <- getTopHVG(inSCE = inSCE, method = method, hvgNumber = hvgNumber,
-                         featureDisplay = NULL)
+                         featureDisplay = NULL, useFeatureSubset = NULL)
   }
   if (is.null(hvgNumber) || hvgNumber == 0) {
     redIdx <- logical()

diff --git a/R/runCluster.R b/R/runCluster.R
@@ -6,7 +6,7 @@
 #' @param inSCE A \linkS4class{SingleCellExperiment} object.
 #' @param useReducedDim A single \code{character}, specifying which
 #' low-dimension representation (\code{\link{reducedDim}})
-#' to perform the clustering algorithm on. Default \code{NULL}.
+#' to perform the clustering algorithm on. Default \code{"PCA"}.
 #' @param useAssay A single \code{character}, specifying which
 #' \code{\link{assay}} to perform the clustering algorithm
 #' on. Default \code{NULL}.
@@ -23,15 +23,15 @@
 #' \code{NULL}.
 #' @param clusterName A single \code{character}, specifying the name to store
 #' the cluster label in \code{\link{colData}}. Default
-#' \code{"scranSNN_cluster"}.
+#' \code{"cluster"}.
 #' @param k An \code{integer}, the number of nearest neighbors used to construct
 #' the graph. Smaller value indicates higher resolution and larger number of
-#' clusters. Default \code{8}.
+#' clusters. Default \code{14}.
 #' @param nComp An \code{integer}. The number of components to use for graph
 #' construction. Default \code{10}. See Detail.
 #' @param weightType A single \code{character}, that specifies the edge weighing
 #' scheme when constructing the Shared Nearest-Neighbor (SNN) graph. Choose from
-#' \code{"rank"}, \code{"number"}, \code{"jaccard"}. Default \code{"rank"}.
+#' \code{"rank"}, \code{"number"}, \code{"jaccard"}. Default \code{"jaccard"}.
 #' @param algorithm A single \code{character}, that specifies the community
 #' detection algorithm to work on the SNN graph. Choose from \code{"leiden"},
 #' \code{"louvain"}, \code{"walktrap"}, \code{"infomap"}, \code{"fastGreedy"},
@@ -77,12 +77,12 @@
 #' data("mouseBrainSubsetSCE")
 #' mouseBrainSubsetSCE <- runScranSNN(mouseBrainSubsetSCE,
 #'                                    useReducedDim = "PCA_logcounts")
-runScranSNN <- function(inSCE, useReducedDim = NULL, useAssay = NULL,
+runScranSNN <- function(inSCE, useReducedDim = "PCA", useAssay = NULL,
                         useAltExp = NULL, altExpAssay = "counts",
                         altExpRedDim = NULL,
-                        clusterName = "scranSNN_cluster",
-                        k = 8, nComp = 10,
-                        weightType = c("rank", "number", "jaccard"),
+                        clusterName = "cluster",
+                        k = 14, nComp = 10,
+                        weightType = "jaccard",
                         algorithm = c("louvain", "leiden", "walktrap",
                                       "infomap", "fastGreedy", "labelProp",
                                       "leadingEigen"),
@@ -113,7 +113,7 @@ runScranSNN <- function(inSCE, useReducedDim = NULL, useAssay = NULL,
             stop("Scran SNN clustering requires one and only one of ",
                  "'useAssay', 'useReducedDim', and 'useAltExp'.")
         }
-        weightType <- match.arg(weightType)
+        weightType <- match.arg(weightType, choices = c("rank", "number", "jaccard"))
         algorithm <- match.arg(algorithm)
 
         graphClustAlgoList = list(leiden = igraph::cluster_leiden,