Merge pull request satijalab#777 from satijalab/seurat5_devel

Seurat5 devel
nigiord · Jul 14, 2023 · 96dc134 · 96dc134
2 parents c8e2e67 + dbe7947
commit 96dc134
Show file tree

Hide file tree

Showing 37 changed files with 741 additions and 162 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: Seurat
-Version: 4.9.9.9050
-Date: 2023-06-30
+Version: 4.9.9.9058
+Date: 2023-07-14
 Title: Tools for Single Cell Genomics
 Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) <doi:10.1038/nbt.3192>, Macosko E, Basu A, Satija R, et al (2015) <doi:10.1016/j.cell.2015.05.002>, Stuart T, Butler A, et al (2019) <doi:10.1016/j.cell.2019.05.031>, and Hao, Hao, et al (2020) <doi:10.1101/2020.10.12.335331> for more details.
 Authors@R: c(
@@ -33,7 +33,7 @@ Remotes:
 Depends:
     R (>= 4.0.0),
     methods,
-    SeuratObject (>= 4.9.9.9049)
+    SeuratObject (>= 4.9.9.9091)
 Imports:
     cluster,
     cowplot,

diff --git a/NAMESPACE b/NAMESPACE
@@ -41,6 +41,7 @@ S3method(FindVariableFeatures,V3Matrix)
 S3method(FindVariableFeatures,default)
 S3method(FoldChange,Assay)
 S3method(FoldChange,DimReduc)
+S3method(FoldChange,SCTAssay)
 S3method(FoldChange,Seurat)
 S3method(FoldChange,StdAssay)
 S3method(FoldChange,default)
@@ -494,6 +495,7 @@ importFrom(Matrix,rowMeans)
 importFrom(Matrix,rowSums)
 importFrom(Matrix,sparse.model.matrix)
 importFrom(Matrix,sparseMatrix)
+importFrom(Matrix,summary)
 importFrom(Matrix,t)
 importFrom(RANN,nn2)
 importFrom(RColorBrewer,brewer.pal)

diff --git a/NEWS.md b/NEWS.md
@@ -1,9 +1,16 @@
 # Unreleased
+
+## Added
+- Added parallelization support with speed improvements for `PrepSCTFindMarkers` 
+
 ## Changes
 - Fix bug in `as.Seurat.SingleCellExperiment()` ([#6692](https://github.com/satijalab/seurat/issues/6692))
 - Support for Visium probe information introduced in Spaceranger 2.1 ([#7141](https://github.com/satijalab/seurat/pull/7141))
 - Add `LoadCurioSeeker` to load sequencing-based spatial datasets generated using the Curio Seeker ([#744](https://github.com/satijalab/seurat-private/pull/744))
 - Fix fold change calculation for assays ([#739](https://github.com/satijalab/seurat-private/pull/739))
+- Fix `pt.size` bug when rasterization is set to true ([#7379](https://github.com/satijalab/seurat/issues/7379)) 
+- Fix `FoldChange` and `FindMarkers` to support all normalization approaches ([#7115](https://github.com/satijalab/seurat/pull/7115),[#7110](https://github.com/satijalab/seurat/issues/7110),[#7095](https://github.com/satijalab/seurat/issues/7095),[#6976](https://github.com/satijalab/seurat/issues/6976),[#6654](https://github.com/satijalab/seurat/issues/6654),[#6701](https://github.com/satijalab/seurat/issues/6701),[#6773](https://github.com/satijalab/seurat/issues/6773), [#7107](https://github.com/satijalab/seurat/issues/7107))
+- Fix for handling newer ParseBio formats in `ReadParseBio` ([#7565](https://github.com/satijalab/seurat/pull/7565))
 
 # Seurat 4.3.0 (2022-11-18)
 

diff --git a/R/clustering.R b/R/clustering.R
@@ -507,7 +507,6 @@ FindClusters.Seurat <- function(
 #' @param nn.eps Error bound when performing nearest neighbor seach using RANN;
 #' default of 0.0 implies exact nearest neighbor search
 #' @param verbose Whether or not to print output to the console
-#' @param force.recalc Force recalculation of (S)NN.
 #' @param l2.norm Take L2Norm of the data
 #' @param cache.index Include cached index in returned Neighbor object
 #' (only relevant if return.neighbor = TRUE)
@@ -535,7 +534,6 @@ FindNeighbors.default <- function(
   annoy.metric = "euclidean",
   nn.eps = 0,
   verbose = TRUE,
-  force.recalc = FALSE,
   l2.norm = FALSE,
   cache.index = FALSE,
   index = NULL,
@@ -645,7 +643,6 @@ FindNeighbors.Assay <- function(
   annoy.metric = "euclidean",
   nn.eps = 0,
   verbose = TRUE,
-  force.recalc = FALSE,
   l2.norm = FALSE,
   cache.index = FALSE,
   ...
@@ -663,7 +660,6 @@ FindNeighbors.Assay <- function(
     annoy.metric = annoy.metric,
     nn.eps = nn.eps,
     verbose = verbose,
-    force.recalc = force.recalc,
     l2.norm = l2.norm,
     return.neighbor = return.neighbor,
     cache.index = cache.index,
@@ -688,7 +684,6 @@ FindNeighbors.dist <- function(
   annoy.metric = "euclidean",
   nn.eps = 0,
   verbose = TRUE,
-  force.recalc = FALSE,
   l2.norm = FALSE,
   cache.index = FALSE,
   ...
@@ -705,7 +700,6 @@ FindNeighbors.dist <- function(
     n.trees = n.trees,
     annoy.metric = annoy.metric,
     verbose = verbose,
-    force.recalc = force.recalc,
     l2.norm = l2.norm,
     return.neighbor = return.neighbor,
     cache.index = cache.index,
@@ -750,7 +744,6 @@ FindNeighbors.Seurat <- function(
   annoy.metric = "euclidean",
   nn.eps = 0,
   verbose = TRUE,
-  force.recalc = FALSE,
   do.plot = FALSE,
   graph.name = NULL,
   l2.norm = FALSE,
@@ -775,7 +768,6 @@ FindNeighbors.Seurat <- function(
       annoy.metric = annoy.metric,
       nn.eps = nn.eps,
       verbose = verbose,
-      force.recalc = force.recalc,
       l2.norm = l2.norm,
       return.neighbor = return.neighbor,
       cache.index = cache.index,
@@ -794,7 +786,6 @@ FindNeighbors.Seurat <- function(
       annoy.metric = annoy.metric,
       nn.eps = nn.eps,
       verbose = verbose,
-      force.recalc = force.recalc,
       l2.norm = l2.norm,
       return.neighbor = return.neighbor,
       cache.index = cache.index,

diff --git a/R/convenience.R b/R/convenience.R
@@ -391,7 +391,8 @@ SpecificDimPlot <- function(object, ...) {
 #' @export
 #'
 ReadParseBio <- function(data.dir, ...) {
-  mtx <- file.path(data.dir, "DGE.mtx")
+  file.dir <- list.files(path = data.dir, pattern = ".mtx")
+  mtx <- file.path(data.dir, file.dir)
   cells <- file.path(data.dir, "cell_metadata.csv")
   features <- file.path(data.dir, "all_genes.csv")
   return(ReadMtx(

diff --git a/R/differential_expression.R b/R/differential_expression.R
@@ -59,7 +59,6 @@ FindAllMarkers <- function(
   latent.vars = NULL,
   min.cells.feature = 3,
   min.cells.group = 3,
-  pseudocount.use = 1,
   mean.fxn = NULL,
   fc.name = NULL,
   base = 2,
@@ -102,18 +101,6 @@ FindAllMarkers <- function(
     new.nodes <- unique(x = tree$edge[, 1, drop = TRUE])
     idents.all <- (tree$Nnode + 2):max(tree$edge)
   }
-  # Default mean function assumes data has been log transformed (such as post NormalizeData or SCT data slot)
-  default.mean.fxn <- function(x) {
-    return(log(x = rowMeans(x = expm1(x = x)) + pseudocount.use, base = base))
-  }
-  mean.fxn <- mean.fxn %||% switch(
-    EXPR = slot,
-    'counts' = function(x) {
-      return(log(x = rowMeans(x = x) + pseudocount.use, base = base))
-    },
-    'scale.data' = rowMeans,
-    default.mean.fxn
-  )
   genes.de <- list()
   messages <- list()
   for (i in 1:length(x = idents.all)) {
@@ -741,6 +728,13 @@ FindMarkers.SCTAssay <- function(
     yes = 'counts',
     no = slot
   )
+  if (test.use %in% DEmethods_counts()){
+    # set slot to counts
+    if (slot !="counts") {
+      message(paste0("Setting slot to counts for ", test.use, " (counts based test: "))
+      slot <- "counts"
+    }
+  }
   if (recorrect_umi && length(x = levels(x = object)) > 1) {
     cell_attributes <- SCTResults(object = object, slot = "cell.attributes")
     observed_median_umis <- lapply(
@@ -770,7 +764,7 @@ FindMarkers.SCTAssay <- function(
     'scale.data' = GetAssayData(object = object, slot = "counts"),
     numeric()
   )
-  # Default assumes FindMarkers was invoked with log2(corrected counts) - SCT data slot
+  # Default assumes the input is log1p(corrected counts)
   default.mean.fxn <- function(x) {
     return(log(x = rowMeans(x = expm1(x = x)) + pseudocount.use, base = base))
   }
@@ -1125,30 +1119,94 @@ FoldChange.Assay <- function(
   ...
 ) {
   data <- GetAssayData(object = object, slot = slot)
-  # Default assumes FoldChange was invoked with log2(corrected counts) - SCT data slot
+  # By default run as if LogNormalize is done
+  log1pdata.mean.fxn <- function(x) {
+    return(log(x = rowMeans(x = expm1(x = x)) + pseudocount.use, base = base))
+  }
+  scaledata.mean.fxn <- rowMeans
+  counts.mean.fxn <- function(x) {
+    return(log(x = rowMeans(x = x) + pseudocount.use, base = base))
+  }
+  if (!is.null(x = norm.method)) {
+    # For anything apart from log normalization set to rowMeans
+    if (norm.method!="LogNormalize") {
+      new.mean.fxn <- counts.mean.fxn
+    } else {
+      new.mean.fxn <- counts.mean.fxn
+      if (slot == "data") {
+        new.mean.fxn <- log1pdata.mean.fxn
+      }  else if (slot == "scale.data") {
+        new.mean.fxn <- scaledata.mean.fxn
+      }
+    }
+  } else {
+    # If no normalization method is passed use slots to decide mean function
+    new.mean.fxn <- switch(
+      EXPR = slot,
+      'data' = log1pdata.mean.fxn,
+      'scale.data' = scaledata.mean.fxn,
+      'counts' = counts.mean.fxn,
+      log1pdata.mean.fxn
+    )
+  }
+  mean.fxn <- mean.fxn %||% new.mean.fxn
+  # Omit the decimal value of e from the column name if base == exp(1)
+  base.text <- ifelse(
+    test = base == exp(1),
+    yes = "",
+    no = base
+  )
+  fc.name <- fc.name %||% ifelse(
+    test = slot == "scale.data",
+    yes = "avg_diff",
+    no = paste0("avg_log", base.text, "FC")
+  )
+  FoldChange(
+    object = data,
+    cells.1 = cells.1,
+    cells.2 = cells.2,
+    features = features,
+    mean.fxn = mean.fxn,
+    fc.name = fc.name
+  )
+}
+
+#' @method FoldChange StdAssay
+#' @export
+#'
+FoldChange.StdAssay <- FoldChange.Assay
+
+#' @importFrom Matrix rowMeans
+#' @rdname FoldChange
+#' @concept differential_expression
+#' @export
+#' @method FoldChange SCTAssay
+FoldChange.SCTAssay <- function(
+    object,
+    cells.1,
+    cells.2,
+    features = NULL,
+    slot = "data",
+    pseudocount.use = 1,
+    fc.name = NULL,
+    mean.fxn = NULL,
+    base = 2,
+    ...
+) {
+  pseudocount.use <- pseudocount.use %||% 1
+  data <- GetAssayData(object = object, slot = slot)
   default.mean.fxn <- function(x) {
     return(log(x = rowMeans(x = expm1(x = x)) + pseudocount.use, base = base))
   }
   mean.fxn <- mean.fxn %||% switch(
     EXPR = slot,
+    'data' = default.mean.fxn,
+    'scale.data' = rowMeans,
     'counts' = function(x) {
       return(log(x = rowMeans(x = x) + pseudocount.use, base = base))
     },
-    'scale.data' = rowMeans,
     default.mean.fxn
   )
-  # mean.fxn <- mean.fxn %||% switch(
-  #   EXPR = slot,
-  #   'data' = switch(
-  #     EXPR = norm.method %||% '',
-  #     'LogNormalize' = function(x) {
-  #       return(log(x = rowMeans(x = expm1(x = x)) + pseudocount.use, base = base))
-  #     },
-  #     default.mean.fxn
-  #   ),
-  #   'scale.data' = rowMeans,
-  #   default.mean.fxn
-  # )
   # Omit the decimal value of e from the column name if base == exp(1)
   base.text <- ifelse(
     test = base == exp(1),
@@ -1170,10 +1228,6 @@ FoldChange.Assay <- function(
   )
 }
 
-#' @method FoldChange StdAssay
-#' @export
-#'
-FoldChange.StdAssay <- FoldChange.Assay
 
 #' @importFrom Matrix rowMeans
 #' @rdname FoldChange
@@ -2087,12 +2141,17 @@ PerformDE <- function(
 #' @param assay Assay name where for SCT objects are stored; Default is 'SCT'
 #' @param verbose Print messages and progress
 #' @importFrom Matrix Matrix
+#' @importFrom pbapply pblapply
+#' @importFrom future.apply future_lapply
+#' @importFrom future nbrOfWorkers
 #' @importFrom sctransform correct_counts
 #'
 #' @return Returns a Seurat object with recorrected counts and data in the SCT assay.
 #' @export
 #'
 #' @concept differential_expression
+#' @template section-progressr
+#' @template section-future
 #' @examples
 #' data("pbmc_small")
 #' pbmc_small1 <- SCTransform(object = pbmc_small, variable.features.n = 20)
@@ -2115,6 +2174,11 @@ PerformDE <- function(
 #' )
 #'
 PrepSCTFindMarkers <- function(object, assay = "SCT", verbose = TRUE) {
+  if (verbose && nbrOfWorkers() == 1) {
+    my.lapply <- pblapply
+  } else {
+    my.lapply <- future_lapply
+  }
   if (length(x = levels(x = object[[assay]])) == 1) {
     if (verbose) {
       message("Only one SCT model is stored - skipping recalculating corrected counts")
@@ -2186,27 +2250,34 @@ PrepSCTFindMarkers <- function(object, assay = "SCT", verbose = TRUE) {
   set_median_umi <- rep(min_median_umi, length(levels(x = object[[assay]])))
   names(set_median_umi) <- levels(x = object[[assay]])
   set_median_umi <- as.list(set_median_umi)
+
   # correct counts
-  for (model_name in levels(x = object[[assay]])) {
+  my.correct_counts <- function(model_name){
     model_genes <- rownames(x = model_pars_fit[[model_name]])
-    x <- list(
-      model_str = model_str[[model_name]],
-      arguments = arguments[[model_name]],
-      model_pars_fit = as.matrix(x = model_pars_fit[[model_name]]),
-      cell_attr = cell_attr[[model_name]]
-    )
-    cells <- rownames(x = cell_attr[[model_name]])
-    umi <- raw_umi[model_genes, cells]
+      x <- list(
+        model_str = model_str[[model_name]],
+        arguments = arguments[[model_name]],
+        model_pars_fit = as.matrix(x = model_pars_fit[[model_name]]),
+        cell_attr = cell_attr[[model_name]]
+      )
+      cells <- rownames(x = cell_attr[[model_name]])
+      umi <- raw_umi[model_genes, cells]
 
-    umi_corrected <- correct_counts(
-      x = x,
-      umi = umi,
-      verbosity = 0,
-      scale_factor = min_median_umi
-    )
-    corrected_counts[rownames(umi_corrected), colnames(umi_corrected)] <- umi_corrected
+      umi_corrected <- correct_counts(
+        x = x,
+        umi = umi,
+        verbosity = 0,
+        scale_factor = min_median_umi
+      )
+      return(umi_corrected)
   }
-  corrected_data <- log1p(corrected_counts)
+  corrected_counts.list <- my.lapply(X = levels(x = object[[assay]]),
+                                     FUN = my.correct_counts)
+  names(x = corrected_counts.list) <- levels(x = object[[assay]])
+  corrected_counts <- do.call(what = MergeSparseMatrices, args = corrected_counts.list)
+  corrected_counts.list <- NULL
+
+  corrected_data <- log1p(x = corrected_counts)
   suppressWarnings({object <- SetAssayData(object = object,
                                            assay = assay,
                                            slot = "counts",
@@ -2216,7 +2287,6 @@ PrepSCTFindMarkers <- function(object, assay = "SCT", verbose = TRUE) {
                                            slot = "data",
                                            new.data = corrected_data)})
   SCTResults(object = object[[assay]], slot = "median_umi") <- set_median_umi
-
   return(object)
 }