Merge branch 'develop' into rharao-findallmarkers-groupby

satijalab · Dec 20, 2024 · ebbcc97 · ebbcc97
2 parents 586d28f + 6278779
commit ebbcc97
Show file tree

Hide file tree

Showing 47 changed files with 527 additions and 183 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: Seurat
-Version: 5.1.0.9008
+Version: 5.1.0.9017
 Title: Tools for Single Cell Genomics
 Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) <doi:10.1038/nbt.3192>, Macosko E, Basu A, Satija R, et al (2015) <doi:10.1016/j.cell.2015.05.002>, Stuart T, Butler A, et al (2019) <doi:10.1016/j.cell.2019.05.031>, and Hao, Hao, et al (2020) <doi:10.1101/2020.10.12.335331> for more details.
 Authors@R: c(
@@ -56,7 +56,7 @@ Imports:
     irlba,
     jsonlite,
     KernSmooth,
-    leiden (>= 0.3.1),
+    leidenbase,
     lifecycle,
     lmtest,
     MASS,
@@ -68,7 +68,6 @@ Imports:
     plotly (>= 4.9.0),
     png,
     progressr,
-    purrr,
     RANN,
     RColorBrewer,
     Rcpp (>= 1.0.7),

diff --git a/NAMESPACE b/NAMESPACE
@@ -49,6 +49,7 @@ S3method(GetAssay,Seurat)
 S3method(GetImage,STARmap)
 S3method(GetImage,SlideSeq)
 S3method(GetImage,VisiumV1)
+S3method(GetImage,VisiumV2)
 S3method(GetTissueCoordinates,STARmap)
 S3method(GetTissueCoordinates,SlideSeq)
 S3method(GetTissueCoordinates,VisiumV1)
@@ -385,6 +386,7 @@ export(RunCCA)
 export(RunGraphLaplacian)
 export(RunICA)
 export(RunLDA)
+export(RunLeiden)
 export(RunMarkVario)
 export(RunMixscape)
 export(RunMoransI)
@@ -754,7 +756,6 @@ importFrom(igraph,plot.igraph)
 importFrom(irlba,irlba)
 importFrom(jsonlite,fromJSON)
 importFrom(jsonlite,read_json)
-importFrom(leiden,leiden)
 importFrom(lifecycle,deprecate_soft)
 importFrom(lifecycle,deprecate_stop)
 importFrom(lifecycle,deprecate_warn)
@@ -796,7 +797,6 @@ importFrom(plotly,plot_ly)
 importFrom(plotly,raster2uri)
 importFrom(png,readPNG)
 importFrom(progressr,progressor)
-importFrom(purrr,imap)
 importFrom(reticulate,import)
 importFrom(reticulate,py_module_available)
 importFrom(reticulate,py_set_seed)

diff --git a/NEWS.md b/NEWS.md
@@ -1,6 +1,11 @@
 # Unreleased
 
 ## Changes
+- Added `image.type` parameter to `Read10X_Image` enabling `VisiumV1` instances to be populated instead of instances of the default `VisiumV2` class ([#9556](https://github.com/satijalab/seurat/pull/9556))
+- Fixed `IntegrateLayers` to respect the `dims.to.integrate` parameter.
+- Added `stroke.size` parameter to `DimPlot` ([#8180](https://github.com/satijalab/seurat/pull/8180))
+- Updated `RunLeiden` to use the `leidenbase` package instead of `leiden`; deprecated the `method` parameter for `RunLeiden` and `FindClusters`; updated `RunLeiden` to reset `random.seed` to 1 if the value is 0 or less ([#6792](https://github.com/satijalab/seurat/pull/6792))
+- Updated `RunUMAP` to support `umap-learn` version >= 0.5.0 ([#9559](https://github.com/satijalab/seurat/pull/9559))
 - Surfaced more fine-grained control over what parts of a Xenium experiment are loaded in `LoadXenium`
 - Added ability to load Xenium nucleus segmentation masks
 - Updated `LoadXenium` to also read some run metadata (run start time, preservation method, panel used, organism, tissue type, instrument software version and stain kit used) into `misc` slot
@@ -13,6 +18,8 @@
 - Fixed `RunPCA` to avoid converting `BPCells` matrices into dense matrices - significantly reduces the function's memory usage when running on `BPCells` matrices
 - Added `features` parameter to `LeverageScore` and `SketchData`
 - Updated `SketchData`'s `ncells` parameter to accept integer vector
+- Updated `JackStraw` to support `BPCells` matrices
+- Updated `RunPCA` to use the `BPCells`-provided SVD solver on `BPCells` matrices
 
 # Seurat 5.1.0 (2024-05-08)
 
@@ -238,6 +245,7 @@
 - Add `rlsi` option for `FindIntegrationAnchors()`
 
 ## Changes
+- 
 - Preserve feature metadata when converting from `SingleCellExperiment` to `SeuratObject` class
 ([#4205](https://github.com/satijalab/seurat/issues/4205))
 - Preserve multiple assays when converting from `SingleCellExperiment` to `SeuratObject` class

diff --git a/R/clustering.R b/R/clustering.R
@@ -190,7 +190,7 @@ FindSubCluster <- function(
 #' embeddings from dimensional reductions.
 #'
 #' @param object The object used to calculate knn
-#' @param nn.idx k near neighbour indices. A cells x k matrix.
+#' @param nn.idx k near neighbor indices. A cells x k matrix.
 #' @param assay Assay used for prediction
 #' @param reduction Cell embedding of the reduction used for prediction
 #' @param dims Number of dimensions of cell embedding
@@ -275,14 +275,16 @@ PredictAssay <- function(
 #' @importFrom future nbrOfWorkers
 #'
 #' @param modularity.fxn Modularity function (1 = standard; 2 = alternative).
-#' @param initial.membership,node.sizes Parameters to pass to the Python leidenalg function.
+#' @param initial.membership Passed to the `initial_membership` parameter
+#' of `leidenbase::leiden_find_partition`.
+#' @param node.sizes Passed to the `node_sizes` parameter of
+#' `leidenbase::leiden_find_partition`.
 #' @param resolution Value of the resolution parameter, use a value above
 #' (below) 1.0 if you want to obtain a larger (smaller) number of communities.
 #' @param algorithm Algorithm for modularity optimization (1 = original Louvain
 #' algorithm; 2 = Louvain algorithm with multilevel refinement; 3 = SLM
-#' algorithm; 4 = Leiden algorithm). Leiden requires the leidenalg python.
-#' @param method Method for running leiden (defaults to matrix which is fast for small datasets).
-#' Enable method = "igraph" to avoid casting large data to a dense matrix.
+#' algorithm; 4 = Leiden algorithm).
+#' @param method DEPRECATED.
 #' @param n.start Number of random starts.
 #' @param n.iter Maximal number of iterations per random start.
 #' @param random.seed Seed of the random number generator.
@@ -303,7 +305,7 @@ FindClusters.default <- function(
   initial.membership = NULL,
   node.sizes = NULL,
   resolution = 0.8,
-  method = "matrix",
+  method = deprecated(),
   algorithm = 1,
   n.start = 10,
   n.iter = 10,
@@ -315,6 +317,14 @@ FindClusters.default <- function(
   ...
 ) {
   CheckDots(...)
+  # The `method` parameter is for `RunLeiden` but was deprecated, see
+  # function for more details.
+  if (is_present(method)) {
+    deprecate_soft(
+      when = "5.2.0",
+      what = "FindClusters(method)"
+    )
+  }
   if (is.null(x = object)) {
     stop("Please provide an SNN graph")
   }
@@ -344,7 +354,6 @@ FindClusters.default <- function(
         } else if (algorithm == 4) {
           ids <- RunLeiden(
             object = object,
-            method = method,
             partition.type = "RBConfigurationVertexPartition",
             initial.membership = initial.membership,
             node.sizes = node.sizes,
@@ -418,7 +427,8 @@ FindClusters.Seurat <- function(
   initial.membership = NULL,
   node.sizes = NULL,
   resolution = 0.8,
-  method = "matrix",
+  # ToDo: Update `LogSeuratCommand` to accommodate deprecated parameters.
+  method = NULL,
   algorithm = 1,
   n.start = 10,
   n.iter = 10,
@@ -430,6 +440,15 @@ FindClusters.Seurat <- function(
   ...
 ) {
   CheckDots(...)
+  # Since we're throwing a soft deprecation warning, it needs to be duplicated
+  # for each implementation of the `FindClusters` generic, see
+  # `FindCluster.default` for more details.
+  if (!is.null(method)) {
+    deprecate_soft(
+      when = "5.2.0",
+      what = "FindClusters(method)"
+    )
+  }
   graph.name <- graph.name %||% paste0(DefaultAssay(object = object), "_snn")
   if (!graph.name %in% names(x = object)) {
     stop("Provided graph.name not present in Seurat object")
@@ -443,7 +462,6 @@ FindClusters.Seurat <- function(
     initial.membership = initial.membership,
     node.sizes = node.sizes,
     resolution = resolution,
-    method = method,
     algorithm = algorithm,
     n.start = n.start,
     n.iter = n.iter,
@@ -504,7 +522,7 @@ FindClusters.Seurat <- function(
 #' cosine, manhattan, and hamming
 #' @param n.trees More trees gives higher precision when using annoy approximate
 #' nearest neighbor search
-#' @param nn.eps Error bound when performing nearest neighbor seach using RANN;
+#' @param nn.eps Error bound when performing nearest neighbor search using RANN;
 #' default of 0.0 implies exact nearest neighbor search
 #' @param verbose Whether or not to print output to the console
 #' @param l2.norm Take L2Norm of the data
@@ -850,7 +868,7 @@ FindNeighbors.Seurat <- function(
 # @param n.trees More trees gives higher precision when querying
 # @param k Number of neighbors
 # @param search.k During the query it will inspect up to search_k nodes which
-# gives you a run-time tradeoff between better accuracy and speed.
+# gives you a run-time trade off between better accuracy and speed.
 # @param include.distance Include the corresponding distances
 # @param index optional index object, will be recomputed if not provided
 #
@@ -910,7 +928,7 @@ AnnoyBuildIndex <- function(data, metric = "euclidean", n.trees = 50) {
 # @param query A set of data to be queried against the index
 # @param k Number of neighbors
 # @param search.k During the query it will inspect up to search_k nodes which
-# gives you a run-time tradeoff between better accuracy and speed.
+# gives you a run-time trade off between better accuracy and speed.
 # @param include.distance Include the corresponding distances in the result
 #
 # @return A list with 'nn.idx' (for each element in 'query', the index of the
@@ -1629,38 +1647,35 @@ NNHelper <- function(data, query = data, k, method, cache.index = FALSE, ...) {
   return(n.ob)
 }
 
-# Run Leiden clustering algorithm
-#
-# Implements the Leiden clustering algorithm in R using reticulate
-# to run the Python version. Requires the python "leidenalg" and "igraph" modules
-# to be installed. Returns a vector of partition indices.
-#
-# @param adj_mat An adjacency matrix or SNN matrix
-# @param partition.type Type of partition to use for Leiden algorithm.
-# Defaults to RBConfigurationVertexPartition. Options include: ModularityVertexPartition,
-# RBERVertexPartition, CPMVertexPartition, MutableVertexPartition,
-# SignificanceVertexPartition, SurpriseVertexPartition (see the Leiden python
-# module documentation for more details)
-# @param initial.membership,node.sizes Parameters to pass to the Python leidenalg function.
-# @param resolution.parameter A parameter controlling the coarseness of the clusters
-# for Leiden algorithm. Higher values lead to more clusters. (defaults to 1.0 for
-# partition types that accept a resolution parameter)
-# @param random.seed Seed of the random number generator
-# @param n.iter Maximal number of iterations per random start
-#
-# @keywords graph network igraph mvtnorm simulation
-#
-#' @importFrom leiden leiden
-#' @importFrom reticulate py_module_available
+#' Run Leiden clustering algorithm
+#'
+#' Returns a vector of partition indices.
+#'
+#' @param object An adjacency matrix or adjacency list. 
+#' @param method DEPRECATED.
+#' @param partition.type Type of partition to use for Leiden algorithm.
+#' Defaults to "RBConfigurationVertexPartition", see 
+#' https://cran.rstudio.com/web/packages/leidenbase/leidenbase.pdf for more options.
+#' @param initial.membership Passed to the `initial_membership` parameter
+#' of `leidenbase::leiden_find_partition`.
+#' @param node.sizes Passed to the `node_sizes` parameter of
+#' `leidenbase::leiden_find_partition`.
+#' @param resolution.parameter A parameter controlling the coarseness of the clusters
+#' for Leiden algorithm. Higher values lead to more clusters. (defaults to 1.0 for
+#' partition types that accept a resolution parameter)
+#' @param random.seed Seed of the random number generator, must be greater than 0.
+#' @param n.iter Maximal number of iterations per random start
+#'
 #' @importFrom igraph graph_from_adjacency_matrix graph_from_adj_list
-#
-# @author Tom Kelly
-#
-# @export
-#
+#'
+#' @export
+#' 
+#' @rdname RunLeiden
+#' @concept clustering
+#' 
 RunLeiden <- function(
   object,
-  method = c("matrix", "igraph"),
+  method = deprecated(),
   partition.type = c(
     'RBConfigurationVertexPartition',
     'ModularityVertexPartition',
@@ -1673,50 +1688,69 @@ RunLeiden <- function(
   initial.membership = NULL,
   node.sizes = NULL,
   resolution.parameter = 1,
-  random.seed = 0,
+  random.seed = 1,
   n.iter = 10
 ) {
-  if (!py_module_available(module = 'leidenalg')) {
+  # `leidenbase::leiden_find_partition` requires it's `seed` parameter to be
+  # greater than 0 (or NULL) but the default value for `FindClusters` is 0. 
+  # If `random.seed` is 0 or less, throw a warning and reset the value to 1. 
+   if (!is.null(random.seed) && random.seed <= 0) {
+    warning(
+      paste0(
+        "`random.seed` must be greater than 0 for leiden clustering, ",
+        "resetting `random.seed` to 1."
+      )
+    )
+    random.seed <- 1
+  }
+
+  # The `method` parameter was deprecated after switching from the `leiden`
+  # package to `leidenbase` to run the algorithm. Unlike `leiden`, `leidenbase`
+  # _requires_ an `igraph` input, so the parameter no longer makes sense. The
+  # good news is that `leidenbase` is much faster than `leiden` so it shouldn't
+  # really matter. 
+  if (is_present(method)) {
+    deprecate_soft(
+      when = "5.2.0",
+      what = "RunLeiden(method)"
+    )
+  }
+
+  # Convert `object` into an `igraph`.
+  # If `object` is already an `igraph` no conversion is necessary.
+  if (inherits(object, what = "igraph")) { 
+    input <- object
+  # Otherwise, if `object` is a list, assume it is an adjacency list...
+  } else if (inherits(object, what = "list")) {
+    # And convert it to an `igraph` with the appropriate method. 
+    input <- graph_from_adj_list(object)
+  # Or, if `object` is a matrix...
+  } else if (inherits(object, what = c("dgCMatrix", "matrix", "Matrix"))) {
+    # Make sure the matrix is sparse.
+    if (inherits(object, what = "Graph")) {
+      object <- as.sparse(object)
+    }
+    # And then convert it to an graph.
+    input <- graph_from_adjacency_matrix(object, weighted = TRUE)
+  # Throw an error if `object` is of an unknown type. 
+  } else {
     stop(
-      "Cannot find Leiden algorithm, please install through pip (e.g. pip install leidenalg).",
+      "Method for Leiden not found for class", class(object),
       call. = FALSE
     )
   }
-  switch(
-    EXPR = method,
-    "matrix" = {
-      input <- as(object = object, Class = "matrix")
-    },
-    "igraph" = {
-      input <- if (inherits(x = object, what = 'list')) {
-        graph_from_adj_list(adjlist = object)
-      } else if (inherits(x = object, what = c('dgCMatrix', 'matrix', 'Matrix'))) {
-        if (inherits(x = object, what = 'Graph')) {
-          object <- as.sparse(x = object)
-        }
-        graph_from_adjacency_matrix(adjmatrix = object, weighted = TRUE)
-      } else if (inherits(x = object, what = 'igraph')) {
-        object
-      } else {
-        stop(
-          "Method for Leiden not found for class", class(x = object),
-          call. = FALSE
-        )
-      }
-    },
-    stop("Method for Leiden must be either 'matrix' or igraph'")
-  )
-  #run leiden from CRAN package (calls python with reticulate)
-  partition <- leiden(
-    object = input,
+
+  # Run clustering with `leidenbase`.
+  partition <- leidenbase::leiden_find_partition(
+    input,
     partition_type = partition.type,
     initial_membership = initial.membership,
-    weights = NULL,
+    edge_weights = NULL,
     node_sizes = node.sizes,
     resolution_parameter = resolution.parameter,
     seed = random.seed,
-    n_iterations = n.iter
-  )
+    num_iter = n.iter
+  )$membership
   return(partition)
 }