From d9881018e545508e6edc6bb56acfd0ed4001cc56 Mon Sep 17 00:00:00 2001 From: rharao Date: Thu, 12 Dec 2024 14:12:56 -0500 Subject: [PATCH 01/11] Implement group.by arg to FindAllMarkers --- R/differential_expression.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/differential_expression.R b/R/differential_expression.R index 515491545..0e245cfd0 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -46,6 +46,7 @@ FindAllMarkers <- function( object, assay = NULL, features = NULL, + group.by = NULL, logfc.threshold = 0.1, test.use = 'wilcox', slot = 'data', @@ -75,6 +76,9 @@ FindAllMarkers <- function( return.thresh <- 0.7 } if (is.null(x = node)) { + if (!is.null(x = group.by)) { + Idents(object = object) <- group.by + } idents.all <- sort(x = unique(x = Idents(object = object))) } else { if (!PackageCheck('ape', error = FALSE)) { From 28af73f63150e6d06e0f621e48046215e2f6806d Mon Sep 17 00:00:00 2001 From: rharao Date: Fri, 13 Dec 2024 10:12:02 -0500 Subject: [PATCH 02/11] Handle "ident"; guard against group.by not in metadata --- R/differential_expression.R | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/R/differential_expression.R b/R/differential_expression.R index 0e245cfd0..8d67c8393 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -76,9 +76,12 @@ FindAllMarkers <- function( return.thresh <- 0.7 } if (is.null(x = node)) { - if (!is.null(x = group.by)) { - Idents(object = object) <- group.by + if (!is.null(x = group.by) && group.by != "ident") { + if (length(x = group.by) == 1 && ! group.by %in% colnames(x = object@meta.data)) { + stop("'", group.by, "' not found in object metadata") } + Idents(object = object) <- group.by + } idents.all <- sort(x = unique(x = Idents(object = object))) } else { if (!PackageCheck('ape', error = FALSE)) { @@ -900,7 +903,7 @@ FindMarkers.DimReduc <- function( #' use all other cells for comparison; if an object of class \code{phylo} or #' 'clustertree' is passed to \code{ident.1}, must pass a node to find markers for #' @param group.by Regroup cells into a different identity class prior to -#' performing differential expression (see example) +#' performing differential expression (see example); \code{"ident"} to use Idents #' @param subset.ident Subset a particular identity class prior to regrouping. #' Only relevant if group.by is set (see example) #' @param assay Assay to use in differential expression testing @@ -923,10 +926,13 @@ FindMarkers.Seurat <- function( reduction = NULL, ... ) { - if (!is.null(x = group.by)) { + if (!is.null(x = group.by) && group.by != "ident") { if (!is.null(x = subset.ident)) { object <- subset(x = object, idents = subset.ident) } + if (length(x = group.by) == 1 && ! group.by %in% colnames(x = object@meta.data)) { + stop("'", group.by, "' not found in object metadata") + } Idents(object = object) <- group.by } if (!is.null(x = assay) && !is.null(x = reduction)) { From e93d0f51db2c8804a862e76a8710a2cdfe9811f6 Mon Sep 17 00:00:00 2001 From: rharao <18608184+rharao@users.noreply.github.com> Date: Fri, 13 Dec 2024 12:08:57 -0500 Subject: [PATCH 03/11] roxygenize --- man/FindAllMarkers.Rd | 4 ++++ man/FindMarkers.Rd | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) mode change 100644 => 100755 man/FindAllMarkers.Rd mode change 100644 => 100755 man/FindMarkers.Rd diff --git a/man/FindAllMarkers.Rd b/man/FindAllMarkers.Rd old mode 100644 new mode 100755 index a2f9f764e..35183b632 --- a/man/FindAllMarkers.Rd +++ b/man/FindAllMarkers.Rd @@ -9,6 +9,7 @@ FindAllMarkers( object, assay = NULL, features = NULL, + group.by = NULL, logfc.threshold = 0.1, test.use = "wilcox", slot = "data", @@ -37,6 +38,9 @@ FindAllMarkers( \item{features}{Genes to test. Default is to use all genes} +\item{group.by}{Regroup cells into a different identity class prior to +performing differential expression (see example); \code{"ident"} to use Idents} + \item{logfc.threshold}{Limit testing to genes which show, on average, at least X-fold difference (log-scale) between the two groups of cells. Default is 0.1 Increasing logfc.threshold speeds up the function, but can miss weaker signals. diff --git a/man/FindMarkers.Rd b/man/FindMarkers.Rd old mode 100644 new mode 100755 index 373d58190..4a437fc8f --- a/man/FindMarkers.Rd +++ b/man/FindMarkers.Rd @@ -231,7 +231,7 @@ use all other cells for comparison; if an object of class \code{phylo} or 'clustertree' is passed to \code{ident.1}, must pass a node to find markers for} \item{group.by}{Regroup cells into a different identity class prior to -performing differential expression (see example)} +performing differential expression (see example); \code{"ident"} to use Idents} \item{subset.ident}{Subset a particular identity class prior to regrouping. Only relevant if group.by is set (see example)} From 0e9b6ffb19701482c27ea00d2711640dd433ccdb Mon Sep 17 00:00:00 2001 From: rharao <18608184+rharao@users.noreply.github.com> Date: Fri, 13 Dec 2024 12:13:29 -0500 Subject: [PATCH 04/11] stray file mode change --- man/FindAllMarkers.Rd | 0 man/FindMarkers.Rd | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 man/FindAllMarkers.Rd mode change 100755 => 100644 man/FindMarkers.Rd diff --git a/man/FindAllMarkers.Rd b/man/FindAllMarkers.Rd old mode 100755 new mode 100644 diff --git a/man/FindMarkers.Rd b/man/FindMarkers.Rd old mode 100755 new mode 100644 From 44e500bbfb670ac9f03fd880101f9f25163f0ff5 Mon Sep 17 00:00:00 2001 From: rharao <18608184+rharao@users.noreply.github.com> Date: Mon, 16 Dec 2024 09:19:48 -0500 Subject: [PATCH 05/11] Ensure condition length 1 --- R/differential_expression.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/differential_expression.R b/R/differential_expression.R index 8d67c8393..486171e4f 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -76,7 +76,7 @@ FindAllMarkers <- function( return.thresh <- 0.7 } if (is.null(x = node)) { - if (!is.null(x = group.by) && group.by != "ident") { + if (!is.null(x = group.by) && !identical(x = group.by, y = "ident")) { if (length(x = group.by) == 1 && ! group.by %in% colnames(x = object@meta.data)) { stop("'", group.by, "' not found in object metadata") } From ffd4a20beec5d506eeba71dd41f9828bd1aab013 Mon Sep 17 00:00:00 2001 From: rharao <18608184+rharao@users.noreply.github.com> Date: Mon, 16 Dec 2024 09:26:46 -0500 Subject: [PATCH 06/11] ensure condition length 1 --- R/differential_expression.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/differential_expression.R b/R/differential_expression.R index 486171e4f..778fb0431 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -926,7 +926,7 @@ FindMarkers.Seurat <- function( reduction = NULL, ... ) { - if (!is.null(x = group.by) && group.by != "ident") { + if (!is.null(x = group.by) && !identical(x = group.by, y = "ident")) { if (!is.null(x = subset.ident)) { object <- subset(x = object, idents = subset.ident) } From 51654e54938019e952247ca890e96c97783ad668 Mon Sep 17 00:00:00 2001 From: David Collins Date: Thu, 19 Dec 2024 17:38:22 -0500 Subject: [PATCH 07/11] Move FindAllMarkers calls inside test_that --- tests/testthat/test_differential_expression.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test_differential_expression.R b/tests/testthat/test_differential_expression.R index f0c685670..ce21a6659 100644 --- a/tests/testthat/test_differential_expression.R +++ b/tests/testthat/test_differential_expression.R @@ -366,12 +366,13 @@ test_that("BPCells FindMarkers gives same results", { # Tests for FindAllMarkers # ------------------------------------------------------------------------------- -results <- suppressMessages(suppressWarnings(FindAllMarkers(object = pbmc_small,pseudocount.use=1))) -results.clr <- suppressMessages(suppressWarnings(FindAllMarkers(object = clr.obj, pseudocount.use=1))) -results.sct <- suppressMessages(suppressWarnings(FindAllMarkers(object = sct.obj, pseudocount.use=1, vst.flavor = "v1"))) -results.pseudo <- suppressMessages(suppressWarnings(FindAllMarkers(object = pbmc_small, pseudocount.use = 0.1))) test_that("FindAllMarkers works as expected", { + results <- suppressMessages(suppressWarnings(FindAllMarkers(object = pbmc_small, pseudocount.use = 1))) + results.clr <- suppressMessages(suppressWarnings(FindAllMarkers(object = clr.obj, pseudocount.use = 1))) + results.sct <- suppressMessages(suppressWarnings(FindAllMarkers(object = sct.obj, pseudocount.use = 1, vst.flavor = "v1"))) + results.pseudo <- suppressMessages(suppressWarnings(FindAllMarkers(object = pbmc_small, pseudocount.use = 0.1))) + expect_equal(colnames(x = results), c("p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj", "cluster", "gene")) expect_equal(results[1, "p_val"], 9.572778e-13, tolerance = 1e-18) expect_equal(results[1, "avg_log2FC"], -6.030507, tolerance = 1e-6) From 1ae260bcdda2056cc97d84f592a0ecdec6f36f01 Mon Sep 17 00:00:00 2001 From: David Collins Date: Thu, 19 Dec 2024 17:59:49 -0500 Subject: [PATCH 08/11] Add test case for FindMarkers group.by --- tests/testthat/test_differential_expression.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/testthat/test_differential_expression.R b/tests/testthat/test_differential_expression.R index ce21a6659..060d485e1 100644 --- a/tests/testthat/test_differential_expression.R +++ b/tests/testthat/test_differential_expression.R @@ -368,10 +368,14 @@ test_that("BPCells FindMarkers gives same results", { # ------------------------------------------------------------------------------- test_that("FindAllMarkers works as expected", { + pbmc_copy <- pbmc_small + Idents(pbmc_copy) <- "orig.ident" + results <- suppressMessages(suppressWarnings(FindAllMarkers(object = pbmc_small, pseudocount.use = 1))) results.clr <- suppressMessages(suppressWarnings(FindAllMarkers(object = clr.obj, pseudocount.use = 1))) results.sct <- suppressMessages(suppressWarnings(FindAllMarkers(object = sct.obj, pseudocount.use = 1, vst.flavor = "v1"))) results.pseudo <- suppressMessages(suppressWarnings(FindAllMarkers(object = pbmc_small, pseudocount.use = 0.1))) + results.gb <- suppressMessages(suppressWarnings(FindAllMarkers(object = pbmc_copy, pseudocount.use = 1, group.by = "RNA_snn_res.1"))) expect_equal(colnames(x = results), c("p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj", "cluster", "gene")) expect_equal(results[1, "p_val"], 9.572778e-13, tolerance = 1e-18) @@ -408,6 +412,10 @@ test_that("FindAllMarkers works as expected", { expect_equal(results.pseudo[1, "p_val_adj"], 2.201739e-10, tolerance = 1e-15) expect_equal(nrow(x = results.pseudo), 222) expect_equal(rownames(results.pseudo)[1], "HLA-DPB1") + + # Setting `group.by` the group by parameter is equivalent + # to setting the object's `Idents` before running `FindAllMarkers`. + expect_equal(results.gb, results) }) From f93380a057b36490f2101c3fc21058dcc544fb35 Mon Sep 17 00:00:00 2001 From: David Collins Date: Thu, 19 Dec 2024 18:16:37 -0500 Subject: [PATCH 09/11] Raise warning when `node` and `group.by` are both set --- R/differential_expression.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/R/differential_expression.R b/R/differential_expression.R index 778fb0431..6e45e8cfe 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -87,6 +87,14 @@ FindAllMarkers <- function( if (!PackageCheck('ape', error = FALSE)) { stop(cluster.ape, call. = FALSE) } + if (!is.null(group.by)) { + warning( + paste0( + "The `group.by` parameter for `FindAllMarkers` ", + "is ignored when `node` is set." + ) + ) + } tree <- Tool(object = object, slot = 'BuildClusterTree') if (is.null(x = tree)) { stop("Please run 'BuildClusterTree' before finding markers on nodes") From 8723dcc4d4c83fd50c41819df8875eaef0891c81 Mon Sep 17 00:00:00 2001 From: David Collins Date: Fri, 20 Dec 2024 12:53:04 -0500 Subject: [PATCH 10/11] Update changelog --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index f385e8135..abfd8dbd8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,8 @@ # Unreleased ## Changes +- Added `group.by` parameter to `FindAllMarkers`, allowing users to regroup their data using a non-default identity class prior to performing differential expression ([#9550](https://github.com/satijalab/seurat/pull/9550)) +#' performing differential expression (see example); \code{"ident"} to use Idents - Added `image.type` parameter to `Read10X_Image` enabling `VisiumV1` instances to be populated instead of instances of the default `VisiumV2` class ([#9556](https://github.com/satijalab/seurat/pull/9556)) - Fixed `IntegrateLayers` to respect the `dims.to.integrate` parameter. - Added `stroke.size` parameter to `DimPlot` ([#8180](https://github.com/satijalab/seurat/pull/8180)) From 9c9a859b13c5a125fa6b9d329f281cbbfd5f2eae Mon Sep 17 00:00:00 2001 From: David Collins Date: Fri, 20 Dec 2024 12:53:22 -0500 Subject: [PATCH 11/11] Bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2cdac3190..bdf90336f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 5.1.0.9017 +Version: 5.1.0.9018 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c(