From f36ce013d0d155392d000eaff45aa9dbb6c896ad Mon Sep 17 00:00:00 2001 From: Jura Pintar Date: Sat, 2 Dec 2023 19:36:17 -0500 Subject: [PATCH 1/6] Expose random_state in leiden feat(leiden): expose additional parameter 1) expose 'random_state' parameter to better align with Scanpy 2) pass it to cuGraph via 'culeiden' 3) save its value in '.uns["leiden"]["params"]' 4) describe it in docstring 5) make multiple small improvements to docstring N.b., the 'theta' parameter was not exposed because a bug in cuGraph (present at commit 1e446c4) makes it have no effect; this should be re-visited when cuGraph is updated. --- src/rapids_singlecell/tools/_clustering.py | 50 +++++++++++++++------- 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/src/rapids_singlecell/tools/_clustering.py b/src/rapids_singlecell/tools/_clustering.py index 375df41d..4ffee976 100644 --- a/src/rapids_singlecell/tools/_clustering.py +++ b/src/rapids_singlecell/tools/_clustering.py @@ -14,6 +14,7 @@ def leiden( adata: AnnData, resolution: float = 1.0, *, + random_state: Union[int, None] = 0, restrict_to: Optional[Tuple[str, Sequence[str]]] = None, key_added: str = "leiden", adjacency: Optional[sparse.spmatrix] = None, @@ -24,7 +25,12 @@ def leiden( copy: bool = False, ) -> Optional[AnnData]: """ - Performs Leiden Clustering using cuGraph + Performs Leiden clustering using cuGraph, which implements the method + described in: + + Traag, V.A., Waltman, L., & van Eck, N.J. (2019). From Louvain to + Leiden: guaranteeing well-connected communities. Sci. Rep., 9(1), 5233. + DOI: 10.1038/s41598-019-41695-z Parameters ---------- @@ -33,38 +39,46 @@ def leiden( resolution A parameter value controlling the coarseness of the clustering. - Higher values lead to more clusters. + (called gamma in the modularity formula). Higher values lead to + more clusters. + + random_state + Change the initialization of the optimization. Defaults to 0. restrict_to - Restrict the clustering to the categories within the key for sample - annotation, tuple needs to contain `(obs_key, list_of_categories)`. + Restrict the clustering to the categories within the key for + sample annotation, tuple needs to contain + `(obs_key, list_of_categories)`. key_added `adata.obs` key under which to add the cluster labels. adjacency - Sparse adjacency matrix of the graph, defaults to neighbors connectivities. + Sparse adjacency matrix of the graph, defaults to neighbors + connectivities. n_iterations - This controls the maximum number of levels/iterations of the Leiden algorithm. - When specified the algorithm will terminate after no more than the specified number of iterations. - No error occurs when the algorithm terminates early in this manner. + This controls the maximum number of levels/iterations of the + Leiden algorithm. When specified, the algorithm will terminate + after no more than the specified number of iterations. No error + occurs when the algorithm terminates early in this manner. use_weights - If `True`, edge weights from the graph are used in the computation - (placing more emphasis on stronger edges). + If `True`, edge weights from the graph are used in the + computation (placing more emphasis on stronger edges). neighbors_key - If not specified, `leiden` looks at `.obsp['connectivities']` for neighbors connectivities - If specified, `leiden` looks at `.obsp['neighbors_key_ connectivities']` for neighbors connectivities + If not specified, `leiden` looks at `.obsp['connectivities']` + for neighbors connectivities. If specified, `leiden` looks at + `.obsp[.uns[neighbors_key]['connectivities_key']]` for neighbors + connectivities. obsp Use .obsp[obsp] as adjacency. You can't specify both `obsp` and `neighbors_key` at the same time. copy - Whether to copy `adata` or modify it inplace. - + Whether to copy `adata` or modify it in place. """ # Adjacency graph from cugraph import Graph @@ -94,7 +108,12 @@ def leiden( g.from_cudf_adjlist(offsets, indices, weights) # Cluster - leiden_parts, _ = culeiden(g, resolution=resolution, max_iter=n_iterations) + leiden_parts, _ = culeiden( + g, + resolution=resolution, + random_state=random_state, + max_iter=n_iterations, + ) # Format output groups = ( @@ -119,6 +138,7 @@ def leiden( adata.uns["leiden"] = {} adata.uns["leiden"]["params"] = { "resolution": resolution, + "random_state": random_state, "n_iterations": n_iterations, } return adata if copy else None From 371139907b1d859e01ca1824360e88af6bac835c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 3 Dec 2023 01:29:03 +0000 Subject: [PATCH 2/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/rapids_singlecell/tools/_clustering.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rapids_singlecell/tools/_clustering.py b/src/rapids_singlecell/tools/_clustering.py index 4ffee976..62121be1 100644 --- a/src/rapids_singlecell/tools/_clustering.py +++ b/src/rapids_singlecell/tools/_clustering.py @@ -64,11 +64,11 @@ def leiden( occurs when the algorithm terminates early in this manner. use_weights - If `True`, edge weights from the graph are used in the + If `True`, edge weights from the graph are used in the computation (placing more emphasis on stronger edges). neighbors_key - If not specified, `leiden` looks at `.obsp['connectivities']` + If not specified, `leiden` looks at `.obsp['connectivities']` for neighbors connectivities. If specified, `leiden` looks at `.obsp[.uns[neighbors_key]['connectivities_key']]` for neighbors connectivities. From 8945c92d47f8ea6f4cfb957332e6604aeaa7a75d Mon Sep 17 00:00:00 2001 From: Jura Pintar Date: Sat, 2 Dec 2023 20:39:45 -0500 Subject: [PATCH 3/6] Fix undefined type fix(leiden): fix undefined type Import 'Union' from 'typing' to fix error when defining 'random_state' (line 17) --- src/rapids_singlecell/tools/_clustering.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rapids_singlecell/tools/_clustering.py b/src/rapids_singlecell/tools/_clustering.py index 62121be1..ad8c8a9d 100644 --- a/src/rapids_singlecell/tools/_clustering.py +++ b/src/rapids_singlecell/tools/_clustering.py @@ -1,4 +1,4 @@ -from typing import Optional, Sequence, Tuple +from typing import Optional, Sequence, Tuple, Union import cudf import numpy as np From f7026349d8b48df61bc0a80363984c3b7bf0f011 Mon Sep 17 00:00:00 2001 From: Severin Dicks <37635888+Intron7@users.noreply.github.com> Date: Tue, 12 Dec 2023 16:44:59 -0300 Subject: [PATCH 4/6] Update _clustering.py --- src/rapids_singlecell/tools/_clustering.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rapids_singlecell/tools/_clustering.py b/src/rapids_singlecell/tools/_clustering.py index ad8c8a9d..3c510934 100644 --- a/src/rapids_singlecell/tools/_clustering.py +++ b/src/rapids_singlecell/tools/_clustering.py @@ -20,7 +20,7 @@ def leiden( adjacency: Optional[sparse.spmatrix] = None, n_iterations: int = 100, use_weights: bool = True, - neighbors_key: Optional[int] = None, + neighbors_key: Optional[str] = None, obsp: Optional[str] = None, copy: bool = False, ) -> Optional[AnnData]: From d6522f46ab4f4ee31fe57b624f83caefe6b0e07e Mon Sep 17 00:00:00 2001 From: Jura Pintar Date: Tue, 12 Dec 2023 18:27:45 -0500 Subject: [PATCH 5/6] Create 0.9.4.md Release note describing addition of `random_state` parameter to `leiden` --- docs/release-notes/0.9.4.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/release-notes/0.9.4.md diff --git a/docs/release-notes/0.9.4.md b/docs/release-notes/0.9.4.md new file mode 100644 index 00000000..5e009f82 --- /dev/null +++ b/docs/release-notes/0.9.4.md @@ -0,0 +1,5 @@ +### 0.9.4 + +```{rubric} Features +``` +* {func}`~rapids_singlecell.tl.leiden` now provides `random_state` parameter for initializing the optimization {pr}`102` {smaller}`J Pintar & S Dicks` From 97e740dda50d9829c4efbf8eafbb803157268866 Mon Sep 17 00:00:00 2001 From: Jura Pintar Date: Tue, 12 Dec 2023 18:51:16 -0500 Subject: [PATCH 6/6] Update index.md Include 0.9.4.md --- docs/release-notes/index.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/release-notes/index.md b/docs/release-notes/index.md index 6fd9653f..4660b37d 100644 --- a/docs/release-notes/index.md +++ b/docs/release-notes/index.md @@ -3,6 +3,9 @@ # Release notes ## Version 0.9.0 +```{include} /release-notes/0.9.4.md +`````` + ```{include} /release-notes/0.9.3.md ``````