From 18fa40be09d0543c3849ef8e9f5b6a6444738539 Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Sun, 14 Apr 2024 00:54:07 +0200 Subject: [PATCH 01/12] Implement FAISS GPU support --- scHPL/faissKNeighbors.py | 6 +++++- scHPL/learn.py | 26 +++++++++++++++++--------- scHPL/train.py | 18 +++++++++++------- scHPL/utils.py | 2 +- 4 files changed, 34 insertions(+), 18 deletions(-) diff --git a/scHPL/faissKNeighbors.py b/scHPL/faissKNeighbors.py index 6273abe..a73c3a5 100644 --- a/scHPL/faissKNeighbors.py +++ b/scHPL/faissKNeighbors.py @@ -9,13 +9,17 @@ import numpy as np class FaissKNeighbors: - def __init__(self, k=50): + def __init__(self, k=50, gpu=None): self.index = None self.y = None self.k = k + self.gpu = gpu def fit(self, X, y): self.index = faiss.IndexFlatL2(X.shape[1]) + if self.gpu is not None: + res = faiss.StandardGpuResources() + self.index = faiss.index_cpu_to_gpu(res, self.gpu, self.index) self.index.add(X.astype(np.float32)) self.y = y diff --git a/scHPL/learn.py b/scHPL/learn.py index 1bef064..0ddf6d6 100644 --- a/scHPL/learn.py +++ b/scHPL/learn.py @@ -9,7 +9,7 @@ from anndata import AnnData from .train import train_tree -from .utils import TreeNode, create_tree, print_tree +from .utils import TreeNode, create_tree, print_tree as print_tree_func from .predict import predict_labels from .update import update_tree # from train import train_tree @@ -36,11 +36,13 @@ def learn_tree(data: AnnData, distkNN: int = 99, dimred: bool = False, useRE: bool = True, - FN: float = 0.5, + FN: float = 0.5, rej_threshold: float = 0.5, match_threshold: float = 0.25, attach_missing: bool = False, - print_conf: bool = False + print_conf: bool = False, + print_tree: bool = True, + gpu: int | None = None ): '''Learn a classification tree based on multiple labeled datasets. @@ -93,6 +95,10 @@ def learn_tree(data: AnnData, If 'True' missing nodes are attached to the root node. print_conf: Boolean = False Whether to print the confusion matrices during the matching step. + print_tree: Boolean = True + Whether to print the tree during the training. + gpu: int = None + GPU index to use for the Faiss library (only used when classifier='knn') Returns ------- @@ -120,8 +126,9 @@ def learn_tree(data: AnnData, labels_1 = labels[idx_1] data_1 = xx[idx_1] - print('Starting tree:') - print_tree(tree) + if print_tree: + print('Starting tree:') + print_tree_func(tree) for b in batch_order: @@ -137,13 +144,13 @@ def learn_tree(data: AnnData, if retrain: tree = train_tree(data_1, labels_1, tree, classifier, dimred, useRE, FN, n_neighbors, dynamic_neighbors, - distkNN) + distkNN, gpu=gpu) else: retrain = True tree_2 = train_tree(data_2, labels_2, tree_2, classifier, dimred, useRE, FN, n_neighbors, dynamic_neighbors, - distkNN) + distkNN, gpu=gpu) # Predict labels other dataset labels_2_pred,_ = predict_labels(data_2, tree, threshold=rej_threshold) @@ -160,8 +167,9 @@ def learn_tree(data: AnnData, missing_pop.extend(mis_pop) - print('\nUpdated tree:') - print_tree(tree, np.unique(labels_2)) + if print_tree: + print('\nUpdated tree:') + print_tree_func(tree, np.unique(labels_2)) #concatenate the two datasets data_1 = np.concatenate((data_1, data_2), axis = 0) diff --git a/scHPL/train.py b/scHPL/train.py index 40ef7ff..a43970a 100644 --- a/scHPL/train.py +++ b/scHPL/train.py @@ -34,7 +34,8 @@ def train_tree(data, FN: float = 0.5, n_neighbors: int = 50, dynamic_neighbors: bool = True, - distkNN: int = 99): + distkNN: int = 99, + gpu=None): '''Train a hierarchical classifier. Parameters @@ -66,6 +67,8 @@ def train_tree(data, cell and it's closest neighbor of the training set. Threshold is set to the distkNN's percentile of distances within the training set + gpu: int | None = None + GPU index to use for the Faiss library (only used when classifier='knn') Returns @@ -129,7 +132,7 @@ def train_tree(data, except: None _,_ = _train_parentnode(data, labels_train, tree[0], n_neighbors, - dynamic_neighbors, distkNN) + dynamic_neighbors, distkNN, gpu=gpu) else: for n in tree[0].descendants: _ = _train_node(data, labels, n, classifier, dimred, numgenes) @@ -175,7 +178,7 @@ def _train_node(data, labels, n, classifier, dimred, numgenes): return group -def _train_parentnode(data, labels, n, n_neighbors, dynamic_neighbors, distkNN): +def _train_parentnode(data, labels, n, n_neighbors, dynamic_neighbors, distkNN, gpu=None): '''Train a knn classifier. In contrast to the linear svm and oc svm, this is trained for each parent node instead of each child node @@ -187,6 +190,7 @@ def _train_parentnode(data, labels, n, n_neighbors, dynamic_neighbors, distkNN): classifier: which classifier to use dimred: dimensionality reduction numgenes: number of genes in the training data + gpu: GPU index to use for the Faiss library (only used when classifier='knn') Return ------ @@ -203,7 +207,7 @@ def _train_parentnode(data, labels, n, n_neighbors, dynamic_neighbors, distkNN): for j in n.descendants: group_new, labels_new = _train_parentnode(data, labels, j, n_neighbors, dynamic_neighbors, - distkNN) + distkNN, gpu=gpu) group[np.where(group_new == 1)[0]] = 1 labels[np.where(group_new == 1)[0]] = labels_new[np.where(group_new == 1)[0]] if n.name != None: @@ -211,7 +215,7 @@ def _train_parentnode(data, labels, n, n_neighbors, dynamic_neighbors, distkNN): if len(n.descendants) == 1: group[np.squeeze(np.isin(labels, n.name))] = 1 # train_knn - _train_knn(data,labels,group,n,n_neighbors,dynamic_neighbors,distkNN) + _train_knn(data,labels,group,n,n_neighbors,dynamic_neighbors,distkNN,gpu=gpu) # rename all group == 1 to node.name group[np.squeeze(np.isin(labels, n.name))] = 1 labels[group==1] = n.name[0] @@ -271,7 +275,7 @@ def _train_svm(data, labels, group, n): n.set_classifier(clf) #save classifier to the node -def _train_knn(data, labels, group, n, n_neighbors, dynamic_neighbors, distkNN): +def _train_knn(data, labels, group, n, n_neighbors, dynamic_neighbors, distkNN, gpu=None): '''Train a linear svm and attach to the node Parameters: @@ -300,7 +304,7 @@ def _train_knn(data, labels, group, n, n_neighbors, dynamic_neighbors, distkNN): try: import faiss from .faissKNeighbors import FaissKNeighbors - clf = FaissKNeighbors(k=k) + clf = FaissKNeighbors(k=k, gpu=gpu) clf.fit(data_knn, labels_knn) #print('Using FAISS library') diff --git a/scHPL/utils.py b/scHPL/utils.py index eecd3b0..cf9f3d1 100644 --- a/scHPL/utils.py +++ b/scHPL/utils.py @@ -53,7 +53,7 @@ def set_classifier(self, classifier): """ Add a classifier to the node. """ - self.classifier = copy.deepcopy(classifier) + self.classifier = classifier def get_classifier(self): return self.classifier From a59e895fee5c80da04a56a0bf176e90d402f6885 Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Sun, 14 Apr 2024 11:02:14 +0200 Subject: [PATCH 02/12] Version bump --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7205552..40895a3 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ setuptools.setup( name="scHPL", - version="1.0.3", + version="1.0.4", author="Lieke Michielsen", author_email="l.c.m.michielsen@tudelft.nl", description="Hierarchical progressive learning pipeline for single-cell RNA-sequencing datasets", From 1dc16b7288ba9503ff088f903594fb3e65845048 Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Sun, 14 Apr 2024 11:18:01 +0200 Subject: [PATCH 03/12] Fix gpu parameter typing bug --- scHPL/learn.py | 6 +++--- scHPL/train.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/scHPL/learn.py b/scHPL/learn.py index 0ddf6d6..cb9042b 100644 --- a/scHPL/learn.py +++ b/scHPL/learn.py @@ -18,9 +18,9 @@ # from update import update_tree try: - from typing import Literal + from typing import Literal, Optional except ImportError: - from typing_extensions import Literal + from typing_extensions import Literal, Optional def learn_tree(data: AnnData, @@ -42,7 +42,7 @@ def learn_tree(data: AnnData, attach_missing: bool = False, print_conf: bool = False, print_tree: bool = True, - gpu: int | None = None + gpu: Optional[int] = None ): '''Learn a classification tree based on multiple labeled datasets. diff --git a/scHPL/train.py b/scHPL/train.py index a43970a..a8746a1 100644 --- a/scHPL/train.py +++ b/scHPL/train.py @@ -19,9 +19,9 @@ import copy as cp try: - from typing import Literal + from typing import Literal, Optional except ImportError: - from typing_extensions import Literal + from typing_extensions import Literal, Optional @ignore_warnings(category=ConvergenceWarning) @@ -35,7 +35,7 @@ def train_tree(data, n_neighbors: int = 50, dynamic_neighbors: bool = True, distkNN: int = 99, - gpu=None): + gpu: Optional[int] = None): '''Train a hierarchical classifier. Parameters From e75ff9f7cb5d084543d2bd061d817e6cd047cb18 Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Sun, 14 Apr 2024 14:21:18 +0200 Subject: [PATCH 04/12] Loosen pandas dependency --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 40895a3..8f0328b 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ "numpy>=1.19.2", "scipy>=1.5.2", "scikit-learn>=0.23.2", - "pandas>=1.1.2,<2.0.0", + "pandas>=1.1.2,<=2.2.2", "newick~=1.0.0", "anndata>=0.7.4", "matplotlib>=3.3.1", From 64a2f6a508382b865d1d0e8da92a5f758d331eed Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Tue, 16 Apr 2024 08:22:22 +0200 Subject: [PATCH 05/12] Add option for compressed FAISS index --- scHPL/faissKNeighbors.py | 8 +++++++- scHPL/learn.py | 9 ++++++--- scHPL/train.py | 19 ++++++++++++------- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/scHPL/faissKNeighbors.py b/scHPL/faissKNeighbors.py index a73c3a5..dd6a9d6 100644 --- a/scHPL/faissKNeighbors.py +++ b/scHPL/faissKNeighbors.py @@ -9,17 +9,23 @@ import numpy as np class FaissKNeighbors: - def __init__(self, k=50, gpu=None): + def __init__(self, k=50, gpu=None, compress=False): self.index = None self.y = None self.k = k self.gpu = gpu + self.compress = compress def fit(self, X, y): self.index = faiss.IndexFlatL2(X.shape[1]) + + if self.compress: + self.index = faiss.IndexIVFPQ(self.index, X.shape[1], 100, 16, 8) + if self.gpu is not None: res = faiss.StandardGpuResources() self.index = faiss.index_cpu_to_gpu(res, self.gpu, self.index) + self.index.add(X.astype(np.float32)) self.y = y diff --git a/scHPL/learn.py b/scHPL/learn.py index cb9042b..967f2d2 100644 --- a/scHPL/learn.py +++ b/scHPL/learn.py @@ -42,7 +42,8 @@ def learn_tree(data: AnnData, attach_missing: bool = False, print_conf: bool = False, print_tree: bool = True, - gpu: Optional[int] = None + gpu: Optional[int] = None, + compress: bool = False ): '''Learn a classification tree based on multiple labeled datasets. @@ -99,6 +100,8 @@ def learn_tree(data: AnnData, Whether to print the tree during the training. gpu: int = None GPU index to use for the Faiss library (only used when classifier='knn') + compress: Boolean = False + If 'True', the Faiss index is compressed (only used when classifier='knn') Returns ------- @@ -144,13 +147,13 @@ def learn_tree(data: AnnData, if retrain: tree = train_tree(data_1, labels_1, tree, classifier, dimred, useRE, FN, n_neighbors, dynamic_neighbors, - distkNN, gpu=gpu) + distkNN, gpu=gpu, compress=compress) else: retrain = True tree_2 = train_tree(data_2, labels_2, tree_2, classifier, dimred, useRE, FN, n_neighbors, dynamic_neighbors, - distkNN, gpu=gpu) + distkNN, gpu=gpu, compress=compress) # Predict labels other dataset labels_2_pred,_ = predict_labels(data_2, tree, threshold=rej_threshold) diff --git a/scHPL/train.py b/scHPL/train.py index a8746a1..594c9f0 100644 --- a/scHPL/train.py +++ b/scHPL/train.py @@ -35,7 +35,8 @@ def train_tree(data, n_neighbors: int = 50, dynamic_neighbors: bool = True, distkNN: int = 99, - gpu: Optional[int] = None): + gpu: Optional[int] = None, + compress: bool = False): '''Train a hierarchical classifier. Parameters @@ -69,6 +70,9 @@ def train_tree(data, set gpu: int | None = None GPU index to use for the Faiss library (only used when classifier='knn') + compress: bool = False + If 'True', the Faiss library will use a compressed index for the kNN + classifier. Returns @@ -132,7 +136,7 @@ def train_tree(data, except: None _,_ = _train_parentnode(data, labels_train, tree[0], n_neighbors, - dynamic_neighbors, distkNN, gpu=gpu) + dynamic_neighbors, distkNN, gpu=gpu, compress=compress) else: for n in tree[0].descendants: _ = _train_node(data, labels, n, classifier, dimred, numgenes) @@ -178,7 +182,7 @@ def _train_node(data, labels, n, classifier, dimred, numgenes): return group -def _train_parentnode(data, labels, n, n_neighbors, dynamic_neighbors, distkNN, gpu=None): +def _train_parentnode(data, labels, n, n_neighbors, dynamic_neighbors, distkNN, gpu=None, compress=False): '''Train a knn classifier. In contrast to the linear svm and oc svm, this is trained for each parent node instead of each child node @@ -191,6 +195,7 @@ def _train_parentnode(data, labels, n, n_neighbors, dynamic_neighbors, distkNN, dimred: dimensionality reduction numgenes: number of genes in the training data gpu: GPU index to use for the Faiss library (only used when classifier='knn') + compress: If 'True', the Faiss library will use a compressed index for the kNN classifier. Return ------ @@ -207,7 +212,7 @@ def _train_parentnode(data, labels, n, n_neighbors, dynamic_neighbors, distkNN, for j in n.descendants: group_new, labels_new = _train_parentnode(data, labels, j, n_neighbors, dynamic_neighbors, - distkNN, gpu=gpu) + distkNN, gpu=gpu, compress=compress) group[np.where(group_new == 1)[0]] = 1 labels[np.where(group_new == 1)[0]] = labels_new[np.where(group_new == 1)[0]] if n.name != None: @@ -215,7 +220,7 @@ def _train_parentnode(data, labels, n, n_neighbors, dynamic_neighbors, distkNN, if len(n.descendants) == 1: group[np.squeeze(np.isin(labels, n.name))] = 1 # train_knn - _train_knn(data,labels,group,n,n_neighbors,dynamic_neighbors,distkNN,gpu=gpu) + _train_knn(data,labels,group,n,n_neighbors,dynamic_neighbors,distkNN,gpu=gpu, compress=compress) # rename all group == 1 to node.name group[np.squeeze(np.isin(labels, n.name))] = 1 labels[group==1] = n.name[0] @@ -275,7 +280,7 @@ def _train_svm(data, labels, group, n): n.set_classifier(clf) #save classifier to the node -def _train_knn(data, labels, group, n, n_neighbors, dynamic_neighbors, distkNN, gpu=None): +def _train_knn(data, labels, group, n, n_neighbors, dynamic_neighbors, distkNN, gpu=None, compress=False): '''Train a linear svm and attach to the node Parameters: @@ -304,7 +309,7 @@ def _train_knn(data, labels, group, n, n_neighbors, dynamic_neighbors, distkNN, try: import faiss from .faissKNeighbors import FaissKNeighbors - clf = FaissKNeighbors(k=k, gpu=gpu) + clf = FaissKNeighbors(k=k, gpu=gpu, compress=compress) clf.fit(data_knn, labels_knn) #print('Using FAISS library') From a209a9b8c28c5d4930f396fc1072755b5fea7744 Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Tue, 16 Apr 2024 16:48:06 +0200 Subject: [PATCH 06/12] Add GPU support to prediction --- scHPL/faissKNeighbors.py | 7 +++++-- scHPL/predict.py | 8 +++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/scHPL/faissKNeighbors.py b/scHPL/faissKNeighbors.py index dd6a9d6..2e52364 100644 --- a/scHPL/faissKNeighbors.py +++ b/scHPL/faissKNeighbors.py @@ -23,12 +23,15 @@ def fit(self, X, y): self.index = faiss.IndexIVFPQ(self.index, X.shape[1], 100, 16, 8) if self.gpu is not None: - res = faiss.StandardGpuResources() - self.index = faiss.index_cpu_to_gpu(res, self.gpu, self.index) + self.to_gpu(self.gpu) self.index.add(X.astype(np.float32)) self.y = y + def to_gpu(self, gpu): + res = faiss.StandardGpuResources() + self.index = faiss.index_cpu_to_gpu(res, gpu, self.index) + def predict(self, X): distances, indices = self.index.search(X.astype(np.float32), k=self.k) votes = self.y[indices] diff --git a/scHPL/predict.py b/scHPL/predict.py index 590f7ec..4e45b45 100644 --- a/scHPL/predict.py +++ b/scHPL/predict.py @@ -11,7 +11,8 @@ def predict_labels(testdata, tree: TreeNode, - threshold: float = 0.5): + threshold: float = 0.5, + gpu=None): '''Use the trained tree to predict the labels of a new dataset. Parameters @@ -51,6 +52,11 @@ def predict_labels(testdata, pca, pcs = tree[0].get_pca() testdata = pca.transform(testdata) dimred = True + + if (tree[0].classifier and + tree[0].classifier.__class__ == FaissKNeighbors and + gpu is not None): + tree[0].classifier.to_gpu(gpu) labels_all = [] prob_all = np.zeros((np.shape(testdata)[0],1)) From 94d3a5f6243d93ca048d46c0bfb2fe85a0025125 Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Tue, 16 Apr 2024 16:50:11 +0200 Subject: [PATCH 07/12] Add tqdm support to prediction --- scHPL/predict.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/scHPL/predict.py b/scHPL/predict.py index 4e45b45..3ba174a 100644 --- a/scHPL/predict.py +++ b/scHPL/predict.py @@ -7,6 +7,12 @@ import numpy as np from numpy import linalg as LA from .utils import TreeNode +from .faissKNeighbors import FaissKNeighbors +try: + from tqdm import tqdm +except ImportError: + def tqdm(x): + return x # from utils import TreeNode def predict_labels(testdata, @@ -60,7 +66,7 @@ def predict_labels(testdata, labels_all = [] prob_all = np.zeros((np.shape(testdata)[0],1)) - for idx, testpoint in enumerate(testdata): + for idx, testpoint in enumerate(tqdm(testdata)): if useRE: if rej_RE[idx]: labels_all.append('Rejected (RE)') From 1150261611d9cd5d6c9294613a4a4d317203d873 Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Tue, 16 Apr 2024 17:02:47 +0200 Subject: [PATCH 08/12] Revert "Version bump" This reverts commit a59e895fee5c80da04a56a0bf176e90d402f6885. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 8f0328b..dfde56c 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ setuptools.setup( name="scHPL", - version="1.0.4", + version="1.0.3", author="Lieke Michielsen", author_email="l.c.m.michielsen@tudelft.nl", description="Hierarchical progressive learning pipeline for single-cell RNA-sequencing datasets", From 2f988d9c762680cb6e39b97b8d6d9a2b9dddacb5 Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Tue, 16 Apr 2024 17:03:08 +0200 Subject: [PATCH 09/12] Revert "Loosen pandas dependency" This reverts commit e75ff9f7cb5d084543d2bd061d817e6cd047cb18. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index dfde56c..7205552 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ "numpy>=1.19.2", "scipy>=1.5.2", "scikit-learn>=0.23.2", - "pandas>=1.1.2,<=2.2.2", + "pandas>=1.1.2,<2.0.0", "newick~=1.0.0", "anndata>=0.7.4", "matplotlib>=3.3.1", From bbb0f49baf27c76e4c65c1aba6f7d0b98763b721 Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Tue, 16 Apr 2024 17:30:19 +0200 Subject: [PATCH 10/12] Remove option for disabling tree printing --- scHPL/learn.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/scHPL/learn.py b/scHPL/learn.py index 967f2d2..8df860b 100644 --- a/scHPL/learn.py +++ b/scHPL/learn.py @@ -9,7 +9,7 @@ from anndata import AnnData from .train import train_tree -from .utils import TreeNode, create_tree, print_tree as print_tree_func +from .utils import TreeNode, create_tree, print_tree from .predict import predict_labels from .update import update_tree # from train import train_tree @@ -41,7 +41,6 @@ def learn_tree(data: AnnData, match_threshold: float = 0.25, attach_missing: bool = False, print_conf: bool = False, - print_tree: bool = True, gpu: Optional[int] = None, compress: bool = False ): @@ -96,8 +95,6 @@ def learn_tree(data: AnnData, If 'True' missing nodes are attached to the root node. print_conf: Boolean = False Whether to print the confusion matrices during the matching step. - print_tree: Boolean = True - Whether to print the tree during the training. gpu: int = None GPU index to use for the Faiss library (only used when classifier='knn') compress: Boolean = False @@ -129,9 +126,8 @@ def learn_tree(data: AnnData, labels_1 = labels[idx_1] data_1 = xx[idx_1] - if print_tree: - print('Starting tree:') - print_tree_func(tree) + print('Starting tree:') + print_tree(tree) for b in batch_order: @@ -170,9 +166,8 @@ def learn_tree(data: AnnData, missing_pop.extend(mis_pop) - if print_tree: - print('\nUpdated tree:') - print_tree_func(tree, np.unique(labels_2)) + print('\nUpdated tree:') + print_tree(tree, np.unique(labels_2)) #concatenate the two datasets data_1 = np.concatenate((data_1, data_2), axis = 0) From 6d931a9cbf8dd8e9d96b89b84b661b61f039c43c Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Tue, 16 Apr 2024 18:42:37 +0200 Subject: [PATCH 11/12] Fix problems with underlines in bold text --- scHPL/utils.py | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/scHPL/utils.py b/scHPL/utils.py index cf9f3d1..f6d52cb 100644 --- a/scHPL/utils.py +++ b/scHPL/utils.py @@ -371,20 +371,15 @@ def _print_node(node, hor, ver_steps, fig, new_nodes): x, y = ([np.max([0.05, hor-0.045]), hor], [ver, ver]) line = mlines.Line2D(x,y, lw=1) fig.add_artist(line) - - # Add textbox - if np.isin(node.name[0], new_nodes): - txt = r"$\bf{" + node.name[0] + "}$" - else: - txt = node.name[0] - - for n in node.name: - if(n != node.name[0]): - if np.isin(n, new_nodes): - txt = txt + ' & ' + r"$\bf{" + n + "}$" - else: - txt = txt + ' & ' + n - + + def format_node(name): + if np.isin(name, new_nodes): + return r"$\bf{" + name.replace("_", "\_") + "}$" + else: + return name + + txt = " & ".join([format_node(n) for n in node.name]) + fig.text(hor,ver, txt, size=10, ha = 'left', va='center', bbox = dict(boxstyle='round', fc='w', ec='k')) From ac6652ddcde596e2f0da2f22d0eedb53e2d98a3b Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Wed, 17 Apr 2024 11:44:12 +0200 Subject: [PATCH 12/12] Revert "Add option for compressed FAISS index" This reverts commit 64a2f6a508382b865d1d0e8da92a5f758d331eed. --- scHPL/faissKNeighbors.py | 7 +------ scHPL/learn.py | 9 +++------ scHPL/train.py | 19 +++++++------------ 3 files changed, 11 insertions(+), 24 deletions(-) diff --git a/scHPL/faissKNeighbors.py b/scHPL/faissKNeighbors.py index 2e52364..0ac24ae 100644 --- a/scHPL/faissKNeighbors.py +++ b/scHPL/faissKNeighbors.py @@ -9,19 +9,14 @@ import numpy as np class FaissKNeighbors: - def __init__(self, k=50, gpu=None, compress=False): + def __init__(self, k=50, gpu=None): self.index = None self.y = None self.k = k self.gpu = gpu - self.compress = compress def fit(self, X, y): self.index = faiss.IndexFlatL2(X.shape[1]) - - if self.compress: - self.index = faiss.IndexIVFPQ(self.index, X.shape[1], 100, 16, 8) - if self.gpu is not None: self.to_gpu(self.gpu) diff --git a/scHPL/learn.py b/scHPL/learn.py index 8df860b..b8c7da0 100644 --- a/scHPL/learn.py +++ b/scHPL/learn.py @@ -41,8 +41,7 @@ def learn_tree(data: AnnData, match_threshold: float = 0.25, attach_missing: bool = False, print_conf: bool = False, - gpu: Optional[int] = None, - compress: bool = False + gpu: Optional[int] = None ): '''Learn a classification tree based on multiple labeled datasets. @@ -97,8 +96,6 @@ def learn_tree(data: AnnData, Whether to print the confusion matrices during the matching step. gpu: int = None GPU index to use for the Faiss library (only used when classifier='knn') - compress: Boolean = False - If 'True', the Faiss index is compressed (only used when classifier='knn') Returns ------- @@ -143,13 +140,13 @@ def learn_tree(data: AnnData, if retrain: tree = train_tree(data_1, labels_1, tree, classifier, dimred, useRE, FN, n_neighbors, dynamic_neighbors, - distkNN, gpu=gpu, compress=compress) + distkNN, gpu=gpu) else: retrain = True tree_2 = train_tree(data_2, labels_2, tree_2, classifier, dimred, useRE, FN, n_neighbors, dynamic_neighbors, - distkNN, gpu=gpu, compress=compress) + distkNN, gpu=gpu) # Predict labels other dataset labels_2_pred,_ = predict_labels(data_2, tree, threshold=rej_threshold) diff --git a/scHPL/train.py b/scHPL/train.py index 594c9f0..a8746a1 100644 --- a/scHPL/train.py +++ b/scHPL/train.py @@ -35,8 +35,7 @@ def train_tree(data, n_neighbors: int = 50, dynamic_neighbors: bool = True, distkNN: int = 99, - gpu: Optional[int] = None, - compress: bool = False): + gpu: Optional[int] = None): '''Train a hierarchical classifier. Parameters @@ -70,9 +69,6 @@ def train_tree(data, set gpu: int | None = None GPU index to use for the Faiss library (only used when classifier='knn') - compress: bool = False - If 'True', the Faiss library will use a compressed index for the kNN - classifier. Returns @@ -136,7 +132,7 @@ def train_tree(data, except: None _,_ = _train_parentnode(data, labels_train, tree[0], n_neighbors, - dynamic_neighbors, distkNN, gpu=gpu, compress=compress) + dynamic_neighbors, distkNN, gpu=gpu) else: for n in tree[0].descendants: _ = _train_node(data, labels, n, classifier, dimred, numgenes) @@ -182,7 +178,7 @@ def _train_node(data, labels, n, classifier, dimred, numgenes): return group -def _train_parentnode(data, labels, n, n_neighbors, dynamic_neighbors, distkNN, gpu=None, compress=False): +def _train_parentnode(data, labels, n, n_neighbors, dynamic_neighbors, distkNN, gpu=None): '''Train a knn classifier. In contrast to the linear svm and oc svm, this is trained for each parent node instead of each child node @@ -195,7 +191,6 @@ def _train_parentnode(data, labels, n, n_neighbors, dynamic_neighbors, distkNN, dimred: dimensionality reduction numgenes: number of genes in the training data gpu: GPU index to use for the Faiss library (only used when classifier='knn') - compress: If 'True', the Faiss library will use a compressed index for the kNN classifier. Return ------ @@ -212,7 +207,7 @@ def _train_parentnode(data, labels, n, n_neighbors, dynamic_neighbors, distkNN, for j in n.descendants: group_new, labels_new = _train_parentnode(data, labels, j, n_neighbors, dynamic_neighbors, - distkNN, gpu=gpu, compress=compress) + distkNN, gpu=gpu) group[np.where(group_new == 1)[0]] = 1 labels[np.where(group_new == 1)[0]] = labels_new[np.where(group_new == 1)[0]] if n.name != None: @@ -220,7 +215,7 @@ def _train_parentnode(data, labels, n, n_neighbors, dynamic_neighbors, distkNN, if len(n.descendants) == 1: group[np.squeeze(np.isin(labels, n.name))] = 1 # train_knn - _train_knn(data,labels,group,n,n_neighbors,dynamic_neighbors,distkNN,gpu=gpu, compress=compress) + _train_knn(data,labels,group,n,n_neighbors,dynamic_neighbors,distkNN,gpu=gpu) # rename all group == 1 to node.name group[np.squeeze(np.isin(labels, n.name))] = 1 labels[group==1] = n.name[0] @@ -280,7 +275,7 @@ def _train_svm(data, labels, group, n): n.set_classifier(clf) #save classifier to the node -def _train_knn(data, labels, group, n, n_neighbors, dynamic_neighbors, distkNN, gpu=None, compress=False): +def _train_knn(data, labels, group, n, n_neighbors, dynamic_neighbors, distkNN, gpu=None): '''Train a linear svm and attach to the node Parameters: @@ -309,7 +304,7 @@ def _train_knn(data, labels, group, n, n_neighbors, dynamic_neighbors, distkNN, try: import faiss from .faissKNeighbors import FaissKNeighbors - clf = FaissKNeighbors(k=k, gpu=gpu, compress=compress) + clf = FaissKNeighbors(k=k, gpu=gpu) clf.fit(data_knn, labels_knn) #print('Using FAISS library')