update extraction to properly filter cell id list also accounting for…

… dict lists
MannLabs · Jan 19, 2024 · ab6baf7 · ab6baf7
1 parent 8052a8d
commit ab6baf7
Showing 1 changed file with 18 additions and 7 deletions.
diff --git a/src/sparcscore/pipeline/extraction.py b/src/sparcscore/pipeline/extraction.py
@@ -61,7 +61,7 @@ def __init__(self,
         self.input_segmentation_path = os.path.join(base_directory, self.DEFAULT_SEGMENTATION_DIR, self.DEFAULT_SEGMENTATION_FILE)
 
         #get path to filtered classes
-        if os.path.isfile(os.path.join(base_directory, self.DEFAULT_SEGMENTATION_DIR, "needs_filtering.txt")):
+        if os.path.isfile(os.path.join(base_directory, self.DEFAULT_SEGMENTATION_DIR, "needs_additional_filtering.txt")):
             try:
                 self.classes_path = os.path.join(base_directory, self.DEFAULT_SEGMENTATION_DIR, self.DEFAULT_FILTERED_CLASSES_FILE)
                 self.log(f"Loading classes from filtered classes path: {self.classes_path}")
@@ -584,21 +584,32 @@ def process(self, input_segmentation_path, filtered_classes_path = None):
         px_centers, _cell_ids = self._calculate_centers(hdf_labels)
 
         #get classes to extract
-        class_list = self.get_classes(filtered_classes_path)    
-        class_list = set(class_list)
+        class_list = self.get_classes(filtered_classes_path)
+
+        if type(class_list[0]) == str:
+            lookup_dict = {x.split(":")[0]:x.split(":")[1] for x in class_list}
+            nuclei_ids = set(list(lookup_dict.keys()))
+        else:
+            nuclei_ids = set(class_list)
 
         #filter cell ids found using center into those that we actually want to extract
         _cell_ids = list(_cell_ids)
-        filter = [x in class_list for x in _cell_ids]
+        filter = [x in nuclei_ids for x in _cell_ids]
 
         px_centers = np.array(list(compress(px_centers, filter)))
         _cell_ids = list(compress(_cell_ids, filter))
 
-        #update number of classes
+        #generate new class list 
+        if type(class_list[0]) == str:
+            class_list = [f"{x}:{lookup_dict[str(x)]}" for x in _cell_ids]
+            del lookup_dict
+        else:
+            class_list = _cell_ids
+
         self.log(f"Number of classes found in filtered classes list {len(class_list)} vs number of classes for which centers were calculated {len(_cell_ids)}")
-        class_list = _cell_ids
-        del _cell_ids, filter
+        del _cell_ids, filter, nuclei_ids
 
+        #update number of classes
         self.num_classes = len(class_list)
 
         # setup cache