Skip to content

Commit

Permalink
update extraction to properly filter cell id list also accounting for…
Browse files Browse the repository at this point in the history
… dict lists
  • Loading branch information
sophiamaedler committed Jan 19, 2024
1 parent 8052a8d commit ab6baf7
Showing 1 changed file with 18 additions and 7 deletions.
25 changes: 18 additions & 7 deletions src/sparcscore/pipeline/extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def __init__(self,
self.input_segmentation_path = os.path.join(base_directory, self.DEFAULT_SEGMENTATION_DIR, self.DEFAULT_SEGMENTATION_FILE)

#get path to filtered classes
if os.path.isfile(os.path.join(base_directory, self.DEFAULT_SEGMENTATION_DIR, "needs_filtering.txt")):
if os.path.isfile(os.path.join(base_directory, self.DEFAULT_SEGMENTATION_DIR, "needs_additional_filtering.txt")):
try:
self.classes_path = os.path.join(base_directory, self.DEFAULT_SEGMENTATION_DIR, self.DEFAULT_FILTERED_CLASSES_FILE)
self.log(f"Loading classes from filtered classes path: {self.classes_path}")
Expand Down Expand Up @@ -584,21 +584,32 @@ def process(self, input_segmentation_path, filtered_classes_path = None):
px_centers, _cell_ids = self._calculate_centers(hdf_labels)

#get classes to extract
class_list = self.get_classes(filtered_classes_path)
class_list = set(class_list)
class_list = self.get_classes(filtered_classes_path)

if type(class_list[0]) == str:
lookup_dict = {x.split(":")[0]:x.split(":")[1] for x in class_list}
nuclei_ids = set(list(lookup_dict.keys()))
else:
nuclei_ids = set(class_list)

#filter cell ids found using center into those that we actually want to extract
_cell_ids = list(_cell_ids)
filter = [x in class_list for x in _cell_ids]
filter = [x in nuclei_ids for x in _cell_ids]

px_centers = np.array(list(compress(px_centers, filter)))
_cell_ids = list(compress(_cell_ids, filter))

#update number of classes
#generate new class list
if type(class_list[0]) == str:
class_list = [f"{x}:{lookup_dict[str(x)]}" for x in _cell_ids]
del lookup_dict
else:
class_list = _cell_ids

self.log(f"Number of classes found in filtered classes list {len(class_list)} vs number of classes for which centers were calculated {len(_cell_ids)}")
class_list = _cell_ids
del _cell_ids, filter
del _cell_ids, filter, nuclei_ids

#update number of classes
self.num_classes = len(class_list)

# setup cache
Expand Down

0 comments on commit ab6baf7

Please sign in to comment.