Skip to content

Commit

Permalink
update extraction to properly filter cell id list also accounting for…
Browse files Browse the repository at this point in the history
… dict lists
  • Loading branch information
sophiamaedler committed Jan 19, 2024
1 parent 8052a8d commit 9063153
Showing 1 changed file with 17 additions and 6 deletions.
23 changes: 17 additions & 6 deletions src/sparcscore/pipeline/extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,21 +584,32 @@ def process(self, input_segmentation_path, filtered_classes_path = None):
px_centers, _cell_ids = self._calculate_centers(hdf_labels)

#get classes to extract
class_list = self.get_classes(filtered_classes_path)
class_list = set(class_list)
class_list = self.get_classes(filtered_classes_path)

if type(class_list[0]) == str:
lookup_dict = {x.split(":")[0]:x.split(":")[1] for x in class_list}
nuclei_ids = set(list(lookup_dict.keys()))
else:
nuclei_ids = set(class_list)

#filter cell ids found using center into those that we actually want to extract
_cell_ids = list(_cell_ids)
filter = [x in class_list for x in _cell_ids]
filter = [x in nuclei_ids for x in _cell_ids]

px_centers = np.array(list(compress(px_centers, filter)))
_cell_ids = list(compress(_cell_ids, filter))

#update number of classes
#generate new class list
if type(class_list[0]) == str:
class_list = [f"{x}:{lookup_dict[str(x)]}" for x in _cell_ids]
del lookup_dict
else:
class_list = _cell_ids

self.log(f"Number of classes found in filtered classes list {len(class_list)} vs number of classes for which centers were calculated {len(_cell_ids)}")
class_list = _cell_ids
del _cell_ids, filter
del _cell_ids, filter, nuclei_ids

#update number of classes
self.num_classes = len(class_list)

# setup cache
Expand Down

0 comments on commit 9063153

Please sign in to comment.