From eeeaf7b4cc28f42db874d3f7cd77ac966c2586b0 Mon Sep 17 00:00:00 2001 From: John Davis Date: Tue, 28 Nov 2023 14:46:16 -0500 Subject: [PATCH] Use distinct only for dataset states/extensions summary --- lib/galaxy/model/__init__.py | 33 ++++++++------------------- test/unit/data/test_galaxy_mapping.py | 14 ++++++------ 2 files changed, 16 insertions(+), 31 deletions(-) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 0c91431f4e3a..60794fbdcbe3 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -6203,9 +6203,7 @@ def attribute_columns(column_collection, attributes, nesting_level=None): if entity == DatasetCollectionElement: q = q.filter(entity.id == dce.c.id) - q = q.distinct().order_by(*order_by_columns) - # With DISTINCT, all columns that appear in the ORDER BY clause must appear in the SELECT clause. - q = q.add_columns(*order_by_columns) + q = q.order_by(*order_by_columns) return q @property @@ -6214,11 +6212,15 @@ def dataset_states_and_extensions_summary(self): stmt = self._build_nested_collection_attributes_stmt( hda_attributes=("extension",), dataset_attributes=("state",) ) + # With DISTINCT, all columns that appear in the ORDER BY clause must appear in the SELECT clause. + stmt = stmt.add_columns(*stmt._order_by_clauses) + stmt = stmt.distinct() + tuples = object_session(self).execute(stmt) extensions = set() states = set() - for extension, state, *_ in tuples: + for extension, state, *_ in tuples: # we discard the added columns from the order-by clause states.add(state) extensions.add(extension) @@ -6234,8 +6236,7 @@ def has_deferred_data(self): # TODO: Optimize by just querying without returning the states... stmt = self._build_nested_collection_attributes_stmt(dataset_attributes=("state",)) tuples = object_session(self).execute(stmt) - - for state, *_ in tuples: + for (state,) in tuples: if state == Dataset.states.DEFERRED: has_deferred_data = True break @@ -6283,28 +6284,13 @@ def dataset_action_tuples(self): if not hasattr(self, "_dataset_action_tuples"): stmt = self._build_nested_collection_attributes_stmt(dataset_permission_attributes=("action", "role_id")) tuples = object_session(self).execute(stmt) - self._dataset_action_tuples = [(action, role_id) for action, role_id, *_ in tuples if action is not None] + self._dataset_action_tuples = [(action, role_id) for action, role_id in tuples if action is not None] return self._dataset_action_tuples @property def element_identifiers_extensions_paths_and_metadata_files( self, ) -> List[List[Any]]: - def find_identifiers(row): - # Assume row has this structure: [id1, id2, ..., idn, extension, model, anything]; - # model is an instance of a Dataset or a HistoryDatasetAssociation; - # extension is a string that always preceeds the model; - # we look for the first model, then pick the preceeding items minus the extention. - identifiers = [] - i = 0 - while i + 1 < len(row): - item, next_item = row[i], row[i + 1] - if isinstance(next_item, HistoryDatasetAssociation) or isinstance(next_item, Dataset): - break - identifiers.append(item) - i += 1 - return tuple(identifiers) - results = [] if object_session(self): stmt = self._build_nested_collection_attributes_stmt( @@ -6315,7 +6301,7 @@ def find_identifiers(row): tuples = object_session(self).execute(stmt) # element_identifiers, extension, path for row in tuples: - result = [find_identifiers(row), row.extension, row.Dataset.get_file_name()] + result = [row[:-3], row.extension, row.Dataset.get_file_name()] hda = row.HistoryDatasetAssociation result.append(hda.get_metadata_file_paths_and_extensions()) results.append(result) @@ -6468,7 +6454,6 @@ def replace_failed_elements(self, replacements): return_entities=[DatasetCollectionElement], hda_attributes=["id"] ) tuples = object_session(self).execute(stmt).all() - tuples = [(element, id) for element, id, *_ in tuples] hda_id_to_element = dict(tuples) for failed, replacement in replacements.items(): element = hda_id_to_element.get(failed.id) diff --git a/test/unit/data/test_galaxy_mapping.py b/test/unit/data/test_galaxy_mapping.py index e8b27e140c8f..4a0f4a16b663 100644 --- a/test/unit/data/test_galaxy_mapping.py +++ b/test/unit/data/test_galaxy_mapping.py @@ -414,25 +414,25 @@ def test_nested_collection_attributes(self): ) result = self.model.session.execute(stmt).all() assert [(r._fields) for r in result] == [ - ("element_identifier_0", "element_identifier_1", "extension", "state", "element_index", "element_index_1"), - ("element_identifier_0", "element_identifier_1", "extension", "state", "element_index", "element_index_1"), + ("element_identifier_0", "element_identifier_1", "extension", "state"), + ("element_identifier_0", "element_identifier_1", "extension", "state"), ] stmt = c2._build_nested_collection_attributes_stmt( element_attributes=("element_identifier",), hda_attributes=("extension",), dataset_attributes=("state",) ) result = self.model.session.execute(stmt).all() - assert result == [("inner_list", "forward", "bam", "new", 0, 0), ("inner_list", "reverse", "txt", "new", 0, 1)] + assert result == [("inner_list", "forward", "bam", "new"), ("inner_list", "reverse", "txt", "new")] stmt = c2._build_nested_collection_attributes_stmt(return_entities=(model.HistoryDatasetAssociation,)) result = self.model.session.execute(stmt).all() - assert result == [(d1, 0, 0), (d2, 0, 1)] + assert result == [(d1,), (d2,)] stmt = c2._build_nested_collection_attributes_stmt( return_entities=(model.HistoryDatasetAssociation, model.Dataset) ) result = self.model.session.execute(stmt).all() - assert result == [(d1, d1.dataset, 0, 0), (d2, d2.dataset, 0, 1)] + assert result == [(d1, d1.dataset), (d2, d2.dataset)] # Assert properties that use _get_nested_collection_attributes return correct content assert c2.dataset_instances == [d1, d2] assert c2.dataset_elements == [dce1, dce2] @@ -455,8 +455,8 @@ def test_nested_collection_attributes(self): stmt = c4._build_nested_collection_attributes_stmt(element_attributes=("element_identifier",)) result = self.model.session.execute(stmt).all() assert result == [ - ("outer_list", "inner_list", "forward", 0, 0, 0), - ("outer_list", "inner_list", "reverse", 0, 0, 1), + ("outer_list", "inner_list", "forward"), + ("outer_list", "inner_list", "reverse"), ] assert c4.dataset_elements == [dce1, dce2]