From fc45b4031ad833a6655caf555e59c2a0248f4a15 Mon Sep 17 00:00:00 2001 From: Keith James Date: Thu, 2 Jan 2025 17:07:35 +0000 Subject: [PATCH 1/2] Add support for data objects having metadata for >1 sample --- src/npg_irods/common.py | 49 ++++++++++++++--------------- tests/conftest.py | 57 +++++++++++++++++++++++++++------ tests/test_common.py | 70 ++++++++++++++++++++++++++++++++--------- tests/test_utilities.py | 24 +++++++------- 4 files changed, 138 insertions(+), 62 deletions(-) diff --git a/src/npg_irods/common.py b/src/npg_irods/common.py index d4eea22..30bc90a 100644 --- a/src/npg_irods/common.py +++ b/src/npg_irods/common.py @@ -401,37 +401,34 @@ def ensure_secondary_metadata_updated( True if updated. """ secondary_metadata, acl = [], [] - sample, study = None, None + samples, study = [], None if item.metadata(TrackedStudy.ID): - study_id = item.avu(TrackedStudy.ID).value - study = find_study_by_study_id(mlwh_session, study_id) + study = find_study_by_study_id(mlwh_session, item.avu(TrackedStudy.ID).value) - if item.metadata(TrackedSample.ID): - sample_id = item.avu(TrackedSample.ID).value - sample = find_sample_by_sample_id(mlwh_session, sample_id) - - if study is None and sample is None: - raise ValueError( - f"Failed to update metadata on {item}. No {TrackedStudy.ID} or " - f"{TrackedSample.ID} or metadata are present" - ) + for avu in item.metadata(TrackedSample.ID): + sample = find_sample_by_sample_id(mlwh_session, avu.value) + samples.append(sample) zone = infer_zone(item) - if sample is None: - secondary_metadata.extend(make_study_metadata(study)) - acl.extend(make_study_acl(study, zone=zone)) - elif study is None: - secondary_metadata.extend(make_sample_metadata(sample)) - log.warn( - f"Item has no {TrackedStudy.ID} metadata to allow permissions to be set. " - "It will be set as unreadable by default.", - path=item, - ) - else: - secondary_metadata.extend(make_study_metadata(study)) - secondary_metadata.extend(make_sample_metadata(sample)) - acl.extend(make_sample_acl(sample, study, zone=zone)) + + match study, samples: + case None, []: + raise ValueError( + f"Failed to update metadata on {item}. No {TrackedStudy.ID} or " + f"{TrackedSample.ID} or metadata are present" + ) + case study, []: + secondary_metadata.extend(make_study_metadata(study)) + acl.extend(make_study_acl(study, zone=zone)) + case None, [*sm]: + for s in sm: + secondary_metadata.extend(make_sample_metadata(s)) + case study, [*sm]: + secondary_metadata.extend(make_study_metadata(study)) + for s in sm: + secondary_metadata.extend(make_sample_metadata(s)) + acl.extend(make_sample_acl(s, study, zone=zone)) secondary_metadata = sorted(set(secondary_metadata)) acl = sorted(set(acl)) diff --git a/tests/conftest.py b/tests/conftest.py index 2871e1b..171ac82 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -127,7 +127,7 @@ def mlwh_session() -> Session: drop_database(engine.url) -def initialize_mlwh_study_and_sample(session: Session): +def initialize_mlwh_study_and_samples(session: Session): """ Insert ML warehouse test data for synthetic runs. @@ -148,7 +148,7 @@ def initialize_mlwh_study_and_sample(session: Session): ) session.add(study_x) - sample_y = Sample( + sample_1 = Sample( id_lims="LIMS_01", common_name="common_name1", donor_id="donor_id1", @@ -160,15 +160,29 @@ def initialize_mlwh_study_and_sample(session: Session): uuid_sample_lims="82429892-0ab6-11ee-b5ba-fa163eac3af7", **default_timestamps, ) - session.add(sample_y) + session.add(sample_1) + + sample_2 = Sample( + id_lims="LIMS_01", + common_name="common_name2", + donor_id="donor_id2", + id_sample_lims="id_sample_lims2", + accession_number="Test Accession", + name="name2", + public_name="public_name2", + supplier_name="supplier_name2", + **default_timestamps, + ) + session.add(sample_2) session.commit() @pytest.fixture(scope="function") -def simple_study_and_sample_mlwh(mlwh_session) -> Session: - """An ML warehouse database fixture populated with test study records.""" - initialize_mlwh_study_and_sample(mlwh_session) +def study_and_samples_mlwh(mlwh_session) -> Session: + """An ML warehouse database fixture populated with a single test study and some + sample records.""" + initialize_mlwh_study_and_samples(mlwh_session) yield mlwh_session @@ -229,11 +243,11 @@ def annotated_data_object(tmp_path): @pytest.fixture(scope="function") -def simple_study_and_sample_data_object(tmp_path, irods_groups): +def single_study_and_single_sample_data_object(tmp_path, irods_groups): """A fixture providing a collection containing a single data object with sample - and study metadata""" + and study metadata.""" root_path = PurePath( - "/testZone/home/irods/test/simple_study_and_sample_data_object" + "/testZone/home/irods/test/single_study_and_single_sample_data_object" ) rods_path = add_rods_path(root_path, tmp_path) @@ -251,6 +265,31 @@ def simple_study_and_sample_data_object(tmp_path, irods_groups): remove_rods_path(rods_path) +@pytest.fixture(scope="function") +def single_study_and_multi_sample_data_object(tmp_path, irods_groups): + """A fixture providing a collection containing a single data object with multiple + sample and single study metadata.""" + root_path = PurePath( + "/testZone/home/irods/test/single_study_and_multi_sample_data_object" + ) + rods_path = add_rods_path(root_path, tmp_path) + + obj_path = rods_path / "lorem.txt" + iput("./tests/data/simple/data_object/lorem.txt", obj_path) + + obj = DataObject(obj_path) + obj.add_metadata( + AVU(TrackedSample.ID, "id_sample_lims1"), + AVU(TrackedSample.ID, "id_sample_lims2"), + AVU(TrackedStudy.ID, "1000"), + ) + + try: + yield obj_path + finally: + remove_rods_path(rods_path) + + @pytest.fixture(scope="function") def consent_withdrawn_gapi_data_object(tmp_path): root_path = PurePath("/testZone/home/irods/test/consent_withdrawn_gapi_data_object") diff --git a/tests/test_common.py b/tests/test_common.py index 5c4c5b5..9f43777 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -23,11 +23,11 @@ class TestCommonFunctions: - @m.context("When an iRODS object has both study and sample ID metadata") + @m.context("When an iRODS object has both one study and one sample ID metadata") @m.context("When it wants both study and sample metadata enhanced") @m.it("Updates study and sample metadata from MLWH") def test_ensure_secondary_metadata_updated( - self, simple_study_and_sample_data_object, simple_study_and_sample_mlwh + self, single_study_and_single_sample_data_object, study_and_samples_mlwh ): sample_id = "id_sample_lims1" study_id = "1000" @@ -48,8 +48,48 @@ def test_ensure_secondary_metadata_updated( AVU(TrackedSample.UUID, "82429892-0ab6-11ee-b5ba-fa163eac3af7"), ] - obj = DataObject(simple_study_and_sample_data_object) - assert ensure_secondary_metadata_updated(obj, simple_study_and_sample_mlwh) + obj = DataObject(single_study_and_single_sample_data_object) + assert ensure_secondary_metadata_updated(obj, study_and_samples_mlwh) + + for avu in expected_avus: + assert avu in obj.metadata() + + @m.context("When an iRODS object has one study ID and multiple sample ID metadata") + @m.context("When it wants both study and sample metadata enhanced") + @m.it("Updates study and sample metadata from MLWH") + def test_ensure_secondary_metadata_updated_multiple_samples( + self, + single_study_and_multi_sample_data_object, + study_and_samples_mlwh, + ): + sample_id1 = "id_sample_lims1" + sample_id2 = "id_sample_lims2" + study_id = "1000" + + expected_avus = [ + AVU(TrackedStudy.ID, study_id), + AVU(TrackedStudy.NAME, "Study X"), + AVU(TrackedStudy.TITLE, "Test Study Title"), + AVU(TrackedStudy.ACCESSION_NUMBER, "Test Accession"), + AVU(TrackedSample.ID, sample_id1), + AVU(TrackedSample.ACCESSION_NUMBER, "Test Accession"), + AVU(TrackedSample.COMMON_NAME, "common_name1"), + AVU(TrackedSample.DONOR_ID, "donor_id1"), + AVU(TrackedSample.NAME, "name1"), + AVU(TrackedSample.PUBLIC_NAME, "public_name1"), + AVU(TrackedSample.SUPPLIER_NAME, "supplier_name1"), + AVU(TrackedSample.ID, sample_id2), + AVU(TrackedSample.ACCESSION_NUMBER, "Test Accession"), + AVU(TrackedSample.COMMON_NAME, "common_name2"), + AVU(TrackedSample.DONOR_ID, "donor_id2"), + AVU(TrackedSample.NAME, "name2"), + AVU(TrackedSample.PUBLIC_NAME, "public_name2"), + AVU(TrackedSample.SUPPLIER_NAME, "supplier_name2"), + ] + + obj = DataObject(single_study_and_multi_sample_data_object) + assert ensure_secondary_metadata_updated(obj, study_and_samples_mlwh) + for avu in expected_avus: assert avu in obj.metadata() @@ -57,45 +97,45 @@ def test_ensure_secondary_metadata_updated( @m.context("When it wants both study and sample metadata enhanced") @m.it("Updates permissions according to the study") def test_ensure_secondary_metadata_permissions_updated( - self, simple_study_and_sample_data_object, simple_study_and_sample_mlwh + self, single_study_and_single_sample_data_object, study_and_samples_mlwh ): zone = "testZone" - obj = DataObject(simple_study_and_sample_data_object) + obj = DataObject(single_study_and_single_sample_data_object) assert obj.permissions() == [AC("irods", perm=Permission.OWN, zone=zone)] - assert ensure_secondary_metadata_updated(obj, simple_study_and_sample_mlwh) + assert ensure_secondary_metadata_updated(obj, study_and_samples_mlwh) assert obj.permissions() == [ AC("irods", perm=Permission.OWN, zone=zone), AC("ss_1000", perm=Permission.READ, zone=zone), ] - @m.context("When an iRODS object has study ID, but not sample ID metadata") + @m.context("When an iRODS object has one study ID, but no sample ID metadata") @m.context("When it wants study metadata enhanced") @m.it("Updates permissions according to the study") def test_ensure_secondary_metadata_permissions_updated_no_sample( - self, simple_study_and_sample_data_object, simple_study_and_sample_mlwh + self, single_study_and_single_sample_data_object, study_and_samples_mlwh ): zone = "testZone" - obj = DataObject(simple_study_and_sample_data_object) + obj = DataObject(single_study_and_single_sample_data_object) obj.remove_metadata(AVU(TrackedSample.ID, "id_sample_lims1")) assert obj.permissions() == [AC("irods", perm=Permission.OWN, zone=zone)] - assert ensure_secondary_metadata_updated(obj, simple_study_and_sample_mlwh) + assert ensure_secondary_metadata_updated(obj, study_and_samples_mlwh) assert obj.permissions() == [ AC("irods", perm=Permission.OWN, zone=zone), AC("ss_1000", perm=Permission.READ, zone=zone), ] - @m.context("When an iRODS object has no study ID") + @m.context("When an iRODS object has no study ID and one sample ID metadata") @m.context("When it wants study metadata enhanced") @m.it("Removes access permissions") def test_ensure_secondary_metadata_permissions_updated_no_study( - self, simple_study_and_sample_data_object, simple_study_and_sample_mlwh + self, single_study_and_single_sample_data_object, study_and_samples_mlwh ): zone = "testZone" - obj = DataObject(simple_study_and_sample_data_object) + obj = DataObject(single_study_and_single_sample_data_object) obj.remove_metadata(AVU(TrackedStudy.ID, "1000")) obj.add_permissions( AC("ss_1000", perm=Permission.READ, zone=zone), @@ -105,5 +145,5 @@ def test_ensure_secondary_metadata_permissions_updated_no_study( AC("irods", perm=Permission.OWN, zone=zone), AC("ss_1000", perm=Permission.READ, zone=zone), ] - assert ensure_secondary_metadata_updated(obj, simple_study_and_sample_mlwh) + assert ensure_secondary_metadata_updated(obj, study_and_samples_mlwh) assert obj.permissions() == [AC("irods", perm=Permission.OWN, zone=zone)] diff --git a/tests/test_utilities.py b/tests/test_utilities.py index ed6bd29..71e11ab 100644 --- a/tests/test_utilities.py +++ b/tests/test_utilities.py @@ -645,13 +645,13 @@ class TestMetadataUtilities: @m.context("When it wants both study and sample metadata enhanced") @m.it("Counts successes correctly") def test_update_secondary_metadata( - self, simple_study_and_sample_data_object, simple_study_and_sample_mlwh + self, single_study_and_single_sample_data_object, study_and_samples_mlwh ): - obj_path = simple_study_and_sample_data_object.as_posix() + obj_path = single_study_and_single_sample_data_object.as_posix() with StringIO("\n".join([obj_path])) as reader: with StringIO() as writer: - engine = simple_study_and_sample_mlwh.get_bind() + engine = study_and_samples_mlwh.get_bind() num_processed, num_updated, num_errors = update_secondary_metadata( reader, writer, engine, print_update=True ) @@ -678,7 +678,7 @@ def test_update_secondary_metadata( AVU(TrackedSample.UUID, "82429892-0ab6-11ee-b5ba-fa163eac3af7"), ] - obj = DataObject(simple_study_and_sample_data_object) + obj = DataObject(single_study_and_single_sample_data_object) for avu in expected_avus: assert avu in obj.metadata() @@ -686,16 +686,16 @@ def test_update_secondary_metadata( @m.context("When it wants study metadata enhanced") @m.it("Counts successes correctly") def test_update_secondary_metadata_with_just_study( - self, simple_study_and_sample_data_object, simple_study_and_sample_mlwh + self, single_study_and_single_sample_data_object, study_and_samples_mlwh ): - obj_path = simple_study_and_sample_data_object.as_posix() - obj = DataObject(simple_study_and_sample_data_object) + obj_path = single_study_and_single_sample_data_object.as_posix() + obj = DataObject(single_study_and_single_sample_data_object) assert obj.remove_metadata(AVU(TrackedSample.ID, "id_sample_lims1")) == 1 with StringIO("\n".join([obj_path])) as reader: with StringIO() as writer: - engine = simple_study_and_sample_mlwh.get_bind() + engine = study_and_samples_mlwh.get_bind() num_processed, num_updated, num_errors = update_secondary_metadata( reader, writer, engine, print_update=True ) @@ -720,16 +720,16 @@ def test_update_secondary_metadata_with_just_study( @m.context("When it wants sample metadata enhanced") @m.it("Counts successes correctly") def test_update_general_metadata_with_just_study_err( - self, simple_study_and_sample_data_object, simple_study_and_sample_mlwh + self, single_study_and_single_sample_data_object, study_and_samples_mlwh ): - obj_path = simple_study_and_sample_data_object.as_posix() - obj = DataObject(simple_study_and_sample_data_object) + obj_path = single_study_and_single_sample_data_object.as_posix() + obj = DataObject(single_study_and_single_sample_data_object) assert obj.remove_metadata(AVU(TrackedStudy.ID, "1000")) == 1 with StringIO("\n".join([obj_path])) as reader: with StringIO() as writer: - engine = simple_study_and_sample_mlwh.get_bind() + engine = study_and_samples_mlwh.get_bind() num_processed, num_updated, num_errors = update_secondary_metadata( reader, writer, engine, print_update=True ) From 1a1b1e189f9726f817c7c35a7ec9ea8b01654fa0 Mon Sep 17 00:00:00 2001 From: Keith James Date: Wed, 8 Jan 2025 11:15:19 +0000 Subject: [PATCH 2/2] Add missing sample UUID in test fixture Also change the UUID stem to avoid potential clashes where the same stem is used to generate UUIDs elsewhere in the fixtures. --- tests/conftest.py | 3 ++- tests/test_common.py | 2 +- tests/test_utilities.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 171ac82..0318688 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -157,7 +157,7 @@ def initialize_mlwh_study_and_samples(session: Session): name="name1", public_name="public_name1", supplier_name="supplier_name1", - uuid_sample_lims="82429892-0ab6-11ee-b5ba-fa163eac3af7", + uuid_sample_lims="82429892-0ab6-11ee-b5ba-fa163eac3ag7", **default_timestamps, ) session.add(sample_1) @@ -171,6 +171,7 @@ def initialize_mlwh_study_and_samples(session: Session): name="name2", public_name="public_name2", supplier_name="supplier_name2", + uuid_sample_lims="82429892-0ab6-11ee-b5ba-fa163eac3ag8", **default_timestamps, ) session.add(sample_2) diff --git a/tests/test_common.py b/tests/test_common.py index 9f43777..6eb9b67 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -45,7 +45,7 @@ def test_ensure_secondary_metadata_updated( AVU(TrackedSample.NAME, "name1"), AVU(TrackedSample.PUBLIC_NAME, "public_name1"), AVU(TrackedSample.SUPPLIER_NAME, "supplier_name1"), - AVU(TrackedSample.UUID, "82429892-0ab6-11ee-b5ba-fa163eac3af7"), + AVU(TrackedSample.UUID, "82429892-0ab6-11ee-b5ba-fa163eac3ag7"), ] obj = DataObject(single_study_and_single_sample_data_object) diff --git a/tests/test_utilities.py b/tests/test_utilities.py index 71e11ab..e6ac162 100644 --- a/tests/test_utilities.py +++ b/tests/test_utilities.py @@ -675,7 +675,7 @@ def test_update_secondary_metadata( AVU(TrackedSample.NAME, "name1"), AVU(TrackedSample.PUBLIC_NAME, "public_name1"), AVU(TrackedSample.SUPPLIER_NAME, "supplier_name1"), - AVU(TrackedSample.UUID, "82429892-0ab6-11ee-b5ba-fa163eac3af7"), + AVU(TrackedSample.UUID, "82429892-0ab6-11ee-b5ba-fa163eac3ag7"), ] obj = DataObject(single_study_and_single_sample_data_object)