Skip to content

Commit

Permalink
Merge pull request #396 from kjsanger/feature/multi-sample-metadata
Browse files Browse the repository at this point in the history
Add support for data objects having metadata for >1 sample
  • Loading branch information
mgcam authored Jan 8, 2025
2 parents 36a8506 + 1a1b1e1 commit b2ddae7
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 65 deletions.
49 changes: 23 additions & 26 deletions src/npg_irods/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,37 +401,34 @@ def ensure_secondary_metadata_updated(
True if updated.
"""
secondary_metadata, acl = [], []
sample, study = None, None
samples, study = [], None

if item.metadata(TrackedStudy.ID):
study_id = item.avu(TrackedStudy.ID).value
study = find_study_by_study_id(mlwh_session, study_id)
study = find_study_by_study_id(mlwh_session, item.avu(TrackedStudy.ID).value)

if item.metadata(TrackedSample.ID):
sample_id = item.avu(TrackedSample.ID).value
sample = find_sample_by_sample_id(mlwh_session, sample_id)

if study is None and sample is None:
raise ValueError(
f"Failed to update metadata on {item}. No {TrackedStudy.ID} or "
f"{TrackedSample.ID} or metadata are present"
)
for avu in item.metadata(TrackedSample.ID):
sample = find_sample_by_sample_id(mlwh_session, avu.value)
samples.append(sample)

zone = infer_zone(item)
if sample is None:
secondary_metadata.extend(make_study_metadata(study))
acl.extend(make_study_acl(study, zone=zone))
elif study is None:
secondary_metadata.extend(make_sample_metadata(sample))
log.warn(
f"Item has no {TrackedStudy.ID} metadata to allow permissions to be set. "
"It will be set as unreadable by default.",
path=item,
)
else:
secondary_metadata.extend(make_study_metadata(study))
secondary_metadata.extend(make_sample_metadata(sample))
acl.extend(make_sample_acl(sample, study, zone=zone))

match study, samples:
case None, []:
raise ValueError(
f"Failed to update metadata on {item}. No {TrackedStudy.ID} or "
f"{TrackedSample.ID} or metadata are present"
)
case study, []:
secondary_metadata.extend(make_study_metadata(study))
acl.extend(make_study_acl(study, zone=zone))
case None, [*sm]:
for s in sm:
secondary_metadata.extend(make_sample_metadata(s))
case study, [*sm]:
secondary_metadata.extend(make_study_metadata(study))
for s in sm:
secondary_metadata.extend(make_sample_metadata(s))
acl.extend(make_sample_acl(s, study, zone=zone))

secondary_metadata = sorted(set(secondary_metadata))
acl = sorted(set(acl))
Expand Down
60 changes: 50 additions & 10 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def mlwh_session() -> Session:
drop_database(engine.url)


def initialize_mlwh_study_and_sample(session: Session):
def initialize_mlwh_study_and_samples(session: Session):
"""
Insert ML warehouse test data for synthetic runs.
Expand All @@ -148,7 +148,7 @@ def initialize_mlwh_study_and_sample(session: Session):
)
session.add(study_x)

sample_y = Sample(
sample_1 = Sample(
id_lims="LIMS_01",
common_name="common_name1",
donor_id="donor_id1",
Expand All @@ -157,18 +157,33 @@ def initialize_mlwh_study_and_sample(session: Session):
name="name1",
public_name="public_name1",
supplier_name="supplier_name1",
uuid_sample_lims="82429892-0ab6-11ee-b5ba-fa163eac3af7",
uuid_sample_lims="82429892-0ab6-11ee-b5ba-fa163eac3ag7",
**default_timestamps,
)
session.add(sample_y)
session.add(sample_1)

sample_2 = Sample(
id_lims="LIMS_01",
common_name="common_name2",
donor_id="donor_id2",
id_sample_lims="id_sample_lims2",
accession_number="Test Accession",
name="name2",
public_name="public_name2",
supplier_name="supplier_name2",
uuid_sample_lims="82429892-0ab6-11ee-b5ba-fa163eac3ag8",
**default_timestamps,
)
session.add(sample_2)

session.commit()


@pytest.fixture(scope="function")
def simple_study_and_sample_mlwh(mlwh_session) -> Session:
"""An ML warehouse database fixture populated with test study records."""
initialize_mlwh_study_and_sample(mlwh_session)
def study_and_samples_mlwh(mlwh_session) -> Session:
"""An ML warehouse database fixture populated with a single test study and some
sample records."""
initialize_mlwh_study_and_samples(mlwh_session)
yield mlwh_session


Expand Down Expand Up @@ -229,11 +244,11 @@ def annotated_data_object(tmp_path):


@pytest.fixture(scope="function")
def simple_study_and_sample_data_object(tmp_path, irods_groups):
def single_study_and_single_sample_data_object(tmp_path, irods_groups):
"""A fixture providing a collection containing a single data object with sample
and study metadata"""
and study metadata."""
root_path = PurePath(
"/testZone/home/irods/test/simple_study_and_sample_data_object"
"/testZone/home/irods/test/single_study_and_single_sample_data_object"
)
rods_path = add_rods_path(root_path, tmp_path)

Expand All @@ -251,6 +266,31 @@ def simple_study_and_sample_data_object(tmp_path, irods_groups):
remove_rods_path(rods_path)


@pytest.fixture(scope="function")
def single_study_and_multi_sample_data_object(tmp_path, irods_groups):
"""A fixture providing a collection containing a single data object with multiple
sample and single study metadata."""
root_path = PurePath(
"/testZone/home/irods/test/single_study_and_multi_sample_data_object"
)
rods_path = add_rods_path(root_path, tmp_path)

obj_path = rods_path / "lorem.txt"
iput("./tests/data/simple/data_object/lorem.txt", obj_path)

obj = DataObject(obj_path)
obj.add_metadata(
AVU(TrackedSample.ID, "id_sample_lims1"),
AVU(TrackedSample.ID, "id_sample_lims2"),
AVU(TrackedStudy.ID, "1000"),
)

try:
yield obj_path
finally:
remove_rods_path(rods_path)


@pytest.fixture(scope="function")
def consent_withdrawn_gapi_data_object(tmp_path):
root_path = PurePath("/testZone/home/irods/test/consent_withdrawn_gapi_data_object")
Expand Down
72 changes: 56 additions & 16 deletions tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@


class TestCommonFunctions:
@m.context("When an iRODS object has both study and sample ID metadata")
@m.context("When an iRODS object has both one study and one sample ID metadata")
@m.context("When it wants both study and sample metadata enhanced")
@m.it("Updates study and sample metadata from MLWH")
def test_ensure_secondary_metadata_updated(
self, simple_study_and_sample_data_object, simple_study_and_sample_mlwh
self, single_study_and_single_sample_data_object, study_and_samples_mlwh
):
sample_id = "id_sample_lims1"
study_id = "1000"
Expand All @@ -45,57 +45,97 @@ def test_ensure_secondary_metadata_updated(
AVU(TrackedSample.NAME, "name1"),
AVU(TrackedSample.PUBLIC_NAME, "public_name1"),
AVU(TrackedSample.SUPPLIER_NAME, "supplier_name1"),
AVU(TrackedSample.UUID, "82429892-0ab6-11ee-b5ba-fa163eac3af7"),
AVU(TrackedSample.UUID, "82429892-0ab6-11ee-b5ba-fa163eac3ag7"),
]

obj = DataObject(simple_study_and_sample_data_object)
assert ensure_secondary_metadata_updated(obj, simple_study_and_sample_mlwh)
obj = DataObject(single_study_and_single_sample_data_object)
assert ensure_secondary_metadata_updated(obj, study_and_samples_mlwh)

for avu in expected_avus:
assert avu in obj.metadata()

@m.context("When an iRODS object has one study ID and multiple sample ID metadata")
@m.context("When it wants both study and sample metadata enhanced")
@m.it("Updates study and sample metadata from MLWH")
def test_ensure_secondary_metadata_updated_multiple_samples(
self,
single_study_and_multi_sample_data_object,
study_and_samples_mlwh,
):
sample_id1 = "id_sample_lims1"
sample_id2 = "id_sample_lims2"
study_id = "1000"

expected_avus = [
AVU(TrackedStudy.ID, study_id),
AVU(TrackedStudy.NAME, "Study X"),
AVU(TrackedStudy.TITLE, "Test Study Title"),
AVU(TrackedStudy.ACCESSION_NUMBER, "Test Accession"),
AVU(TrackedSample.ID, sample_id1),
AVU(TrackedSample.ACCESSION_NUMBER, "Test Accession"),
AVU(TrackedSample.COMMON_NAME, "common_name1"),
AVU(TrackedSample.DONOR_ID, "donor_id1"),
AVU(TrackedSample.NAME, "name1"),
AVU(TrackedSample.PUBLIC_NAME, "public_name1"),
AVU(TrackedSample.SUPPLIER_NAME, "supplier_name1"),
AVU(TrackedSample.ID, sample_id2),
AVU(TrackedSample.ACCESSION_NUMBER, "Test Accession"),
AVU(TrackedSample.COMMON_NAME, "common_name2"),
AVU(TrackedSample.DONOR_ID, "donor_id2"),
AVU(TrackedSample.NAME, "name2"),
AVU(TrackedSample.PUBLIC_NAME, "public_name2"),
AVU(TrackedSample.SUPPLIER_NAME, "supplier_name2"),
]

obj = DataObject(single_study_and_multi_sample_data_object)
assert ensure_secondary_metadata_updated(obj, study_and_samples_mlwh)

for avu in expected_avus:
assert avu in obj.metadata()

@m.context("When an iRODS object has both study and sample ID metadata")
@m.context("When it wants both study and sample metadata enhanced")
@m.it("Updates permissions according to the study")
def test_ensure_secondary_metadata_permissions_updated(
self, simple_study_and_sample_data_object, simple_study_and_sample_mlwh
self, single_study_and_single_sample_data_object, study_and_samples_mlwh
):
zone = "testZone"

obj = DataObject(simple_study_and_sample_data_object)
obj = DataObject(single_study_and_single_sample_data_object)
assert obj.permissions() == [AC("irods", perm=Permission.OWN, zone=zone)]
assert ensure_secondary_metadata_updated(obj, simple_study_and_sample_mlwh)
assert ensure_secondary_metadata_updated(obj, study_and_samples_mlwh)
assert obj.permissions() == [
AC("irods", perm=Permission.OWN, zone=zone),
AC("ss_1000", perm=Permission.READ, zone=zone),
]

@m.context("When an iRODS object has study ID, but not sample ID metadata")
@m.context("When an iRODS object has one study ID, but no sample ID metadata")
@m.context("When it wants study metadata enhanced")
@m.it("Updates permissions according to the study")
def test_ensure_secondary_metadata_permissions_updated_no_sample(
self, simple_study_and_sample_data_object, simple_study_and_sample_mlwh
self, single_study_and_single_sample_data_object, study_and_samples_mlwh
):
zone = "testZone"

obj = DataObject(simple_study_and_sample_data_object)
obj = DataObject(single_study_and_single_sample_data_object)
obj.remove_metadata(AVU(TrackedSample.ID, "id_sample_lims1"))

assert obj.permissions() == [AC("irods", perm=Permission.OWN, zone=zone)]
assert ensure_secondary_metadata_updated(obj, simple_study_and_sample_mlwh)
assert ensure_secondary_metadata_updated(obj, study_and_samples_mlwh)
assert obj.permissions() == [
AC("irods", perm=Permission.OWN, zone=zone),
AC("ss_1000", perm=Permission.READ, zone=zone),
]

@m.context("When an iRODS object has no study ID")
@m.context("When an iRODS object has no study ID and one sample ID metadata")
@m.context("When it wants study metadata enhanced")
@m.it("Removes access permissions")
def test_ensure_secondary_metadata_permissions_updated_no_study(
self, simple_study_and_sample_data_object, simple_study_and_sample_mlwh
self, single_study_and_single_sample_data_object, study_and_samples_mlwh
):
zone = "testZone"

obj = DataObject(simple_study_and_sample_data_object)
obj = DataObject(single_study_and_single_sample_data_object)
obj.remove_metadata(AVU(TrackedStudy.ID, "1000"))
obj.add_permissions(
AC("ss_1000", perm=Permission.READ, zone=zone),
Expand All @@ -105,5 +145,5 @@ def test_ensure_secondary_metadata_permissions_updated_no_study(
AC("irods", perm=Permission.OWN, zone=zone),
AC("ss_1000", perm=Permission.READ, zone=zone),
]
assert ensure_secondary_metadata_updated(obj, simple_study_and_sample_mlwh)
assert ensure_secondary_metadata_updated(obj, study_and_samples_mlwh)
assert obj.permissions() == [AC("irods", perm=Permission.OWN, zone=zone)]
26 changes: 13 additions & 13 deletions tests/test_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,13 +645,13 @@ class TestMetadataUtilities:
@m.context("When it wants both study and sample metadata enhanced")
@m.it("Counts successes correctly")
def test_update_secondary_metadata(
self, simple_study_and_sample_data_object, simple_study_and_sample_mlwh
self, single_study_and_single_sample_data_object, study_and_samples_mlwh
):
obj_path = simple_study_and_sample_data_object.as_posix()
obj_path = single_study_and_single_sample_data_object.as_posix()

with StringIO("\n".join([obj_path])) as reader:
with StringIO() as writer:
engine = simple_study_and_sample_mlwh.get_bind()
engine = study_and_samples_mlwh.get_bind()
num_processed, num_updated, num_errors = update_secondary_metadata(
reader, writer, engine, print_update=True
)
Expand All @@ -675,27 +675,27 @@ def test_update_secondary_metadata(
AVU(TrackedSample.NAME, "name1"),
AVU(TrackedSample.PUBLIC_NAME, "public_name1"),
AVU(TrackedSample.SUPPLIER_NAME, "supplier_name1"),
AVU(TrackedSample.UUID, "82429892-0ab6-11ee-b5ba-fa163eac3af7"),
AVU(TrackedSample.UUID, "82429892-0ab6-11ee-b5ba-fa163eac3ag7"),
]

obj = DataObject(simple_study_and_sample_data_object)
obj = DataObject(single_study_and_single_sample_data_object)
for avu in expected_avus:
assert avu in obj.metadata()

@m.context("When an iRODS object has study ID, but not sample ID metadata")
@m.context("When it wants study metadata enhanced")
@m.it("Counts successes correctly")
def test_update_secondary_metadata_with_just_study(
self, simple_study_and_sample_data_object, simple_study_and_sample_mlwh
self, single_study_and_single_sample_data_object, study_and_samples_mlwh
):
obj_path = simple_study_and_sample_data_object.as_posix()
obj = DataObject(simple_study_and_sample_data_object)
obj_path = single_study_and_single_sample_data_object.as_posix()
obj = DataObject(single_study_and_single_sample_data_object)

assert obj.remove_metadata(AVU(TrackedSample.ID, "id_sample_lims1")) == 1

with StringIO("\n".join([obj_path])) as reader:
with StringIO() as writer:
engine = simple_study_and_sample_mlwh.get_bind()
engine = study_and_samples_mlwh.get_bind()
num_processed, num_updated, num_errors = update_secondary_metadata(
reader, writer, engine, print_update=True
)
Expand All @@ -720,16 +720,16 @@ def test_update_secondary_metadata_with_just_study(
@m.context("When it wants sample metadata enhanced")
@m.it("Counts successes correctly")
def test_update_general_metadata_with_just_study_err(
self, simple_study_and_sample_data_object, simple_study_and_sample_mlwh
self, single_study_and_single_sample_data_object, study_and_samples_mlwh
):
obj_path = simple_study_and_sample_data_object.as_posix()
obj = DataObject(simple_study_and_sample_data_object)
obj_path = single_study_and_single_sample_data_object.as_posix()
obj = DataObject(single_study_and_single_sample_data_object)

assert obj.remove_metadata(AVU(TrackedStudy.ID, "1000")) == 1

with StringIO("\n".join([obj_path])) as reader:
with StringIO() as writer:
engine = simple_study_and_sample_mlwh.get_bind()
engine = study_and_samples_mlwh.get_bind()
num_processed, num_updated, num_errors = update_secondary_metadata(
reader, writer, engine, print_update=True
)
Expand Down

0 comments on commit b2ddae7

Please sign in to comment.