Skip to content

Commit

Permalink
Sample metadata ont tests (#404)
Browse files Browse the repository at this point in the history
* Add sample_uuid and sample_lims in ONT tests

* Make barcodes visible in test files through fixture

* Add a small barcode set as fixture for rebasecalled experiments
  • Loading branch information
marcomoscasgr authored Jan 29, 2025
1 parent 8f455f3 commit 3379a68
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 51 deletions.
77 changes: 43 additions & 34 deletions tests/ont/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,17 @@
NUM_SIMPLE_EXPTS = 5
NUM_MULTIPLEXED_EXPTS = 3
NUM_INSTRUMENT_SLOTS = 5
NUM_MULTIPLEXED_REBASECALLED_EXPTS = 1
NUM_MULTIPLEXED_REBASECALLED_SLOTS = 1
MAX_NUM_BARCODES_MULTIPLEXED_EXPTS = 4


def ont_tag_identifier(tag_index: int) -> str:
"""Return an ONT tag identifier in tag set EXP-NBD104, given a tag index."""
return f"NB{tag_index:02d}"


def initialize_mlwh_ont_synthetic(session: Session):
def initialize_mlwh_ont_synthetic(session: Session, ont_barcodes):
"""Insert ML warehouse test data for all synthetic simple and multiplexed
ONT experiments.
Expand Down Expand Up @@ -86,7 +89,7 @@ def make_sample(n):
name=f"name{n}",
public_name=f"public_name{n}",
supplier_name=f"supplier_name{n}",
uuid_sample_lims=f"62429892-0ab6-11ee-b5ba-fa163eac3af{n}",
uuid_sample_lims=f"62429892-0ab6-11ee-b5ba-fa163eac3{n:0>3}",
**default_timestamps,
)

Expand Down Expand Up @@ -148,25 +151,9 @@ def make_mplex_flowcell(ex_name, ex_n, fc_start, sl, tid, bc, n):
recorded_at=when,
)

barcodes = [
"CACAAAGACACCGACAACTTTCTT",
"ACAGACGACTACAAACGGAATCGA",
"CCTGGTAACTGGGACACAAGACTC",
"TAGGGAAACACGATAGAATCCGAA",
"AAGGTTACACAAACCCTGGACAAG",
"GACTACTTTCTGCCTTTGCGAGAA",
"AAGGATTCATTCCCACGGTAACAC",
"ACGTAACTTGGTTTGTTCCCTGAA",
"AACCAAGACTCGCTGTGCCTAGTT",
"GAGAGGACAAAGGTTTCAACGCTT",
"TCCATTCCCTCCGATAGATGAAAC",
"TCCGATTCTGCTTCTTTCTACCTG",
]

msample_idx = 0
for expt in range(1, NUM_MULTIPLEXED_EXPTS + 1):
for slot in range(1, NUM_INSTRUMENT_SLOTS + 1):
for barcode_idx, barcode in enumerate(barcodes):
for barcode_idx, barcode in enumerate(ont_barcodes):
# The tag_id format and tag_set_name are taken from the Guppy barcode
# arrangement file barcode_arrs_nb12.toml distributed with Guppy and
# MinKNOW.
Expand All @@ -179,15 +166,15 @@ def make_mplex_flowcell(ex_name, ex_n, fc_start, sl, tid, bc, n):
slot,
tag_id,
barcode,
msample_idx,
barcode_idx,
)
)
msample_idx += 1

msample_idx = 0
for expt in range(1, 2):
for slot in range(1, 2):
for barcode_idx, barcode in enumerate(barcodes[:4]):
for expt in range(1, NUM_MULTIPLEXED_REBASECALLED_EXPTS + 1):
for slot in range(1, NUM_MULTIPLEXED_REBASECALLED_SLOTS + 1):
for barcode_idx, barcode in enumerate(
ont_barcodes[:MAX_NUM_BARCODES_MULTIPLEXED_EXPTS]
):
tag_id = ont_tag_identifier(barcode_idx + 1)
flowcells.extend(
[
Expand All @@ -198,7 +185,7 @@ def make_mplex_flowcell(ex_name, ex_n, fc_start, sl, tid, bc, n):
slot,
tag_id,
barcode,
msample_idx,
barcode_idx,
),
make_mplex_flowcell(
"rebasecalled_multiplexed_experiment",
Expand All @@ -207,20 +194,42 @@ def make_mplex_flowcell(ex_name, ex_n, fc_start, sl, tid, bc, n):
slot,
tag_id,
barcode,
msample_idx,
barcode_idx,
),
]
)
msample_idx += 1

session.add_all(flowcells) # Simple and multiplexed
session.commit()


@pytest.fixture(scope="function")
def ont_synthetic_mlwh(mlwh_session) -> Session:
def ont_barcodes() -> list[str]:
return [
"CACAAAGACACCGACAACTTTCTT",
"ACAGACGACTACAAACGGAATCGA",
"CCTGGTAACTGGGACACAAGACTC",
"TAGGGAAACACGATAGAATCCGAA",
"AAGGTTACACAAACCCTGGACAAG",
"GACTACTTTCTGCCTTTGCGAGAA",
"AAGGATTCATTCCCACGGTAACAC",
"ACGTAACTTGGTTTGTTCCCTGAA",
"AACCAAGACTCGCTGTGCCTAGTT",
"GAGAGGACAAAGGTTTCAACGCTT",
"TCCATTCCCTCCGATAGATGAAAC",
"TCCGATTCTGCTTCTTTCTACCTG",
]


@pytest.fixture(scope="function")
def ont_smallset_barcodes(ont_barcodes) -> list[str]:
return ont_barcodes[:MAX_NUM_BARCODES_MULTIPLEXED_EXPTS]


@pytest.fixture(scope="function")
def ont_synthetic_mlwh(mlwh_session, ont_barcodes) -> Session:
"""An ML warehouse database fixture populated with ONT-related records."""
initialize_mlwh_ont_synthetic(mlwh_session)
initialize_mlwh_ont_synthetic(mlwh_session, ont_barcodes)
yield mlwh_session


Expand Down Expand Up @@ -279,8 +288,8 @@ def ont_synthetic_irods(tmp_path, irods_groups):
]
coll.add_metadata(*meta)

for expt in range(1, 2):
for slot in range(1, 2):
for expt in range(1, NUM_MULTIPLEXED_REBASECALLED_EXPTS + 1):
for slot in range(1, NUM_MULTIPLEXED_REBASECALLED_SLOTS + 1):
expt_name = f"old_rebasecalled_multiplexed_experiment_{expt:0>3}"
id_flowcell = f"flowcell{slot + 200:0>3}"
run_folder = f"20190904_1514_GA{slot}0000_{id_flowcell}_b4a1fd79"
Expand All @@ -303,8 +312,8 @@ def ont_synthetic_irods(tmp_path, irods_groups):
]
coll.add_metadata(*meta)

for expt in range(1, 2):
for slot in range(1, 2):
for expt in range(1, NUM_MULTIPLEXED_REBASECALLED_EXPTS + 1):
for slot in range(1, NUM_MULTIPLEXED_REBASECALLED_SLOTS + 1):
expt_name = f"rebasecalled_multiplexed_experiment_{expt:0>3}"
id_flowcell = f"flowcell{slot + 300:0>3}"
run_folder = f"20190904_1514_GA{slot}0000_{id_flowcell}_08c179cd"
Expand Down
58 changes: 41 additions & 17 deletions tests/ont/test_metadata_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,11 @@ def test_add_new_sample_metadata(self, ont_synthetic_irods, ont_synthetic_mlwh):
AVU(TrackedSample.COMMON_NAME, "common_name1"),
AVU(TrackedSample.DONOR_ID, "donor_id1"),
AVU(TrackedSample.ID, "id_sample_lims1"),
AVU(TrackedSample.LIMS, "LIMS_01"),
AVU(TrackedSample.NAME, "name1"),
AVU(TrackedSample.SUPPLIER_NAME, "supplier_name1"),
AVU(TrackedSample.PUBLIC_NAME, "public_name1"),
AVU(TrackedSample.UUID, "62429892-0ab6-11ee-b5ba-fa163eac3001"),
AVU(TrackedStudy.ID, "2000"),
AVU(TrackedStudy.NAME, "Study Y"),
]:
Expand All @@ -202,7 +204,9 @@ def test_add_new_sample_metadata(self, ont_synthetic_irods, ont_synthetic_mlwh):
@m.context("When an ONT experiment collection is annotated")
@m.context("When the experiment is multiplexed")
@m.it("Adds {tag_index_from_id => <n>} metadata to barcode<0n> sub-collections")
def test_add_new_plex_metadata(self, ont_synthetic_irods, ont_synthetic_mlwh):
def test_add_new_plex_metadata(
self, ont_synthetic_irods, ont_synthetic_mlwh, ont_barcodes
):
expt = "multiplexed_experiment_001"
slot = 1
path = ont_synthetic_irods / expt / "20190904_1514_GA10000_flowcell101_cf751ba1"
Expand All @@ -212,7 +216,7 @@ def test_add_new_plex_metadata(self, ont_synthetic_irods, ont_synthetic_mlwh):
assert annotate_results_collection(path, c, mlwh_session=ont_synthetic_mlwh)

for subcoll in ["fast5_fail", "fast5_pass", "fastq_fail", "fastq_pass"]:
for tag_index in range(1, 12):
for tag_index in range(1, len(ont_barcodes) + 1):
tag_identifier = ont_tag_identifier(tag_index)
bc_coll = Collection(
path / subcoll / ont.barcode_name_from_id(tag_identifier)
Expand All @@ -225,19 +229,20 @@ def test_add_new_plex_metadata(self, ont_synthetic_irods, ont_synthetic_mlwh):
@m.context("When the experiment is multiplexed")
@m.it("Adds sample and study metadata to barcode<0n> sub-collections")
def test_add_new_plex_sample_metadata(
self, ont_synthetic_irods, ont_synthetic_mlwh
self, ont_synthetic_irods, ont_synthetic_mlwh, ont_barcodes
):
zone = "testZone"
expt = "multiplexed_experiment_001"
slot = 1
path = ont_synthetic_irods / expt / "20190904_1514_GA10000_flowcell101_cf751ba1"
num_barcodes = len(ont_barcodes)

c = Component(experiment_name=expt, instrument_slot=slot)

assert annotate_results_collection(path, c, mlwh_session=ont_synthetic_mlwh)

for subcoll in ["fast5_fail", "fast5_pass", "fastq_fail", "fastq_pass"]:
for tag_index in range(1, 12):
for tag_index in range(1, num_barcodes + 1):
tag_id = ont_tag_identifier(tag_index)
bc_coll = Collection(path / subcoll / ont.barcode_name_from_id(tag_id))

Expand All @@ -246,9 +251,14 @@ def test_add_new_plex_sample_metadata(
AVU(TrackedSample.COMMON_NAME, f"common_name{tag_index}"),
AVU(TrackedSample.DONOR_ID, f"donor_id{tag_index}"),
AVU(TrackedSample.ID, f"id_sample_lims{tag_index}"),
AVU(TrackedSample.LIMS, "LIMS_01"),
AVU(TrackedSample.NAME, f"name{tag_index}"),
AVU(TrackedSample.PUBLIC_NAME, f"public_name{tag_index}"),
AVU(TrackedSample.SUPPLIER_NAME, f"supplier_name{tag_index}"),
AVU(
TrackedSample.UUID,
f"62429892-0ab6-11ee-b5ba-fa163eac3{tag_index:0>3}",
),
AVU(TrackedStudy.ID, "3000"),
AVU(TrackedStudy.NAME, "Study Z"),
]:
Expand All @@ -268,10 +278,11 @@ def test_add_new_plex_sample_metadata(
@m.context("When experiments are multiplexed")
@m.it("Adds tag_index, sample and study metadata to barcode<0n> sub-collections")
def test_add_new_plex_metadata_on_rebasecalled(
self, ont_synthetic_irods, ont_synthetic_mlwh
self, ont_synthetic_irods, ont_synthetic_mlwh, ont_smallset_barcodes
):
zone = "testZone"
slot = 1
max_num_barcodes = len(ont_smallset_barcodes)

subpath = PurePath(
"dorado",
Expand Down Expand Up @@ -302,7 +313,7 @@ def test_add_new_plex_metadata_on_rebasecalled(

assert annotate_results_collection(path, c, mlwh_session=ont_synthetic_mlwh)

for tag_index in range(1, 5):
for tag_index in range(1, max_num_barcodes + 1):
tag_identifier = ont_tag_identifier(tag_index)
bpath = path / ont.barcode_name_from_id(tag_identifier)
bc_coll = Collection(bpath)
Expand All @@ -313,9 +324,14 @@ def test_add_new_plex_metadata_on_rebasecalled(
AVU(TrackedSample.COMMON_NAME, f"common_name{tag_index}"),
AVU(TrackedSample.DONOR_ID, f"donor_id{tag_index}"),
AVU(TrackedSample.ID, f"id_sample_lims{tag_index}"),
AVU(TrackedSample.LIMS, "LIMS_01"),
AVU(TrackedSample.NAME, f"name{tag_index}"),
AVU(TrackedSample.PUBLIC_NAME, f"public_name{tag_index}"),
AVU(TrackedSample.SUPPLIER_NAME, f"supplier_name{tag_index}"),
AVU(
TrackedSample.UUID,
f"62429892-0ab6-11ee-b5ba-fa163eac3{tag_index:0>3}",
),
AVU(TrackedStudy.ID, "3000"),
AVU(TrackedStudy.NAME, "Study Z"),
]:
Expand Down Expand Up @@ -450,9 +466,10 @@ def test_updates_annotated_collection(
@m.context("When an iRODS path has metadata identifying its run component")
@m.it("Updates the metadata")
def test_updates_rebasecalled_annotated_collection(
self, ont_synthetic_irods, ont_synthetic_mlwh
self, ont_synthetic_irods, ont_synthetic_mlwh, ont_smallset_barcodes
):
slot = 1
max_num_barcodes = len(ont_smallset_barcodes)
subpath = PurePath(
"dorado",
"7.2.13",
Expand Down Expand Up @@ -489,7 +506,7 @@ def test_updates_rebasecalled_annotated_collection(
)

samples_paths = []
for tag_index in range(1, 5):
for tag_index in range(1, max_num_barcodes + 1):
tag_identifier = ont_tag_identifier(tag_index)
bpath = (
path
Expand Down Expand Up @@ -705,38 +722,44 @@ class TestBarcodeRelatedFunctions(object):
@m.context("When barcode folders lie one level down in the output folder")
@m.it("Barcode collections number is correct")
def test_barcode_collections_under_subfolder(self):
expected_bcolls = 5
num_expected_bcolls = 5
root_path = PurePath(
"/testZone/home/irods/test/ont_synthetic_irods/synthetic/barcode_collection_test"
)
expt = "multiplexed_folder_experiment_001"
path = root_path / expt / "20190904_1514_GA10000_flowcell401_ba641ab1"
tag_identifiers = [ont_tag_identifier(tag_index) for tag_index in range(1, 6)]
tag_identifiers = [
ont_tag_identifier(tag_index)
for tag_index in range(1, num_expected_bcolls + 1)
]
for tag_identifier in tag_identifiers:
bpath = path / "pass" / ont.barcode_name_from_id(tag_identifier)
Collection(bpath).create(parents=True)

bcolls = barcode_collections(Collection(path), *tag_identifiers)
assert len(bcolls) == expected_bcolls
assert len(bcolls) == num_expected_bcolls
remove_rods_path(root_path)

@m.context("When rebasecalled ONT runs are plexed")
@m.context("When barcodes are right under the output folder")
@m.it("Barcode collections number is correct")
def test_barcode_collections_under_output_folder(self):
expected_bcolls = 5
num_expected_bcolls = 5
root_path = PurePath(
"/testZone/home/irods/test/ont_synthetic_irods/synthetic/barcode_collection_test"
)
expt = "multiplexed_folder_experiment_002"
path = root_path / expt / "20190904_1514_GA10000_flowcell402_ca641bc1"
tag_identifiers = [ont_tag_identifier(tag_index) for tag_index in range(1, 6)]
tag_identifiers = [
ont_tag_identifier(tag_index)
for tag_index in range(1, num_expected_bcolls + 1)
]
for tag_identifier in tag_identifiers:
bpath = path / ont.barcode_name_from_id(tag_identifier)
Collection(bpath).create(parents=True)

bcolls = barcode_collections(Collection(path), *tag_identifiers)
assert len(bcolls) == expected_bcolls
assert len(bcolls) == num_expected_bcolls
remove_rods_path(root_path)

@m.context("When rebasecalled ONT runs are plexed")
Expand All @@ -763,14 +786,15 @@ def test_barcode_collections_duplicates(self):
)
@m.it("Workflow continues with no error")
def test_barcode_collections_missing_folders(self):
expected_bcolls = 3
num_expected_bcolls = 3
num_total_tags = 5
root_path = PurePath(
"/testZone/home/irods/test/ont_synthetic_irods/synthetic/barcode_collection_test"
)
expt = "multiplexed_folder_experiment_004"
path = root_path / expt / "20190904_1514_GA10000_flowcell404_fg345hil"
expected_tag_identifiers = [
ont_tag_identifier(tag_index) for tag_index in range(1, 6)
ont_tag_identifier(tag_index) for tag_index in range(1, num_total_tags + 1)
]
actual_tag_identifiers = [
ont_tag_identifier(tag_index) for tag_index in [1, 3, 5]
Expand All @@ -780,5 +804,5 @@ def test_barcode_collections_missing_folders(self):
Collection(bpath).create(parents=True)

bcolls = barcode_collections(Collection(path), *expected_tag_identifiers)
assert len(bcolls) == expected_bcolls
assert len(bcolls) == num_expected_bcolls
remove_rods_path(root_path)

0 comments on commit 3379a68

Please sign in to comment.