From 805b1b704a5ca61d4ec9dc38b73882a36f9b49e7 Mon Sep 17 00:00:00 2001 From: Zhewei Shen Date: Wed, 12 Feb 2025 11:53:35 -0800 Subject: [PATCH] changed read_name and onlist audits on file sets --- src/igvfd/audit/analysis_set.py | 36 ------------------- src/igvfd/audit/file_set.py | 8 +++++ src/igvfd/audit/measurement_set.py | 9 ----- src/igvfd/mappings/analysis_set.json | 4 +-- src/igvfd/mappings/measurement_set.json | 4 +-- src/igvfd/tests/test_audit_analysis_set.py | 27 -------------- src/igvfd/tests/test_audit_file_set.py | 27 +++++++------- src/igvfd/tests/test_audit_measurement_set.py | 22 ++---------- 8 files changed, 29 insertions(+), 108 deletions(-) diff --git a/src/igvfd/audit/analysis_set.py b/src/igvfd/audit/analysis_set.py index f6f313f78..56e7176d5 100644 --- a/src/igvfd/audit/analysis_set.py +++ b/src/igvfd/audit/analysis_set.py @@ -255,39 +255,3 @@ def audit_analysis_set_multiplexed_samples(value, system): f'of the `samples`: {all_samples} of its `input_file_sets`: {input_file_sets}.' ) yield AuditFailure(audit_message_inconsistent_demultiplexed_sample.get('audit_category', ''), f'{detail} {audit_message_inconsistent_demultiplexed_sample.get("audit_description", "")}', level=audit_message_inconsistent_demultiplexed_sample.get('audit_level', '')) - - -@audit_checker('AnalysisSet', frame='object') -def audit_analysis_set_inconsistent_onlist_info(value, system): - ''' - [ - { - "audit_description": "Analysis sets for single cell uniform pipeline runs are expected to have measurement sets with the same barcode files.", - "audit_category": "inconsistent barcode onlist", - "audit_level": "WARNING" - }, - { - "audit_description": "Analysis sets for single cell uniform pipeline runs are expected to have measurement sets with the same barcode methods.", - "audit_category": "inconsistent barcode onlist", - "audit_level": "WARNING" - } - ] - ''' - audit_msg_inconsistent_onlist_files = get_audit_message(audit_analysis_set_inconsistent_onlist_info, index=0) - audit_msg_inconsistent_onlist_methods = get_audit_message(audit_analysis_set_inconsistent_onlist_info, index=1) - all_onlist_files = [] - all_onlist_methods = [] - input_file_sets = value.get('input_file_sets', []) - for input_file_set in input_file_sets: - if input_file_set.startswith('/measurement-sets/'): - input_file_set_object = system.get('request').embed(input_file_set + '@@object?skip_calculated=true') - single_cell_assay_status = single_cell_check(system, input_file_set_object, 'Measurement set') - if single_cell_assay_status: - all_onlist_files.append(sorted(input_file_set_object.get('onlist_files', ''))) - all_onlist_methods.append(input_file_set_object.get('onlist_method', '')) - # If there are multiple onlist methods from the input measurement sets, trigger audit - if len(set(all_onlist_methods)) > 1: - yield AuditFailure(audit_msg_inconsistent_onlist_methods.get('audit_category', ''), audit_msg_inconsistent_onlist_methods.get('audit_description', ''), level=audit_msg_inconsistent_onlist_methods.get('audit_level', '')) - # If the input measurement sets have different onlist files - elif not all(set(sublist) == set(all_onlist_files[0]) for sublist in all_onlist_files): - yield AuditFailure(audit_msg_inconsistent_onlist_files.get('audit_category', ''), audit_msg_inconsistent_onlist_files.get('audit_description', ''), level=audit_msg_inconsistent_onlist_files.get('audit_level', '')) diff --git a/src/igvfd/audit/file_set.py b/src/igvfd/audit/file_set.py index 751bff920..51f6d07a1 100644 --- a/src/igvfd/audit/file_set.py +++ b/src/igvfd/audit/file_set.py @@ -592,12 +592,20 @@ def audit_single_cell_read_names(value, system): for file in value['files']: if file.startswith('/sequence-files/'): sequence_file_object = system.get('request').embed(file) + applicable_read_types = ['R1', 'R2', 'R3'] # Skip Index 1 and Index 2 + # Get read type + illumina_read_type = sequence_file_object.get('illumina_read_type', '') + # If no read type or I-type, skip audit + if illumina_read_type not in applicable_read_types: + continue + # Check for read names read_names = sequence_file_object.get('read_names', '') if read_names: if any(read_name not in ['Read 1', 'Read 2', 'Barcode index'] for read_name in read_names): unexpected_read_names.append(file) else: missing_read_names.append(file) + # Audit for missing read names if missing_read_names: for file in missing_read_names: diff --git a/src/igvfd/audit/measurement_set.py b/src/igvfd/audit/measurement_set.py index e2108b5b0..72ec98a63 100644 --- a/src/igvfd/audit/measurement_set.py +++ b/src/igvfd/audit/measurement_set.py @@ -537,16 +537,10 @@ def audit_inconsistent_onlist_info(value, system): "audit_description": "Measurement sets with 2 or more barcode onlist files are expected to have an onlist method of either product or multi.", "audit_category": "inconsistent barcode onlist", "audit_level": "ERROR" - }, - { - "audit_description": "Measurement sets with only 1 barcode onlist files are expected to have an onlist method of no combination.", - "audit_category": "inconsistent barcode onlist", - "audit_level": "ERROR" } ] ''' audit_message_missing_method_mismatch_combo = get_audit_message(audit_inconsistent_onlist_info, index=0) - audit_message_missing_method_mismatch_nocombo = get_audit_message(audit_inconsistent_onlist_info, index=1) onlist_files = value.get('onlist_files') onlist_method = value.get('onlist_method') # Only check if both files and method properties are present @@ -554,9 +548,6 @@ def audit_inconsistent_onlist_info(value, system): # Check if multiple onlist files are submitted but the method is no combination if (len(onlist_files) > 1) and (onlist_method == 'no combination'): yield AuditFailure(audit_message_missing_method_mismatch_combo.get('audit_category', ''), audit_message_missing_method_mismatch_combo.get('audit_description', ''), level=audit_message_missing_method_mismatch_combo.get('audit_level', '')) - # Check if one onlist file is submitted but the method indicates combination - if (len(onlist_files) == 1) and (onlist_method != 'no combination'): - yield AuditFailure(audit_message_missing_method_mismatch_nocombo.get('audit_category', ''), audit_message_missing_method_mismatch_nocombo.get('audit_description', ''), level=audit_message_missing_method_mismatch_nocombo.get('audit_level', '')) @audit_checker('MeasurementSet', frame='object') diff --git a/src/igvfd/mappings/analysis_set.json b/src/igvfd/mappings/analysis_set.json index df56d518e..3c9eac5c7 100644 --- a/src/igvfd/mappings/analysis_set.json +++ b/src/igvfd/mappings/analysis_set.json @@ -1,6 +1,6 @@ { - "hash": "bc24d24de6fcfff86dd1f8d568b5d7c1", - "index_name": "analysis_set_bc24d24d", + "hash": "96af1dff59d5e0cbeb0b1e5a66cb6591", + "index_name": "analysis_set_96af1dff", "item_type": "analysis_set", "mapping": { "dynamic_templates": [ diff --git a/src/igvfd/mappings/measurement_set.json b/src/igvfd/mappings/measurement_set.json index 9b47991bf..a1d64cfc6 100644 --- a/src/igvfd/mappings/measurement_set.json +++ b/src/igvfd/mappings/measurement_set.json @@ -1,6 +1,6 @@ { - "hash": "657c2b6577fc959190f279a4f94ff912", - "index_name": "measurement_set_657c2b65", + "hash": "2cdb7cde70546f9e9c16bf50dcc19eb1", + "index_name": "measurement_set_2cdb7cde", "item_type": "measurement_set", "mapping": { "dynamic_templates": [ diff --git a/src/igvfd/tests/test_audit_analysis_set.py b/src/igvfd/tests/test_audit_analysis_set.py index 3f020c954..def5487df 100644 --- a/src/igvfd/tests/test_audit_analysis_set.py +++ b/src/igvfd/tests/test_audit_analysis_set.py @@ -303,30 +303,3 @@ def test_audit_analysis_set_demultiplexed_sample( error['category'] != 'inconsistent demultiplexed sample' for error in res.json['audit'].get('ERROR', []) ) - - -def test_audit_analysis_set_inconsistent_barcode_onlist(testapp, analysis_set_with_scrna_measurement_sets, measurement_set_one_onlist, measurement_set_two_onlists, tabular_file_onlist_1, tabular_file_onlist_2): - # Check if the audit can catch input MeaSets have multiple onlist methods and 2 different sets of onlist files - testapp.patch_json( - analysis_set_with_scrna_measurement_sets['@id'], - { - 'input_file_sets': [measurement_set_one_onlist['@id'], measurement_set_two_onlists['@id']] - } - ) - res = testapp.get(analysis_set_with_scrna_measurement_sets['@id'] + '@@audit') - assert any( - error['category'] == 'inconsistent barcode onlist' - for error in res.json['audit'].get('WARNING', []) - ) - # Check if an analysis set with 2 measurement sets have the same onlist info will be audit-free - testapp.patch_json( - measurement_set_one_onlist['@id'], - { - 'onlist_files': [tabular_file_onlist_1['@id'], tabular_file_onlist_2['@id']] - } - ) - res = testapp.get(analysis_set_with_scrna_measurement_sets['@id'] + '@@audit') - assert any( - error['category'] != 'inconsistent barcode onlist' - for error in res.json['audit'].get('WARNING', []) - ) diff --git a/src/igvfd/tests/test_audit_file_set.py b/src/igvfd/tests/test_audit_file_set.py index 420750713..a33b9bdf0 100644 --- a/src/igvfd/tests/test_audit_file_set.py +++ b/src/igvfd/tests/test_audit_file_set.py @@ -103,17 +103,24 @@ def test_audit_inconsistent_location_files(testapp, sequence_file_pod5, sequence def test_audit_single_cell_read_names(testapp, measurement_set_one_onlist, sequence_file, sequence_file_sequencing_run_2): - # Patch a single cell MeaSet SeqFiles without read_names (audit) + # Patch a single cell SeqFiles without read_names and I1 (no audit) testapp.patch_json( sequence_file['@id'], { - 'file_set': measurement_set_one_onlist['@id'] + 'file_set': measurement_set_one_onlist['@id'], + 'illumina_read_type': 'I1' } ) + res = testapp.get(measurement_set_one_onlist['@id'] + '@@audit') + assert all( + error['category'] != 'missing read names' + for error in res.json['audit'].get('NOT_COMPLIANT', []) + ) + # Patch a single cell SeqFiles without read_names and R1 (audit) testapp.patch_json( - sequence_file_sequencing_run_2['@id'], + sequence_file['@id'], { - 'file_set': measurement_set_one_onlist['@id'] + 'illumina_read_type': 'R1' } ) res = testapp.get(measurement_set_one_onlist['@id'] + '@@audit') @@ -121,22 +128,18 @@ def test_audit_single_cell_read_names(testapp, measurement_set_one_onlist, seque error['category'] == 'missing read names' for error in res.json['audit'].get('NOT_COMPLIANT', []) ) - # Patch the a MeaSet with one SeqFile with read_names and one without (audit) + # Patch SeqFiles with R-read type and read_names (no audit) testapp.patch_json( sequence_file['@id'], { - 'read_names': ['Read 1'], + 'read_names': ['Read 1'] } ) - res = testapp.get(measurement_set_one_onlist['@id'] + '@@audit') - assert any( - error['category'] == 'missing read names' - for error in res.json['audit'].get('NOT_COMPLIANT', []) - ) - # Patch both SeqFiles with read_names (no audit) testapp.patch_json( sequence_file_sequencing_run_2['@id'], { + 'file_set': measurement_set_one_onlist['@id'], + 'illumina_read_type': 'R2', 'read_names': ['Read 2', 'Barcode index'] } ) diff --git a/src/igvfd/tests/test_audit_measurement_set.py b/src/igvfd/tests/test_audit_measurement_set.py index 233f48c6b..276e9650e 100644 --- a/src/igvfd/tests/test_audit_measurement_set.py +++ b/src/igvfd/tests/test_audit_measurement_set.py @@ -1187,19 +1187,13 @@ def test_audit_onlist(testapp, measurement_set_one_onlist, measurement_set, assa def test_audit_inconsistent_barcode_onlist(testapp, measurement_set_one_onlist, measurement_set_two_onlists, tabular_file_onlist_1, tabular_file_onlist_2): - # Check if the measurement set fixture with one file and no combination method is audit-free - res = testapp.get(measurement_set_one_onlist['@id'] + '@@audit') - assert all( - error['category'] != 'inconsistent barcode onlist' - for error in res.json['audit'].get('ERROR', []) - ) - # Check if the measurement set fixture with two file and combination method is audit-free + # Check the MeaSet with two file and combination method (no audit) res = testapp.get(measurement_set_two_onlists['@id'] + '@@audit') assert all( error['category'] != 'inconsistent barcode onlist' for error in res.json['audit'].get('ERROR', []) ) - # Add another onlist file to a MeaSet that is no-combination for onlist method. + # Patch a MeaSet with 2 onlist files and no combination method (audit). testapp.patch_json( measurement_set_one_onlist['@id'], { @@ -1211,18 +1205,6 @@ def test_audit_inconsistent_barcode_onlist(testapp, measurement_set_one_onlist, error['category'] == 'inconsistent barcode onlist' for error in res.json['audit'].get('ERROR', []) ) - # Remove an onlist file to a MeaSet with a combination onlist method. - testapp.patch_json( - measurement_set_two_onlists['@id'], - { - 'onlist_files': [tabular_file_onlist_1['@id']] - } - ) - res = testapp.get(measurement_set_one_onlist['@id'] + '@@audit') - assert any( - error['category'] == 'inconsistent barcode onlist' - for error in res.json['audit'].get('ERROR', []) - ) def test_audit_unexpected_onlist_files(testapp, measurement_set_one_onlist, tabular_file_onlist_1):