Skip to content

Commit

Permalink
changed read_name and onlist audits on file sets
Browse files Browse the repository at this point in the history
  • Loading branch information
zhwshen committed Feb 13, 2025
1 parent 035a8af commit 805b1b7
Show file tree
Hide file tree
Showing 8 changed files with 29 additions and 108 deletions.
36 changes: 0 additions & 36 deletions src/igvfd/audit/analysis_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,39 +255,3 @@ def audit_analysis_set_multiplexed_samples(value, system):
f'of the `samples`: {all_samples} of its `input_file_sets`: {input_file_sets}.'
)
yield AuditFailure(audit_message_inconsistent_demultiplexed_sample.get('audit_category', ''), f'{detail} {audit_message_inconsistent_demultiplexed_sample.get("audit_description", "")}', level=audit_message_inconsistent_demultiplexed_sample.get('audit_level', ''))


@audit_checker('AnalysisSet', frame='object')
def audit_analysis_set_inconsistent_onlist_info(value, system):
'''
[
{
"audit_description": "Analysis sets for single cell uniform pipeline runs are expected to have measurement sets with the same barcode files.",
"audit_category": "inconsistent barcode onlist",
"audit_level": "WARNING"
},
{
"audit_description": "Analysis sets for single cell uniform pipeline runs are expected to have measurement sets with the same barcode methods.",
"audit_category": "inconsistent barcode onlist",
"audit_level": "WARNING"
}
]
'''
audit_msg_inconsistent_onlist_files = get_audit_message(audit_analysis_set_inconsistent_onlist_info, index=0)
audit_msg_inconsistent_onlist_methods = get_audit_message(audit_analysis_set_inconsistent_onlist_info, index=1)
all_onlist_files = []
all_onlist_methods = []
input_file_sets = value.get('input_file_sets', [])
for input_file_set in input_file_sets:
if input_file_set.startswith('/measurement-sets/'):
input_file_set_object = system.get('request').embed(input_file_set + '@@object?skip_calculated=true')
single_cell_assay_status = single_cell_check(system, input_file_set_object, 'Measurement set')
if single_cell_assay_status:
all_onlist_files.append(sorted(input_file_set_object.get('onlist_files', '')))
all_onlist_methods.append(input_file_set_object.get('onlist_method', ''))
# If there are multiple onlist methods from the input measurement sets, trigger audit
if len(set(all_onlist_methods)) > 1:
yield AuditFailure(audit_msg_inconsistent_onlist_methods.get('audit_category', ''), audit_msg_inconsistent_onlist_methods.get('audit_description', ''), level=audit_msg_inconsistent_onlist_methods.get('audit_level', ''))
# If the input measurement sets have different onlist files
elif not all(set(sublist) == set(all_onlist_files[0]) for sublist in all_onlist_files):
yield AuditFailure(audit_msg_inconsistent_onlist_files.get('audit_category', ''), audit_msg_inconsistent_onlist_files.get('audit_description', ''), level=audit_msg_inconsistent_onlist_files.get('audit_level', ''))
8 changes: 8 additions & 0 deletions src/igvfd/audit/file_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,12 +592,20 @@ def audit_single_cell_read_names(value, system):
for file in value['files']:
if file.startswith('/sequence-files/'):
sequence_file_object = system.get('request').embed(file)
applicable_read_types = ['R1', 'R2', 'R3'] # Skip Index 1 and Index 2
# Get read type
illumina_read_type = sequence_file_object.get('illumina_read_type', '')
# If no read type or I-type, skip audit
if illumina_read_type not in applicable_read_types:
continue
# Check for read names
read_names = sequence_file_object.get('read_names', '')
if read_names:
if any(read_name not in ['Read 1', 'Read 2', 'Barcode index'] for read_name in read_names):
unexpected_read_names.append(file)
else:
missing_read_names.append(file)

# Audit for missing read names
if missing_read_names:
for file in missing_read_names:
Expand Down
9 changes: 0 additions & 9 deletions src/igvfd/audit/measurement_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,26 +537,17 @@ def audit_inconsistent_onlist_info(value, system):
"audit_description": "Measurement sets with 2 or more barcode onlist files are expected to have an onlist method of either product or multi.",
"audit_category": "inconsistent barcode onlist",
"audit_level": "ERROR"
},
{
"audit_description": "Measurement sets with only 1 barcode onlist files are expected to have an onlist method of no combination.",
"audit_category": "inconsistent barcode onlist",
"audit_level": "ERROR"
}
]
'''
audit_message_missing_method_mismatch_combo = get_audit_message(audit_inconsistent_onlist_info, index=0)
audit_message_missing_method_mismatch_nocombo = get_audit_message(audit_inconsistent_onlist_info, index=1)
onlist_files = value.get('onlist_files')
onlist_method = value.get('onlist_method')
# Only check if both files and method properties are present
if onlist_files and onlist_method:
# Check if multiple onlist files are submitted but the method is no combination
if (len(onlist_files) > 1) and (onlist_method == 'no combination'):
yield AuditFailure(audit_message_missing_method_mismatch_combo.get('audit_category', ''), audit_message_missing_method_mismatch_combo.get('audit_description', ''), level=audit_message_missing_method_mismatch_combo.get('audit_level', ''))
# Check if one onlist file is submitted but the method indicates combination
if (len(onlist_files) == 1) and (onlist_method != 'no combination'):
yield AuditFailure(audit_message_missing_method_mismatch_nocombo.get('audit_category', ''), audit_message_missing_method_mismatch_nocombo.get('audit_description', ''), level=audit_message_missing_method_mismatch_nocombo.get('audit_level', ''))


@audit_checker('MeasurementSet', frame='object')
Expand Down
4 changes: 2 additions & 2 deletions src/igvfd/mappings/analysis_set.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"hash": "bc24d24de6fcfff86dd1f8d568b5d7c1",
"index_name": "analysis_set_bc24d24d",
"hash": "96af1dff59d5e0cbeb0b1e5a66cb6591",
"index_name": "analysis_set_96af1dff",
"item_type": "analysis_set",
"mapping": {
"dynamic_templates": [
Expand Down
4 changes: 2 additions & 2 deletions src/igvfd/mappings/measurement_set.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"hash": "657c2b6577fc959190f279a4f94ff912",
"index_name": "measurement_set_657c2b65",
"hash": "2cdb7cde70546f9e9c16bf50dcc19eb1",
"index_name": "measurement_set_2cdb7cde",
"item_type": "measurement_set",
"mapping": {
"dynamic_templates": [
Expand Down
27 changes: 0 additions & 27 deletions src/igvfd/tests/test_audit_analysis_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,30 +303,3 @@ def test_audit_analysis_set_demultiplexed_sample(
error['category'] != 'inconsistent demultiplexed sample'
for error in res.json['audit'].get('ERROR', [])
)


def test_audit_analysis_set_inconsistent_barcode_onlist(testapp, analysis_set_with_scrna_measurement_sets, measurement_set_one_onlist, measurement_set_two_onlists, tabular_file_onlist_1, tabular_file_onlist_2):
# Check if the audit can catch input MeaSets have multiple onlist methods and 2 different sets of onlist files
testapp.patch_json(
analysis_set_with_scrna_measurement_sets['@id'],
{
'input_file_sets': [measurement_set_one_onlist['@id'], measurement_set_two_onlists['@id']]
}
)
res = testapp.get(analysis_set_with_scrna_measurement_sets['@id'] + '@@audit')
assert any(
error['category'] == 'inconsistent barcode onlist'
for error in res.json['audit'].get('WARNING', [])
)
# Check if an analysis set with 2 measurement sets have the same onlist info will be audit-free
testapp.patch_json(
measurement_set_one_onlist['@id'],
{
'onlist_files': [tabular_file_onlist_1['@id'], tabular_file_onlist_2['@id']]
}
)
res = testapp.get(analysis_set_with_scrna_measurement_sets['@id'] + '@@audit')
assert any(
error['category'] != 'inconsistent barcode onlist'
for error in res.json['audit'].get('WARNING', [])
)
27 changes: 15 additions & 12 deletions src/igvfd/tests/test_audit_file_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,40 +103,43 @@ def test_audit_inconsistent_location_files(testapp, sequence_file_pod5, sequence


def test_audit_single_cell_read_names(testapp, measurement_set_one_onlist, sequence_file, sequence_file_sequencing_run_2):
# Patch a single cell MeaSet SeqFiles without read_names (audit)
# Patch a single cell SeqFiles without read_names and I1 (no audit)
testapp.patch_json(
sequence_file['@id'],
{
'file_set': measurement_set_one_onlist['@id']
'file_set': measurement_set_one_onlist['@id'],
'illumina_read_type': 'I1'
}
)
res = testapp.get(measurement_set_one_onlist['@id'] + '@@audit')
assert all(
error['category'] != 'missing read names'
for error in res.json['audit'].get('NOT_COMPLIANT', [])
)
# Patch a single cell SeqFiles without read_names and R1 (audit)
testapp.patch_json(
sequence_file_sequencing_run_2['@id'],
sequence_file['@id'],
{
'file_set': measurement_set_one_onlist['@id']
'illumina_read_type': 'R1'
}
)
res = testapp.get(measurement_set_one_onlist['@id'] + '@@audit')
assert any(
error['category'] == 'missing read names'
for error in res.json['audit'].get('NOT_COMPLIANT', [])
)
# Patch the a MeaSet with one SeqFile with read_names and one without (audit)
# Patch SeqFiles with R-read type and read_names (no audit)
testapp.patch_json(
sequence_file['@id'],
{
'read_names': ['Read 1'],
'read_names': ['Read 1']
}
)
res = testapp.get(measurement_set_one_onlist['@id'] + '@@audit')
assert any(
error['category'] == 'missing read names'
for error in res.json['audit'].get('NOT_COMPLIANT', [])
)
# Patch both SeqFiles with read_names (no audit)
testapp.patch_json(
sequence_file_sequencing_run_2['@id'],
{
'file_set': measurement_set_one_onlist['@id'],
'illumina_read_type': 'R2',
'read_names': ['Read 2', 'Barcode index']
}
)
Expand Down
22 changes: 2 additions & 20 deletions src/igvfd/tests/test_audit_measurement_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -1187,19 +1187,13 @@ def test_audit_onlist(testapp, measurement_set_one_onlist, measurement_set, assa


def test_audit_inconsistent_barcode_onlist(testapp, measurement_set_one_onlist, measurement_set_two_onlists, tabular_file_onlist_1, tabular_file_onlist_2):
# Check if the measurement set fixture with one file and no combination method is audit-free
res = testapp.get(measurement_set_one_onlist['@id'] + '@@audit')
assert all(
error['category'] != 'inconsistent barcode onlist'
for error in res.json['audit'].get('ERROR', [])
)
# Check if the measurement set fixture with two file and combination method is audit-free
# Check the MeaSet with two file and combination method (no audit)
res = testapp.get(measurement_set_two_onlists['@id'] + '@@audit')
assert all(
error['category'] != 'inconsistent barcode onlist'
for error in res.json['audit'].get('ERROR', [])
)
# Add another onlist file to a MeaSet that is no-combination for onlist method.
# Patch a MeaSet with 2 onlist files and no combination method (audit).
testapp.patch_json(
measurement_set_one_onlist['@id'],
{
Expand All @@ -1211,18 +1205,6 @@ def test_audit_inconsistent_barcode_onlist(testapp, measurement_set_one_onlist,
error['category'] == 'inconsistent barcode onlist'
for error in res.json['audit'].get('ERROR', [])
)
# Remove an onlist file to a MeaSet with a combination onlist method.
testapp.patch_json(
measurement_set_two_onlists['@id'],
{
'onlist_files': [tabular_file_onlist_1['@id']]
}
)
res = testapp.get(measurement_set_one_onlist['@id'] + '@@audit')
assert any(
error['category'] == 'inconsistent barcode onlist'
for error in res.json['audit'].get('ERROR', [])
)


def test_audit_unexpected_onlist_files(testapp, measurement_set_one_onlist, tabular_file_onlist_1):
Expand Down

0 comments on commit 805b1b7

Please sign in to comment.