From 805b1b704a5ca61d4ec9dc38b73882a36f9b49e7 Mon Sep 17 00:00:00 2001
From: Zhewei Shen <zwshen@stanford.edu>
Date: Wed, 12 Feb 2025 11:53:35 -0800
Subject: [PATCH] changed read_name and onlist audits on file sets

---
 src/igvfd/audit/analysis_set.py               | 36 -------------------
 src/igvfd/audit/file_set.py                   |  8 +++++
 src/igvfd/audit/measurement_set.py            |  9 -----
 src/igvfd/mappings/analysis_set.json          |  4 +--
 src/igvfd/mappings/measurement_set.json       |  4 +--
 src/igvfd/tests/test_audit_analysis_set.py    | 27 --------------
 src/igvfd/tests/test_audit_file_set.py        | 27 +++++++-------
 src/igvfd/tests/test_audit_measurement_set.py | 22 ++----------
 8 files changed, 29 insertions(+), 108 deletions(-)

diff --git a/src/igvfd/audit/analysis_set.py b/src/igvfd/audit/analysis_set.py
index f6f313f78..56e7176d5 100644
--- a/src/igvfd/audit/analysis_set.py
+++ b/src/igvfd/audit/analysis_set.py
@@ -255,39 +255,3 @@ def audit_analysis_set_multiplexed_samples(value, system):
                 f'of the `samples`: {all_samples} of its `input_file_sets`: {input_file_sets}.'
             )
             yield AuditFailure(audit_message_inconsistent_demultiplexed_sample.get('audit_category', ''), f'{detail} {audit_message_inconsistent_demultiplexed_sample.get("audit_description", "")}', level=audit_message_inconsistent_demultiplexed_sample.get('audit_level', ''))
-
-
-@audit_checker('AnalysisSet', frame='object')
-def audit_analysis_set_inconsistent_onlist_info(value, system):
-    '''
-    [
-        {
-            "audit_description": "Analysis sets for single cell uniform pipeline runs are expected to have measurement sets with the same barcode files.",
-            "audit_category": "inconsistent barcode onlist",
-            "audit_level": "WARNING"
-        },
-        {
-            "audit_description": "Analysis sets for single cell uniform pipeline runs are expected to have measurement sets with the same barcode methods.",
-            "audit_category": "inconsistent barcode onlist",
-            "audit_level": "WARNING"
-        }
-    ]
-    '''
-    audit_msg_inconsistent_onlist_files = get_audit_message(audit_analysis_set_inconsistent_onlist_info, index=0)
-    audit_msg_inconsistent_onlist_methods = get_audit_message(audit_analysis_set_inconsistent_onlist_info, index=1)
-    all_onlist_files = []
-    all_onlist_methods = []
-    input_file_sets = value.get('input_file_sets', [])
-    for input_file_set in input_file_sets:
-        if input_file_set.startswith('/measurement-sets/'):
-            input_file_set_object = system.get('request').embed(input_file_set + '@@object?skip_calculated=true')
-            single_cell_assay_status = single_cell_check(system, input_file_set_object, 'Measurement set')
-            if single_cell_assay_status:
-                all_onlist_files.append(sorted(input_file_set_object.get('onlist_files', '')))
-                all_onlist_methods.append(input_file_set_object.get('onlist_method', ''))
-    # If there are multiple onlist methods from the input measurement sets, trigger audit
-    if len(set(all_onlist_methods)) > 1:
-        yield AuditFailure(audit_msg_inconsistent_onlist_methods.get('audit_category', ''), audit_msg_inconsistent_onlist_methods.get('audit_description', ''), level=audit_msg_inconsistent_onlist_methods.get('audit_level', ''))
-    # If the input measurement sets have different onlist files
-    elif not all(set(sublist) == set(all_onlist_files[0]) for sublist in all_onlist_files):
-        yield AuditFailure(audit_msg_inconsistent_onlist_files.get('audit_category', ''), audit_msg_inconsistent_onlist_files.get('audit_description', ''), level=audit_msg_inconsistent_onlist_files.get('audit_level', ''))
diff --git a/src/igvfd/audit/file_set.py b/src/igvfd/audit/file_set.py
index 751bff920..51f6d07a1 100644
--- a/src/igvfd/audit/file_set.py
+++ b/src/igvfd/audit/file_set.py
@@ -592,12 +592,20 @@ def audit_single_cell_read_names(value, system):
             for file in value['files']:
                 if file.startswith('/sequence-files/'):
                     sequence_file_object = system.get('request').embed(file)
+                    applicable_read_types = ['R1', 'R2', 'R3']  # Skip Index 1 and Index 2
+                    # Get read type
+                    illumina_read_type = sequence_file_object.get('illumina_read_type', '')
+                    # If no read type or I-type, skip audit
+                    if illumina_read_type not in applicable_read_types:
+                        continue
+                    # Check for read names
                     read_names = sequence_file_object.get('read_names', '')
                     if read_names:
                         if any(read_name not in ['Read 1', 'Read 2', 'Barcode index'] for read_name in read_names):
                             unexpected_read_names.append(file)
                     else:
                         missing_read_names.append(file)
+
         # Audit for missing read names
         if missing_read_names:
             for file in missing_read_names:
diff --git a/src/igvfd/audit/measurement_set.py b/src/igvfd/audit/measurement_set.py
index e2108b5b0..72ec98a63 100644
--- a/src/igvfd/audit/measurement_set.py
+++ b/src/igvfd/audit/measurement_set.py
@@ -537,16 +537,10 @@ def audit_inconsistent_onlist_info(value, system):
             "audit_description": "Measurement sets with 2 or more barcode onlist files are expected to have an onlist method of either product or multi.",
             "audit_category": "inconsistent barcode onlist",
             "audit_level": "ERROR"
-        },
-        {
-            "audit_description": "Measurement sets with only 1 barcode onlist files are expected to have an onlist method of no combination.",
-            "audit_category": "inconsistent barcode onlist",
-            "audit_level": "ERROR"
         }
     ]
     '''
     audit_message_missing_method_mismatch_combo = get_audit_message(audit_inconsistent_onlist_info, index=0)
-    audit_message_missing_method_mismatch_nocombo = get_audit_message(audit_inconsistent_onlist_info, index=1)
     onlist_files = value.get('onlist_files')
     onlist_method = value.get('onlist_method')
     # Only check if both files and method properties are present
@@ -554,9 +548,6 @@ def audit_inconsistent_onlist_info(value, system):
         # Check if multiple onlist files are submitted but the method is no combination
         if (len(onlist_files) > 1) and (onlist_method == 'no combination'):
             yield AuditFailure(audit_message_missing_method_mismatch_combo.get('audit_category', ''), audit_message_missing_method_mismatch_combo.get('audit_description', ''), level=audit_message_missing_method_mismatch_combo.get('audit_level', ''))
-        # Check if one onlist file is submitted but the method indicates combination
-        if (len(onlist_files) == 1) and (onlist_method != 'no combination'):
-            yield AuditFailure(audit_message_missing_method_mismatch_nocombo.get('audit_category', ''), audit_message_missing_method_mismatch_nocombo.get('audit_description', ''), level=audit_message_missing_method_mismatch_nocombo.get('audit_level', ''))
 
 
 @audit_checker('MeasurementSet', frame='object')
diff --git a/src/igvfd/mappings/analysis_set.json b/src/igvfd/mappings/analysis_set.json
index df56d518e..3c9eac5c7 100644
--- a/src/igvfd/mappings/analysis_set.json
+++ b/src/igvfd/mappings/analysis_set.json
@@ -1,6 +1,6 @@
 {
-    "hash": "bc24d24de6fcfff86dd1f8d568b5d7c1",
-    "index_name": "analysis_set_bc24d24d",
+    "hash": "96af1dff59d5e0cbeb0b1e5a66cb6591",
+    "index_name": "analysis_set_96af1dff",
     "item_type": "analysis_set",
     "mapping": {
         "dynamic_templates": [
diff --git a/src/igvfd/mappings/measurement_set.json b/src/igvfd/mappings/measurement_set.json
index 9b47991bf..a1d64cfc6 100644
--- a/src/igvfd/mappings/measurement_set.json
+++ b/src/igvfd/mappings/measurement_set.json
@@ -1,6 +1,6 @@
 {
-    "hash": "657c2b6577fc959190f279a4f94ff912",
-    "index_name": "measurement_set_657c2b65",
+    "hash": "2cdb7cde70546f9e9c16bf50dcc19eb1",
+    "index_name": "measurement_set_2cdb7cde",
     "item_type": "measurement_set",
     "mapping": {
         "dynamic_templates": [
diff --git a/src/igvfd/tests/test_audit_analysis_set.py b/src/igvfd/tests/test_audit_analysis_set.py
index 3f020c954..def5487df 100644
--- a/src/igvfd/tests/test_audit_analysis_set.py
+++ b/src/igvfd/tests/test_audit_analysis_set.py
@@ -303,30 +303,3 @@ def test_audit_analysis_set_demultiplexed_sample(
         error['category'] != 'inconsistent demultiplexed sample'
         for error in res.json['audit'].get('ERROR', [])
     )
-
-
-def test_audit_analysis_set_inconsistent_barcode_onlist(testapp, analysis_set_with_scrna_measurement_sets, measurement_set_one_onlist, measurement_set_two_onlists, tabular_file_onlist_1, tabular_file_onlist_2):
-    # Check if the audit can catch input MeaSets have multiple onlist methods and 2 different sets of onlist files
-    testapp.patch_json(
-        analysis_set_with_scrna_measurement_sets['@id'],
-        {
-            'input_file_sets': [measurement_set_one_onlist['@id'], measurement_set_two_onlists['@id']]
-        }
-    )
-    res = testapp.get(analysis_set_with_scrna_measurement_sets['@id'] + '@@audit')
-    assert any(
-        error['category'] == 'inconsistent barcode onlist'
-        for error in res.json['audit'].get('WARNING', [])
-    )
-    # Check if an analysis set with 2 measurement sets have the same onlist info will be audit-free
-    testapp.patch_json(
-        measurement_set_one_onlist['@id'],
-        {
-            'onlist_files': [tabular_file_onlist_1['@id'], tabular_file_onlist_2['@id']]
-        }
-    )
-    res = testapp.get(analysis_set_with_scrna_measurement_sets['@id'] + '@@audit')
-    assert any(
-        error['category'] != 'inconsistent barcode onlist'
-        for error in res.json['audit'].get('WARNING', [])
-    )
diff --git a/src/igvfd/tests/test_audit_file_set.py b/src/igvfd/tests/test_audit_file_set.py
index 420750713..a33b9bdf0 100644
--- a/src/igvfd/tests/test_audit_file_set.py
+++ b/src/igvfd/tests/test_audit_file_set.py
@@ -103,17 +103,24 @@ def test_audit_inconsistent_location_files(testapp, sequence_file_pod5, sequence
 
 
 def test_audit_single_cell_read_names(testapp, measurement_set_one_onlist, sequence_file, sequence_file_sequencing_run_2):
-    # Patch a single cell MeaSet SeqFiles without read_names (audit)
+    # Patch a single cell SeqFiles without read_names and I1 (no audit)
     testapp.patch_json(
         sequence_file['@id'],
         {
-            'file_set': measurement_set_one_onlist['@id']
+            'file_set': measurement_set_one_onlist['@id'],
+            'illumina_read_type': 'I1'
         }
     )
+    res = testapp.get(measurement_set_one_onlist['@id'] + '@@audit')
+    assert all(
+        error['category'] != 'missing read names'
+        for error in res.json['audit'].get('NOT_COMPLIANT', [])
+    )
+    # Patch a single cell SeqFiles without read_names and R1 (audit)
     testapp.patch_json(
-        sequence_file_sequencing_run_2['@id'],
+        sequence_file['@id'],
         {
-            'file_set': measurement_set_one_onlist['@id']
+            'illumina_read_type': 'R1'
         }
     )
     res = testapp.get(measurement_set_one_onlist['@id'] + '@@audit')
@@ -121,22 +128,18 @@ def test_audit_single_cell_read_names(testapp, measurement_set_one_onlist, seque
         error['category'] == 'missing read names'
         for error in res.json['audit'].get('NOT_COMPLIANT', [])
     )
-    # Patch the a MeaSet with one SeqFile with read_names and one without (audit)
+    # Patch SeqFiles with R-read type and read_names (no audit)
     testapp.patch_json(
         sequence_file['@id'],
         {
-            'read_names': ['Read 1'],
+            'read_names': ['Read 1']
         }
     )
-    res = testapp.get(measurement_set_one_onlist['@id'] + '@@audit')
-    assert any(
-        error['category'] == 'missing read names'
-        for error in res.json['audit'].get('NOT_COMPLIANT', [])
-    )
-    # Patch both SeqFiles with read_names (no audit)
     testapp.patch_json(
         sequence_file_sequencing_run_2['@id'],
         {
+            'file_set': measurement_set_one_onlist['@id'],
+            'illumina_read_type': 'R2',
             'read_names': ['Read 2', 'Barcode index']
         }
     )
diff --git a/src/igvfd/tests/test_audit_measurement_set.py b/src/igvfd/tests/test_audit_measurement_set.py
index 233f48c6b..276e9650e 100644
--- a/src/igvfd/tests/test_audit_measurement_set.py
+++ b/src/igvfd/tests/test_audit_measurement_set.py
@@ -1187,19 +1187,13 @@ def test_audit_onlist(testapp, measurement_set_one_onlist, measurement_set, assa
 
 
 def test_audit_inconsistent_barcode_onlist(testapp, measurement_set_one_onlist, measurement_set_two_onlists, tabular_file_onlist_1, tabular_file_onlist_2):
-    # Check if the measurement set fixture with one file and no combination method is audit-free
-    res = testapp.get(measurement_set_one_onlist['@id'] + '@@audit')
-    assert all(
-        error['category'] != 'inconsistent barcode onlist'
-        for error in res.json['audit'].get('ERROR', [])
-    )
-    # Check if the measurement set fixture with two file and combination method is audit-free
+    # Check the MeaSet with two file and combination method (no audit)
     res = testapp.get(measurement_set_two_onlists['@id'] + '@@audit')
     assert all(
         error['category'] != 'inconsistent barcode onlist'
         for error in res.json['audit'].get('ERROR', [])
     )
-    # Add another onlist file to a MeaSet that is no-combination for onlist method.
+    # Patch a MeaSet with 2 onlist files and no combination method (audit).
     testapp.patch_json(
         measurement_set_one_onlist['@id'],
         {
@@ -1211,18 +1205,6 @@ def test_audit_inconsistent_barcode_onlist(testapp, measurement_set_one_onlist,
         error['category'] == 'inconsistent barcode onlist'
         for error in res.json['audit'].get('ERROR', [])
     )
-    # Remove an onlist file to a MeaSet with a combination onlist method.
-    testapp.patch_json(
-        measurement_set_two_onlists['@id'],
-        {
-            'onlist_files': [tabular_file_onlist_1['@id']]
-        }
-    )
-    res = testapp.get(measurement_set_one_onlist['@id'] + '@@audit')
-    assert any(
-        error['category'] == 'inconsistent barcode onlist'
-        for error in res.json['audit'].get('ERROR', [])
-    )
 
 
 def test_audit_unexpected_onlist_files(testapp, measurement_set_one_onlist, tabular_file_onlist_1):