HEPData · GraemeWatt · Dec 10, 2024 · Sep 24, 2024 · Dec 4, 2024 · Dec 4, 2024
diff --git a/tests/search_test.py b/tests/search_test.py
@@ -21,14 +21,16 @@
 from opensearch_dsl import Search, Index
 from datetime import datetime
 import pytest
+import os as op_s
 from invenio_db import db
 from unittest.mock import call
 
 from hepdata.ext.opensearch.config.os_config import \
     add_default_aggregations, sort_fields_mapping
 from hepdata.ext.opensearch import api as os_api
 from hepdata.ext.opensearch.config.os_config import get_filter_field
-from hepdata.ext.opensearch.document_enhancers import add_data_keywords, process_cmenergies
+from hepdata.ext.opensearch.document_enhancers import add_data_keywords, process_cmenergies, add_analyses
+from hepdata.modules.records.utils.submission import process_submission_directory
 from hepdata.utils.miscellaneous import get_resource_data
 from hepdata.ext.opensearch.process_results import merge_results, match_tables_to_papers, \
     get_basic_record_information, is_datatable
@@ -753,6 +755,88 @@ def test_add_data_keywords():
     assert 'NOTAREALKEYWORD' not in doc['data_keywords']
 
 
+def test_add_analyses(app):
+    """
+    Tests the add_analyses function to ensure that DataSubmission data
+        is properly added to the doc object during document enhancement.
+
+    Currently testing against: NUISANCE, HistFactory, MadAnalysis
+    """
+    # Here, test_data should match the contents of the test_folder
+    test_folder = "test_data/test_analysis_submission"
+    test_data = [
+        {  # ProSelecta/NUISANCE
+            "type": "NUISANCE",
+            "filename": "test.ProSelecta"
+        },
+        {  # HistFactory entry
+            "type": "HistFactory",
+            "filename": "test.tar.gz"
+        },
+    ]
+    # This should probably be changed to use SITE_URL or some similar concept
+    analysis_url = "http://localhost:5000/record/resource/%s?landing_page=true"
+
+    with app.app_context():
+        # Creating and submitting the test submission containing resources
+        # op_s is os module
+        base_dir = op_s.path.dirname(op_s.path.realpath(__file__))
+
+        hepsubmission = HEPSubmission(publication_recid=123456,
+                                      overall_status="finished",
+                                      version=1,
+                                      doi="10.17182/hepdata.123456")
+        db.session.add(hepsubmission)
+        db.session.commit()
+
+        # Setting directory and executing processing
+        directory = op_s.path.join(base_dir, test_folder)
+        errors = process_submission_directory(
+            directory,
+            op_s.path.join(directory, "submission.yaml"),
+            hepsubmission.publication_recid
+        )
+
+        # No errors should happen
+        assert not errors
+
+        # Add MadAnalysis DataResource object separately
+        mad_analysis_resource = DataResource(
+            file_location = "placeholder",
+            file_type = "MadAnalysis",
+            file_description = "placeholder"
+        )
+
+        # Adding object to database
+        hepsubmission.resources.append(mad_analysis_resource)
+        db.session.add(mad_analysis_resource)
+        db.session.add(hepsubmission)
+        db.session.commit()
+
+        # Set up a generic doc object to match what add_analyses expects
+        test_doc = {"analyses": [], "recid": hepsubmission.publication_recid}
+        # Run the test add_analyses function
+        add_analyses(test_doc)
+
+        # A sorted list of all DataResource object IDs from submission
+        data_ids = sorted([r.id for r in hepsubmission.resources])
+
+        # There should be 3 analyses and 3 resources
+        assert len(data_ids) == len(test_doc["analyses"]) == 3
+
+        # There should be one entry into test_data per resource ID
+        # Looping through the test, resource IDs and the analysis outputs
+        for test, d_id, analysis in zip(test_data, data_ids, test_doc["analyses"]):
+            # Set the expected ID in the url to the sorted data_id entry
+            test["analysis"] = (analysis_url % d_id)
+            # Confirm data has been added to the doc
+            assert analysis == test
+
+        # Checking MadAnalysis added after submission
+        mad_analysis = test_doc["analyses"][-1]
+        assert mad_analysis["type"] == "MadAnalysis"
+
+
 def test_process_cmenergies():
     test_keywords = {
         "cmenergies": [

diff --git a/tests/test_data/test_analysis_submission/data1.yaml b/tests/test_data/test_analysis_submission/data1.yaml
@@ -0,0 +1,10 @@
+dependent_variables:
+- header: {name: TestData1-dependent-V1, units: NA}
+  qualifiers:
+  - {name: TestData1-qualifier-V1, value: 0}
+  values:
+    - {value: 0}
+independent_variables:
+- header: {name: TestData1-independent-V1, units: NA}
+  values:
+  - value: 0
diff --git a/tests/test_data/test_analysis_submission/submission.yaml b/tests/test_data/test_analysis_submission/submission.yaml
@@ -0,0 +1,12 @@
+---
+description: "TestSubmission1-V1"
+comment: TestComment1-V1
+additional_resources:
+- {description: Test, location: test.ProSelecta,  type: ProSelecta }
+- {description: Test, location: test.tar.gz, type: HistFactory }
+---
+name: "TestTable1-V1"
+description: TestTable1-description-V1
+keywords:
+- {name: cmenergies, values: [0]}
+data_file: data1.yaml
diff --git a/tests/test_data/test_analysis_submission/test.ProSelecta b/tests/test_data/test_analysis_submission/test.ProSelecta
diff --git a/tests/test_data/test_analysis_submission/test.tar.gz b/tests/test_data/test_analysis_submission/test.tar.gz