Merge branch 'main' into fix_do_finalise

HEPData · Jul 16, 2024 · a57210d · a57210d
2 parents c2f0af3 + 32b9fb7
commit a57210d
Show file tree

Hide file tree

Showing 8 changed files with 231 additions and 33 deletions.
diff --git a/hepdata/config.py b/hepdata/config.py
@@ -312,7 +312,7 @@ def _(x):
 # ANALYSES_ENDPOINTS
 ANALYSES_ENDPOINTS = {
     'rivet': {
-        'endpoint_url': 'http://rivet.hepforge.org/analyses.json',
+        'endpoint_url': 'https://cedar-tools.web.cern.ch/rivet/analyses.json',
         'url_template': 'http://rivet.hepforge.org/analyses/{0}'
     },
     'MadAnalysis': {
@@ -326,6 +326,7 @@ def _(x):
 }
 
 HISTFACTORY_FILE_TYPE = 'HistFactory'
+NUISANCE_FILE_TYPE = 'ProSelecta'
 
 ADMIN_EMAIL = '[email protected]'
 SUBMISSION_FILE_NAME_PATTERN = 'HEPData-{}-v{}-yaml.zip'

diff --git a/hepdata/ext/opensearch/document_enhancers.py b/hepdata/ext/opensearch/document_enhancers.py
@@ -30,7 +30,7 @@
 from dateutil.parser import parse
 from flask import current_app
 
-from hepdata.config import CFG_PUB_TYPE, CFG_DATA_TYPE, HISTFACTORY_FILE_TYPE
+from hepdata.config import CFG_PUB_TYPE, CFG_DATA_TYPE, HISTFACTORY_FILE_TYPE, NUISANCE_FILE_TYPE
 from hepdata.ext.opensearch.config.record_mapping import mapping as os_mapping
 from hepdata.modules.permissions.models import SubmissionParticipant
 from hepdata.modules.submission.api import get_latest_hepsubmission
@@ -94,7 +94,7 @@ def add_shortened_authors(doc):
 
 def add_analyses(doc):
     """
-    Add analyses links such as Rivet, MadAnalysis 5 and HistFactory to the index.
+    Add analyses links such as Rivet, MadAnalysis 5, HistFactory and NUISANCE to the index.
 
     :param doc:
     :return:
@@ -106,11 +106,15 @@ def add_analyses(doc):
         for reference in latest_submission.resources:
             if reference.file_type in current_app.config['ANALYSES_ENDPOINTS']:
                 doc["analyses"].append({'type': reference.file_type, 'analysis': reference.file_location})
-            elif reference.file_type == HISTFACTORY_FILE_TYPE:
+            else:
                 site_url = current_app.config.get('SITE_URL', 'https://www.hepdata.net')
                 landing_page_url = f"{site_url}/record/resource/{reference.id}?landing_page=true"
-                doc["analyses"].append({'type': reference.file_type, 'analysis': landing_page_url,
-                                        'filename': os.path.basename(reference.file_location)})
+                if reference.file_type == HISTFACTORY_FILE_TYPE:
+                    doc["analyses"].append({'type': reference.file_type, 'analysis': landing_page_url,
+                                            'filename': os.path.basename(reference.file_location)})
+                elif reference.file_type == NUISANCE_FILE_TYPE:
+                    doc["analyses"].append({'type': 'NUISANCE', 'analysis': landing_page_url,
+                                            'filename': os.path.basename(reference.file_location)})
 
 
 def add_data_keywords(doc):

diff --git a/hepdata/modules/records/api.py b/hepdata/modules/records/api.py
@@ -1066,8 +1066,7 @@ def get_related_hepsubmissions(submission):
     related_submissions = []
     for related in submission.related_recids:
         data_submission = get_latest_hepsubmission(
-            publication_recid=related.related_recid,
-            overall_status='finished'
+            publication_recid=related.related_recid
         )
         if data_submission:
             related_submissions.append(data_submission)
@@ -1083,25 +1082,38 @@ def get_related_to_this_hepsubmissions(submission):
 
     :return: [list] List containing related records.
     """
-    related_sub_ids = (
+
+    # We use a subquery to get the max version/recid pairing
+    subquery = (
         HEPSubmission.query
-            .join(RelatedRecid, RelatedRecid.this_recid == HEPSubmission.publication_recid)
-            .filter(RelatedRecid.related_recid == submission.publication_recid)
-            .filter(HEPSubmission.overall_status == 'finished')  # Only finished submissions
-            .with_entities(HEPSubmission.publication_recid)
-            .all()
+        .with_entities(
+            HEPSubmission.publication_recid,
+            func.max(HEPSubmission.version).label('max_version')
+        )
+        .group_by(HEPSubmission.publication_recid)
+        .subquery()
     )
 
-    # Filter out the non-unique submission results returned by different versions
-    unique_recids =  set(sub[0] for sub in related_sub_ids)
+    # Use result of subquery to join and select the max submission where related
+    related_submissions = (
+        HEPSubmission.query
+        .join(subquery, (HEPSubmission.publication_recid == subquery.c.publication_recid) & (
+                HEPSubmission.version == subquery.c.max_version))
+        .join(RelatedRecid, RelatedRecid.this_recid == HEPSubmission.publication_recid)
+        .filter(RelatedRecid.related_recid == submission.publication_recid)
+        .all()
+    )
+
+    # Set comprehension to determine unique IDs where the max version object is 'finished'
+    unique_recids = {sub.publication_recid for sub in related_submissions if sub.overall_status == 'finished'}
+
     return [get_latest_hepsubmission(publication_recid=recid, overall_status='finished') for recid in unique_recids]
 
 
 def get_related_datasubmissions(data_submission):
     """
     Queries the database for all DataSubmission objects contained in
     this object's related DOI list.
-    Only returns an object if associated HEPSubmission status is 'finished'
     (All submissions this one is relating to)
 
     :param data_submission: The datasubmission object to find related data for.
@@ -1111,10 +1123,9 @@ def get_related_datasubmissions(data_submission):
     for related in data_submission.related_tables:
         submission = (
             DataSubmission.query
-                .filter(DataSubmission.doi == related.related_doi)
-                .join(HEPSubmission, HEPSubmission.publication_recid == DataSubmission.publication_recid)
-                .filter(HEPSubmission.overall_status == 'finished')
-                .first()
+            .filter(DataSubmission.doi == related.related_doi)
+            .join(HEPSubmission, HEPSubmission.publication_recid == DataSubmission.publication_recid)
+            .first()
         )
         if submission:
             related_submissions.append(submission)
@@ -1125,19 +1136,24 @@ def get_related_to_this_datasubmissions(data_submission):
     """
         Get the DataSubmission Objects with a RelatedTable entry
         where this doi is referred to in related_doi.
+        Only returns where associated HEPSubmission object is `finished`,
+        OR where it is within the same HEPSubmission
 
         :param data_submission: The datasubmission to find the related entries for.
         :return: [List] List of DataSubmission objects.
     """
     related_submissions = (
         DataSubmission.query
-            .join(RelatedTable, RelatedTable.table_doi == DataSubmission.doi)
-            .join(HEPSubmission,(HEPSubmission.publication_recid == DataSubmission.publication_recid))
-            .group_by(DataSubmission.id)
-            .having(func.max(HEPSubmission.version) == DataSubmission.version)
-            .filter(RelatedTable.related_doi == data_submission.doi)
-            .filter(HEPSubmission.overall_status == 'finished')
-            .all()
+        .join(RelatedTable, RelatedTable.table_doi == DataSubmission.doi)
+        .join(HEPSubmission, (HEPSubmission.publication_recid == DataSubmission.publication_recid))
+        .group_by(DataSubmission.id)
+        .having(func.max(HEPSubmission.version) == DataSubmission.version)
+        .filter(RelatedTable.related_doi == data_submission.doi)
+        # If finished, OR is part of the same submission
+        .filter(
+            (HEPSubmission.overall_status == 'finished') | (
+                HEPSubmission.publication_recid == data_submission.publication_recid))
+        .all()
     )
     return related_submissions
 

diff --git a/hepdata/modules/records/utils/common.py b/hepdata/modules/records/utils/common.py
@@ -30,7 +30,7 @@
 import os
 from sqlalchemy.orm.exc import NoResultFound
 
-from hepdata.config import HISTFACTORY_FILE_TYPE, SIZE_LOAD_CHECK_THRESHOLD
+from hepdata.config import HISTFACTORY_FILE_TYPE, NUISANCE_FILE_TYPE, SIZE_LOAD_CHECK_THRESHOLD
 from hepdata.ext.opensearch.api import get_record
 from hepdata.modules.submission.models import HEPSubmission, License, DataSubmission, DataResource
 
@@ -117,6 +117,8 @@ def infer_file_type(file, description, type=None):
         else:
             if is_histfactory(file, description, type):
                 return HISTFACTORY_FILE_TYPE
+            elif type and type.lower() == NUISANCE_FILE_TYPE.lower():
+                return NUISANCE_FILE_TYPE
             extension = file.rsplit(".", 1)[1]
             if extension in FILE_TYPES:
                 return FILE_TYPES[extension]

diff --git a/hepdata/modules/search/templates/hepdata_search/modals/search_help.html b/hepdata/modules/search/templates/hepdata_search/modals/search_help.html
@@ -243,6 +243,13 @@ <h4>Other useful searches</h4>
                                        (likelihoods in HistFactory format)
                                     </span>
                                 </li>
+                                <li>
+                                    <a href='/search?q=analysis:NUISANCE&sort_by=latest'
+                                       target="_new">analysis:NUISANCE</a>
+                                    <span class="text-muted">
+                                       (ProSelecta analysis for use with NUISANCE)
+                                    </span>
+                                </li>
                             </ul>
                         </li>
 

diff --git a/hepdata/version.py b/hepdata/version.py
@@ -28,4 +28,4 @@
 and parsed by ``setup.py``.
 """
 
-__version__ = "0.9.4dev20240628"
+__version__ = "0.9.4dev20240703"
diff --git a/tests/records_test.py b/tests/records_test.py
@@ -22,6 +22,7 @@
 # as an Intergovernmental Organization or submit itself to any jurisdiction.
 
 """HEPData records test cases."""
+import random
 from io import open, StringIO
 import os
 import re
@@ -45,7 +46,8 @@
     move_files, get_all_ids, has_upload_permissions, \
     has_coordinator_permissions, create_new_version, \
     get_resource_mimetype, create_breadcrumb_text, format_submission, \
-    format_resource, get_commit_message
+    format_resource, get_commit_message, get_related_to_this_hepsubmissions, \
+    get_related_hepsubmissions, get_related_datasubmissions, get_related_to_this_datasubmissions
 from hepdata.modules.records.importer.api import import_records
 from hepdata.modules.records.utils.analyses import update_analyses
 from hepdata.modules.records.utils.submission import get_or_create_hepsubmission, process_submission_directory, \
@@ -58,7 +60,7 @@
 from hepdata.modules.records.utils.workflow import update_record, create_record
 from hepdata.modules.records.views import set_data_review_status
 from hepdata.modules.submission.models import HEPSubmission, DataReview, \
-    DataSubmission, DataResource, License, RecordVersionCommitMessage
+    DataSubmission, DataResource, License, RecordVersionCommitMessage, RelatedRecid, RelatedTable
 from hepdata.modules.submission.views import process_submission_payload
 from hepdata.modules.submission.api import get_latest_hepsubmission
 from tests.conftest import TEST_EMAIL
@@ -1163,3 +1165,168 @@ def test_get_commit_message(app):
             # We know it's the most recent as message
             # is set to the highest inserted range, equal to insert_amount.
             ctx["revision_message"]["message"] = str(insert_amount)
+
+
+def test_version_related_functions(app):
+    """
+    Attempts to bulk test the related functions for both data tables and submissions (records/api):
+    Tests the functions:
+        - get_related_hepsubmissions
+        - get_related_to_this_hepsubmissions
+        - get_related_datasubmissions
+        - get_related_to_this_datasubmissions
+    Tests forward and backward relation for both HEPSubmission and DataSubmission objects, through
+    testing the RelatedRecId and RelatedTable relations and querying functions respectively.
+
+    Very similar to e2e/test_records::test_version_related_table, but tests core functionality.
+    """
+
+    # Set some random integers to use for record IDs
+    random_ints = [random.randint(300, 2147483648) for _ in range(0, 3)]
+    # We set alternating record IDs
+    test_data = [
+        {  # Record 1, which relates to 2
+            "recid": random_ints[0],  # This record ID
+            "other_recid": random_ints[1],  # Record to relate to
+            "overall_status": "finished"  # Chosen HEPSubmission status
+        },
+        {  # Record 2, which relates to 3
+            "recid": random_ints[1],
+            "other_recid": random_ints[0],
+            "overall_status": "finished"
+        },
+        {  # Record 3, which relates to 1, but is unfinished
+            "recid": random_ints[2],
+            "other_recid": random_ints[0],
+            "overall_status": "todo"
+        }
+    ]
+
+    # Insertion of test data
+    for test in test_data:
+        # We store any HEPSubmission versions in the `test` object
+        test["submissions"] = []
+        # We also store any related tables data
+        test["related_table_data"] = None
+        # For each version per test
+        for version in range(1, 3):
+            new_submission_data = {
+                "version": version,
+                "submission": HEPSubmission(
+                    publication_recid=test["recid"],
+                    version=version,
+                    overall_status=test["overall_status"]
+                ),
+                "data_submissions": []
+            }
+
+            for table_number in range(1, 3):
+                new_datasubmission = {
+                    "submission": DataSubmission(
+                        doi=f"10.17182/hepdata.{test['recid']}.v{version}/t{table_number}",
+                        publication_recid=new_submission_data["submission"].publication_recid,
+                        version=new_submission_data["submission"].version  # Also 'v'
+                    ),
+                    "number": table_number
+                }
+
+                new_submission_data["data_submissions"].append(new_datasubmission)
+                db.session.add(new_datasubmission["submission"])
+            db.session.add(new_submission_data["submission"])
+            test["submissions"].append(new_submission_data)
+
+    # Commit now as we need this data for more insertion
+    db.session.commit()
+
+    # Now we handle the related data insertion
+    for test in test_data:
+        latest_submission = test["submissions"][-1]["submission"]
+        related_recid = RelatedRecid(this_recid=test["recid"], related_recid=test["other_recid"])
+        latest_submission.related_recids.append(related_recid)
+        db.session.add_all([related_recid, latest_submission])
+
+        related_table_data = [
+            {
+                "table_doi": f"10.17182/hepdata.{test['recid']}.v2/t1",
+                "related_doi": f"10.17182/hepdata.{test['other_recid']}.v2/t1"
+            },
+            {
+                "table_doi": f"10.17182/hepdata.{test['recid']}.v2/t2",
+                "related_doi": f"10.17182/hepdata.{test['other_recid']}.v2/t2"
+            },
+            {
+                "table_doi": f"10.17182/hepdata.{test['recid']}.v2/t2",
+                "related_doi": f"10.17182/hepdata.{test['recid']}.v2/t1"
+            }
+        ]
+        test["related_table_data"] = related_table_data
+
+        for related in related_table_data:
+            datasub = DataSubmission.query.filter_by(
+                doi=related["table_doi"]
+            ).first()
+
+            related_datasub = RelatedTable(
+                table_doi=related["table_doi"],
+                related_doi=related["related_doi"]
+            )
+            datasub.related_tables.append(related_datasub)
+            db.session.add_all([related_datasub, datasub])
+
+    # Finally, we commit all the new data
+    db.session.commit()
+
+    # Test case checking
+    for test in test_data:
+        latest_submission = test["submissions"][-1]
+        # Get the HEPSubmission and DataSubmission objects for the test
+        test_submission = latest_submission["submission"]
+        test_datasubmissions = latest_submission["data_submissions"]
+
+        # Run the HEPSubmission functions to test
+        forward_sub_relations = get_related_hepsubmissions(test_submission)
+        backward_sub_relations = get_related_to_this_hepsubmissions(test_submission)
+
+        # This record should be referenced by the OTHER record,
+        #   and this record should reference the OTHER record
+        assert [sub.publication_recid for sub in forward_sub_relations] == [test["other_recid"]]
+
+        expected_backward_sub_relations = []
+
+        # Finished records will have other record references appear
+        if test["overall_status"] is not "todo":
+            expected_backward_sub_relations.append(test["other_recid"])
+
+        assert [sub.publication_recid for sub in backward_sub_relations] == expected_backward_sub_relations
+
+        for test_datasub in test_datasubmissions:
+            table_number = test_datasub["number"]
+            submission = test_datasub["submission"]
+
+            # Execute the DataSubmission functions to test
+            forward_dt_relations = [sub.doi for sub in get_related_datasubmissions(submission)]
+            backward_dt_relations = [sub.doi for sub in get_related_to_this_datasubmissions(submission)]
+
+            # The number of entries happens to match the table number
+            assert len(forward_dt_relations) == table_number
+
+            # This record should be referenced by the OTHER table,
+            #   and this table should reference the OTHER table
+            #   (matching the same table number)
+            expected_forward_dt_relations = [f"10.17182/hepdata.{test['other_recid']}.v2/t{table_number}"]
+            expected_backward_dt_relations = []
+
+            # We expect unfinished records to NOT have `other_recid` tables
+            if test["overall_status"] is not "todo":
+                expected_backward_dt_relations.append(f"10.17182/hepdata.{test['other_recid']}.v2/t{table_number}")
+
+            # Here we expect the second table to reference ITS OWN table one
+            if table_number == 2:
+                expected_forward_dt_relations.append(f"10.17182/hepdata.{test['recid']}.v2/t1")
+            else:
+                # For table 1, we expect it to be referenced by the table 2
+                expected_backward_dt_relations.append(f"10.17182/hepdata.{test['recid']}.v2/t2")
+
+            # Test that the forward/backward datatable relations work as expected
+            assert set(forward_dt_relations) == set(expected_forward_dt_relations)
+            assert set(backward_dt_relations) == set(expected_backward_dt_relations)