Skip to content

Commit

Permalink
Merge branch 'main' into fix_do_finalise
Browse files Browse the repository at this point in the history
  • Loading branch information
ItIsJordan committed Jul 16, 2024
2 parents c2f0af3 + 32b9fb7 commit a57210d
Show file tree
Hide file tree
Showing 8 changed files with 231 additions and 33 deletions.
3 changes: 2 additions & 1 deletion hepdata/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ def _(x):
# ANALYSES_ENDPOINTS
ANALYSES_ENDPOINTS = {
'rivet': {
'endpoint_url': 'http://rivet.hepforge.org/analyses.json',
'endpoint_url': 'https://cedar-tools.web.cern.ch/rivet/analyses.json',
'url_template': 'http://rivet.hepforge.org/analyses/{0}'
},
'MadAnalysis': {
Expand All @@ -326,6 +326,7 @@ def _(x):
}

HISTFACTORY_FILE_TYPE = 'HistFactory'
NUISANCE_FILE_TYPE = 'ProSelecta'

ADMIN_EMAIL = '[email protected]'
SUBMISSION_FILE_NAME_PATTERN = 'HEPData-{}-v{}-yaml.zip'
Expand Down
14 changes: 9 additions & 5 deletions hepdata/ext/opensearch/document_enhancers.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from dateutil.parser import parse
from flask import current_app

from hepdata.config import CFG_PUB_TYPE, CFG_DATA_TYPE, HISTFACTORY_FILE_TYPE
from hepdata.config import CFG_PUB_TYPE, CFG_DATA_TYPE, HISTFACTORY_FILE_TYPE, NUISANCE_FILE_TYPE
from hepdata.ext.opensearch.config.record_mapping import mapping as os_mapping
from hepdata.modules.permissions.models import SubmissionParticipant
from hepdata.modules.submission.api import get_latest_hepsubmission
Expand Down Expand Up @@ -94,7 +94,7 @@ def add_shortened_authors(doc):

def add_analyses(doc):
"""
Add analyses links such as Rivet, MadAnalysis 5 and HistFactory to the index.
Add analyses links such as Rivet, MadAnalysis 5, HistFactory and NUISANCE to the index.
:param doc:
:return:
Expand All @@ -106,11 +106,15 @@ def add_analyses(doc):
for reference in latest_submission.resources:
if reference.file_type in current_app.config['ANALYSES_ENDPOINTS']:
doc["analyses"].append({'type': reference.file_type, 'analysis': reference.file_location})
elif reference.file_type == HISTFACTORY_FILE_TYPE:
else:
site_url = current_app.config.get('SITE_URL', 'https://www.hepdata.net')
landing_page_url = f"{site_url}/record/resource/{reference.id}?landing_page=true"
doc["analyses"].append({'type': reference.file_type, 'analysis': landing_page_url,
'filename': os.path.basename(reference.file_location)})
if reference.file_type == HISTFACTORY_FILE_TYPE:
doc["analyses"].append({'type': reference.file_type, 'analysis': landing_page_url,
'filename': os.path.basename(reference.file_location)})
elif reference.file_type == NUISANCE_FILE_TYPE:
doc["analyses"].append({'type': 'NUISANCE', 'analysis': landing_page_url,
'filename': os.path.basename(reference.file_location)})


def add_data_keywords(doc):
Expand Down
60 changes: 38 additions & 22 deletions hepdata/modules/records/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1066,8 +1066,7 @@ def get_related_hepsubmissions(submission):
related_submissions = []
for related in submission.related_recids:
data_submission = get_latest_hepsubmission(
publication_recid=related.related_recid,
overall_status='finished'
publication_recid=related.related_recid
)
if data_submission:
related_submissions.append(data_submission)
Expand All @@ -1083,25 +1082,38 @@ def get_related_to_this_hepsubmissions(submission):
:return: [list] List containing related records.
"""
related_sub_ids = (

# We use a subquery to get the max version/recid pairing
subquery = (
HEPSubmission.query
.join(RelatedRecid, RelatedRecid.this_recid == HEPSubmission.publication_recid)
.filter(RelatedRecid.related_recid == submission.publication_recid)
.filter(HEPSubmission.overall_status == 'finished') # Only finished submissions
.with_entities(HEPSubmission.publication_recid)
.all()
.with_entities(
HEPSubmission.publication_recid,
func.max(HEPSubmission.version).label('max_version')
)
.group_by(HEPSubmission.publication_recid)
.subquery()
)

# Filter out the non-unique submission results returned by different versions
unique_recids = set(sub[0] for sub in related_sub_ids)
# Use result of subquery to join and select the max submission where related
related_submissions = (
HEPSubmission.query
.join(subquery, (HEPSubmission.publication_recid == subquery.c.publication_recid) & (
HEPSubmission.version == subquery.c.max_version))
.join(RelatedRecid, RelatedRecid.this_recid == HEPSubmission.publication_recid)
.filter(RelatedRecid.related_recid == submission.publication_recid)
.all()
)

# Set comprehension to determine unique IDs where the max version object is 'finished'
unique_recids = {sub.publication_recid for sub in related_submissions if sub.overall_status == 'finished'}

return [get_latest_hepsubmission(publication_recid=recid, overall_status='finished') for recid in unique_recids]


def get_related_datasubmissions(data_submission):
"""
Queries the database for all DataSubmission objects contained in
this object's related DOI list.
Only returns an object if associated HEPSubmission status is 'finished'
(All submissions this one is relating to)
:param data_submission: The datasubmission object to find related data for.
Expand All @@ -1111,10 +1123,9 @@ def get_related_datasubmissions(data_submission):
for related in data_submission.related_tables:
submission = (
DataSubmission.query
.filter(DataSubmission.doi == related.related_doi)
.join(HEPSubmission, HEPSubmission.publication_recid == DataSubmission.publication_recid)
.filter(HEPSubmission.overall_status == 'finished')
.first()
.filter(DataSubmission.doi == related.related_doi)
.join(HEPSubmission, HEPSubmission.publication_recid == DataSubmission.publication_recid)
.first()
)
if submission:
related_submissions.append(submission)
Expand All @@ -1125,19 +1136,24 @@ def get_related_to_this_datasubmissions(data_submission):
"""
Get the DataSubmission Objects with a RelatedTable entry
where this doi is referred to in related_doi.
Only returns where associated HEPSubmission object is `finished`,
OR where it is within the same HEPSubmission
:param data_submission: The datasubmission to find the related entries for.
:return: [List] List of DataSubmission objects.
"""
related_submissions = (
DataSubmission.query
.join(RelatedTable, RelatedTable.table_doi == DataSubmission.doi)
.join(HEPSubmission,(HEPSubmission.publication_recid == DataSubmission.publication_recid))
.group_by(DataSubmission.id)
.having(func.max(HEPSubmission.version) == DataSubmission.version)
.filter(RelatedTable.related_doi == data_submission.doi)
.filter(HEPSubmission.overall_status == 'finished')
.all()
.join(RelatedTable, RelatedTable.table_doi == DataSubmission.doi)
.join(HEPSubmission, (HEPSubmission.publication_recid == DataSubmission.publication_recid))
.group_by(DataSubmission.id)
.having(func.max(HEPSubmission.version) == DataSubmission.version)
.filter(RelatedTable.related_doi == data_submission.doi)
# If finished, OR is part of the same submission
.filter(
(HEPSubmission.overall_status == 'finished') | (
HEPSubmission.publication_recid == data_submission.publication_recid))
.all()
)
return related_submissions

Expand Down
4 changes: 3 additions & 1 deletion hepdata/modules/records/utils/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
import os
from sqlalchemy.orm.exc import NoResultFound

from hepdata.config import HISTFACTORY_FILE_TYPE, SIZE_LOAD_CHECK_THRESHOLD
from hepdata.config import HISTFACTORY_FILE_TYPE, NUISANCE_FILE_TYPE, SIZE_LOAD_CHECK_THRESHOLD
from hepdata.ext.opensearch.api import get_record
from hepdata.modules.submission.models import HEPSubmission, License, DataSubmission, DataResource

Expand Down Expand Up @@ -117,6 +117,8 @@ def infer_file_type(file, description, type=None):
else:
if is_histfactory(file, description, type):
return HISTFACTORY_FILE_TYPE
elif type and type.lower() == NUISANCE_FILE_TYPE.lower():
return NUISANCE_FILE_TYPE
extension = file.rsplit(".", 1)[1]
if extension in FILE_TYPES:
return FILE_TYPES[extension]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,13 @@ <h4>Other useful searches</h4>
(likelihoods in HistFactory format)
</span>
</li>
<li>
<a href='/search?q=analysis:NUISANCE&sort_by=latest'
target="_new">analysis:NUISANCE</a>
<span class="text-muted">
(ProSelecta analysis for use with NUISANCE)
</span>
</li>
</ul>
</li>

Expand Down
2 changes: 1 addition & 1 deletion hepdata/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@
and parsed by ``setup.py``.
"""

__version__ = "0.9.4dev20240628"
__version__ = "0.9.4dev20240703"
171 changes: 169 additions & 2 deletions tests/records_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
# as an Intergovernmental Organization or submit itself to any jurisdiction.

"""HEPData records test cases."""
import random
from io import open, StringIO
import os
import re
Expand All @@ -45,7 +46,8 @@
move_files, get_all_ids, has_upload_permissions, \
has_coordinator_permissions, create_new_version, \
get_resource_mimetype, create_breadcrumb_text, format_submission, \
format_resource, get_commit_message
format_resource, get_commit_message, get_related_to_this_hepsubmissions, \
get_related_hepsubmissions, get_related_datasubmissions, get_related_to_this_datasubmissions
from hepdata.modules.records.importer.api import import_records
from hepdata.modules.records.utils.analyses import update_analyses
from hepdata.modules.records.utils.submission import get_or_create_hepsubmission, process_submission_directory, \
Expand All @@ -58,7 +60,7 @@
from hepdata.modules.records.utils.workflow import update_record, create_record
from hepdata.modules.records.views import set_data_review_status
from hepdata.modules.submission.models import HEPSubmission, DataReview, \
DataSubmission, DataResource, License, RecordVersionCommitMessage
DataSubmission, DataResource, License, RecordVersionCommitMessage, RelatedRecid, RelatedTable
from hepdata.modules.submission.views import process_submission_payload
from hepdata.modules.submission.api import get_latest_hepsubmission
from tests.conftest import TEST_EMAIL
Expand Down Expand Up @@ -1163,3 +1165,168 @@ def test_get_commit_message(app):
# We know it's the most recent as message
# is set to the highest inserted range, equal to insert_amount.
ctx["revision_message"]["message"] = str(insert_amount)


def test_version_related_functions(app):
"""
Attempts to bulk test the related functions for both data tables and submissions (records/api):
Tests the functions:
- get_related_hepsubmissions
- get_related_to_this_hepsubmissions
- get_related_datasubmissions
- get_related_to_this_datasubmissions
Tests forward and backward relation for both HEPSubmission and DataSubmission objects, through
testing the RelatedRecId and RelatedTable relations and querying functions respectively.
Very similar to e2e/test_records::test_version_related_table, but tests core functionality.
"""

# Set some random integers to use for record IDs
random_ints = [random.randint(300, 2147483648) for _ in range(0, 3)]
# We set alternating record IDs
test_data = [
{ # Record 1, which relates to 2
"recid": random_ints[0], # This record ID
"other_recid": random_ints[1], # Record to relate to
"overall_status": "finished" # Chosen HEPSubmission status
},
{ # Record 2, which relates to 3
"recid": random_ints[1],
"other_recid": random_ints[0],
"overall_status": "finished"
},
{ # Record 3, which relates to 1, but is unfinished
"recid": random_ints[2],
"other_recid": random_ints[0],
"overall_status": "todo"
}
]

# Insertion of test data
for test in test_data:
# We store any HEPSubmission versions in the `test` object
test["submissions"] = []
# We also store any related tables data
test["related_table_data"] = None
# For each version per test
for version in range(1, 3):
new_submission_data = {
"version": version,
"submission": HEPSubmission(
publication_recid=test["recid"],
version=version,
overall_status=test["overall_status"]
),
"data_submissions": []
}

for table_number in range(1, 3):
new_datasubmission = {
"submission": DataSubmission(
doi=f"10.17182/hepdata.{test['recid']}.v{version}/t{table_number}",
publication_recid=new_submission_data["submission"].publication_recid,
version=new_submission_data["submission"].version # Also 'v'
),
"number": table_number
}

new_submission_data["data_submissions"].append(new_datasubmission)
db.session.add(new_datasubmission["submission"])
db.session.add(new_submission_data["submission"])
test["submissions"].append(new_submission_data)

# Commit now as we need this data for more insertion
db.session.commit()

# Now we handle the related data insertion
for test in test_data:
latest_submission = test["submissions"][-1]["submission"]
related_recid = RelatedRecid(this_recid=test["recid"], related_recid=test["other_recid"])
latest_submission.related_recids.append(related_recid)
db.session.add_all([related_recid, latest_submission])

related_table_data = [
{
"table_doi": f"10.17182/hepdata.{test['recid']}.v2/t1",
"related_doi": f"10.17182/hepdata.{test['other_recid']}.v2/t1"
},
{
"table_doi": f"10.17182/hepdata.{test['recid']}.v2/t2",
"related_doi": f"10.17182/hepdata.{test['other_recid']}.v2/t2"
},
{
"table_doi": f"10.17182/hepdata.{test['recid']}.v2/t2",
"related_doi": f"10.17182/hepdata.{test['recid']}.v2/t1"
}
]
test["related_table_data"] = related_table_data

for related in related_table_data:
datasub = DataSubmission.query.filter_by(
doi=related["table_doi"]
).first()

related_datasub = RelatedTable(
table_doi=related["table_doi"],
related_doi=related["related_doi"]
)
datasub.related_tables.append(related_datasub)
db.session.add_all([related_datasub, datasub])

# Finally, we commit all the new data
db.session.commit()

# Test case checking
for test in test_data:
latest_submission = test["submissions"][-1]
# Get the HEPSubmission and DataSubmission objects for the test
test_submission = latest_submission["submission"]
test_datasubmissions = latest_submission["data_submissions"]

# Run the HEPSubmission functions to test
forward_sub_relations = get_related_hepsubmissions(test_submission)
backward_sub_relations = get_related_to_this_hepsubmissions(test_submission)

# This record should be referenced by the OTHER record,
# and this record should reference the OTHER record
assert [sub.publication_recid for sub in forward_sub_relations] == [test["other_recid"]]

expected_backward_sub_relations = []

# Finished records will have other record references appear
if test["overall_status"] is not "todo":
expected_backward_sub_relations.append(test["other_recid"])

assert [sub.publication_recid for sub in backward_sub_relations] == expected_backward_sub_relations

for test_datasub in test_datasubmissions:
table_number = test_datasub["number"]
submission = test_datasub["submission"]

# Execute the DataSubmission functions to test
forward_dt_relations = [sub.doi for sub in get_related_datasubmissions(submission)]
backward_dt_relations = [sub.doi for sub in get_related_to_this_datasubmissions(submission)]

# The number of entries happens to match the table number
assert len(forward_dt_relations) == table_number

# This record should be referenced by the OTHER table,
# and this table should reference the OTHER table
# (matching the same table number)
expected_forward_dt_relations = [f"10.17182/hepdata.{test['other_recid']}.v2/t{table_number}"]
expected_backward_dt_relations = []

# We expect unfinished records to NOT have `other_recid` tables
if test["overall_status"] is not "todo":
expected_backward_dt_relations.append(f"10.17182/hepdata.{test['other_recid']}.v2/t{table_number}")

# Here we expect the second table to reference ITS OWN table one
if table_number == 2:
expected_forward_dt_relations.append(f"10.17182/hepdata.{test['recid']}.v2/t1")
else:
# For table 1, we expect it to be referenced by the table 2
expected_backward_dt_relations.append(f"10.17182/hepdata.{test['recid']}.v2/t2")

# Test that the forward/backward datatable relations work as expected
assert set(forward_dt_relations) == set(expected_forward_dt_relations)
assert set(backward_dt_relations) == set(expected_backward_dt_relations)
Loading

0 comments on commit a57210d

Please sign in to comment.