Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add analyses test #849

Merged
merged 6 commits into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 85 additions & 1 deletion tests/search_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,16 @@
from opensearch_dsl import Search, Index
from datetime import datetime
import pytest
import os as op_s
from invenio_db import db
from unittest.mock import call

from hepdata.ext.opensearch.config.os_config import \
add_default_aggregations, sort_fields_mapping
from hepdata.ext.opensearch import api as os_api
from hepdata.ext.opensearch.config.os_config import get_filter_field
from hepdata.ext.opensearch.document_enhancers import add_data_keywords, process_cmenergies
from hepdata.ext.opensearch.document_enhancers import add_data_keywords, process_cmenergies, add_analyses
from hepdata.modules.records.utils.submission import process_submission_directory
from hepdata.utils.miscellaneous import get_resource_data
from hepdata.ext.opensearch.process_results import merge_results, match_tables_to_papers, \
get_basic_record_information, is_datatable
Expand Down Expand Up @@ -753,6 +755,88 @@ def test_add_data_keywords():
assert 'NOTAREALKEYWORD' not in doc['data_keywords']


def test_add_analyses(app):
"""
Tests the add_analyses function to ensure that DataSubmission data
is properly added to the doc object during document enhancement.

Currently testing against: NUISANCE, HistFactory, MadAnalysis
"""
# Here, test_data should match the contents of the test_folder
test_folder = "test_data/test_analysis_submission"
test_data = [
{ # ProSelecta/NUISANCE
"type": "NUISANCE",
"filename": "test.ProSelecta"
},
{ # HistFactory entry
"type": "HistFactory",
"filename": "test.tar.gz"
},
]
# This should probably be changed to use SITE_URL or some similar concept
analysis_url = "http://localhost:5000/record/resource/%s?landing_page=true"

with app.app_context():
# Creating and submitting the test submission containing resources
# op_s is os module
base_dir = op_s.path.dirname(op_s.path.realpath(__file__))

hepsubmission = HEPSubmission(publication_recid=123456,
overall_status="finished",
version=1,
doi="10.17182/hepdata.123456")
db.session.add(hepsubmission)
db.session.commit()

# Setting directory and executing processing
directory = op_s.path.join(base_dir, test_folder)
errors = process_submission_directory(
directory,
op_s.path.join(directory, "submission.yaml"),
hepsubmission.publication_recid
)

# No errors should happen
assert not errors

# Add MadAnalysis DataResource object separately
mad_analysis_resource = DataResource(
file_location = "placeholder",
file_type = "MadAnalysis",
file_description = "placeholder"
)

# Adding object to database
hepsubmission.resources.append(mad_analysis_resource)
db.session.add(mad_analysis_resource)
db.session.add(hepsubmission)
db.session.commit()

# Set up a generic doc object to match what add_analyses expects
test_doc = {"analyses": [], "recid": hepsubmission.publication_recid}
# Run the test add_analyses function
add_analyses(test_doc)

# A sorted list of all DataResource object IDs from submission
data_ids = sorted([r.id for r in hepsubmission.resources])

# There should be 3 analyses and 3 resources
assert len(data_ids) == len(test_doc["analyses"]) == 3

# There should be one entry into test_data per resource ID
# Looping through the test, resource IDs and the analysis outputs
for test, d_id, analysis in zip(test_data, data_ids, test_doc["analyses"]):
# Set the expected ID in the url to the sorted data_id entry
test["analysis"] = (analysis_url % d_id)
# Confirm data has been added to the doc
assert analysis == test

# Checking MadAnalysis added after submission
mad_analysis = test_doc["analyses"][-1]
assert mad_analysis["type"] == "MadAnalysis"


def test_process_cmenergies():
test_keywords = {
"cmenergies": [
Expand Down
10 changes: 10 additions & 0 deletions tests/test_data/test_analysis_submission/data1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
dependent_variables:
- header: {name: TestData1-dependent-V1, units: NA}
qualifiers:
- {name: TestData1-qualifier-V1, value: 0}
values:
- {value: 0}
independent_variables:
- header: {name: TestData1-independent-V1, units: NA}
values:
- value: 0
12 changes: 12 additions & 0 deletions tests/test_data/test_analysis_submission/submission.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
description: "TestSubmission1-V1"
comment: TestComment1-V1
additional_resources:
- {description: Test, location: test.ProSelecta, type: ProSelecta }
- {description: Test, location: test.tar.gz, type: HistFactory }
---
name: "TestTable1-V1"
description: TestTable1-description-V1
keywords:
- {name: cmenergies, values: [0]}
data_file: data1.yaml
Empty file.
Empty file.
Loading