Skip to content

Commit

Permalink
Merge pull request #810 from HEPData/fix_do_finalise
Browse files Browse the repository at this point in the history
Fix do finalise
  • Loading branch information
GraemeWatt authored Jul 17, 2024
2 parents 32b9fb7 + a57210d commit 0c1b7de
Show file tree
Hide file tree
Showing 5 changed files with 177 additions and 14 deletions.
7 changes: 5 additions & 2 deletions hepdata/modules/records/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,16 +315,19 @@ def should_send_json_ld(request):
def get_commit_message(ctx, recid):
"""
Returns a commit message for the current version if present.
Will return the highest ID of a version-recid pairing.
:param ctx:
:param recid:
"""
try:
# Select the most recent commit (greatest ID)
commit_message_query = RecordVersionCommitMessage.query \
.filter_by(version=ctx["version"], recid=recid)
.filter_by(version=ctx["version"], recid=recid) \
.order_by(RecordVersionCommitMessage.id.desc())

if commit_message_query.count() > 0:
commit_message = commit_message_query.one()
commit_message = commit_message_query.first()
ctx["revision_message"] = {
'version': commit_message.version,
'message': commit_message.message}
Expand Down
21 changes: 17 additions & 4 deletions hepdata/modules/records/utils/submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -715,6 +715,7 @@ def do_finalise(recid, publication_record=None, force_finalise=False,
print('Finalising record {}'.format(recid))

hep_submission = get_latest_hepsubmission(publication_recid=recid)
error_message = None

generated_record_ids = []
if hep_submission \
Expand Down Expand Up @@ -809,14 +810,26 @@ def do_finalise(recid, publication_record=None, force_finalise=False,
site_url = current_app.config.get('SITE_URL', 'https://www.hepdata.net')
tweet(record.get('title'), record.get('collaborations'),
site_url + '/record/ins{0}'.format(record.get('inspire_id')), version)

return json.dumps({"success": True, "recid": recid,
"data_count": len(submissions),
"generated_records": generated_record_ids})

except NoResultFound:
print('No record found to update. Which is super strange.')

error_message = 'No record found to update. Which is super strange.'

# If we have not returned, then we set an error message
if error_message is None:
error_message = "An error occurred, please try again"

# If we get to here, we have not returned
# Do some cleanup (rollback and return error message)
if error_message:
print(error_message)
db.session.rollback()
return json.dumps({
"success": False,
"recid": recid,
"errors": [error_message]
})
else:
return json.dumps(
{"success": False, "recid": recid,
Expand Down
5 changes: 3 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,10 +187,11 @@ def create_blank_test_record():
return submission


def create_test_record(file_location):
def create_test_record(file_location, overall_status='finished'):
"""
Helper function to create a dummy record with data.
:param file_location: Path to the data directory.
:param overall_status: Allows setting of custom overall status. Defaults to 'finished'.
:returns test_submission: The newly created submission object
"""
record = {'title': 'HEPData Testing',
Expand All @@ -201,7 +202,7 @@ def create_test_record(file_location):
# Set up a new test submission
test_submission = process_submission_payload(**record)
# Ensure the status is set to `finished` so the related data can be accessed.
test_submission.overall_status = 'finished'
test_submission.overall_status = overall_status
record_dir = get_data_path_for_record(test_submission.publication_recid, str(int(round(time.time()))))
shutil.copytree(file_location, record_dir)
process_submission_directory(record_dir, os.path.join(record_dir, 'submission.yaml'),
Expand Down
53 changes: 50 additions & 3 deletions tests/records_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from flask_login import login_user
from invenio_accounts.models import User
from invenio_db import db
from sqlalchemy.exc import MultipleResultsFound
import pytest
from werkzeug.datastructures import FileStorage
import requests_mock
Expand All @@ -45,8 +46,8 @@
move_files, get_all_ids, has_upload_permissions, \
has_coordinator_permissions, create_new_version, \
get_resource_mimetype, create_breadcrumb_text, format_submission, \
format_resource, get_related_to_this_hepsubmissions, get_related_hepsubmissions, get_related_datasubmissions, \
get_related_to_this_datasubmissions
format_resource, get_commit_message, get_related_to_this_hepsubmissions, \
get_related_hepsubmissions, get_related_datasubmissions, get_related_to_this_datasubmissions
from hepdata.modules.records.importer.api import import_records
from hepdata.modules.records.utils.analyses import update_analyses
from hepdata.modules.records.utils.submission import get_or_create_hepsubmission, process_submission_directory, \
Expand All @@ -59,7 +60,7 @@
from hepdata.modules.records.utils.workflow import update_record, create_record
from hepdata.modules.records.views import set_data_review_status
from hepdata.modules.submission.models import HEPSubmission, DataReview, \
DataSubmission, DataResource, License, RelatedRecid, RelatedTable
DataSubmission, DataResource, License, RecordVersionCommitMessage, RelatedRecid, RelatedTable
from hepdata.modules.submission.views import process_submission_payload
from hepdata.modules.submission.api import get_latest_hepsubmission
from tests.conftest import TEST_EMAIL
Expand Down Expand Up @@ -1120,6 +1121,52 @@ def test_generate_license_data_by_id(app):
}


def test_get_commit_message(app):
"""
Tests functionality of the get_commit_message function.
Ensures no instances of None, that duplicate commit messages
are handled correctly, and only the most recent
RecordVersionCommitMessage is returned.
"""
# We want to ensure duplicate entries
test_version, test_recid = 1, 1
# How many records we want to insert
insert_amount = 5

# First we check no insertion, then we check insertion
for should_insert in [False, True]:
# Only insert on the second go
if should_insert:
# Insert a bunch of duplicate entries
for i in range(0, insert_amount):
new_record = RecordVersionCommitMessage(
recid=test_recid,
version=test_version,
# Setting message to a unique value
message=str(insert_amount)
)
db.session.add(new_record)
db.session.commit()

# Result of get_commit_message is added to ctx as revision_message
ctx = {"version": test_version}

# We always want to check that duplicates are not returned
try:
get_commit_message(ctx, test_recid)
except MultipleResultsFound as e:
raise AssertionError(e)

# revision_message only exists if we should insert
assert ("revision_message" in ctx) == should_insert

# Check the most recent has been retrieved
if should_insert:
# We know it's the most recent as message
# is set to the highest inserted range, equal to insert_amount.
ctx["revision_message"]["message"] = str(insert_amount)


def test_version_related_functions(app):
"""
Attempts to bulk test the related functions for both data tables and submissions (records/api):
Expand Down
105 changes: 102 additions & 3 deletions tests/submission_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,15 @@
# waive the privileges and immunities granted to it by virtue of its status
# as an Intergovernmental Organization or submit itself to any jurisdiction.

import json
import logging
import os
import shutil
import time
from datetime import datetime
from time import sleep
from unittest.mock import patch, MagicMock
from sqlalchemy.exc import NoResultFound

from invenio_db import db
import pytest
Expand All @@ -42,15 +46,15 @@
get_related_to_this_datasubmissions,
get_related_to_this_hepsubmissions,
get_table_data_list,
process_saved_file
process_saved_file, get_commit_message
)
from hepdata.modules.records.utils.common import infer_file_type, contains_accepted_url, allowed_file, record_exists, \
get_record_contents, is_histfactory
get_record_contents, is_histfactory, get_record_by_id
from hepdata.modules.records.utils.data_files import get_data_path_for_record
from hepdata.modules.records.utils.submission import process_submission_directory, do_finalise, unload_submission, \
cleanup_data_related_recid
from hepdata.modules.submission.api import get_latest_hepsubmission, get_submission_participants_for_record
from hepdata.modules.submission.models import DataSubmission, HEPSubmission, RelatedRecid
from hepdata.modules.submission.models import DataSubmission, HEPSubmission, RelatedRecid, RecordVersionCommitMessage
from hepdata.modules.submission.views import process_submission_payload
from hepdata.config import HEPDATA_DOI_PREFIX
from tests.conftest import create_test_record
Expand Down Expand Up @@ -713,3 +717,98 @@ def test_status_reset_error(app, mocker, caplog):
assert(caplog.records[0].levelname == "ERROR")
assert(caplog.records[0].msg
== "Exception while cleaning up: Could not clean up the submission")


# Patching to force no-op on function: process_last_updates()
@patch("hepdata.ext.opensearch.document_enhancers.process_last_updates")
def test_do_finalise_commit_message(app, admin_idx):
"""
Tests the do_finalise function.
Here we are testing the commit message functionality, ensuring proper rollback in the event of an error.
"""

# Insert the testing record data
with app.app_context():
admin_idx.recreate_index()
# Create test submission/record
hepdata_submission = create_test_record(
os.path.abspath('tests/test_data/test_submission'),
overall_status='todo'
)

# We're going to add an extra commit message
conflicting_record = RecordVersionCommitMessage(
recid=hepdata_submission.publication_recid,
version=hepdata_submission.version,
message="OldMessage"
)

db.session.add(conflicting_record)
db.session.commit()

# Create record data and prepare
record = get_record_by_id(hepdata_submission.publication_recid)
record["creation_date"] = str(datetime.today().strftime('%Y-%m-%d'))

# Now we mock the get_record_by_id function to cause an error.
with patch("hepdata.modules.records.utils.submission.get_record_by_id") as mock_get_record:
class MockRecord(MagicMock, dict):
# Mocking the invenio-records Record class
# Used to create a mock also of type dict
pass

# Create mock record object, set its value
# and set it to cause an error.
mock_record = MockRecord()
mock_get_record.return_value = mock_record
# Set the record to raise exception when commit() is called
mock_record.commit.side_effect = NoResultFound()
# Injecting specific dictionary keys to avoid error.
mock_record.__getitem__.side_effect = lambda key: 1 if key == 'item_doi' else None

# Now we run the do_finalise function (PATCHED)
result = do_finalise(
hepdata_submission.publication_recid,
publication_record=record,
commit_message="NewMessage",
force_finalise=True,
convert=False
)
# Convert str(json)->dict
result = json.loads(result)

# Get all commit messages
commit_messages = RecordVersionCommitMessage.query.filter_by(
recid=hepdata_submission.publication_recid
).all()

# Ensure that no new messages have inserted
assert len(commit_messages) == 1
assert commit_messages[0].message == "OldMessage"

# Confirm that the result response exists and is correct
assert "errors" in result
assert len(result["errors"]) == 1
assert result["errors"][0] == "No record found to update. Which is super strange."

# Run do_finalise again, but UNPATCHED
result = do_finalise(
hepdata_submission.publication_recid,
publication_record=record,
commit_message="NewMessage",
force_finalise=True,
convert=False
)
# Convert str(json)->dict
result = json.loads(result)

# Get the commit messages
commit_messages = RecordVersionCommitMessage.query.filter_by(
recid=hepdata_submission.publication_recid
).all()

# `NewMessage` should have inserted, and no error found.
assert len(commit_messages) == 2
assert commit_messages[-1].message == "NewMessage"
assert "errors" not in result

0 comments on commit 0c1b7de

Please sign in to comment.