Skip to content

Commit

Permalink
Update get_table_details and add size check
Browse files Browse the repository at this point in the history
Updates the get_table_details function and adds  a file size loading threshold check
  • Loading branch information
ItIsJordan committed Nov 8, 2023
1 parent d245ca0 commit 382e5c8
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 17 deletions.
17 changes: 16 additions & 1 deletion hepdata/modules/records/utils/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import os
from sqlalchemy.orm.exc import NoResultFound

from hepdata.config import CFG_PUB_TYPE, HISTFACTORY_FILE_TYPE
from hepdata.config import CFG_PUB_TYPE, HISTFACTORY_FILE_TYPE, SIZE_LOAD_CHECK_THRESHOLD
from hepdata.ext.opensearch.api import get_record
from hepdata.modules.submission.models import HEPSubmission, License

Expand Down Expand Up @@ -251,3 +251,18 @@ def get_record_by_id(recid):
def record_exists(*args, **kwargs):
count = HEPSubmission.query.filter_by(**kwargs).count()
return count > 0


def file_size_check(file_location, load_all):
"""
Decides if a file breaks the maximum size threshold
for immediate loading on the records page.
:param file_location: Location of the data file on disk
:param load_all: If the check should be run
:return bool: Pass or fail
"""
# We do the check only if told to
if load_all == 0:
return os.path.getsize(file_location) <= SIZE_LOAD_CHECK_THRESHOLD
return True
3 changes: 2 additions & 1 deletion hepdata/modules/records/utils/data_processing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,8 @@ def generate_table_structure(table_contents):
"review": table_contents["review"],
"associated_files": table_contents["associated_files"],
"keywords": {},
"values": []}
"values": [],
"load_fail": table_contents["load_fail"]}

record["description"] = sanitize_html(table_contents["title"])

Expand Down
41 changes: 26 additions & 15 deletions hepdata/modules/records/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
from hepdata.modules.submission.models import HEPSubmission, DataSubmission, \
DataResource, DataReview, Message, Question
from hepdata.modules.records.utils.common import get_record_by_id, \
default_time, IMAGE_TYPES, decode_string
default_time, IMAGE_TYPES, decode_string, file_size_check
from hepdata.modules.records.utils.data_processing_utils import \
generate_table_structure, process_ctx
from hepdata.modules.records.utils.submission import create_data_review, \
Expand Down Expand Up @@ -289,14 +289,15 @@ def get_latest():
return jsonify(result)


@blueprint.route('/data/<int:recid>/<int:data_recid>/<int:version>', methods=['GET', ])
def get_table_details(recid, data_recid, version):
@blueprint.route('/data/<int:recid>/<int:data_recid>/<int:version>/<int:load_all>', methods=['GET'])
def get_table_details(recid, data_recid, version, load_all):
"""
Get the table details.
:param recid:
:param data_recid:
:param version:
:param load_all:
:return:
"""
# joinedload allows query of data in another table without a second database access.
Expand All @@ -311,18 +312,28 @@ def get_table_details(recid, data_recid, version):
if data_query.count() > 0:
data_record = data_query.one()
file_location = data_record.file_location

attempts = 0
while True:
try:
with open(file_location, 'r') as table_file:
table_contents = yaml.load(table_file, Loader=Loader)
except:
attempts += 1
# allow multiple attempts to read file in case of temporary disk problems
if (table_contents and table_contents is not None) or attempts > 5:
break

load_fail = True

if file_size_check(file_location, load_all):
attempts = 0
while True:
try:
with open(file_location, 'r') as table_file:
table_contents = yaml.load(table_file, Loader=Loader)
if table_contents:
load_fail = False

except (FileNotFoundError, PermissionError) as e:
attempts += 1
# allow multiple attempts to read file in case of temporary disk problems
if (table_contents and table_contents is not None) or attempts > 5:
break
if load_fail:
# TODO - Needs to be initialised for later
table_contents["dependent_variables"] = []
table_contents["independent_variables"] = []

table_contents["load_fail"] = load_fail
table_contents["name"] = datasub_record.name
table_contents["title"] = datasub_record.description
table_contents["keywords"] = datasub_record.keywords
Expand Down

0 comments on commit 382e5c8

Please sign in to comment.