Skip to content

Commit

Permalink
Merge pull request #85 from LSSTDESC/u/stuart/api_config
Browse files Browse the repository at this point in the history
Add `access_API_configuration` to `register_dataset`
  • Loading branch information
stuartmcalpine authored Dec 6, 2023
2 parents ebbfa5c + a5932f6 commit 1b62bc8
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 16 deletions.
2 changes: 2 additions & 0 deletions scripts/create_registry_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,8 @@ def _Dataset(schema):
# a foreign key into another table. Possible values for the column
# might include "gcr-catalogs", "skyCatalogs"
"access_API": Column("access_API", String(20)),
# Optional configuration file associated with access API
"access_API_configuration": Column("configuration", String),
# A way to associate a dataset with a program execution or "run"
"execution_id": Column(
Integer,
Expand Down
19 changes: 11 additions & 8 deletions src/dataregistry/registrar.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from dataregistry.db_basic import add_table_row
from dataregistry.registrar_util import _form_dataset_path, get_directory_info
from dataregistry.registrar_util import _parse_version_string, _bump_version
from dataregistry.registrar_util import _name_from_relpath
from dataregistry.registrar_util import _name_from_relpath, _read_configuration_file
from dataregistry.db_basic import TableMetadata

# from dataregistry.exceptions import *
Expand Down Expand Up @@ -270,12 +270,7 @@ def register_execution(

# Read configuration file. Enter contents as a raw string.
if configuration:
# Maybe first check that file size isn't outrageous?
with open(configuration) as f:
contents = f.read(max_config_length)
# if len(contents) == _MAX_CONFIG:
# issue truncation warning?
values["configuration"] = contents
values["configuration"] = _read_configuration_file(configuration, max_config_length)

# Enter row into data registry database
with self._engine.connect() as conn:
Expand Down Expand Up @@ -308,6 +303,7 @@ def register_dataset(
description=None,
execution_id=None,
access_API=None,
access_API_configuration=None,
is_overwritable=False,
old_location=None,
copy=True,
Expand All @@ -321,7 +317,8 @@ def register_dataset(
execution_locale=None,
execution_configuration=None,
input_datasets=[],
input_production_datasets=[]
input_production_datasets=[],
max_config_length=_DEFAULT_MAX_CONFIG,
):
"""
Register a new dataset in the DESC data registry.
Expand Down Expand Up @@ -353,6 +350,8 @@ def register_dataset(
Used to associate dataset with a particular execution
access_API : str, optional
Hint as to how to read the data
access_API_configuration : str, optional
Path to configuration file for `access_API`
is_overwritable : bool, optional
True if dataset may be overwritten (defaults to False).
Expand Down Expand Up @@ -392,6 +391,8 @@ def register_dataset(
List of dataset ids that were the input to this execution
input_production_datasets : list, optional
List of production dataset ids that were the input to this execution
max_config_length : int, optional
Maxiumum number of lines to read from a configuration file
Returns
-------
Expand Down Expand Up @@ -515,6 +516,8 @@ def register_dataset(
values["execution_id"] = execution_id
if access_API:
values["access_API"] = access_API
if access_API_configuration:
values["access_API_configuration"] = _read_configuration_file(access_API_configuration, max_config_length)
values["is_overwritable"] = is_overwritable
values["is_overwritten"] = False
values["register_date"] = datetime.now()
Expand Down
35 changes: 35 additions & 0 deletions src/dataregistry/registrar_util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import re
import warnings
from sqlalchemy import MetaData, Table, Column, text, select

__all__ = [
Expand Down Expand Up @@ -207,3 +208,37 @@ def _name_from_relpath(relative_path):
name = base

return name


def _read_configuration_file(configuration_file, max_config_length):
"""
Read a text, YAML, TOML, etc, configuration file.
Parameters
----------
configuration_file : str
Path to configuration file
max_config_length : int
Maximum number of characters to read from file. Files beyond this limit
will be truncated (with a warning message).
Returns
-------
contents : str
"""

# Make sure file exists
if not os.path.isfile(configuration_file):
raise FileNotFoundError(f"{configuration_file} not found")

# Open configuration file and read up to max_config_length characters
with open(configuration_file) as f:
contents = f.read(max_config_length)

if len(contents) == max_config_length:
warnings.warn(
"Configuration file is longer than `max_config_length`, truncated",
UserWarning,
)

return contents
5 changes: 5 additions & 0 deletions tests/end_to_end_tests/create_test_entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ def _insert_dataset_entry(
execution_configuration=None,
input_datasets=[],
input_production_datasets=[],
access_API_configuration=None,
):
"""
Wrapper to create dataset entry
Expand Down Expand Up @@ -163,6 +164,8 @@ def _insert_dataset_entry(
List of dataset ids that were the input to this execution
input_production_datasets : list, optional
List of production dataset ids that were the input to this execution
access_API_configuration : str, optional
Configuration file for access API
Returns
-------
Expand Down Expand Up @@ -203,6 +206,7 @@ def _insert_dataset_entry(
execution_configuration=execution_configuration,
input_datasets=input_datasets,
input_production_datasets=input_production_datasets,
access_API_configuration=access_API_configuration,
)

assert dataset_id is not None, "Trying to create a dataset that already exists"
Expand All @@ -220,6 +224,7 @@ def _insert_dataset_entry(
"user",
None,
"This is my first DESC dataset",
access_API_configuration="dummy_configuration_file.yaml",
)

# Test set 2
Expand Down
78 changes: 70 additions & 8 deletions tests/unit_tests/test_registrar_util.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
from dataregistry.registrar_util import _parse_version_string, _name_from_relpath, _form_dataset_path, get_directory_info
from dataregistry.registrar_util import (
_parse_version_string,
_name_from_relpath,
_form_dataset_path,
get_directory_info,
_read_configuration_file,
)
import os
import pytest


def test_parse_version_string():
""" Make sure version strings are parsed correctly """
"""Make sure version strings are parsed correctly"""

# Test case with no version suffix
tmp = _parse_version_string("1.2.3")
Expand All @@ -29,14 +37,15 @@ def test_parse_version_string():
assert tmp["minor"] == "8"
assert tmp["patch"] == "9"


def test_form_dataset_path():
"""
Test dataset path construction
Datasets should come back with the format:
<root_dir>/<owner_type>/<owner>/<relative_path>
"""

tmp = _form_dataset_path("production", "desc", "my/path", root_dir=None)
assert tmp == "production/production/my/path"

Expand All @@ -49,6 +58,7 @@ def test_form_dataset_path():
tmp = _form_dataset_path("user", "desc", "my/path", root_dir="/root/")
assert tmp == "/root/user/desc/my/path"


def test_directory_info():
"""
Test getting number of files and disk space usage from a directory.
Expand All @@ -60,10 +70,62 @@ def test_directory_info():
assert num_files > 0
assert total_size > 0


def test_name_from_relpath():
""" Make sure names are exctracted from paths correctly """
"""Make sure names are extracted from paths correctly"""

assert _name_from_relpath("/testing/test") == "test"
assert _name_from_relpath("./testing/test") == "test"
assert _name_from_relpath("/testing/test/") == "test"
assert _name_from_relpath("test") == "test"


def _make_dummy_config(tmpdir, nchars):
"""
Create a dummy config file in temp directory
Parameters
----------
tmpdir : py.path.local object (pytest @fixture)
Temporary directory we can store test config files to
nchars : int
Number of characters to put in temp config file
Returns
-------
file_path : str
Path to temporary config file we can read
"""

file_path = os.path.join(tmpdir, "dummy_config.txt")

# Write nchars characters into the dummy file
with open(file_path, "w") as file:
for i in range(nchars):
file.write(f"X")

return file_path


@pytest.mark.parametrize("nchars,max_config_length,ans", [(10, 10, 10), (100, 10, 10)])
def test_read_file(tmpdir, nchars, max_config_length, ans):
"""Test reading in configuration file, and check truncation warning"""

# Make sure we warn when truncating
if nchars > max_config_length:
with pytest.warns(UserWarning, match="Configuration file is longer"):
content = _read_configuration_file(
_make_dummy_config(tmpdir, nchars), max_config_length
)
assert len(content) == ans

# Usual case
else:
content = _read_configuration_file(
_make_dummy_config(tmpdir, nchars), max_config_length
)
assert len(content) == ans

assert _name_from_relpath("/testing/test") == "test"
assert _name_from_relpath("./testing/test") == "test"
assert _name_from_relpath("/testing/test/") == "test"
assert _name_from_relpath("test") == "test"
# Make sure we raise an exception when the file doesn't exist
with pytest.raises(FileNotFoundError, match="not found"):
_read_configuration_file("i_dont_exist.txt", 10)

0 comments on commit 1b62bc8

Please sign in to comment.