diff --git a/src/access_nri_intake/data/utils.py b/src/access_nri_intake/data/utils.py index 04a6c168..6b2d5e1d 100644 --- a/src/access_nri_intake/data/utils.py +++ b/src/access_nri_intake/data/utils.py @@ -1,8 +1,8 @@ # Copyright 2024 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. # SPDX-License-Identifier: Apache-2.0 -import os import re +from pathlib import Path import yaml @@ -12,7 +12,7 @@ CATALOG_PATH_REGEX = r"^(?P.*?)\{\{version\}\}.*?$" -def _get_catalog_rp(): +def _get_catalog_root(): """ Get the catalog root path. """ @@ -28,14 +28,14 @@ def _get_catalog_rp(): match = re.match(CATALOG_PATH_REGEX, catalog_fp) try: - return match.group("rootpath") + return Path(match.group("rootpath")) except AttributeError: # Match failed raise RuntimeError( f"Catalog metadata {get_catalog_fp()} contains unexpected catalog filepath: {catalog_fp}" ) -def available_versions(pretty: bool = True): +def available_versions(pretty: bool = True) -> list[str] | None: """ Report the available versions of the `intake.cat.access_nri` catalog. @@ -46,24 +46,47 @@ def available_versions(pretty: bool = True): (True, default), or to provide a list of version numbers only (False). """ # Work out where the catalogs are stored - base_path = _get_catalog_rp() + base_path = _get_catalog_root() + + # Grab the extant catalog and work out its min and max versions + try: + with open(get_catalog_fp()) as cat_file: + cat_yaml = yaml.safe_load(cat_file) + vers_min = cat_yaml["sources"]["access_nri"]["parameters"]["version"]["min"] + vers_max = cat_yaml["sources"]["access_nri"]["parameters"]["version"]["max"] + vers_def = cat_yaml["sources"]["access_nri"]["parameters"]["version"][ + "default" + ] + except FileNotFoundError: + raise FileNotFoundError(f"Unable to find catalog at {get_catalog_fp()}") + except KeyError: + raise RuntimeError(f"Catalog at {get_catalog_fp()} not correctly formatted") # Grab all the catalog names - cats = [d for d in os.listdir(base_path) if re.search(CATALOG_NAME_FORMAT, d)] + cats = [ + dir_path.name + for dir_path in base_path.iterdir() + if re.search(CATALOG_NAME_FORMAT, dir_path.name) + and dir_path.is_dir() + and ( + (dir_path.name >= vers_min and dir_path.name <= vers_max) + or dir_path.name == vers_def + ) + ] cats.sort(reverse=True) # Find all the symlinked versions - symlinks = [s for s in cats if os.path.islink(os.path.join(base_path, s))] + symlinks = [s for s in cats if (Path(base_path) / s).is_symlink()] - symlink_targets = { - s: os.path.basename(os.readlink(os.path.join(base_path, s))) for s in symlinks - } + symlink_targets = {s: (base_path / s).readlink().name for s in symlinks} if pretty: - for i, c in enumerate(cats): + for c in cats: if c in symlink_targets.keys(): c += f"(-->{symlink_targets[c]})" + if c == vers_def: + c += "*" print(c) - return + return None return cats diff --git a/tests/data/catalog/catalog-dirs/v2023-01-01 b/tests/data/catalog/catalog-dirs/v2023-01-01 new file mode 100644 index 00000000..7b02fc2c --- /dev/null +++ b/tests/data/catalog/catalog-dirs/v2023-01-01 @@ -0,0 +1,3 @@ +# This is a catalog 'file', not directory, +# to make sure the system doesn't detect such things +# as a real catalog \ No newline at end of file diff --git a/tests/data/catalog/catalog-versions.yaml b/tests/data/catalog/catalog-versions.yaml new file mode 100644 index 00000000..2c6d98c2 --- /dev/null +++ b/tests/data/catalog/catalog-versions.yaml @@ -0,0 +1,24 @@ +sources: + access_nri: + args: + columns_with_iterables: + - model + - realm + - frequency + - variable + mode: r + name_column: name + path: /g/data/xp65/public/apps/access-nri-intake-catalog/{{version}}/metacatalog.csv + yaml_column: yaml + description: ACCESS-NRI intake catalog + driver: intake_dataframe_catalog.core.DfFileCatalog + metadata: + storage: gdata/al33+gdata/cj50+gdata/dk92+gdata/fs38+gdata/ik11+gdata/oi10+gdata/p73+gdata/rr3+gdata/xp65 + version: '{{version}}' + parameters: + version: + min: v2019-02-02 + max: v2024-06-19 + default: v2025-02-28 # Check default outside range is returned + description: Catalog version + type: str \ No newline at end of file diff --git a/tests/test_data.py b/tests/test_data.py index 314fdd8c..d66b3f4c 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -2,17 +2,18 @@ # SPDX-License-Identifier: Apache-2.0 import re +from pathlib import Path from unittest import mock import pytest import access_nri_intake from access_nri_intake.data import CATALOG_NAME_FORMAT -from access_nri_intake.data.utils import _get_catalog_rp, available_versions +from access_nri_intake.data.utils import _get_catalog_root, available_versions @mock.patch("access_nri_intake.data.utils.get_catalog_fp") -def test__get_catalog_rp(mock_get_catalog_fp, test_data): +def test__get_catalog_root(mock_get_catalog_fp, test_data): """ Check that we correctly decipher to rootpath (rp) to the catalogs """ @@ -22,9 +23,9 @@ def test__get_catalog_rp(mock_get_catalog_fp, test_data): == test_data / "catalog/catalog-good.yaml" ), "Mock failed" - rp = _get_catalog_rp() - assert ( - rp == "/this/is/root/path/" + rp = _get_catalog_root() + assert rp == Path( + "/this/is/root/path/" ), f"Computed root path {rp} != expected value /this/is/root/path/" @@ -32,7 +33,7 @@ def test__get_catalog_rp(mock_get_catalog_fp, test_data): @pytest.mark.parametrize( "cat", ["catalog/catalog-bad-path.yaml", "catalog/catalog-bad-structure.yaml"] ) -def test__get_catalog_rp_runtime_errors(mock_get_catalog_fp, test_data, cat): +def test__get_catalog_root_runtime_errors(mock_get_catalog_fp, test_data, cat): """ Check that we correctly decipher to rootpath (rp) to the catalogs """ @@ -42,12 +43,14 @@ def test__get_catalog_rp_runtime_errors(mock_get_catalog_fp, test_data, cat): ), "Mock failed" with pytest.raises(RuntimeError): - _get_catalog_rp() + _get_catalog_root() -@mock.patch("access_nri_intake.data.utils._get_catalog_rp") -def test_available_versions(mock__get_catalog_rp, test_data): - mock__get_catalog_rp.return_value = test_data / "catalog/catalog-dirs" +@mock.patch("access_nri_intake.data.utils._get_catalog_root") +@mock.patch("access_nri_intake.data.utils.get_catalog_fp") +def test_available_versions(mock_get_catalog_fp, mock__get_catalog_root, test_data): + mock__get_catalog_root.return_value = test_data / "catalog/catalog-dirs" + mock_get_catalog_fp.return_value = test_data / "catalog/catalog-versions.yaml" cats = available_versions(pretty=False) assert cats == [ "v2025-02-28", @@ -57,16 +60,35 @@ def test_available_versions(mock__get_catalog_rp, test_data): ], "Did not get expected catalog list" -@mock.patch("access_nri_intake.data.utils._get_catalog_rp") -def test_available_versions_pretty(mock__get_catalog_rp, test_data, capfd): - mock__get_catalog_rp.return_value = test_data / "catalog/catalog-dirs" +@mock.patch("access_nri_intake.data.utils._get_catalog_root") +@mock.patch("access_nri_intake.data.utils.get_catalog_fp") +def test_available_versions_pretty( + mock_get_catalog_fp, mock__get_catalog_root, test_data, capfd +): + mock__get_catalog_root.return_value = test_data / "catalog/catalog-dirs" + mock_get_catalog_fp.return_value = test_data / "catalog/catalog-versions.yaml" available_versions(pretty=True) captured, _ = capfd.readouterr() assert ( - captured == "v2025-02-28\nv2024-06-19\nv2024-01-01\nv2019-02-02(-->vN.N.N)\n" + captured == "v2025-02-28*\nv2024-06-19\nv2024-01-01\nv2019-02-02(-->vN.N.N)\n" ), "Did not get expected catalog printout" +@mock.patch( + "access_nri_intake.data.utils.get_catalog_fp", return_value="/this/is/not/real.yaml" +) +def test_available_versions_no_catalog(mock_get_catalog_fp): + with pytest.raises(FileNotFoundError): + available_versions() + + +@mock.patch("access_nri_intake.data.utils.get_catalog_fp") +def test_available_versions_bad_catalog(mock_get_catalog_fp, test_data): + mock_get_catalog_fp.return_value = test_data / "catalog/catalog-bad-structure.yaml" + with pytest.raises(RuntimeError): + available_versions() + + @pytest.mark.parametrize( "name", [