Skip to content

Commit

Permalink
Progress on catalog version reporting (#295)
Browse files Browse the repository at this point in the history
* WIP, commit for safekeeping

* Elimanate some redundant dependencies

* Revert "Elimanate some redundant dependencies"

This reverts commit 4b574b3.

* Revert "WIP, commit for safekeeping"

This reverts commit 1ce9631.

* Switch _get_catalog_rp to use Path

* Switch current available_versions to use Path, not os.path

* Protect against finding a non-directory catalog version

* Restrict basic list of available versions to those in the current catalog.yaml

* Mark 'default' version in pretty print

* Remove debug pdb

* Cover missing exception lines with tests

* @charles-turner-1 suggestions
  • Loading branch information
marc-white authored Dec 10, 2024
1 parent 44ad1bf commit 293d484
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 26 deletions.
47 changes: 35 additions & 12 deletions src/access_nri_intake/data/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Copyright 2024 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
# SPDX-License-Identifier: Apache-2.0

import os
import re
from pathlib import Path

import yaml

Expand All @@ -12,7 +12,7 @@
CATALOG_PATH_REGEX = r"^(?P<rootpath>.*?)\{\{version\}\}.*?$"


def _get_catalog_rp():
def _get_catalog_root():
"""
Get the catalog root path.
"""
Expand All @@ -28,14 +28,14 @@ def _get_catalog_rp():

match = re.match(CATALOG_PATH_REGEX, catalog_fp)
try:
return match.group("rootpath")
return Path(match.group("rootpath"))
except AttributeError: # Match failed
raise RuntimeError(
f"Catalog metadata {get_catalog_fp()} contains unexpected catalog filepath: {catalog_fp}"
)


def available_versions(pretty: bool = True):
def available_versions(pretty: bool = True) -> list[str] | None:
"""
Report the available versions of the `intake.cat.access_nri` catalog.
Expand All @@ -46,24 +46,47 @@ def available_versions(pretty: bool = True):
(True, default), or to provide a list of version numbers only (False).
"""
# Work out where the catalogs are stored
base_path = _get_catalog_rp()
base_path = _get_catalog_root()

# Grab the extant catalog and work out its min and max versions
try:
with open(get_catalog_fp()) as cat_file:
cat_yaml = yaml.safe_load(cat_file)
vers_min = cat_yaml["sources"]["access_nri"]["parameters"]["version"]["min"]
vers_max = cat_yaml["sources"]["access_nri"]["parameters"]["version"]["max"]
vers_def = cat_yaml["sources"]["access_nri"]["parameters"]["version"][
"default"
]
except FileNotFoundError:
raise FileNotFoundError(f"Unable to find catalog at {get_catalog_fp()}")
except KeyError:
raise RuntimeError(f"Catalog at {get_catalog_fp()} not correctly formatted")

# Grab all the catalog names
cats = [d for d in os.listdir(base_path) if re.search(CATALOG_NAME_FORMAT, d)]
cats = [
dir_path.name
for dir_path in base_path.iterdir()
if re.search(CATALOG_NAME_FORMAT, dir_path.name)
and dir_path.is_dir()
and (
(dir_path.name >= vers_min and dir_path.name <= vers_max)
or dir_path.name == vers_def
)
]
cats.sort(reverse=True)

# Find all the symlinked versions
symlinks = [s for s in cats if os.path.islink(os.path.join(base_path, s))]
symlinks = [s for s in cats if (Path(base_path) / s).is_symlink()]

symlink_targets = {
s: os.path.basename(os.readlink(os.path.join(base_path, s))) for s in symlinks
}
symlink_targets = {s: (base_path / s).readlink().name for s in symlinks}

if pretty:
for i, c in enumerate(cats):
for c in cats:
if c in symlink_targets.keys():
c += f"(-->{symlink_targets[c]})"
if c == vers_def:
c += "*"
print(c)
return
return None

return cats
3 changes: 3 additions & 0 deletions tests/data/catalog/catalog-dirs/v2023-01-01
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# This is a catalog 'file', not directory,
# to make sure the system doesn't detect such things
# as a real catalog
24 changes: 24 additions & 0 deletions tests/data/catalog/catalog-versions.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
sources:
access_nri:
args:
columns_with_iterables:
- model
- realm
- frequency
- variable
mode: r
name_column: name
path: /g/data/xp65/public/apps/access-nri-intake-catalog/{{version}}/metacatalog.csv
yaml_column: yaml
description: ACCESS-NRI intake catalog
driver: intake_dataframe_catalog.core.DfFileCatalog
metadata:
storage: gdata/al33+gdata/cj50+gdata/dk92+gdata/fs38+gdata/ik11+gdata/oi10+gdata/p73+gdata/rr3+gdata/xp65
version: '{{version}}'
parameters:
version:
min: v2019-02-02
max: v2024-06-19
default: v2025-02-28 # Check default outside range is returned
description: Catalog version
type: str
50 changes: 36 additions & 14 deletions tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,18 @@
# SPDX-License-Identifier: Apache-2.0

import re
from pathlib import Path
from unittest import mock

import pytest

import access_nri_intake
from access_nri_intake.data import CATALOG_NAME_FORMAT
from access_nri_intake.data.utils import _get_catalog_rp, available_versions
from access_nri_intake.data.utils import _get_catalog_root, available_versions


@mock.patch("access_nri_intake.data.utils.get_catalog_fp")
def test__get_catalog_rp(mock_get_catalog_fp, test_data):
def test__get_catalog_root(mock_get_catalog_fp, test_data):
"""
Check that we correctly decipher to rootpath (rp) to the catalogs
"""
Expand All @@ -22,17 +23,17 @@ def test__get_catalog_rp(mock_get_catalog_fp, test_data):
== test_data / "catalog/catalog-good.yaml"
), "Mock failed"

rp = _get_catalog_rp()
assert (
rp == "/this/is/root/path/"
rp = _get_catalog_root()
assert rp == Path(
"/this/is/root/path/"
), f"Computed root path {rp} != expected value /this/is/root/path/"


@mock.patch("access_nri_intake.data.utils.get_catalog_fp")
@pytest.mark.parametrize(
"cat", ["catalog/catalog-bad-path.yaml", "catalog/catalog-bad-structure.yaml"]
)
def test__get_catalog_rp_runtime_errors(mock_get_catalog_fp, test_data, cat):
def test__get_catalog_root_runtime_errors(mock_get_catalog_fp, test_data, cat):
"""
Check that we correctly decipher to rootpath (rp) to the catalogs
"""
Expand All @@ -42,12 +43,14 @@ def test__get_catalog_rp_runtime_errors(mock_get_catalog_fp, test_data, cat):
), "Mock failed"

with pytest.raises(RuntimeError):
_get_catalog_rp()
_get_catalog_root()


@mock.patch("access_nri_intake.data.utils._get_catalog_rp")
def test_available_versions(mock__get_catalog_rp, test_data):
mock__get_catalog_rp.return_value = test_data / "catalog/catalog-dirs"
@mock.patch("access_nri_intake.data.utils._get_catalog_root")
@mock.patch("access_nri_intake.data.utils.get_catalog_fp")
def test_available_versions(mock_get_catalog_fp, mock__get_catalog_root, test_data):
mock__get_catalog_root.return_value = test_data / "catalog/catalog-dirs"
mock_get_catalog_fp.return_value = test_data / "catalog/catalog-versions.yaml"
cats = available_versions(pretty=False)
assert cats == [
"v2025-02-28",
Expand All @@ -57,16 +60,35 @@ def test_available_versions(mock__get_catalog_rp, test_data):
], "Did not get expected catalog list"


@mock.patch("access_nri_intake.data.utils._get_catalog_rp")
def test_available_versions_pretty(mock__get_catalog_rp, test_data, capfd):
mock__get_catalog_rp.return_value = test_data / "catalog/catalog-dirs"
@mock.patch("access_nri_intake.data.utils._get_catalog_root")
@mock.patch("access_nri_intake.data.utils.get_catalog_fp")
def test_available_versions_pretty(
mock_get_catalog_fp, mock__get_catalog_root, test_data, capfd
):
mock__get_catalog_root.return_value = test_data / "catalog/catalog-dirs"
mock_get_catalog_fp.return_value = test_data / "catalog/catalog-versions.yaml"
available_versions(pretty=True)
captured, _ = capfd.readouterr()
assert (
captured == "v2025-02-28\nv2024-06-19\nv2024-01-01\nv2019-02-02(-->vN.N.N)\n"
captured == "v2025-02-28*\nv2024-06-19\nv2024-01-01\nv2019-02-02(-->vN.N.N)\n"
), "Did not get expected catalog printout"


@mock.patch(
"access_nri_intake.data.utils.get_catalog_fp", return_value="/this/is/not/real.yaml"
)
def test_available_versions_no_catalog(mock_get_catalog_fp):
with pytest.raises(FileNotFoundError):
available_versions()


@mock.patch("access_nri_intake.data.utils.get_catalog_fp")
def test_available_versions_bad_catalog(mock_get_catalog_fp, test_data):
mock_get_catalog_fp.return_value = test_data / "catalog/catalog-bad-structure.yaml"
with pytest.raises(RuntimeError):
available_versions()


@pytest.mark.parametrize(
"name",
[
Expand Down

0 comments on commit 293d484

Please sign in to comment.