Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve test coverage for dimension cleanup #248

1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed

- update pre-commit: to autoupdate and with gitleaks ([#247](https://github.com/nasa/stitchee/pull/247))([**@danielfromearth**](https://github.com/danielfromearth))
- improved test coverage ([#248](https://github.com/nasa/stitchee/pull/248))([**@danielfromearth**](https://github.com/danielfromearth))

## [1.5.0] - 2024-11-08

Expand Down
5 changes: 5 additions & 0 deletions concatenator/dataset_and_group_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,11 @@ def regroup_flattened_dataset(


def _get_nested_group(dataset: nc.Dataset, group_path: str) -> nc.Group:
"""Get the group object that is represented by the group_path string.

If the 'group_path' string represents a dimension in the root group,
then this returns the root group.
"""
nested_group = dataset
for group in group_path.strip(concatenator.group_delim).split(concatenator.group_delim)[:-1]:
nested_group = nested_group.groups[group]
Expand Down
12 changes: 6 additions & 6 deletions concatenator/dimension_cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def remove_duplicate_dims(nc_dataset: nc.Dataset) -> nc.Dataset:
for dup_var_name, dup_var in dup_vars.items():
dim_list = list(
dup_var.dimensions
) # original dimensions of the variable with duplicated dims
) # original dimensions of the variable with duplicated dimensions

# Dimension(s) that are duplicated are retrieved.
# Note: this is not yet tested for more than one duplicated dimension.
Expand Down Expand Up @@ -95,30 +95,30 @@ def remove_duplicate_dims(nc_dataset: nc.Dataset) -> nc.Dataset:
del nc_dataset.variables[dup_var_name]

# Replace original *Variable* with new variable with no duplicated dimensions.
new_dup_var[dup_var_name] = nc_dataset.createVariable(
nc_dataset.variables[dup_var_name] = nc_dataset.createVariable(
dup_var_name,
str(dup_var[:].dtype),
tuple(new_dim_list),
fill_value=fill_value,
)
for attr_name, contents in attrs_contents.items():
new_dup_var[dup_var_name].setncattr(attr_name, contents)
new_dup_var[dup_var_name][:] = dup_var[:]
nc_dataset[dup_var_name].setncattr(attr_name, contents)
nc_dataset[dup_var_name][:] = dup_var[:]

return nc_dataset


def get_attributes_minus_fillvalue_and_renamed_coords(
original_var_name: str, new_var_name: str, original_dataset: nc.Dataset
) -> dict:
"""Variable attributes are retrieved."""
"""Variable attributes (other than FillValue) are retrieved."""
attrs_contents = {}

for ncattr in original_dataset.variables[original_var_name].ncattrs():
if ncattr != "_FillValue":
contents: str = original_dataset.variables[original_var_name].getncattr(ncattr)
if ncattr == "coordinates":
contents.replace(original_var_name, new_var_name)
contents = contents.replace(original_var_name, new_var_name)
attrs_contents[ncattr] = contents

return attrs_contents
92 changes: 82 additions & 10 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,57 @@ def toy_empty_dataset(temp_toy_data_dir):
return filepath


def add_to_ds_3dims_3vars_4coords_1group_with_step_values(open_ds: nc.Dataset, step_values: list):
def add_to_ds_3dims_3vars_2coords_nogroup(open_ds: nc.Dataset, step_values: list):
"""Creates groups, dimensions, variables; and uses chosen step values in an open dataset"""
# Root-level Dimensions/Variables
open_ds.createDimension("step", 3)
open_ds.createDimension("track", 7)
open_ds.createVariable("step", "i2", ("step",), fill_value=False)
open_ds.createVariable("track", "i2", ("track",), fill_value=False)
open_ds.createVariable("var0", "f4", ("step", "track"))
#
open_ds["step"][:] = step_values
open_ds["track"][:] = [1, 2, 3, 4, 5, 6, 7]
open_ds["var0"][:] = [
[33, 78, 65, 12, 85, 35, 44],
[64, 24, 87, 12, 54, 82, 24],
[66, 18, 99, 52, 77, 88, 59],
]

open_ds["var0"].coordinates = "var0 track"

return open_ds


def add_to_ds_3dims_3vars_2coords_nogroup_duplicate_dimensions(
open_ds: nc.Dataset, step_values: list
):
"""Creates groups, dimensions, variables; and uses chosen step values in an open dataset"""
# Root-level Dimensions/Variables
open_ds.createDimension("step", 3)
open_ds.createDimension("track", 7)
open_ds.createVariable("step", "i2", ("step",), fill_value=False)
open_ds.createVariable("track", "i2", ("track",), fill_value=False)
open_ds.createVariable("var0", "f4", ("track", "step", "step"), fill_value=-99)
#
open_ds["step"][:] = step_values
open_ds["track"][:] = [1, 2, 3, 4, 5, 6, 7]
open_ds["var0"][:] = [
[[33, 78, 65], [33, 78, 65], [33, 78, 65]],
[[64, 24, 87], [64, 24, 87], [64, 24, 87]],
[[66, 18, 99], [66, 18, 99], [66, 18, 99]],
[[77, 88, 59], [77, 88, 59], [77, 88, 59]],
[[52, 77, 88], [52, 77, 88], [52, 77, 88]],
[[66, 18, 99], [66, 18, 99], [66, 18, 99]],
[[18, 99, 52], [18, 99, 52], [18, 99, 52]],
]

open_ds["var0"].coordinates = "track step step"

return open_ds


def add_to_ds_3dims_3vars_3coords_1group_with_step_values(open_ds: nc.Dataset, step_values: list):
"""Creates groups, dimensions, variables; and uses chosen step values in an open dataset"""
grp1 = open_ds.createGroup("Group1")

Expand Down Expand Up @@ -159,33 +209,55 @@ def add_to_ds_3dims_3vars_4coords_1group_with_step_values(open_ds: nc.Dataset, s


@pytest.fixture(scope="function")
def ds_3dims_3vars_4coords_1group_part1(temp_toy_data_dir) -> Path:
filepath = temp_toy_data_dir / "test_3dims_3vars_4coords_1group_part1.nc"
def ds_3dims_3vars_2coords_nogroup(temp_toy_data_dir) -> Path:
filepath = temp_toy_data_dir / "test_3dims_3vars_2coords_nogroup.nc"

f = nc.Dataset(filename=filepath, mode="w")
f = add_to_ds_3dims_3vars_2coords_nogroup(f, step_values=[9, 10, 11])
f.close()

return filepath


@pytest.fixture(scope="function")
def ds_3dims_3vars_2coords_nogroup_duplicate_dimensions(temp_toy_data_dir) -> Path:
filepath = temp_toy_data_dir / "test_3dims_3vars_2coords_nogroup_duplicate_dimensions.nc"

f = nc.Dataset(filename=filepath, mode="w")
f = add_to_ds_3dims_3vars_2coords_nogroup_duplicate_dimensions(f, step_values=[9, 10, 11])
f.close()

return filepath


@pytest.fixture(scope="function")
def ds_3dims_3vars_3coords_1group_part1(temp_toy_data_dir) -> Path:
filepath = temp_toy_data_dir / "test_3dims_3vars_3coords_1group_part1.nc"

f = nc.Dataset(filename=filepath, mode="w")
f = add_to_ds_3dims_3vars_4coords_1group_with_step_values(f, step_values=[9, 10, 11])
f = add_to_ds_3dims_3vars_3coords_1group_with_step_values(f, step_values=[9, 10, 11])
f.close()

return filepath


@pytest.fixture(scope="function")
def ds_3dims_3vars_4coords_1group_part2(temp_toy_data_dir):
filepath = temp_toy_data_dir / "test_3dims_3vars_4coords_1group_part2.nc"
def ds_3dims_3vars_3coords_1group_part2(temp_toy_data_dir):
filepath = temp_toy_data_dir / "test_3dims_3vars_3coords_1group_part2.nc"

f = nc.Dataset(filename=filepath, mode="w")
f = add_to_ds_3dims_3vars_4coords_1group_with_step_values(f, step_values=[12, 13, 14])
f = add_to_ds_3dims_3vars_3coords_1group_with_step_values(f, step_values=[12, 13, 14])
f.close()

return filepath


@pytest.fixture(scope="function")
def ds_3dims_3vars_4coords_1group_part3(temp_toy_data_dir):
filepath = temp_toy_data_dir / "test_3dims_3vars_4coords_1group_part3.nc"
def ds_3dims_3vars_3coords_1group_part3(temp_toy_data_dir):
filepath = temp_toy_data_dir / "test_3dims_3vars_3coords_1group_part3.nc"

f = nc.Dataset(filename=filepath, mode="w")
f = add_to_ds_3dims_3vars_4coords_1group_with_step_values(f, step_values=[6, 7, 8])
f = add_to_ds_3dims_3vars_3coords_1group_with_step_values(f, step_values=[6, 7, 8])
f.close()

return filepath
Expand Down
4 changes: 2 additions & 2 deletions tests/integration/test_history_construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
def test_construct_and_append_history_for_sample_concatenation(
temp_toy_data_dir,
temp_output_dir,
ds_3dims_3vars_4coords_1group_part1,
ds_3dims_3vars_4coords_1group_part2,
ds_3dims_3vars_3coords_1group_part1,
ds_3dims_3vars_3coords_1group_part2,
):
output_path = str(temp_output_dir.joinpath("simple_sample_concatenated.nc")) # type: ignore
prepared_input_files = prep_input_files(temp_toy_data_dir, temp_output_dir)
Expand Down
26 changes: 24 additions & 2 deletions tests/unit/test_dataset_and_group_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import netCDF4 as nc

from concatenator.dataset_and_group_handling import (
_get_nested_group,
_is_file_empty,
validate_workable_files,
)
Expand Down Expand Up @@ -41,8 +42,29 @@ def test_toy_dataset_with_singleton_null_values_is_identified_as_empty(


def test_dataset_with_values_is_identified_as_not_empty(
ds_3dims_3vars_4coords_1group_part1,
ds_3dims_3vars_3coords_1group_part1,
):
"""Ensure that a dataset with non-null arrays is identified as NOT empty."""
with nc.Dataset(ds_3dims_3vars_4coords_1group_part1) as ds:
with nc.Dataset(ds_3dims_3vars_3coords_1group_part1) as ds:
assert _is_file_empty(ds) is False


def test_get_nested_group(ds_3dims_3vars_3coords_1group_part1):
"""Ensure that the retrieved group is correct."""
with nc.Dataset(ds_3dims_3vars_3coords_1group_part1) as ds:
group_obj = _get_nested_group(ds, "__Group1__level")
assert isinstance(group_obj, nc.Group)


def test_get_root_group(ds_3dims_3vars_3coords_1group_part1):
"""Ensure that the retrieved group is correct."""
with nc.Dataset(ds_3dims_3vars_3coords_1group_part1) as ds:
group_obj = _get_nested_group(ds, "__track")
assert group_obj == ds


def test_get_root_group_when_no_delimiter_present(ds_3dims_3vars_3coords_1group_part1):
"""Ensure that the retrieved group is correct."""
with nc.Dataset(ds_3dims_3vars_3coords_1group_part1) as ds:
group_obj = _get_nested_group(ds, "track")
assert group_obj == ds
29 changes: 29 additions & 0 deletions tests/unit/test_dimension_cleanup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""Tests for netCDF dimension clean up operations."""

# pylint: disable=C0116, C0301

import netCDF4 as nc

from concatenator.dimension_cleanup import (
get_attributes_minus_fillvalue_and_renamed_coords,
)


def test_get_attributes_minus_fillvalue_and_renamed_coords(ds_3dims_3vars_2coords_nogroup):
with nc.Dataset(ds_3dims_3vars_2coords_nogroup, "r+") as ds:
attr_contents_dict = get_attributes_minus_fillvalue_and_renamed_coords(
original_var_name="var0", new_var_name="new_dim", original_dataset=ds
)

assert attr_contents_dict["coordinates"] == "new_dim track"


# TODO: this next test is still failing.
# Should go away once using xarray's DataTree instead of flattening group structure.
# def test_remove_duplicate_dims(ds_3dims_3vars_2coords_nogroup_duplicate_dimensions):
# with nc.Dataset(ds_3dims_3vars_2coords_nogroup_duplicate_dimensions, "r+") as ds:
# ds_with_replaced_dims = remove_duplicate_dims(ds)
#
# ds_with_replaced_dims["var0"].coordinates = "var0 track track"
#
# assert ds_with_replaced_dims["var0"].coordinates == "var0 track track_1"
30 changes: 29 additions & 1 deletion tests/unit/test_file_ops.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
from pathlib import Path

from concatenator.file_ops import add_label_to_path
import pytest

from concatenator.file_ops import add_label_to_path, validate_input_path, validate_output_path

from .. import data_for_tests_dir


def test_add_label_to_path():
Expand All @@ -10,3 +14,27 @@ def test_add_label_to_path():
new_path = str((this_module_dir / "tests_file_new-suffix.nc").resolve())

assert add_label_to_path(origin_path, label="_new-suffix") == new_path


def test_validate_bad_output_paths():
path_to_file_that_exists = str(
data_for_tests_dir / "unit-test-data" / "TEMPO_NO2_L2_V03_20240328T154353Z_S008G01.nc4"
)

with pytest.raises(FileExistsError):
validate_output_path(path_to_file_that_exists, overwrite=False)

with pytest.raises(TypeError):
validate_output_path(str(data_for_tests_dir), overwrite=False)


def test_validate_bad_non_existent_input_path():
path_to_file_that_does_not_exist = str(
data_for_tests_dir / "unit-test-data" / "non-existent.nc4"
)

with pytest.raises(TypeError):
validate_input_path([path_to_file_that_does_not_exist])

with pytest.raises(TypeError):
validate_input_path([])