diff --git a/CHANGELOG.md b/CHANGELOG.md index fd48205..8eba1ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - update pre-commit: to autoupdate and with gitleaks ([#247](https://github.com/nasa/stitchee/pull/247))([**@danielfromearth**](https://github.com/danielfromearth)) +- improved test coverage ([#248](https://github.com/nasa/stitchee/pull/248))([**@danielfromearth**](https://github.com/danielfromearth)) ## [1.5.0] - 2024-11-08 diff --git a/concatenator/dataset_and_group_handling.py b/concatenator/dataset_and_group_handling.py index 281c09c..1ee067e 100644 --- a/concatenator/dataset_and_group_handling.py +++ b/concatenator/dataset_and_group_handling.py @@ -268,6 +268,11 @@ def regroup_flattened_dataset( def _get_nested_group(dataset: nc.Dataset, group_path: str) -> nc.Group: + """Get the group object that is represented by the group_path string. + + If the 'group_path' string represents a dimension in the root group, + then this returns the root group. + """ nested_group = dataset for group in group_path.strip(concatenator.group_delim).split(concatenator.group_delim)[:-1]: nested_group = nested_group.groups[group] diff --git a/concatenator/dimension_cleanup.py b/concatenator/dimension_cleanup.py index 1d5ef10..0220eca 100644 --- a/concatenator/dimension_cleanup.py +++ b/concatenator/dimension_cleanup.py @@ -31,7 +31,7 @@ def remove_duplicate_dims(nc_dataset: nc.Dataset) -> nc.Dataset: for dup_var_name, dup_var in dup_vars.items(): dim_list = list( dup_var.dimensions - ) # original dimensions of the variable with duplicated dims + ) # original dimensions of the variable with duplicated dimensions # Dimension(s) that are duplicated are retrieved. # Note: this is not yet tested for more than one duplicated dimension. @@ -95,15 +95,15 @@ def remove_duplicate_dims(nc_dataset: nc.Dataset) -> nc.Dataset: del nc_dataset.variables[dup_var_name] # Replace original *Variable* with new variable with no duplicated dimensions. - new_dup_var[dup_var_name] = nc_dataset.createVariable( + nc_dataset.variables[dup_var_name] = nc_dataset.createVariable( dup_var_name, str(dup_var[:].dtype), tuple(new_dim_list), fill_value=fill_value, ) for attr_name, contents in attrs_contents.items(): - new_dup_var[dup_var_name].setncattr(attr_name, contents) - new_dup_var[dup_var_name][:] = dup_var[:] + nc_dataset[dup_var_name].setncattr(attr_name, contents) + nc_dataset[dup_var_name][:] = dup_var[:] return nc_dataset @@ -111,14 +111,14 @@ def remove_duplicate_dims(nc_dataset: nc.Dataset) -> nc.Dataset: def get_attributes_minus_fillvalue_and_renamed_coords( original_var_name: str, new_var_name: str, original_dataset: nc.Dataset ) -> dict: - """Variable attributes are retrieved.""" + """Variable attributes (other than FillValue) are retrieved.""" attrs_contents = {} for ncattr in original_dataset.variables[original_var_name].ncattrs(): if ncattr != "_FillValue": contents: str = original_dataset.variables[original_var_name].getncattr(ncattr) if ncattr == "coordinates": - contents.replace(original_var_name, new_var_name) + contents = contents.replace(original_var_name, new_var_name) attrs_contents[ncattr] = contents return attrs_contents diff --git a/tests/conftest.py b/tests/conftest.py index b3399c2..be1c071 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -96,7 +96,57 @@ def toy_empty_dataset(temp_toy_data_dir): return filepath -def add_to_ds_3dims_3vars_4coords_1group_with_step_values(open_ds: nc.Dataset, step_values: list): +def add_to_ds_3dims_3vars_2coords_nogroup(open_ds: nc.Dataset, step_values: list): + """Creates groups, dimensions, variables; and uses chosen step values in an open dataset""" + # Root-level Dimensions/Variables + open_ds.createDimension("step", 3) + open_ds.createDimension("track", 7) + open_ds.createVariable("step", "i2", ("step",), fill_value=False) + open_ds.createVariable("track", "i2", ("track",), fill_value=False) + open_ds.createVariable("var0", "f4", ("step", "track")) + # + open_ds["step"][:] = step_values + open_ds["track"][:] = [1, 2, 3, 4, 5, 6, 7] + open_ds["var0"][:] = [ + [33, 78, 65, 12, 85, 35, 44], + [64, 24, 87, 12, 54, 82, 24], + [66, 18, 99, 52, 77, 88, 59], + ] + + open_ds["var0"].coordinates = "var0 track" + + return open_ds + + +def add_to_ds_3dims_3vars_2coords_nogroup_duplicate_dimensions( + open_ds: nc.Dataset, step_values: list +): + """Creates groups, dimensions, variables; and uses chosen step values in an open dataset""" + # Root-level Dimensions/Variables + open_ds.createDimension("step", 3) + open_ds.createDimension("track", 7) + open_ds.createVariable("step", "i2", ("step",), fill_value=False) + open_ds.createVariable("track", "i2", ("track",), fill_value=False) + open_ds.createVariable("var0", "f4", ("track", "step", "step"), fill_value=-99) + # + open_ds["step"][:] = step_values + open_ds["track"][:] = [1, 2, 3, 4, 5, 6, 7] + open_ds["var0"][:] = [ + [[33, 78, 65], [33, 78, 65], [33, 78, 65]], + [[64, 24, 87], [64, 24, 87], [64, 24, 87]], + [[66, 18, 99], [66, 18, 99], [66, 18, 99]], + [[77, 88, 59], [77, 88, 59], [77, 88, 59]], + [[52, 77, 88], [52, 77, 88], [52, 77, 88]], + [[66, 18, 99], [66, 18, 99], [66, 18, 99]], + [[18, 99, 52], [18, 99, 52], [18, 99, 52]], + ] + + open_ds["var0"].coordinates = "track step step" + + return open_ds + + +def add_to_ds_3dims_3vars_3coords_1group_with_step_values(open_ds: nc.Dataset, step_values: list): """Creates groups, dimensions, variables; and uses chosen step values in an open dataset""" grp1 = open_ds.createGroup("Group1") @@ -159,33 +209,55 @@ def add_to_ds_3dims_3vars_4coords_1group_with_step_values(open_ds: nc.Dataset, s @pytest.fixture(scope="function") -def ds_3dims_3vars_4coords_1group_part1(temp_toy_data_dir) -> Path: - filepath = temp_toy_data_dir / "test_3dims_3vars_4coords_1group_part1.nc" +def ds_3dims_3vars_2coords_nogroup(temp_toy_data_dir) -> Path: + filepath = temp_toy_data_dir / "test_3dims_3vars_2coords_nogroup.nc" + + f = nc.Dataset(filename=filepath, mode="w") + f = add_to_ds_3dims_3vars_2coords_nogroup(f, step_values=[9, 10, 11]) + f.close() + + return filepath + + +@pytest.fixture(scope="function") +def ds_3dims_3vars_2coords_nogroup_duplicate_dimensions(temp_toy_data_dir) -> Path: + filepath = temp_toy_data_dir / "test_3dims_3vars_2coords_nogroup_duplicate_dimensions.nc" + + f = nc.Dataset(filename=filepath, mode="w") + f = add_to_ds_3dims_3vars_2coords_nogroup_duplicate_dimensions(f, step_values=[9, 10, 11]) + f.close() + + return filepath + + +@pytest.fixture(scope="function") +def ds_3dims_3vars_3coords_1group_part1(temp_toy_data_dir) -> Path: + filepath = temp_toy_data_dir / "test_3dims_3vars_3coords_1group_part1.nc" f = nc.Dataset(filename=filepath, mode="w") - f = add_to_ds_3dims_3vars_4coords_1group_with_step_values(f, step_values=[9, 10, 11]) + f = add_to_ds_3dims_3vars_3coords_1group_with_step_values(f, step_values=[9, 10, 11]) f.close() return filepath @pytest.fixture(scope="function") -def ds_3dims_3vars_4coords_1group_part2(temp_toy_data_dir): - filepath = temp_toy_data_dir / "test_3dims_3vars_4coords_1group_part2.nc" +def ds_3dims_3vars_3coords_1group_part2(temp_toy_data_dir): + filepath = temp_toy_data_dir / "test_3dims_3vars_3coords_1group_part2.nc" f = nc.Dataset(filename=filepath, mode="w") - f = add_to_ds_3dims_3vars_4coords_1group_with_step_values(f, step_values=[12, 13, 14]) + f = add_to_ds_3dims_3vars_3coords_1group_with_step_values(f, step_values=[12, 13, 14]) f.close() return filepath @pytest.fixture(scope="function") -def ds_3dims_3vars_4coords_1group_part3(temp_toy_data_dir): - filepath = temp_toy_data_dir / "test_3dims_3vars_4coords_1group_part3.nc" +def ds_3dims_3vars_3coords_1group_part3(temp_toy_data_dir): + filepath = temp_toy_data_dir / "test_3dims_3vars_3coords_1group_part3.nc" f = nc.Dataset(filename=filepath, mode="w") - f = add_to_ds_3dims_3vars_4coords_1group_with_step_values(f, step_values=[6, 7, 8]) + f = add_to_ds_3dims_3vars_3coords_1group_with_step_values(f, step_values=[6, 7, 8]) f.close() return filepath diff --git a/tests/integration/test_history_construction.py b/tests/integration/test_history_construction.py index 04e5f88..36dae30 100644 --- a/tests/integration/test_history_construction.py +++ b/tests/integration/test_history_construction.py @@ -10,8 +10,8 @@ def test_construct_and_append_history_for_sample_concatenation( temp_toy_data_dir, temp_output_dir, - ds_3dims_3vars_4coords_1group_part1, - ds_3dims_3vars_4coords_1group_part2, + ds_3dims_3vars_3coords_1group_part1, + ds_3dims_3vars_3coords_1group_part2, ): output_path = str(temp_output_dir.joinpath("simple_sample_concatenated.nc")) # type: ignore prepared_input_files = prep_input_files(temp_toy_data_dir, temp_output_dir) diff --git a/tests/unit/test_dataset_and_group_handling.py b/tests/unit/test_dataset_and_group_handling.py index 34abb0b..70a98b3 100644 --- a/tests/unit/test_dataset_and_group_handling.py +++ b/tests/unit/test_dataset_and_group_handling.py @@ -5,6 +5,7 @@ import netCDF4 as nc from concatenator.dataset_and_group_handling import ( + _get_nested_group, _is_file_empty, validate_workable_files, ) @@ -41,8 +42,29 @@ def test_toy_dataset_with_singleton_null_values_is_identified_as_empty( def test_dataset_with_values_is_identified_as_not_empty( - ds_3dims_3vars_4coords_1group_part1, + ds_3dims_3vars_3coords_1group_part1, ): """Ensure that a dataset with non-null arrays is identified as NOT empty.""" - with nc.Dataset(ds_3dims_3vars_4coords_1group_part1) as ds: + with nc.Dataset(ds_3dims_3vars_3coords_1group_part1) as ds: assert _is_file_empty(ds) is False + + +def test_get_nested_group(ds_3dims_3vars_3coords_1group_part1): + """Ensure that the retrieved group is correct.""" + with nc.Dataset(ds_3dims_3vars_3coords_1group_part1) as ds: + group_obj = _get_nested_group(ds, "__Group1__level") + assert isinstance(group_obj, nc.Group) + + +def test_get_root_group(ds_3dims_3vars_3coords_1group_part1): + """Ensure that the retrieved group is correct.""" + with nc.Dataset(ds_3dims_3vars_3coords_1group_part1) as ds: + group_obj = _get_nested_group(ds, "__track") + assert group_obj == ds + + +def test_get_root_group_when_no_delimiter_present(ds_3dims_3vars_3coords_1group_part1): + """Ensure that the retrieved group is correct.""" + with nc.Dataset(ds_3dims_3vars_3coords_1group_part1) as ds: + group_obj = _get_nested_group(ds, "track") + assert group_obj == ds diff --git a/tests/unit/test_dimension_cleanup.py b/tests/unit/test_dimension_cleanup.py new file mode 100644 index 0000000..c05c1cc --- /dev/null +++ b/tests/unit/test_dimension_cleanup.py @@ -0,0 +1,29 @@ +"""Tests for netCDF dimension clean up operations.""" + +# pylint: disable=C0116, C0301 + +import netCDF4 as nc + +from concatenator.dimension_cleanup import ( + get_attributes_minus_fillvalue_and_renamed_coords, +) + + +def test_get_attributes_minus_fillvalue_and_renamed_coords(ds_3dims_3vars_2coords_nogroup): + with nc.Dataset(ds_3dims_3vars_2coords_nogroup, "r+") as ds: + attr_contents_dict = get_attributes_minus_fillvalue_and_renamed_coords( + original_var_name="var0", new_var_name="new_dim", original_dataset=ds + ) + + assert attr_contents_dict["coordinates"] == "new_dim track" + + +# TODO: this next test is still failing. +# Should go away once using xarray's DataTree instead of flattening group structure. +# def test_remove_duplicate_dims(ds_3dims_3vars_2coords_nogroup_duplicate_dimensions): +# with nc.Dataset(ds_3dims_3vars_2coords_nogroup_duplicate_dimensions, "r+") as ds: +# ds_with_replaced_dims = remove_duplicate_dims(ds) +# +# ds_with_replaced_dims["var0"].coordinates = "var0 track track" +# +# assert ds_with_replaced_dims["var0"].coordinates == "var0 track track_1" diff --git a/tests/unit/test_file_ops.py b/tests/unit/test_file_ops.py index 8044e0f..020975d 100644 --- a/tests/unit/test_file_ops.py +++ b/tests/unit/test_file_ops.py @@ -1,6 +1,10 @@ from pathlib import Path -from concatenator.file_ops import add_label_to_path +import pytest + +from concatenator.file_ops import add_label_to_path, validate_input_path, validate_output_path + +from .. import data_for_tests_dir def test_add_label_to_path(): @@ -10,3 +14,27 @@ def test_add_label_to_path(): new_path = str((this_module_dir / "tests_file_new-suffix.nc").resolve()) assert add_label_to_path(origin_path, label="_new-suffix") == new_path + + +def test_validate_bad_output_paths(): + path_to_file_that_exists = str( + data_for_tests_dir / "unit-test-data" / "TEMPO_NO2_L2_V03_20240328T154353Z_S008G01.nc4" + ) + + with pytest.raises(FileExistsError): + validate_output_path(path_to_file_that_exists, overwrite=False) + + with pytest.raises(TypeError): + validate_output_path(str(data_for_tests_dir), overwrite=False) + + +def test_validate_bad_non_existent_input_path(): + path_to_file_that_does_not_exist = str( + data_for_tests_dir / "unit-test-data" / "non-existent.nc4" + ) + + with pytest.raises(TypeError): + validate_input_path([path_to_file_that_does_not_exist]) + + with pytest.raises(TypeError): + validate_input_path([])