nasa · danielfromearth · Nov 19, 2024 · Nov 15, 2024 · Nov 15, 2024 · Nov 18, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Changed
 
 - update pre-commit: to autoupdate and with gitleaks ([#247](https://github.com/nasa/stitchee/pull/247))([**@danielfromearth**](https://github.com/danielfromearth))
+- improved test coverage ([#248](https://github.com/nasa/stitchee/pull/248))([**@danielfromearth**](https://github.com/danielfromearth))
 
 ## [1.5.0] - 2024-11-08
 

diff --git a/concatenator/dataset_and_group_handling.py b/concatenator/dataset_and_group_handling.py
@@ -268,6 +268,11 @@ def regroup_flattened_dataset(
 
 
 def _get_nested_group(dataset: nc.Dataset, group_path: str) -> nc.Group:
+    """Get the group object that is represented by the group_path string.
+
+    If the 'group_path' string represents a dimension in the root group,
+    then this returns the root group.
+    """
     nested_group = dataset
     for group in group_path.strip(concatenator.group_delim).split(concatenator.group_delim)[:-1]:
         nested_group = nested_group.groups[group]

diff --git a/concatenator/dimension_cleanup.py b/concatenator/dimension_cleanup.py
@@ -31,7 +31,7 @@ def remove_duplicate_dims(nc_dataset: nc.Dataset) -> nc.Dataset:
     for dup_var_name, dup_var in dup_vars.items():
         dim_list = list(
             dup_var.dimensions
-        )  # original dimensions of the variable with duplicated dims
+        )  # original dimensions of the variable with duplicated dimensions
 
         # Dimension(s) that are duplicated are retrieved.
         #   Note: this is not yet tested for more than one duplicated dimension.
@@ -95,30 +95,30 @@ def remove_duplicate_dims(nc_dataset: nc.Dataset) -> nc.Dataset:
         del nc_dataset.variables[dup_var_name]
 
         # Replace original *Variable* with new variable with no duplicated dimensions.
-        new_dup_var[dup_var_name] = nc_dataset.createVariable(
+        nc_dataset.variables[dup_var_name] = nc_dataset.createVariable(
             dup_var_name,
             str(dup_var[:].dtype),
             tuple(new_dim_list),
             fill_value=fill_value,
         )
         for attr_name, contents in attrs_contents.items():
-            new_dup_var[dup_var_name].setncattr(attr_name, contents)
-        new_dup_var[dup_var_name][:] = dup_var[:]
+            nc_dataset[dup_var_name].setncattr(attr_name, contents)
+        nc_dataset[dup_var_name][:] = dup_var[:]
 
     return nc_dataset
 
 
 def get_attributes_minus_fillvalue_and_renamed_coords(
     original_var_name: str, new_var_name: str, original_dataset: nc.Dataset
 ) -> dict:
-    """Variable attributes are retrieved."""
+    """Variable attributes (other than FillValue) are retrieved."""
     attrs_contents = {}
 
     for ncattr in original_dataset.variables[original_var_name].ncattrs():
         if ncattr != "_FillValue":
             contents: str = original_dataset.variables[original_var_name].getncattr(ncattr)
             if ncattr == "coordinates":
-                contents.replace(original_var_name, new_var_name)
+                contents = contents.replace(original_var_name, new_var_name)
             attrs_contents[ncattr] = contents
 
     return attrs_contents
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -96,7 +96,57 @@ def toy_empty_dataset(temp_toy_data_dir):
     return filepath
 
 
-def add_to_ds_3dims_3vars_4coords_1group_with_step_values(open_ds: nc.Dataset, step_values: list):
+def add_to_ds_3dims_3vars_2coords_nogroup(open_ds: nc.Dataset, step_values: list):
+    """Creates groups, dimensions, variables; and uses chosen step values in an open dataset"""
+    # Root-level Dimensions/Variables
+    open_ds.createDimension("step", 3)
+    open_ds.createDimension("track", 7)
+    open_ds.createVariable("step", "i2", ("step",), fill_value=False)
+    open_ds.createVariable("track", "i2", ("track",), fill_value=False)
+    open_ds.createVariable("var0", "f4", ("step", "track"))
+    #
+    open_ds["step"][:] = step_values
+    open_ds["track"][:] = [1, 2, 3, 4, 5, 6, 7]
+    open_ds["var0"][:] = [
+        [33, 78, 65, 12, 85, 35, 44],
+        [64, 24, 87, 12, 54, 82, 24],
+        [66, 18, 99, 52, 77, 88, 59],
+    ]
+
+    open_ds["var0"].coordinates = "var0 track"
+
+    return open_ds
+
+
+def add_to_ds_3dims_3vars_2coords_nogroup_duplicate_dimensions(
+    open_ds: nc.Dataset, step_values: list
+):
+    """Creates groups, dimensions, variables; and uses chosen step values in an open dataset"""
+    # Root-level Dimensions/Variables
+    open_ds.createDimension("step", 3)
+    open_ds.createDimension("track", 7)
+    open_ds.createVariable("step", "i2", ("step",), fill_value=False)
+    open_ds.createVariable("track", "i2", ("track",), fill_value=False)
+    open_ds.createVariable("var0", "f4", ("track", "step", "step"), fill_value=-99)
+    #
+    open_ds["step"][:] = step_values
+    open_ds["track"][:] = [1, 2, 3, 4, 5, 6, 7]
+    open_ds["var0"][:] = [
+        [[33, 78, 65], [33, 78, 65], [33, 78, 65]],
+        [[64, 24, 87], [64, 24, 87], [64, 24, 87]],
+        [[66, 18, 99], [66, 18, 99], [66, 18, 99]],
+        [[77, 88, 59], [77, 88, 59], [77, 88, 59]],
+        [[52, 77, 88], [52, 77, 88], [52, 77, 88]],
+        [[66, 18, 99], [66, 18, 99], [66, 18, 99]],
+        [[18, 99, 52], [18, 99, 52], [18, 99, 52]],
+    ]
+
+    open_ds["var0"].coordinates = "track step step"
+
+    return open_ds
+
+
+def add_to_ds_3dims_3vars_3coords_1group_with_step_values(open_ds: nc.Dataset, step_values: list):
     """Creates groups, dimensions, variables; and uses chosen step values in an open dataset"""
     grp1 = open_ds.createGroup("Group1")
 
@@ -159,33 +209,55 @@ def add_to_ds_3dims_3vars_4coords_1group_with_step_values(open_ds: nc.Dataset, s
 
 
 @pytest.fixture(scope="function")
-def ds_3dims_3vars_4coords_1group_part1(temp_toy_data_dir) -> Path:
-    filepath = temp_toy_data_dir / "test_3dims_3vars_4coords_1group_part1.nc"
+def ds_3dims_3vars_2coords_nogroup(temp_toy_data_dir) -> Path:
+    filepath = temp_toy_data_dir / "test_3dims_3vars_2coords_nogroup.nc"
+
+    f = nc.Dataset(filename=filepath, mode="w")
+    f = add_to_ds_3dims_3vars_2coords_nogroup(f, step_values=[9, 10, 11])
+    f.close()
+
+    return filepath
+
+
+@pytest.fixture(scope="function")
+def ds_3dims_3vars_2coords_nogroup_duplicate_dimensions(temp_toy_data_dir) -> Path:
+    filepath = temp_toy_data_dir / "test_3dims_3vars_2coords_nogroup_duplicate_dimensions.nc"
+
+    f = nc.Dataset(filename=filepath, mode="w")
+    f = add_to_ds_3dims_3vars_2coords_nogroup_duplicate_dimensions(f, step_values=[9, 10, 11])
+    f.close()
+
+    return filepath
+
+
+@pytest.fixture(scope="function")
+def ds_3dims_3vars_3coords_1group_part1(temp_toy_data_dir) -> Path:
+    filepath = temp_toy_data_dir / "test_3dims_3vars_3coords_1group_part1.nc"
 
     f = nc.Dataset(filename=filepath, mode="w")
-    f = add_to_ds_3dims_3vars_4coords_1group_with_step_values(f, step_values=[9, 10, 11])
+    f = add_to_ds_3dims_3vars_3coords_1group_with_step_values(f, step_values=[9, 10, 11])
     f.close()
 
     return filepath
 
 
 @pytest.fixture(scope="function")
-def ds_3dims_3vars_4coords_1group_part2(temp_toy_data_dir):
-    filepath = temp_toy_data_dir / "test_3dims_3vars_4coords_1group_part2.nc"
+def ds_3dims_3vars_3coords_1group_part2(temp_toy_data_dir):
+    filepath = temp_toy_data_dir / "test_3dims_3vars_3coords_1group_part2.nc"
 
     f = nc.Dataset(filename=filepath, mode="w")
-    f = add_to_ds_3dims_3vars_4coords_1group_with_step_values(f, step_values=[12, 13, 14])
+    f = add_to_ds_3dims_3vars_3coords_1group_with_step_values(f, step_values=[12, 13, 14])
     f.close()
 
     return filepath
 
 
 @pytest.fixture(scope="function")
-def ds_3dims_3vars_4coords_1group_part3(temp_toy_data_dir):
-    filepath = temp_toy_data_dir / "test_3dims_3vars_4coords_1group_part3.nc"
+def ds_3dims_3vars_3coords_1group_part3(temp_toy_data_dir):
+    filepath = temp_toy_data_dir / "test_3dims_3vars_3coords_1group_part3.nc"
 
     f = nc.Dataset(filename=filepath, mode="w")
-    f = add_to_ds_3dims_3vars_4coords_1group_with_step_values(f, step_values=[6, 7, 8])
+    f = add_to_ds_3dims_3vars_3coords_1group_with_step_values(f, step_values=[6, 7, 8])
     f.close()
 
     return filepath

diff --git a/tests/integration/test_history_construction.py b/tests/integration/test_history_construction.py
@@ -10,8 +10,8 @@
 def test_construct_and_append_history_for_sample_concatenation(
     temp_toy_data_dir,
     temp_output_dir,
-    ds_3dims_3vars_4coords_1group_part1,
-    ds_3dims_3vars_4coords_1group_part2,
+    ds_3dims_3vars_3coords_1group_part1,
+    ds_3dims_3vars_3coords_1group_part2,
 ):
     output_path = str(temp_output_dir.joinpath("simple_sample_concatenated.nc"))  # type: ignore
     prepared_input_files = prep_input_files(temp_toy_data_dir, temp_output_dir)

diff --git a/tests/unit/test_dataset_and_group_handling.py b/tests/unit/test_dataset_and_group_handling.py
@@ -5,6 +5,7 @@
 import netCDF4 as nc
 
 from concatenator.dataset_and_group_handling import (
+    _get_nested_group,
     _is_file_empty,
     validate_workable_files,
 )
@@ -41,8 +42,29 @@ def test_toy_dataset_with_singleton_null_values_is_identified_as_empty(
 
 
 def test_dataset_with_values_is_identified_as_not_empty(
-    ds_3dims_3vars_4coords_1group_part1,
+    ds_3dims_3vars_3coords_1group_part1,
 ):
     """Ensure that a dataset with non-null arrays is identified as NOT empty."""
-    with nc.Dataset(ds_3dims_3vars_4coords_1group_part1) as ds:
+    with nc.Dataset(ds_3dims_3vars_3coords_1group_part1) as ds:
         assert _is_file_empty(ds) is False
+
+
+def test_get_nested_group(ds_3dims_3vars_3coords_1group_part1):
+    """Ensure that the retrieved group is correct."""
+    with nc.Dataset(ds_3dims_3vars_3coords_1group_part1) as ds:
+        group_obj = _get_nested_group(ds, "__Group1__level")
+        assert isinstance(group_obj, nc.Group)
+
+
+def test_get_root_group(ds_3dims_3vars_3coords_1group_part1):
+    """Ensure that the retrieved group is correct."""
+    with nc.Dataset(ds_3dims_3vars_3coords_1group_part1) as ds:
+        group_obj = _get_nested_group(ds, "__track")
+        assert group_obj == ds
+
+
+def test_get_root_group_when_no_delimiter_present(ds_3dims_3vars_3coords_1group_part1):
+    """Ensure that the retrieved group is correct."""
+    with nc.Dataset(ds_3dims_3vars_3coords_1group_part1) as ds:
+        group_obj = _get_nested_group(ds, "track")
+        assert group_obj == ds
diff --git a/tests/unit/test_dimension_cleanup.py b/tests/unit/test_dimension_cleanup.py
@@ -0,0 +1,29 @@
+"""Tests for netCDF dimension clean up operations."""
+
+# pylint: disable=C0116, C0301
+
+import netCDF4 as nc
+
+from concatenator.dimension_cleanup import (
+    get_attributes_minus_fillvalue_and_renamed_coords,
+)
+
+
+def test_get_attributes_minus_fillvalue_and_renamed_coords(ds_3dims_3vars_2coords_nogroup):
+    with nc.Dataset(ds_3dims_3vars_2coords_nogroup, "r+") as ds:
+        attr_contents_dict = get_attributes_minus_fillvalue_and_renamed_coords(
+            original_var_name="var0", new_var_name="new_dim", original_dataset=ds
+        )
+
+        assert attr_contents_dict["coordinates"] == "new_dim track"
+
+
+# TODO: this next test is still failing.
+#  Should go away once using xarray's DataTree instead of flattening group structure.
+# def test_remove_duplicate_dims(ds_3dims_3vars_2coords_nogroup_duplicate_dimensions):
+#     with nc.Dataset(ds_3dims_3vars_2coords_nogroup_duplicate_dimensions, "r+") as ds:
+#         ds_with_replaced_dims = remove_duplicate_dims(ds)
+#
+#         ds_with_replaced_dims["var0"].coordinates = "var0 track track"
+#
+#         assert ds_with_replaced_dims["var0"].coordinates == "var0 track track_1"
diff --git a/tests/unit/test_file_ops.py b/tests/unit/test_file_ops.py
@@ -1,6 +1,10 @@
 from pathlib import Path
 
-from concatenator.file_ops import add_label_to_path
+import pytest
+
+from concatenator.file_ops import add_label_to_path, validate_input_path, validate_output_path
+
+from .. import data_for_tests_dir
 
 
 def test_add_label_to_path():
@@ -10,3 +14,27 @@ def test_add_label_to_path():
     new_path = str((this_module_dir / "tests_file_new-suffix.nc").resolve())
 
     assert add_label_to_path(origin_path, label="_new-suffix") == new_path
+
+
+def test_validate_bad_output_paths():
+    path_to_file_that_exists = str(
+        data_for_tests_dir / "unit-test-data" / "TEMPO_NO2_L2_V03_20240328T154353Z_S008G01.nc4"
+    )
+
+    with pytest.raises(FileExistsError):
+        validate_output_path(path_to_file_that_exists, overwrite=False)
+
+    with pytest.raises(TypeError):
+        validate_output_path(str(data_for_tests_dir), overwrite=False)
+
+
+def test_validate_bad_non_existent_input_path():
+    path_to_file_that_does_not_exist = str(
+        data_for_tests_dir / "unit-test-data" / "non-existent.nc4"
+    )
+
+    with pytest.raises(TypeError):
+        validate_input_path([path_to_file_that_does_not_exist])
+
+    with pytest.raises(TypeError):
+        validate_input_path([])