Merge branch 'develop' into feature/exposures_crs

# Conflicts: # CHANGELOG.md # climada/util/coordinates.py
CLIMADA-project · Aug 23, 2024 · a4df7cf · a4df7cf
2 parents 5e5584d + b0f3a14
commit a4df7cf
Show file tree

Hide file tree

Showing 25 changed files with 1,034 additions and 247 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,65 @@ Code freeze date: YYYY-MM-DD
 
 ### Added
 
+- `climada.util.interpolation` module for inter- and extrapolation util functions used in local exceedance intensity and return period functions [#930](https://github.com/CLIMADA-project/climada_python/pull/930)
+- Method `Hazard.check_matrices` for bringing the stored CSR matrices into "canonical format" [#893](https://github.com/CLIMADA-project/climada_python/pull/893)
+- Generic s-shaped impact function via `ImpactFunc.from_poly_s_shape` [#878](https://github.com/CLIMADA-project/climada_python/pull/878)
+- climada.hazard.centroids.centr.Centroids.get_area_pixel
+- climada.hazard.centroids.centr.Centroids.get_dist_coast
+- climada.hazard.centroids.centr.Centroids.get_elevation
+- climada.hazard.centroids.centr.Centroids.get_meta
+- climada.hazard.centroids.centr.Centroids.get_pixel_shapes
+- climada.hazard.centroids.centr.Centroids.to_crs
+- climada.hazard.centroids.centr.Centroids.to_default_crs
+- climada.hazard.centroids.centr.Centroids.write_csv
+- climada.hazard.centroids.centr.Centroids.write_excel
+- climada.hazard.local_return_period [#898](https://github.com/CLIMADA-project/climada_python/pull/898)
+- climada.util.plot.subplots_from_gdf [#898](https://github.com/CLIMADA-project/climada_python/pull/898)
+
+### Changed
+
+- In `climada.util.plot.geo_im_from_array`, NaNs are plotted in gray while cells with no centroid are not plotted [#929](https://github.com/CLIMADA-project/climada_python/pull/929)
+- Renamed `climada.util.plot.subplots_from_gdf` to `climada.util.plot.plot_from_gdf` [#929](https://github.com/CLIMADA-project/climada_python/pull/929)
+
+### Fixed
+
+### Deprecated
+
+- climada.entity.exposures.Exposures.set_lat_lon
+- climada.entity.exposures.Exposures.set_geometry_points
+
+### Removed
+
+## 5.0.0
+
+Release date: 2024-07-19
+
+### Dependency Changes
+
+Added:
+
+- `bayesian-optimization`
+- `seaborn` >=0.13
+
+Updated:
+
+- `bottleneck` >=1.3 &rarr; >=1.4
+- `cartopy` >=0.22 &rarr; >=0.23
+- `contextily` >=1.5 &rarr; >=1.6
+- `dask` >=2024.1,<2024.3 &rarr; >=2024.2,<2024.3
+- `matplotlib-base` >=3.8 &rarr; >=3.9
+- `numba` >=0.59 &rarr; >=0.60
+- `numexpr` >=2.9 &rarr; >=2.10
+- `pint` >=0.23 &rarr; >=0.24
+- `pycountry` >=22.3 &rarr; >=24.6
+- `requests` >=2.31 &rarr; >=2.32
+- `salib` >=1.4 &rarr; >=1.5
+- `scikit-learn` >=1.4 &rarr; >=1.5
+- `scipy` >=1.12 &rarr; >=1.13
+- `xarray` >=2024.2 &rarr; >=2024.6
+
+### Added
+
 - GitHub actions workflow for CLIMADA Petals compatibility tests [#855](https://github.com/CLIMADA-project/climada_python/pull/855)
 - Generic s-shaped impact function via `ImpactFunc.from_poly_s_shape` [#878](https://github.com/CLIMADA-project/climada_python/pull/878)
 - climada.hazard.centroids.centr.Centroids.get_area_pixel
@@ -42,6 +101,7 @@ latitude and longitude column are no longer persent there (the according arrays
 - Improved error messages produced by `ImpactCalc.impact()` in case impact function in the exposures is not found in impf_set [#863](https://github.com/CLIMADA-project/climada_python/pull/863)
 - Update the Holland et al. 2010 TC windfield model and introduce `model_kwargs` parameter to adjust model parameters [#846](https://github.com/CLIMADA-project/climada_python/pull/846)
 - Changed module structure: `climada.hazard.Hazard` has been split into the modules `base`, `io` and `plot` [#871](https://github.com/CLIMADA-project/climada_python/pull/871)
+- Ensure `csr_matrix` stored in `climada.hazard.Hazard` have consistent data format and store no explicit zeros when initializing `ImpactCalc` [#893](https://github.com/CLIMADA-project/climada_python/pull/893)
 - `Impact.from_hdf5` now calls `str` on `event_name` data that is not strings, and issue a warning then [#894](https://github.com/CLIMADA-project/climada_python/pull/894)
 - `Impact.write_hdf5` now throws an error if `event_name` is does not contain strings exclusively [#894](https://github.com/CLIMADA-project/climada_python/pull/894)
 - Split `climada.hazard.trop_cyclone` module into smaller submodules without affecting module usage [#911](https://github.com/CLIMADA-project/climada_python/pull/911)
@@ -62,8 +122,7 @@ latitude and longitude column are no longer persent there (the according arrays
 - climada.hazard.centroids.centr.Centroids.empty_geometry_points
 - climada.hazard.centroids.centr.Centroids.set_meta_to_lat_lon
 - climada.hazard.centroids.centr.Centroids.set_lat_lon_to_meta
-- climada.entity.exposures.Exposures.set_lat_lon
-- climada.entity.exposures.Exposures.set_geometry_points
+- `scheduler` parameter in `climada.util.coordinates.set_df_geometry_points`, as dask is not used anymore, leaving all calculation to shapely [#912](https://github.com/CLIMADA-project/climada_python/pull/912)
 
 ### Removed
 

diff --git a/climada/_version.py b/climada/_version.py
@@ -1 +1 @@
-__version__ = '4.1.2-dev'
+__version__ = '5.0.1-dev'
diff --git a/climada/engine/impact_calc.py b/climada/engine/impact_calc.py
@@ -47,6 +47,8 @@ def __init__(self,
         The dimension of the imp_mat variable must be compatible with the
         exposures and hazard objects.
 
+        This will call :py:meth:`climada.hazard.base.Hazard.check_matrices`.
+
         Parameters
         ----------
         exposures : climada.entity.Exposures
@@ -61,6 +63,8 @@ def __init__(self,
         self.exposures = exposures
         self.impfset = impfset
         self.hazard = hazard
+        self.hazard.check_matrices()
+
         # exposures index to use for matrix reconstruction
         self._orig_exp_idx = np.arange(self.exposures.gdf.shape[0])
 

diff --git a/climada/engine/test/test_impact_calc.py b/climada/engine/test/test_impact_calc.py
@@ -70,6 +70,16 @@ def test_init(self):
         np.testing.assert_array_equal(HAZ.event_id, icalc.hazard.event_id)
         np.testing.assert_array_equal(HAZ.event_name, icalc.hazard.event_name)
 
+        # Test check matrices
+        hazard = deepcopy(HAZ)
+        hazard.intensity[0, hazard.intensity.indices[0]] = 0
+        hazard.fraction = sparse.csr_matrix(np.ones((1, 1)))
+        with self.assertRaisesRegex(
+            ValueError, "Intensity and fraction matrices must have the same shape"
+        ):
+            ImpactCalc(ENT.exposures, ENT.impact_funcs, hazard)
+            self.assertEqual(hazard.intensity.nnz, HAZ.intensity.nnz - 1)  # was pruned
+
     def test_metrics(self):
         """Test methods to get impact metrics"""
         mat = sparse.csr_matrix(np.array(

diff --git a/climada/hazard/base.py b/climada/hazard/base.py
@@ -50,6 +50,21 @@ class Hazard(HazardIO, HazardPlot):
     Contains events of some hazard type defined at centroids. Loads from
     files with format defined in FILE_EXT.
 
+    Attention
+    ---------
+    This class uses instances of
+    `scipy.sparse.csr_matrix
+    <https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csr_matrix.html>`_
+    to store :py:attr:`intensity` and :py:attr:`fraction`. This data types comes with
+    its particular pitfalls. Depending on how the objects are instantiated and modified,
+    a matrix might end up in a "non-canonical" state. In this state, its ``.data``
+    attribute does not necessarily represent the values apparent in the final matrix.
+    In particular, a "non-canonical" matrix may store "duplicates", i.e. multiple values
+    that map to the same matrix position. This is supported, and the default behavior is
+    to sum up these values. To avoid any inconsistencies, call :py:meth:`check_matrices`
+    before accessing the ``data`` attribute of either matrix. This will explicitly sum
+    all values at the same matrix position and eliminate explicit zeros.
+
     Attributes
     ----------
     haz_type : str
@@ -192,6 +207,33 @@ def __init__(self,
         if self.pool:
             LOGGER.info('Using %s CPUs.', self.pool.ncpus)
 
+    def check_matrices(self):
+        """Ensure that matrices are consistently shaped and stored
+
+        It is good practice to call this method before accessing the ``data`` attribute
+        of either :py:attr:`intensity` or :py:attr:`fraction`.
+
+        See Also
+        --------
+        :py:func:`climada.util.checker.prune_csr_matrix`
+
+        Todo
+        -----
+        * Check consistency with centroids
+
+        Raises
+        ------
+        ValueError
+            If matrices are ill-formed or ill-shaped in relation to each other
+        """
+        u_check.prune_csr_matrix(self.intensity)
+        u_check.prune_csr_matrix(self.fraction)
+        if self.fraction.nnz > 0:
+            if self.intensity.shape != self.fraction.shape:
+                raise ValueError(
+                    "Intensity and fraction matrices must have the same shape"
+                )
+
     @classmethod
     def get_default(cls, attribute):
         """Get the Hazard type default for a given attribute.

diff --git a/climada/hazard/io.py b/climada/hazard/io.py
@@ -1008,7 +1008,7 @@ def write_hdf5(self, file_name, todense=False):
                 if var_name == 'centroids':
                     # Centroids have their own write_hdf5 method,
                     # which is invoked at the end of this method (s.b.)
-                    pass
+                    continue
                 elif isinstance(var_val, sparse.csr_matrix):
                     if todense:
                         hf_data.create_dataset(var_name, data=var_val.toarray())

diff --git a/climada/hazard/tc_tracks.py b/climada/hazard/tc_tracks.py
@@ -331,11 +331,11 @@ def from_ibtracs_netcdf(cls, provider=None, rescale_windspeeds=True, storm_id=No
 
         When using data from IBTrACS, make sure to be familiar with the scope and limitations of
         IBTrACS, e.g. by reading the official documentation
-        (https://www.ncdc.noaa.gov/ibtracs/pdf/IBTrACS_version4_Technical_Details.pdf). Reading the
-        CLIMADA documentation can't replace a thorough understanding of the underlying data. This
-        function only provides a (hopefully useful) interface for the data input, but cannot
-        provide any guidance or make recommendations about if and how to use IBTrACS data for your
-        particular project.
+        (https://www.ncei.noaa.gov/sites/default/files/2021-07/IBTrACS_version4_Technical_Details.pdf).
+        Reading the CLIMADA documentation can't replace a thorough understanding of the underlying
+        data. This function only provides a (hopefully useful) interface for the data input, but
+        cannot provide any guidance or make recommendations about if and how to use IBTrACS data
+        for your particular project.
 
         Resulting tracks are required to have both pressure and wind speed information at all time
         steps. Therefore, all track positions where one of wind speed or pressure are missing are
@@ -374,8 +374,8 @@ def from_ibtracs_netcdf(cls, provider=None, rescale_windspeeds=True, storm_id=No
         rescale_windspeeds : bool, optional
             If True, all wind speeds are linearly rescaled to 1-minute sustained winds.
             Note however that the IBTrACS documentation (Section 5.2,
-            https://www.ncdc.noaa.gov/ibtracs/pdf/IBTrACS_version4_Technical_Details.pdf) includes
-            a warning about this kind of conversion: "While a multiplicative factor can describe
+            https://www.ncei.noaa.gov/sites/default/files/2021-07/IBTrACS_version4_Technical_Details.pdf)
+            includes a warning about this kind of conversion: "While a multiplicative factor can
             the numerical differences, there are procedural and observational differences between
             agencies that can change through time, which confounds the simple multiplicative
             factor." Default: True
@@ -1509,7 +1509,6 @@ def to_geodataframe(self, as_points=False, split_lines_antimeridian=True):
         return gdf
 
     @staticmethod
-    @numba.jit(forceobj=True)
     def _one_interp_data(track, time_step_h, land_geom=None):
         """Interpolate values of one track.
 

diff --git a/climada/hazard/test/test_base.py b/climada/hazard/test/test_base.py
@@ -124,18 +124,18 @@ def test_check_wrongFreq_fail(self):
     def test_check_wrongInten_fail(self):
         """Wrong hazard definition"""
         self.hazard.intensity = sparse.csr_matrix([[1, 2], [1, 2]])
-
-        with self.assertRaises(ValueError) as cm:
+        with self.assertRaisesRegex(
+            ValueError, "Invalid Hazard.intensity row size: 3 != 2."
+        ):
             self.hazard.check()
-        self.assertIn('Invalid Hazard.intensity row size: 3 != 2.', str(cm.exception))
 
     def test_check_wrongFrac_fail(self):
         """Wrong hazard definition"""
         self.hazard.fraction = sparse.csr_matrix([[1], [1], [1]])
-
-        with self.assertRaises(ValueError) as cm:
+        with self.assertRaisesRegex(
+            ValueError, "Invalid Hazard.fraction column size: 2 != 1."
+        ):
             self.hazard.check()
-        self.assertIn('Invalid Hazard.fraction column size: 2 != 1.', str(cm.exception))
 
     def test_check_wrongEvName_fail(self):
         """Wrong hazard definition"""
@@ -212,6 +212,32 @@ def test_get_date_strings_pass(self):
         self.assertEqual(haz.get_event_date()[560],
                          u_dt.date_to_str(haz.date[560]))
 
+    def test_check_matrices(self):
+        """Test the check_matrices method"""
+        hazard = Hazard("TC")
+        hazard.fraction = sparse.csr_matrix(np.zeros((2, 2)))
+        hazard.check_matrices()  # No error, fraction.nnz = 0
+        hazard.fraction = sparse.csr_matrix(np.ones((2, 2)))
+        with self.assertRaisesRegex(
+            ValueError, "Intensity and fraction matrices must have the same shape"
+        ):
+            hazard.check_matrices()
+        hazard.intensity = sparse.csr_matrix(np.ones((2, 3)))
+        with self.assertRaisesRegex(
+            ValueError, "Intensity and fraction matrices must have the same shape"
+        ):
+            hazard.check_matrices()
+
+        # Check that matrices are pruned
+        hazard.intensity[:] = 0
+        hazard.fraction = sparse.csr_matrix(([0], [0], [0, 1, 1]), shape=(2, 3))
+        hazard.check_matrices()
+        for attr in ("intensity", "fraction"):
+            with self.subTest(matrix=attr):
+                matrix = getattr(hazard, attr)
+                self.assertEqual(matrix.nnz, 0)
+                self.assertTrue(matrix.has_canonical_format)
+
 class TestRemoveDupl(unittest.TestCase):
     """Test remove_duplicates method."""
 

diff --git a/climada/test/test_multi_processing.py b/climada/test/test_multi_processing.py
diff --git a/climada/util/checker.py b/climada/util/checker.py
@@ -23,7 +23,8 @@
     'size',
     'shape',
     'array_optional',
-    'array_default'
+    'array_default',
+    'prune_csr_matrix',
 ]
 
 import logging
@@ -180,3 +181,36 @@ def array_default(exp_len, var, var_name, def_val):
     else:
         size(exp_len, var, var_name)
     return res
+
+def prune_csr_matrix(matrix: sparse.csr_matrix):
+    """Ensure that the matrix is in the "canonical format".
+
+    Depending on how the matrix was instantiated or modified, it might be in a
+    "non-canonical" state. This only relates to its internal storage. In this state,
+    multiple values might be stored for a single "apparent" value in the matrix.
+    Also, the matrix might store zeros explicitly, which could be removed.
+    Calling this function makes sure that the matrix is in the "canonical state", and
+    brings it into this state, if possible.
+
+    See Also
+    --------
+    `csr_matrix.has_canonical_format
+    <https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csr_matrix.has_canonical_format.html#scipy.sparse.csr_matrix.has_canonical_format>`_
+
+    Parameters
+    ----------
+    matrix : csr_matrix
+        The matrix to check. It will be modified *inplace*. Its ``.data`` attribute
+        might change, but apparent matrix values will stay the same.
+
+    Raises
+    ------
+    ValueError
+        If
+        `csr_matrix.check_format
+        <https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csr_matrix.check_format.html#scipy.sparse.csr_matrix.check_format>`_
+        fails
+    """
+    matrix.check_format()
+    matrix.eliminate_zeros()
+    matrix.sum_duplicates()
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		__version__ = '4.1.2-dev'
		__version__ = '5.0.1-dev'