Merge branch 'develop' into feature/exposures_crs

# Conflicts: # CHANGELOG.md
CLIMADA-project · Jul 16, 2024 · 5e5584d · 5e5584d
2 parents 1379eac + d67c804
commit 5e5584d
Show file tree

Hide file tree

Showing 34 changed files with 9,478 additions and 2,100 deletions.
diff --git a/AUTHORS.md b/AUTHORS.md
@@ -29,8 +29,10 @@
 * Raphael Portmann
 * Nicolas Colombi
 * Leonie Villiger
+* Timo Schmid
 * Kam Lam Yeung
 * Sarah Hülsen
 * Timo Schmid
 * Luca Severino
 * Samuel Juhel
+* Valentin Gebhart
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -27,6 +27,7 @@ Code freeze date: YYYY-MM-DD
 - climada.exposures.exposures.Exposures.latitude
 - climada.exposures.exposures.Exposures.longitude
 - climada.exposures.exposures.Exposures.value
+- `climada.util.calibrate` module for calibrating impact functions [#692](https://github.com/CLIMADA-project/climada_python/pull/692)
 
 ### Changed
 
@@ -43,12 +44,14 @@ latitude and longitude column are no longer persent there (the according arrays
 - Changed module structure: `climada.hazard.Hazard` has been split into the modules `base`, `io` and `plot` [#871](https://github.com/CLIMADA-project/climada_python/pull/871)
 - `Impact.from_hdf5` now calls `str` on `event_name` data that is not strings, and issue a warning then [#894](https://github.com/CLIMADA-project/climada_python/pull/894)
 - `Impact.write_hdf5` now throws an error if `event_name` is does not contain strings exclusively [#894](https://github.com/CLIMADA-project/climada_python/pull/894)
+- Split `climada.hazard.trop_cyclone` module into smaller submodules without affecting module usage [#911](https://github.com/CLIMADA-project/climada_python/pull/911)
 
 ### Fixed
 
 - Avoid an issue where a Hazard subselection would have a fraction matrix with only zeros as entries by throwing an error [#866](https://github.com/CLIMADA-project/climada_python/pull/866)
 - Allow downgrading the Python bugfix version to improve environment compatibility [#900](https://github.com/CLIMADA-project/climada_python/pull/900)
 - Fix broken links in `CONTRIBUTING.md` [#900](https://github.com/CLIMADA-project/climada_python/pull/900)
+- When writing `TCTracks` to NetCDF, only apply compression to `float` or `int` data types. This fixes a downstream issue, see [climada_petals#135](https://github.com/CLIMADA-project/climada_petals/issues/135) [#911](https://github.com/CLIMADA-project/climada_python/pull/911)
 
 ### Deprecated
 

diff --git a/climada/hazard/base.py b/climada/hazard/base.py
@@ -27,6 +27,7 @@
 from typing import Optional,List
 import warnings
 
+import geopandas as gpd
 import numpy as np
 from pathos.pools import ProcessPool as Pool
 from scipy import sparse
@@ -450,6 +451,75 @@ def sanitize_event_ids(self):
             LOGGER.debug('Resetting event_id.')
             self.event_id = np.arange(1, self.event_id.size + 1)
 
+    def local_return_period(self, threshold_intensities=(5., 10., 20.)):
+        """Compute local return periods for given hazard intensities. The used method
+        is fitting the ordered intensitites per centroid to the corresponding cummulated
+        frequency with a step function.
+
+        Parameters
+        ----------
+        threshold_intensities : np.array
+            User-specified hazard intensities for which the return period should be calculated
+            locally (at each centroid). Defaults to (5, 10, 20)
+
+        Returns
+        -------
+        gdf : gpd.GeoDataFrame
+            GeoDataFrame containing return periods for given threshold intensities. Each column
+            corresponds to a threshold_intensity value, each row corresponds to a centroid. Values
+            in the gdf correspond to the return period for the given centroid and
+            threshold_intensity value
+        label : str
+            GeoDataFrame label, for reporting and plotting
+        column_label : function
+            Column-label-generating function, for reporting and plotting
+        """
+        #check frequency unit
+        if self.frequency_unit in ['1/year', 'annual', '1/y', '1/a']:
+            rp_unit = 'Years'
+        elif self.frequency_unit in ['1/month', 'monthly', '1/m']:
+            rp_unit = 'Months'
+        elif self.frequency_unit in ['1/week', 'weekly', '1/w']:
+            rp_unit = 'Weeks'
+        else:
+            LOGGER.warning("Hazard's frequency unit %s is not known, "
+                           "years will be used as return period unit.", self.frequency_unit)
+            rp_unit = 'Years'
+
+        # Ensure threshold_intensities is a numpy array
+        threshold_intensities = np.array(threshold_intensities)
+
+        num_cen = self.intensity.shape[1]
+        return_periods = np.zeros((len(threshold_intensities), num_cen))
+
+        # batch_centroids = number of centroids that are handled in parallel: 
+        # batch_centroids = maximal matrix size // number of events
+        batch_centroids = CONFIG.max_matrix_size.int() // self.intensity.shape[0]
+        if batch_centroids < 1:
+            raise ValueError('Increase max_matrix_size configuration parameter to >'
+                             f'{self.intensity.shape[0]}')
+
+        # Process the intensities in chunks of centroids
+        for start_col in range(0, num_cen, batch_centroids):
+            end_col = min(start_col + batch_centroids, num_cen)
+            return_periods[:, start_col:end_col] = self._loc_return_period(
+                threshold_intensities,
+                self.intensity[:, start_col:end_col].toarray()
+                )
+
+        # create the output GeoDataFrame
+        gdf = gpd.GeoDataFrame(geometry = self.centroids.gdf['geometry'],
+                               crs = self.centroids.gdf.crs)
+        col_names = [f'{tresh_inten}' for tresh_inten in threshold_intensities]
+        gdf[col_names] = return_periods.T
+
+        # create label and column_label
+        label = f'Return Periods ({rp_unit})'
+        column_label = lambda column_names: [f'Threshold Intensity: {col} {self.units}'
+                                             for col in column_names]
+
+        return gdf, label, column_label
+
     def get_event_id(self, event_name):
         """Get an event id from its name. Several events might have the same
         name.
@@ -614,6 +684,52 @@ def _loc_return_inten(self, return_periods, inten, exc_inten):
                 inten_sort[:, cen_idx], freq_sort[:, cen_idx],
                 self.intensity_thres, return_periods)
 
+    def _loc_return_period(self, threshold_intensities, inten):
+        """Compute local return periods for user-specified threshold intensities
+         for a subset of hazard centroids
+
+        Parameters
+        ----------
+        threshold_intensities: np.array
+            User-specified hazard intensities for which the return period should be calculated
+            locally (at each centroid).
+        inten: np.array
+            subarray of full hazard intensities corresponding to a subset of the centroids
+            (rows corresponds to events, columns correspond to centroids)
+
+        Returns
+        -------
+            np.array
+            (rows corresponds to threshold_intensities, columns correspond to centroids)
+        """
+        # Assuming inten is sorted and calculating cumulative frequency
+        sort_pos = np.argsort(inten, axis=0)[::-1, :]
+        inten_sort = inten[sort_pos, np.arange(inten.shape[1])]
+        freq_sort = self.frequency[sort_pos]
+        freq_sort = np.cumsum(freq_sort, axis=0)
+        return_periods = np.zeros((len(threshold_intensities), inten.shape[1]))
+
+        for cen_idx in range(inten.shape[1]):
+            sorted_inten_cen = inten_sort[:, cen_idx]
+            cum_freq_cen = freq_sort[:, cen_idx]
+
+            for i, intensity in enumerate(threshold_intensities):
+                # Find the first occurrence where the intensity is less than the sorted intensities
+                exceedance_index = np.searchsorted(sorted_inten_cen[::-1], intensity, side='left')
+
+                # Calculate exceedance probability
+                if exceedance_index < len(cum_freq_cen):
+                    exceedance_probability = cum_freq_cen[-exceedance_index - 1]
+                else:
+                    exceedance_probability = 0  # Or set a default minimal probability
+
+                # Calculate and store return period
+                if exceedance_probability > 0:
+                    return_periods[i, cen_idx] = 1 / exceedance_probability
+                else:
+                    return_periods[i, cen_idx] = np.nan
+        return return_periods
+
     def _check_events(self):
         """Check that all attributes but centroids contain consistent data.
         Put default date, event_name and orig if not provided. Check not

diff --git a/climada/hazard/tc_tracks.py b/climada/hazard/tc_tracks.py
@@ -1383,7 +1383,10 @@ def write_hdf5(self, file_name, complevel=5):
         df_attrs = pd.DataFrame([t.attrs for t in data], index=ds_combined["storm"].to_series())
         ds_combined = xr.merge([ds_combined, df_attrs.to_xarray()])
 
-        encoding = {v: dict(zlib=True, complevel=complevel) for v in ds_combined.data_vars}
+        encoding = {
+            v: dict(zlib=_zlib_from_dataarray(ds_combined[v]), complevel=complevel)
+            for v in ds_combined.data_vars
+        }
         LOGGER.info('Writing %d tracks to %s', self.size, file_name)
         ds_combined.to_netcdf(file_name, encoding=encoding)
 
@@ -2435,3 +2438,18 @@ def set_category(max_sus_wind, wind_unit='kn', saffir_scale=None):
         return (np.argwhere(max_wind < saffir_scale) - 1)[0][0]
     except IndexError:
         return -1
+
+def _zlib_from_dataarray(data_var: xr.DataArray) -> bool:
+    """Return true if data_var is of numerical type, return False otherwise
+
+    Parameters
+    ----------
+    data_var : xr.DataArray
+
+    Returns
+    -------
+    bool
+    """
+    if np.issubdtype(data_var.dtype, float) or np.issubdtype(data_var.dtype, int):
+        return True
+    return False
diff --git a/climada/hazard/test/test_base.py b/climada/hazard/test/test_base.py
@@ -1022,6 +1022,25 @@ def test_ref_all_pass(self):
         self.assertAlmostEqual(inten_stats[1][66], 70.608592953031405)
         self.assertAlmostEqual(inten_stats[3][33], 88.510983305123631)
         self.assertAlmostEqual(inten_stats[2][99], 79.717518054203623)
+
+    def test_local_return_period(self):
+        """Compare local return periods against reference."""
+        haz = dummy_hazard()
+        haz.intensity = sparse.csr_matrix([
+            [1., 5., 1.],
+            [2., 2., 0.]
+            ])
+        haz.frequency = np.full(4, 1.)
+        threshold_intensities = np.array([1., 2., 3.])
+        return_stats, _, _ = haz.local_return_period(threshold_intensities)
+        np.testing.assert_allclose(
+            return_stats[return_stats.columns[1:]].values.T,
+            np.array([
+                [0.5, 0.5, 1.],
+                [1., 0.5, np.nan],
+                [np.nan, 1., np.nan]
+            ])
+        )
 
 
 class TestYearset(unittest.TestCase):