Skip to content

Commit

Permalink
Merge branch 'develop' into feature/exposures_crs
Browse files Browse the repository at this point in the history
# Conflicts:
#	CHANGELOG.md
  • Loading branch information
emanuel-schmid committed Jul 16, 2024
2 parents 1379eac + d67c804 commit 5e5584d
Show file tree
Hide file tree
Showing 34 changed files with 9,478 additions and 2,100 deletions.
2 changes: 2 additions & 0 deletions AUTHORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,10 @@
* Raphael Portmann
* Nicolas Colombi
* Leonie Villiger
* Timo Schmid
* Kam Lam Yeung
* Sarah Hülsen
* Timo Schmid
* Luca Severino
* Samuel Juhel
* Valentin Gebhart
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ Code freeze date: YYYY-MM-DD
- climada.exposures.exposures.Exposures.latitude
- climada.exposures.exposures.Exposures.longitude
- climada.exposures.exposures.Exposures.value
- `climada.util.calibrate` module for calibrating impact functions [#692](https://github.com/CLIMADA-project/climada_python/pull/692)

### Changed

Expand All @@ -43,12 +44,14 @@ latitude and longitude column are no longer persent there (the according arrays
- Changed module structure: `climada.hazard.Hazard` has been split into the modules `base`, `io` and `plot` [#871](https://github.com/CLIMADA-project/climada_python/pull/871)
- `Impact.from_hdf5` now calls `str` on `event_name` data that is not strings, and issue a warning then [#894](https://github.com/CLIMADA-project/climada_python/pull/894)
- `Impact.write_hdf5` now throws an error if `event_name` is does not contain strings exclusively [#894](https://github.com/CLIMADA-project/climada_python/pull/894)
- Split `climada.hazard.trop_cyclone` module into smaller submodules without affecting module usage [#911](https://github.com/CLIMADA-project/climada_python/pull/911)

### Fixed

- Avoid an issue where a Hazard subselection would have a fraction matrix with only zeros as entries by throwing an error [#866](https://github.com/CLIMADA-project/climada_python/pull/866)
- Allow downgrading the Python bugfix version to improve environment compatibility [#900](https://github.com/CLIMADA-project/climada_python/pull/900)
- Fix broken links in `CONTRIBUTING.md` [#900](https://github.com/CLIMADA-project/climada_python/pull/900)
- When writing `TCTracks` to NetCDF, only apply compression to `float` or `int` data types. This fixes a downstream issue, see [climada_petals#135](https://github.com/CLIMADA-project/climada_petals/issues/135) [#911](https://github.com/CLIMADA-project/climada_python/pull/911)

### Deprecated

Expand Down
116 changes: 116 additions & 0 deletions climada/hazard/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from typing import Optional,List
import warnings

import geopandas as gpd
import numpy as np
from pathos.pools import ProcessPool as Pool
from scipy import sparse
Expand Down Expand Up @@ -450,6 +451,75 @@ def sanitize_event_ids(self):
LOGGER.debug('Resetting event_id.')
self.event_id = np.arange(1, self.event_id.size + 1)

def local_return_period(self, threshold_intensities=(5., 10., 20.)):
"""Compute local return periods for given hazard intensities. The used method
is fitting the ordered intensitites per centroid to the corresponding cummulated
frequency with a step function.
Parameters
----------
threshold_intensities : np.array
User-specified hazard intensities for which the return period should be calculated
locally (at each centroid). Defaults to (5, 10, 20)
Returns
-------
gdf : gpd.GeoDataFrame
GeoDataFrame containing return periods for given threshold intensities. Each column
corresponds to a threshold_intensity value, each row corresponds to a centroid. Values
in the gdf correspond to the return period for the given centroid and
threshold_intensity value
label : str
GeoDataFrame label, for reporting and plotting
column_label : function
Column-label-generating function, for reporting and plotting
"""
#check frequency unit
if self.frequency_unit in ['1/year', 'annual', '1/y', '1/a']:
rp_unit = 'Years'
elif self.frequency_unit in ['1/month', 'monthly', '1/m']:
rp_unit = 'Months'
elif self.frequency_unit in ['1/week', 'weekly', '1/w']:
rp_unit = 'Weeks'
else:
LOGGER.warning("Hazard's frequency unit %s is not known, "
"years will be used as return period unit.", self.frequency_unit)
rp_unit = 'Years'

# Ensure threshold_intensities is a numpy array
threshold_intensities = np.array(threshold_intensities)

num_cen = self.intensity.shape[1]
return_periods = np.zeros((len(threshold_intensities), num_cen))

# batch_centroids = number of centroids that are handled in parallel:
# batch_centroids = maximal matrix size // number of events
batch_centroids = CONFIG.max_matrix_size.int() // self.intensity.shape[0]
if batch_centroids < 1:
raise ValueError('Increase max_matrix_size configuration parameter to >'
f'{self.intensity.shape[0]}')

# Process the intensities in chunks of centroids
for start_col in range(0, num_cen, batch_centroids):
end_col = min(start_col + batch_centroids, num_cen)
return_periods[:, start_col:end_col] = self._loc_return_period(
threshold_intensities,
self.intensity[:, start_col:end_col].toarray()
)

# create the output GeoDataFrame
gdf = gpd.GeoDataFrame(geometry = self.centroids.gdf['geometry'],
crs = self.centroids.gdf.crs)
col_names = [f'{tresh_inten}' for tresh_inten in threshold_intensities]
gdf[col_names] = return_periods.T

# create label and column_label
label = f'Return Periods ({rp_unit})'
column_label = lambda column_names: [f'Threshold Intensity: {col} {self.units}'
for col in column_names]

return gdf, label, column_label

def get_event_id(self, event_name):
"""Get an event id from its name. Several events might have the same
name.
Expand Down Expand Up @@ -614,6 +684,52 @@ def _loc_return_inten(self, return_periods, inten, exc_inten):
inten_sort[:, cen_idx], freq_sort[:, cen_idx],
self.intensity_thres, return_periods)

def _loc_return_period(self, threshold_intensities, inten):
"""Compute local return periods for user-specified threshold intensities
for a subset of hazard centroids
Parameters
----------
threshold_intensities: np.array
User-specified hazard intensities for which the return period should be calculated
locally (at each centroid).
inten: np.array
subarray of full hazard intensities corresponding to a subset of the centroids
(rows corresponds to events, columns correspond to centroids)
Returns
-------
np.array
(rows corresponds to threshold_intensities, columns correspond to centroids)
"""
# Assuming inten is sorted and calculating cumulative frequency
sort_pos = np.argsort(inten, axis=0)[::-1, :]
inten_sort = inten[sort_pos, np.arange(inten.shape[1])]
freq_sort = self.frequency[sort_pos]
freq_sort = np.cumsum(freq_sort, axis=0)
return_periods = np.zeros((len(threshold_intensities), inten.shape[1]))

for cen_idx in range(inten.shape[1]):
sorted_inten_cen = inten_sort[:, cen_idx]
cum_freq_cen = freq_sort[:, cen_idx]

for i, intensity in enumerate(threshold_intensities):
# Find the first occurrence where the intensity is less than the sorted intensities
exceedance_index = np.searchsorted(sorted_inten_cen[::-1], intensity, side='left')

# Calculate exceedance probability
if exceedance_index < len(cum_freq_cen):
exceedance_probability = cum_freq_cen[-exceedance_index - 1]
else:
exceedance_probability = 0 # Or set a default minimal probability

# Calculate and store return period
if exceedance_probability > 0:
return_periods[i, cen_idx] = 1 / exceedance_probability
else:
return_periods[i, cen_idx] = np.nan
return return_periods

def _check_events(self):
"""Check that all attributes but centroids contain consistent data.
Put default date, event_name and orig if not provided. Check not
Expand Down
20 changes: 19 additions & 1 deletion climada/hazard/tc_tracks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1383,7 +1383,10 @@ def write_hdf5(self, file_name, complevel=5):
df_attrs = pd.DataFrame([t.attrs for t in data], index=ds_combined["storm"].to_series())
ds_combined = xr.merge([ds_combined, df_attrs.to_xarray()])

encoding = {v: dict(zlib=True, complevel=complevel) for v in ds_combined.data_vars}
encoding = {
v: dict(zlib=_zlib_from_dataarray(ds_combined[v]), complevel=complevel)
for v in ds_combined.data_vars
}
LOGGER.info('Writing %d tracks to %s', self.size, file_name)
ds_combined.to_netcdf(file_name, encoding=encoding)

Expand Down Expand Up @@ -2435,3 +2438,18 @@ def set_category(max_sus_wind, wind_unit='kn', saffir_scale=None):
return (np.argwhere(max_wind < saffir_scale) - 1)[0][0]
except IndexError:
return -1

def _zlib_from_dataarray(data_var: xr.DataArray) -> bool:
"""Return true if data_var is of numerical type, return False otherwise
Parameters
----------
data_var : xr.DataArray
Returns
-------
bool
"""
if np.issubdtype(data_var.dtype, float) or np.issubdtype(data_var.dtype, int):
return True
return False
19 changes: 19 additions & 0 deletions climada/hazard/test/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1022,6 +1022,25 @@ def test_ref_all_pass(self):
self.assertAlmostEqual(inten_stats[1][66], 70.608592953031405)
self.assertAlmostEqual(inten_stats[3][33], 88.510983305123631)
self.assertAlmostEqual(inten_stats[2][99], 79.717518054203623)

def test_local_return_period(self):
"""Compare local return periods against reference."""
haz = dummy_hazard()
haz.intensity = sparse.csr_matrix([
[1., 5., 1.],
[2., 2., 0.]
])
haz.frequency = np.full(4, 1.)
threshold_intensities = np.array([1., 2., 3.])
return_stats, _, _ = haz.local_return_period(threshold_intensities)
np.testing.assert_allclose(
return_stats[return_stats.columns[1:]].values.T,
np.array([
[0.5, 0.5, 1.],
[1., 0.5, np.nan],
[np.nan, 1., np.nan]
])
)


class TestYearset(unittest.TestCase):
Expand Down
Loading

0 comments on commit 5e5584d

Please sign in to comment.