-
Notifications
You must be signed in to change notification settings - Fork 26
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve Handling of Fill-Values in RAD_MAX Estimator #282
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Add basic combinatoric fill-value handling for RAD_MAX estimation. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,6 @@ | |
|
||
import astropy.units as u | ||
import numpy as np | ||
from pyirf.utils import cone_solid_angle | ||
from scipy.spatial import Delaunay | ||
|
||
from .base_extrapolators import DiscretePDFExtrapolator, ParametrizedExtrapolator | ||
|
@@ -13,7 +12,9 @@ | |
PDFNormalization, | ||
) | ||
from .griddata_interpolator import GridDataInterpolator | ||
from .nearest_neighbor_searcher import BaseNearestNeighborSearcher | ||
from .quantile_interpolator import QuantileInterpolator | ||
from .utils import find_nearest_simplex | ||
|
||
__all__ = [ | ||
"BaseComponentEstimator", | ||
|
@@ -477,6 +478,7 @@ def __init__( | |
self, | ||
grid_points, | ||
rad_max, | ||
fill_value=None, | ||
interpolator_cls=GridDataInterpolator, | ||
interpolator_kwargs=None, | ||
extrapolator_cls=None, | ||
|
@@ -495,6 +497,13 @@ def __init__( | |
Grid of theta cuts. Dimensions but the first can in principle be freely | ||
chosen. Class is RAD_MAX_2D compatible, which would require | ||
shape=(n_points, n_energy_bins, n_fov_offset_bins). | ||
fill_val: | ||
Indicator of fill-value handling. If None, fill values are regarded as | ||
normal values and no special handeling is applied. If "infer", fill values | ||
will be infered as max of all values, if a value is provided, | ||
it is used to flag fill values. Flagged fill-values are | ||
not used for interpolation. Fill-value handling is only supported in | ||
up to two grid dimensions. | ||
interpolator_cls: | ||
pyirf interpolator class, defaults to GridDataInterpolator. | ||
interpolator_kwargs: dict | ||
|
@@ -523,6 +532,26 @@ def __init__( | |
extrapolator_kwargs=extrapolator_kwargs, | ||
) | ||
|
||
self.params = rad_max | ||
|
||
if fill_value is None: | ||
self.fill_val = None | ||
elif fill_value == "infer": | ||
self.fill_val = np.max(self.params) | ||
else: | ||
self.fill_val = fill_value | ||
|
||
# Raise error if fill-values should be handled in >=3 dims | ||
if self.fill_val and self.grid_dim >= 3: | ||
raise ValueError( | ||
"Fill-value handling only supported in up to two grid dimensions." | ||
) | ||
|
||
# If fill-values should be handled in 2D, construct a trinangulation | ||
# to later determine in which simplex the target values is | ||
if self.fill_val and (self.grid_dim == 2): | ||
self.triangulation = Delaunay(self.grid_points) | ||
|
||
def __call__(self, target_point): | ||
""" | ||
Estimating rad max table at target_point, inter-/extrapolates as needed and | ||
|
@@ -540,8 +569,99 @@ def __call__(self, target_point): | |
effective areas. For RAD_MAX_2D of shape (n_energy_bins, n_fov_offset_bins) | ||
|
||
""" | ||
# First, construct estimation without handling fill-values | ||
full_estimation = super().__call__(target_point) | ||
# Safeguard against extreme extrapolation cases | ||
np.clip(full_estimation, 0, None, out=full_estimation) | ||
|
||
# Early exit if fill_values should not be handled | ||
if not self.fill_val: | ||
return full_estimation | ||
|
||
# Early exit if a nearest neighbor estimation would be overwritten | ||
# Complex setup is needed to catch settings where the user mixes approaches and | ||
# e.g. uses nearest neighbors for extrapolation and an actual interpolation otherwise | ||
if self.grid_dim == 1: | ||
if ( | ||
(target_point < self.grid_points.min()) | ||
or (target_point > self.grid_points.max()) | ||
) and issubclass(self.extrapolator.__class__, BaseNearestNeighborSearcher): | ||
return full_estimation | ||
elif issubclass(self.interpolator.__class__, BaseNearestNeighborSearcher): | ||
return full_estimation | ||
elif self.grid_dim == 2: | ||
target_simplex = self.triangulation.find_simplex(target_point) | ||
|
||
if (target_simplex == -1) and issubclass( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are checking for So just checking for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Basically, this accounts for the possibility that the user mixes and uses NearestNeighbor for extrapolation and actual interpolation otherwise (and vice-versa) at the same time. |
||
self.extrapolator.__class__, BaseNearestNeighborSearcher | ||
): | ||
return full_estimation | ||
elif issubclass(self.interpolator.__class__, BaseNearestNeighborSearcher): | ||
return full_estimation | ||
|
||
# Actual fill-value handling | ||
if self.grid_dim == 1: | ||
# Locate target in grid | ||
if target_point < self.grid_points.min(): | ||
segment_inds = np.array([0, 1], "int") | ||
elif target_point > self.grid_points.max(): | ||
segment_inds = np.array([-2, -1], "int") | ||
else: | ||
target_bin = np.digitize( | ||
target_point.squeeze(), self.grid_points.squeeze() | ||
) | ||
segment_inds = np.array([target_bin - 1, target_bin], "int") | ||
|
||
mask_left = self.params[segment_inds[0]] >= self.fill_val | ||
mask_right = self.params[segment_inds[1]] >= self.fill_val | ||
# Indicate, wether one of the neighboring entries is a fill-value | ||
mask = np.logical_or(mask_left, mask_right) | ||
elif self.grid_dim == 2: | ||
# Locate target | ||
target_simplex = self.triangulation.find_simplex(target_point) | ||
|
||
if target_simplex == -1: | ||
target_simplex = find_nearest_simplex(self.triangulation, target_point) | ||
|
||
simplex_nodes_indices = self.triangulation.simplices[ | ||
target_simplex | ||
].squeeze() | ||
|
||
mask0 = self.params[simplex_nodes_indices[0]] >= self.fill_val | ||
mask1 = self.params[simplex_nodes_indices[1]] >= self.fill_val | ||
mask2 = self.params[simplex_nodes_indices[2]] >= self.fill_val | ||
|
||
# This collected mask now counts for each entry in the estimation how many | ||
# of the entries used for extrapolation contained fill-values | ||
intermediate_mask = ( | ||
mask0.astype("int") + mask1.astype("int") + mask2.astype("int") | ||
) | ||
mask = np.full_like(intermediate_mask, True, dtype=bool) | ||
|
||
# Simplest cases: All or none entries were fill-values, so either return | ||
# a fill-value or the actual estimation | ||
mask[intermediate_mask == 0] = False | ||
mask[intermediate_mask == 3] = True | ||
|
||
# If two out of three values were fill-values return a fill-value as estimate | ||
mask[intermediate_mask == 2] = True | ||
|
||
# If only one out of three values was a fill-value use the smallest value of the | ||
# remaining two | ||
mask[intermediate_mask == 1] = False | ||
full_estimation = np.where( | ||
intermediate_mask[np.newaxis, :] == 1, | ||
np.min(self.params[simplex_nodes_indices], axis=0), | ||
full_estimation, | ||
) | ||
|
||
# Set all flagged values to fill-value | ||
full_estimation[mask[np.newaxis, :]] = self.fill_val | ||
|
||
# Safeguard against extreme extrapolation cases | ||
full_estimation[full_estimation > self.fill_val] = self.fill_val | ||
|
||
return super().__call__(target_point) | ||
return full_estimation | ||
|
||
|
||
class EnergyDispersionEstimator(DiscretePDFComponentEstimator): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why can't we just check for nearest neighbor? Why this complex setup?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That setup is to catch cases where the user decided to mix approaches and use e.g. NearestNeighbor for interpolation but an actual extrapolator for extrapolation or vice-versa. I'll extend the comment in code to make this more clear for the future.