Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/dei 99 formula based rule between different axis #98

Merged
54 changes: 48 additions & 6 deletions decoimpact/business/entities/rule_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ def process_rules(
Raises:
RuntimeError: if initialization is not correctly done
"""

if len(self._processing_list) < 1:
message = "Processor is not properly initialized, please initialize."
raise RuntimeError(message)
Expand Down Expand Up @@ -293,19 +294,60 @@ def _process_by_multi_cell(
)

value_arrays = list(input_variables.values())
np_array = value_arrays[0].to_numpy()
result_variable = _np.zeros_like(np_array)

# Check the amount of dimensions of all variables
len_dims = _np.array([len(vals.dims) for vals in value_arrays])

# Use the variable with the most dimensions. Broadcast all other
# variables to these dimensions
most_dims_bool = len_dims == max(len_dims)

ref_var = value_arrays[_np.argmax(len_dims)]
for ind_vars, enough_dims in enumerate(most_dims_bool):
if not enough_dims:
# Let the user know which variables will be broadcast to all dimensions
var_orig = value_arrays[ind_vars]
dims_orig = var_orig.dims
dims_result = ref_var.dims
dims_diff = list(str(x) for x in dims_result if x not in dims_orig)
str_dims_broadcasted = ",".join(dims_diff)
logger.log_info(
f"""Variable {var_orig.name} will be expanded to the following \
dimensions: {str_dims_broadcasted} """
)
# perform the broadcast

var_broadcasted = _xr.broadcast(var_orig, ref_var)[0]
# Make sure the dimensions are in the same order
value_arrays[ind_vars] = var_broadcasted.transpose(*ref_var.dims)

# Check if all variables now have the same dimensions
for val_index in range(len(value_arrays) - 1):
var1 = value_arrays[val_index]
var2 = value_arrays[val_index + 1]
diff = set(var1.dims) ^ set(var2.dims)

# If the variables with the most dimensions have different dimensions,
# stop the calculation
if len(diff) != 0:
raise NotImplementedError(
f"Can not execute rule {rule.name} with variables with different \
dimensions. Variable {var1.name} with dimensions:{var1.dims} is \
different than {var2.name} with dimensions:{var2.dims}"
)

result_variable = _np.zeros_like(ref_var.to_numpy())
cell_values = {}

for indices, _ in _np.ndenumerate(np_array):
for name, value_array in input_variables.items():
cell_values[name] = value_array.data[indices]
for indices, _ in _np.ndenumerate(ref_var.to_numpy()):
for value in value_arrays:
cell_values[value.name] = value.data[indices]

result_variable[indices] = rule.execute(cell_values, logger)

# use copy to get the same dimensions as the
# original input variable
return value_arrays[0].copy(data=result_variable)
return ref_var.copy(data=result_variable)

def _get_rule_input_variables(
self, rule: IRule, output_dataset: _xr.Dataset
Expand Down
106 changes: 106 additions & 0 deletions tests/business/entities/test_rule_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,112 @@ def test_process_rules_calls_multi_cell_based_rule_execute_correctly():
assert rule.execute.call_count == 6


@pytest.mark.parametrize(
"input_array1, input_array2, dims",
[
(
_xr.DataArray(
_np.array([1, 2], _np.int32),
dims=["x"],
coords={"x": [0, 1]},
),
_xr.DataArray(
_np.array([[1, 2], [3, 4]], _np.int32),
dims=["x", "y"],
coords={"x": [0, 1], "y": [0, 1]},
),
{"x": 2, "y": 2},
),
(
_xr.DataArray(
_np.array([1, 2], _np.int32),
dims=["x"],
coords={"x": [0, 1]},
),
_xr.DataArray(
_np.array([[[1, 2], [3, 4]], [[1, 2], [3, 4]]], _np.int32),
dims=["x", "y", "z"],
coords={"x": [0, 1], "y": [0, 1], "z": [0, 1]},
),
{"x": 2, "y": 2, "z": 2},
),
],
)
def test_process_rules_calls_multi_cell_based_rule_special_cases(
input_array1, input_array2, dims
):
"""Some exceptional cases need to be tested for the multi_cell rule:
1. variables with different dimensions (1D vs 2D)
2. variables with different dimensions (1D vs 3D)"""

# Arrange
dataset = _xr.Dataset()

dataset["test1"] = input_array1
dataset["test2"] = input_array2

logger = Mock(ILogger)
rule = Mock(IMultiCellBasedRule)

rule.input_variable_names = ["test1", "test2"]
rule.output_variable_name = "output"

rule.execute.return_value = 1

processor = RuleProcessor([rule], dataset)

# Act
assert processor.initialize(logger)
output_dataset = processor.process_rules(dataset, logger)

# Assert
print(output_dataset.output, output_dataset.dims, output_dataset.dims == dims)
assert output_dataset.dims == dims


def test_process_rules_calls_multi_cell_based_fails_with_different_dims():
"""MultiCellBasedRule allows for values with less dimensions, but not
with different dimensions."""

# Arrange
dataset = _xr.Dataset()
input_array1 = _xr.DataArray(
_np.array([1, 2], _np.int32),
dims=["x"],
coords={"x": [0, 1]},
)
input_array2 = _xr.DataArray(
_np.array([1, 2], _np.int32),
dims=["y"],
coords={"y": [0, 1]},
)

dataset["test1"] = input_array1
dataset["test2"] = input_array2

logger = Mock(ILogger)
rule = Mock(IMultiCellBasedRule)
rule.name = "test_rule"
rule.input_variable_names = ["test1", "test2"]
rule.output_variable_name = "output"

rule.execute.return_value = 1
processor = RuleProcessor([rule], dataset)

processor.initialize(logger)

# Act
with pytest.raises(NotImplementedError) as exc_info:
processor.process_rules(dataset, logger)
exception_raised = exc_info.value

# Assert
expected = f"Can not execute rule {rule.name} with variables with different \
dimensions. Variable test1 with dimensions:('x',) is \
different than test2 with dimensions:('y',)"
assert exception_raised.args[0] == expected


def test_process_rules_calls_array_based_rule_execute_correctly():
"""Tests if during processing the rule its execute method of
an IArrayBasedRule is called with the right parameter."""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
version: 0.0.0

input-data:
- dataset:
filename: ./tests_acceptance/input_nc_files/small_subset_FM-VZM_0000_map.nc
variable_mapping:
water_level_mNAP: "water_level"
bathymetry_mNAP: "bathymetry"

rules:
- formula_rule:
name: Get water_depth by caculation
description: Get water_depth by caculation
formula: bathymetry + water_level
input_variables: [water_level, bathymetry]
output_variable: water_depth_calc

output-data:
filename: ./tests_acceptance/output_nc_files/test10d_formula_based_timeaxes_calculation.nc
Binary file not shown.
Loading