Skip to content

Commit

Permalink
Merge branch 'master' into feat/strided_dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
madtoinou authored Dec 31, 2024
2 parents e8ac91d + b441192 commit c375832
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 56 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co
- Added a `stride` argument to the `Dataset` classes (torch-based models) and the fitting methods of the `RegressionModels` to reduce the size of the training set or apply elaborate training approaches. [#2624](https://github.com/unit8co/darts/pull/2529) by [Antoine Madrona](https://github.com/madtoinou)

**Fixed**
- Fixed a bug when performing optimized historical forecasts with `stride=1` using a `RegressionModel` with `output_chunk_shift>=1` and `output_chunk_length=1`, where the forecast time index was not properly shifted. [#2634](https://github.com/unit8co/darts/pull/2634) by [Mattias De Charleroy](https://github.com/MattiasDC).

**Dependencies**

Expand Down
147 changes: 102 additions & 45 deletions darts/tests/utils/historical_forecasts/test_historical_forecasts.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,20 @@
from darts.utils.likelihood_models import GaussianLikelihood, QuantileRegression

models = [LinearRegressionModel, NaiveDrift]
models_reg_no_cov_cls_kwargs = [(LinearRegressionModel, {"lags": 8}, {}, (8, 1))]
models_reg_no_cov_cls_kwargs = [
(LinearRegressionModel, {"lags": 8}, {}, (8, 1)),
# output_chunk_length only
(LinearRegressionModel, {"lags": 5, "output_chunk_length": 2}, {}, (5, 2)),
# output_chunk_shift only
(LinearRegressionModel, {"lags": 5, "output_chunk_shift": 1}, {}, (5, 2)),
# output_chunk_shift + output_chunk_length only
(
LinearRegressionModel,
{"lags": 5, "output_chunk_shift": 1, "output_chunk_length": 2},
{},
(5, 3),
),
]
if not isinstance(CatBoostModel, NotImportedModule):
models_reg_no_cov_cls_kwargs.append((
CatBoostModel,
Expand Down Expand Up @@ -650,13 +663,42 @@ def test_historical_forecasts_negative_rangeindex(self):

@pytest.mark.parametrize("config", models_reg_no_cov_cls_kwargs)
def test_historical_forecasts(self, config):
train_length = 10
"""Tests historical forecasts with retraining for expected forecast lengths and times"""
forecast_horizon = 8
# if no fit and retrain=false, should fit at fist iteration
model_cls, kwargs, model_kwarg, bounds = config
model = model_cls(**kwargs, **model_kwarg)
# set train length to be the minimum required training length
# +1 as sklearn models require min 2 train samples
train_length = bounds[0] + bounds[1] + 1

if model.output_chunk_shift > 0:
with pytest.raises(ValueError) as err:
forecasts = model.historical_forecasts(
series=self.ts_pass_val,
forecast_horizon=forecast_horizon,
stride=1,
train_length=train_length,
retrain=True,
overlap_end=False,
)
assert str(err.value).startswith(
"Cannot perform auto-regression `(n > output_chunk_length)`"
)
# continue the test without auto-regression if we are using shifts
forecast_horizon = model.output_chunk_length

# time index
# time index without train length
forecasts_no_train_length = model.historical_forecasts(
series=self.ts_pass_val,
forecast_horizon=forecast_horizon,
stride=1,
train_length=None,
retrain=True,
overlap_end=False,
)

# time index with minimum train length
forecasts = model.historical_forecasts(
series=self.ts_pass_val,
forecast_horizon=forecast_horizon,
Expand All @@ -666,23 +708,21 @@ def test_historical_forecasts(self, config):
overlap_end=False,
)

assert len(forecasts_no_train_length) == len(forecasts)
theorical_forecast_length = (
self.ts_val_length
- max([
(
bounds[0] + bounds[1] + 1
), # +1 as sklearn models require min 2 train samples
train_length,
]) # because we train
- train_length # because we train
- forecast_horizon # because we have overlap_end = False
+ 1 # because we include the first element
)

assert len(forecasts) == theorical_forecast_length, (
f"Model {model_cls.__name__} does not return the right number of historical forecasts in the case "
f"of retrain=True and overlap_end=False, and a time index of type DateTimeIndex. "
f"Expected {theorical_forecast_length}, got {len(forecasts)}"
)
assert forecasts.time_index.equals(
self.ts_pass_val.time_index[-theorical_forecast_length:]
)

# range index
forecasts = model.historical_forecasts(
Expand All @@ -699,6 +739,10 @@ def test_historical_forecasts(self, config):
f"of retrain=True, overlap_end=False, and a time index of type RangeIndex."
f"Expected {theorical_forecast_length}, got {len(forecasts)}"
)
assert forecasts.time_index.equals(
self.ts_pass_val_range.time_index[-theorical_forecast_length:]
)
start_idx = self.ts_pass_val_range.get_index_at_point(forecasts.start_time())

# stride 2
forecasts = model.historical_forecasts(
Expand All @@ -710,30 +754,30 @@ def test_historical_forecasts(self, config):
overlap_end=False,
)

theorical_forecast_length = np.floor(
(
theorical_forecast_length = int(
np.floor(
(
self.ts_val_length
- max([
(
bounds[0] + bounds[1] + 1
), # +1 as sklearn models require min 2 train samples
train_length,
]) # because we train
- forecast_horizon # because we have overlap_end = False
+ 1 # because we include the first element
(
self.ts_val_length
- train_length # because we train
- forecast_horizon # because we have overlap_end = False
+ 1 # because we include the first element
)
- 1
)
- 1
)
/ 2
+ 1 # because of stride
) # if odd number of elements, we keep the floor
/ 2
+ 1 # because of stride
) # if odd number of elements, we keep the floor
)

assert len(forecasts) == theorical_forecast_length, (
f"Model {model_cls.__name__} does not return the right number of historical forecasts in the case "
f"of retrain=True and overlap_end=False and stride=2. "
f"Expected {theorical_forecast_length}, got {len(forecasts)}"
)
assert forecasts.time_index.equals(
self.ts_pass_val_range.time_index[start_idx::2]
)

# stride 3
forecasts = model.historical_forecasts(
Expand All @@ -749,12 +793,7 @@ def test_historical_forecasts(self, config):
(
(
self.ts_val_length
- max([
(
bounds[0] + bounds[1] + 1
), # +1 as sklearn models require min 2 train samples
train_length,
]) # because we train
- train_length # because we train
- forecast_horizon # because we have overlap_end = False
+ 1 # because we include the first element
)
Expand All @@ -770,6 +809,9 @@ def test_historical_forecasts(self, config):
f"of retrain=True and overlap_end=False and stride=3. "
f"Expected {theorical_forecast_length}, got {len(forecasts)}"
)
assert forecasts.time_index.equals(
self.ts_pass_val_range.time_index[start_idx::3]
)

# last points only False
forecasts = model.historical_forecasts(
Expand All @@ -784,12 +826,7 @@ def test_historical_forecasts(self, config):

theorical_forecast_length = (
self.ts_val_length
- max([
(
bounds[0] + bounds[1] + 1
), # +1 as sklearn models require min 2 train samples
train_length,
]) # because we train
- train_length # because we train
- forecast_horizon # because we have overlap_end = False
+ 1 # because we include the first element
)
Expand All @@ -804,6 +841,11 @@ def test_historical_forecasts(self, config):
f"Model {model_cls} does not return forecast_horizon points per historical forecast in the case of "
f"retrain=True and overlap_end=False, and last_points_only=False"
)
last_points_times = np.array([fc.end_time() for fc in forecasts])
np.testing.assert_equal(
last_points_times,
self.ts_pass_val_range.time_index[-theorical_forecast_length:].values,
)

if not model.supports_past_covariates:
with pytest.raises(ValueError) as msg:
Expand Down Expand Up @@ -1144,15 +1186,32 @@ def test_historical_forecasts_start_too_early(self, caplog, config):

@pytest.mark.parametrize("config", models_reg_no_cov_cls_kwargs)
def test_regression_auto_start_multiple_no_cov(self, config):
train_length = 15
# minimum required train length (+1 since sklearn models require 2 samples)
forecast_horizon = 10
model_cls, kwargs, model_kwargs, bounds = config
train_length = bounds[0] + bounds[1] + 1
model = model_cls(
**kwargs,
**model_kwargs,
)
model.fit(self.ts_pass_train)

if model.output_chunk_shift > 0:
with pytest.raises(ValueError) as err:
forecasts = model.historical_forecasts(
series=[self.ts_pass_val, self.ts_pass_val],
forecast_horizon=forecast_horizon,
train_length=train_length,
stride=1,
retrain=True,
overlap_end=False,
)
assert str(err.value).startswith(
"Cannot perform auto-regression `(n > output_chunk_length)`"
)
# continue the test without autogregression if we are using shifts
forecast_horizon = model.output_chunk_length

forecasts = model.historical_forecasts(
series=[self.ts_pass_val, self.ts_pass_val],
forecast_horizon=forecast_horizon,
Expand All @@ -1168,12 +1227,7 @@ def test_regression_auto_start_multiple_no_cov(self, config):

theorical_forecast_length = (
self.ts_val_length
- max([
(
bounds[0] + bounds[1] + 1
), # +1 as sklearn models require min 2 train samples
train_length,
]) # because we train
- train_length
- forecast_horizon # because we have overlap_end = False
+ 1 # because we include the first element
)
Expand All @@ -1183,6 +1237,9 @@ def test_regression_auto_start_multiple_no_cov(self, config):
f"of retrain=True and overlap_end=False, and a time index of type DateTimeIndex. "
f"Expected {theorical_forecast_length}, got {len(forecasts[0])} and {len(forecasts[1])}"
)
assert forecasts[0].time_index.equals(forecasts[1].time_index) and forecasts[
0
].time_index.equals(self.ts_pass_val.time_index[-theorical_forecast_length:])

@pytest.mark.slow
@pytest.mark.parametrize(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,19 +162,24 @@ def _optimized_historical_forecasts_last_points_only(
else:
forecast = forecast[:, 0]

if (
stride == 1
and model.output_chunk_length == 1
and model.output_chunk_shift == 0
):
times = times[0]
else:
times = generate_index(
start=hist_fct_start
+ (forecast_horizon + model.output_chunk_shift - 1) * freq,
length=forecast.shape[0],
freq=freq * stride,
name=series_.time_index.name,
)

forecasts_list.append(
TimeSeries.from_times_and_values(
times=(
times[0]
if stride == 1 and model.output_chunk_length == 1
else generate_index(
start=hist_fct_start
+ (forecast_horizon + model.output_chunk_shift - 1) * freq,
length=forecast.shape[0],
freq=freq * stride,
name=series_.time_index.name,
)
),
times=times,
values=forecast,
columns=forecast_components,
static_covariates=series_.static_covariates,
Expand Down

0 comments on commit c375832

Please sign in to comment.