diff --git a/CHANGELOG.md b/CHANGELOG.md index 7dd876b32d..8fa74e021d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co **Fixed** +- Fixed a bug when performing probabilistic optimized historical forecasts (`num_samples>1, retrain=False, enable_optimization=True`) with regression models, where reshaping the array resulted in a wrong order of samples across components and forecasts. [#2534](https://github.com/unit8co/darts/pull/2534) by [Dennis Bader](https://github.com/dennisbader). - Fixed bug when plotting a probabilistic multivariate series, where all confidence intervals (starting from 2nd component) had the same color as the median line. [#2532](https://github.com/unit8co/darts/pull/2532) by [Dennis Bader](https://github.com/dennisbader). - Fixed a bug when passing an empty array to `TimeSeries.prepend/append_values()` raised an error. [#2522](https://github.com/unit8co/darts/pull/2522) by [Alessio Pellegrini](https://github.com/AlessiopSymplectic) - Fixed a bug with `TimeSeries.prepend/append_values()`, where the name of the (time) index was lost. [#2522](https://github.com/unit8co/darts/pull/2522) by [Alessio Pellegrini](https://github.com/AlessiopSymplectic) diff --git a/darts/tests/models/forecasting/test_historical_forecasts.py b/darts/tests/models/forecasting/test_historical_forecasts.py index 5281261ad2..5a4d201cb3 100644 --- a/darts/tests/models/forecasting/test_historical_forecasts.py +++ b/darts/tests/models/forecasting/test_historical_forecasts.py @@ -2314,6 +2314,93 @@ def test_predict_likelihood_parameters(self, model_type): ) assert len(hist_fc) == n + 1 + @pytest.mark.parametrize( + "config", + product( + [False, True], # last_points_only + [True, False], # multi_models + [1, 2, 3], # horizon + ), + ) + def test_probabilistic_optimized_hist_fc_regression(self, config): + """Tests optimized probilistic historical forecasts for regression models.""" + np.random.seed(42) + lpo, multi_models, n = config + ocl = 2 + q = [0.05, 0.50, 0.95] + + y = tg.linear_timeseries(length=20) + y = y.stack(y + 1.0) + y = [y, y] + + icl = 3 + model = LinearRegressionModel( + lags=icl, + output_chunk_length=ocl, + likelihood="quantile", + quantiles=q, + multi_models=multi_models, + ) + model.fit(y) + # probabilistic forecasts non-optimized + hfcs_no_opt = model.historical_forecasts( + series=y, + forecast_horizon=n, + last_points_only=lpo, + retrain=False, + enable_optimization=False, + num_samples=1000, + stride=n, + ) + # probabilistic forecasts optimized + hfcs_opt = model.historical_forecasts( + series=y, + forecast_horizon=n, + last_points_only=lpo, + retrain=False, + enable_optimization=True, + num_samples=1000, + stride=n, + ) + if n <= ocl: + # quantile forecasts optimized + hfcs_opt_q = model.historical_forecasts( + series=y, + forecast_horizon=n, + last_points_only=lpo, + retrain=False, + enable_optimization=True, + predict_likelihood_parameters=True, + stride=n, + ) + if lpo: + q_med = hfcs_opt_q[0].components[1::3].tolist() + else: + q_med = hfcs_opt_q[0][0].components[1::3].tolist() + hfcs_opt_q = ( + [concatenate(hfc) for hfc in hfcs_opt_q] + if hfcs_opt_q is not None + else hfcs_opt_q + ) + hfcs_opt_q = ( + [hfc[q_med] for hfc in hfcs_opt_q] + if hfcs_opt_q is not None + else hfcs_opt_q + ) + else: + hfcs_opt_q = [None] * len(hfcs_opt) + + if not lpo: + hfcs_opt = [concatenate(hfc) for hfc in hfcs_opt] + hfcs_no_opt = [concatenate(hfc) for hfc in hfcs_no_opt] + + for hfc_opt, mean_opt_q, hfc_no_opt in zip(hfcs_opt, hfcs_opt_q, hfcs_no_opt): + mean_opt = hfc_opt.all_values().mean(axis=2) + mean_no_opt = hfc_no_opt.all_values().mean(axis=2) + assert np.abs(mean_opt - mean_no_opt).max() < 0.1 + if mean_opt_q is not None: + assert np.abs(mean_opt - mean_opt_q.values()).max() < 0.1 + @pytest.mark.parametrize( "model_type,enable_optimization", product(["regression", "torch"], [True, False]), diff --git a/darts/utils/historical_forecasts/optimized_historical_forecasts_regression.py b/darts/utils/historical_forecasts/optimized_historical_forecasts_regression.py index c480d69c57..eeef59f04b 100644 --- a/darts/utils/historical_forecasts/optimized_historical_forecasts_regression.py +++ b/darts/utils/historical_forecasts/optimized_historical_forecasts_regression.py @@ -141,18 +141,23 @@ def _optimized_historical_forecasts_last_points_only( ) # forecast has shape ((forecastable_index_length-1)*num_samples, k, n_component) # where k = output_chunk length if multi_models, 1 otherwise - - # reshape into (forecasted indexes, n_components, n_samples), components are interleaved - forecast = forecast.reshape(X.shape[0], -1, num_samples) + # reshape into (forecasted indexes, output_chunk_length, n_components, n_samples) + forecast = np.moveaxis( + forecast.reshape( + X.shape[0], + num_samples, + model.output_chunk_length if model.multi_models else 1, + -1, + ), + 1, + -1, + ) # extract the last sub-model forecast for each component if model.multi_models: - forecast = forecast[ - :, - (forecast_horizon - 1) * len(forecast_components) : (forecast_horizon) - * len(forecast_components), - :, - ] + forecast = forecast[:, forecast_horizon - 1] + else: + forecast = forecast[:, 0] forecasts_list.append( TimeSeries.from_times_and_values( @@ -237,7 +242,6 @@ def _optimized_historical_forecasts_all_points( # Additional shift, to account for the model output_chunk_length shift_start = 0 - # shift_end = 0 if model.output_chunk_length > 1: # used to convert the shift into the appropriate unit unit = freq if series_.has_datetime_index else 1 @@ -296,26 +300,27 @@ def _optimized_historical_forecasts_all_points( predict_likelihood_parameters=predict_likelihood_parameters, **kwargs, ) - - # reshape and stride the forecast into (forecastable_index, forecast_horizon, n_components, num_samples) - if model.multi_models: - # forecast has shape ((forecastable_index_length-1)*num_samples, output_chunk_length, n_component) - # and the components are interleaved - forecast = forecast.reshape( + # forecast has shape ((forecastable_index_length-1)*num_samples, k, n_component) + # where k = output_chunk length if multi_models, 1 otherwise + # reshape into (forecasted indexes, output_chunk_length, n_components, n_samples) + forecast = np.moveaxis( + forecast.reshape( X.shape[0], - model.output_chunk_length, - len(forecast_components), num_samples, - ) + model.output_chunk_length if model.multi_models else 1, + -1, + ), + 1, + -1, + ) + + if model.multi_models: forecast = forecast[::stride, :forecast_horizon] else: - # forecast has shape ((forecastable_index_length-1)*num_samples, 1, n_component) - # and the components are interleaved - forecast = forecast.reshape(X.shape[0], -1, num_samples) - - # forecasts depend on lagged data only, output_chunk_length is reconstitued by applying a sliding window + # entire forecast horizon is given by multiple (previous) forecasts -> apply sliding window forecast = sliding_window_view( - forecast, (forecast_horizon, len(forecast_components), num_samples) + forecast[:, 0], + (forecast_horizon, len(forecast_components), num_samples), ) # apply stride, remove the last windows, slice output_chunk_length to keep forecast_horizon values