diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d4bb741b8..10812c7abd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,9 +20,10 @@ but cannot always guarantee backwards compatibility. Changes that may **break co - Improvements to Regression Models: - `XGBModel` now leverages XGBoost's native Quantile Regression support that was released in version 2.0.0 for improved probabilistic forecasts. [#2051](https://github.com/unit8co/darts/pull/2051) by [Dennis Bader](https://github.com/dennisbader). - Other improvements: - - Added support for time index time zone conversion with parameter `tz` before generating/computing holidays and datetime attributes. Support was added to all Time Axis Encoders (standalone encoders and forecasting models' `add_encoders`, time series generation utils functions `holidays_timeseries()` and `datetime_attribute_timeseries()`, and `TimeSeries` methods `add_datetime_attribute()` and `add_holidays()`. [#2054](https://github.com/unit8co/darts/pull/2054) by [Dennis Bader](https://github.com/dennisbader). + - Added support for time index time zone conversion with parameter `tz` before generating/computing holidays and datetime attributes. Support was added to all Time Axis Encoders, standalone encoders and forecasting models' `add_encoders`, time series generation utils functions `holidays_timeseries()` and `datetime_attribute_timeseries()`, and `TimeSeries` methods `add_datetime_attribute()` and `add_holidays()`. [#2054](https://github.com/unit8co/darts/pull/2054) by [Dennis Bader](https://github.com/dennisbader). - Added optional keyword arguments dict `kwargs` to `ExponentialSmoothing` that will be passed to the constructor of the underlying `statsmodels.tsa.holtwinters.ExponentialSmoothing` model. [#2059](https://github.com/unit8co/darts/pull/2059) by [Antoine Madrona](https://github.com/madtoinou). - Added new dataset `ElectricityConsumptionZurichDataset`: The dataset contains the electricity consumption of households in Zurich, Switzerland from 2015-2022 on different grid levels. We also added weather measurements for Zurich which can be used as covariates for modelling. [#2039](https://github.com/unit8co/darts/pull/2039) by [Antoine Madrona](https://github.com/madtoinou) and [Dennis Bader](https://github.com/dennisbader). + - Added new arguments `fit_kwargs` and `predict_kwargs` to `historical_forecasts()`, `backtest()` and `gridsearch()` that will be passed to the model's `fit()` and / or `predict` methods. E.g., you can now set a batch size, static validation series, ... depending on the model support. [#2050](https://github.com/unit8co/darts/pull/2050) by [Antoine Madrona](https://github.com/madtoinou) **Fixed** - Fixed a bug when calling optimized `historical_forecasts()` for a `RegressionModel` trained with unequal component-specific lags. [#2040](https://github.com/unit8co/darts/pull/2040) by [Antoine Madrona](https://github.com/madtoinou). diff --git a/darts/datasets/__init__.py b/darts/datasets/__init__.py index 9b8262e128..5f2954727e 100644 --- a/darts/datasets/__init__.py +++ b/darts/datasets/__init__.py @@ -829,7 +829,8 @@ class ElectricityConsumptionZurichDataset(DatasetLoaderCSV): To simplify the dataset, the measurements from the Zch_Schimmelstrasse and Zch_Rosengartenstrasse weather stations are discarded to keep only the data recorded in the Zch_Stampfenbachstrasse station. - Both dataset sources are updated continuously, but this dataset only retrains values between 2015 and 2022. + Both dataset sources are updated continuously, but this dataset only retrains values between 2015-01-01 and + 2022-08-31. The time index was converted from CET time zone to UTC. Components Descriptions: @@ -864,7 +865,7 @@ def pre_process_dataset(dataset_path): # extract pre-determined period df = df.loc[ (pd.Timestamp("2015-01-01") <= df.index) - & (df.index <= pd.Timestamp("2022-12-31")) + & (df.index <= pd.Timestamp("2022-08-31")) ] # download and preprocess the weather information df_weather = self._download_weather_data() @@ -894,7 +895,7 @@ def pre_process_dataset(dataset_path): "ewz_stromabgabe_netzebenen_stadt_zuerich/" "download/ewz_stromabgabe_netzebenen_stadt_zuerich.csv" ), - hash="c2fea1a0974611ff1c276abcc1d34619", + hash="a019125b7f9c1afeacb0ae60ce7455ef", header_time="Timestamp", freq="15min", pre_process_csv_fn=pre_process_dataset, @@ -919,6 +920,6 @@ def _download_weather_data(): ) df = df.loc[ (pd.Timestamp("2015-01-01") <= df.index) - & (df.index <= pd.Timestamp("2022-12-31")) + & (df.index <= pd.Timestamp("2022-08-31")) ] return df diff --git a/darts/models/forecasting/baselines.py b/darts/models/forecasting/baselines.py index 93309f24d1..bee9d23cd0 100644 --- a/darts/models/forecasting/baselines.py +++ b/darts/models/forecasting/baselines.py @@ -332,8 +332,12 @@ def fit( for model in self.forecasting_models: model._fit_wrapper( series=series, - past_covariates=past_covariates, - future_covariates=future_covariates, + past_covariates=past_covariates + if model.supports_past_covariates + else None, + future_covariates=future_covariates + if model.supports_future_covariates + else None, ) return self diff --git a/darts/models/forecasting/forecasting_model.py b/darts/models/forecasting/forecasting_model.py index 1d3bf14eac..2c2bd4909c 100644 --- a/darts/models/forecasting/forecasting_model.py +++ b/darts/models/forecasting/forecasting_model.py @@ -42,6 +42,7 @@ _get_historical_forecast_predict_index, _get_historical_forecast_train_index, _historical_forecasts_general_checks, + _historical_forecasts_sanitize_kwargs, _reconciliate_historical_time_indices, ) from darts.utils.timeseries_generation import ( @@ -316,23 +317,47 @@ def _fit_wrapper( series: TimeSeries, past_covariates: Optional[TimeSeries], future_covariates: Optional[TimeSeries], + **kwargs, ): - self.fit(series) + supported_params = inspect.signature(self.fit).parameters + kwargs_ = {k: v for k, v in kwargs.items() if k in supported_params} + + # handle past and future covariates based on model support + for covs, name in zip([past_covariates, future_covariates], ["past", "future"]): + covs_name = f"{name}_covariates" + if getattr(self, f"supports_{covs_name}"): + kwargs_[covs_name] = covs + elif covs is not None: + raise_log( + ValueError(f"Model cannot be fit/trained with `{covs_name}`."), + logger, + ) + self.fit(series, **kwargs_) def _predict_wrapper( self, n: int, - series: TimeSeries, - past_covariates: Optional[TimeSeries], - future_covariates: Optional[TimeSeries], - num_samples: int, - verbose: bool = False, - predict_likelihood_parameters: bool = False, - ) -> TimeSeries: - kwargs = dict() - if self.supports_likelihood_parameter_prediction: - kwargs["predict_likelihood_parameters"] = predict_likelihood_parameters - return self.predict(n, num_samples=num_samples, verbose=verbose, **kwargs) + **kwargs, + ) -> Union[TimeSeries, Sequence[TimeSeries]]: + supported_params = set(inspect.signature(self.predict).parameters) + + # if predict() accepts covariates, the model might not support them at inference + for covs_name in ["past_covariates", "future_covariates"]: + if covs_name in kwargs and not getattr(self, f"supports_{covs_name}"): + if kwargs[covs_name] is None: + supported_params = supported_params - {covs_name} + else: + raise_log( + ValueError( + f"Model prediction does not support `{covs_name}`, either because it " + f"does not support `{covs_name}` in general, or because it was fit/trained " + f"without using `{covs_name}`." + ), + logger, + ) + + kwargs_ = {k: v for k, v in kwargs.items() if k in supported_params} + return self.predict(n, **kwargs_) @property def min_train_series_length(self) -> int: @@ -586,6 +611,8 @@ def historical_forecasts( show_warnings: bool = True, predict_likelihood_parameters: bool = False, enable_optimization: bool = True, + fit_kwargs: Optional[Dict[str, Any]] = None, + predict_kwargs: Optional[Dict[str, Any]] = None, ) -> Union[ TimeSeries, List[TimeSeries], Sequence[TimeSeries], Sequence[List[TimeSeries]] ]: @@ -692,6 +719,10 @@ def historical_forecasts( Default: ``False`` enable_optimization Whether to use the optimized version of historical_forecasts when supported and available. + fit_kwargs + Additional arguments passed to the model `fit()` method. + predict_kwargs + Additional arguments passed to the model `predict()` method. Returns ------- @@ -802,6 +833,15 @@ def retrain_func( logger, ) + # remove unsupported arguments, raise exception if interference with historical forecasts logic + fit_kwargs, predict_kwargs = _historical_forecasts_sanitize_kwargs( + model=model, + fit_kwargs=fit_kwargs, + predict_kwargs=predict_kwargs, + retrain=retrain is not False and retrain != 0, + show_warnings=show_warnings, + ) + series = series2seq(series) past_covariates = series2seq(past_covariates) future_covariates = series2seq(future_covariates) @@ -829,6 +869,7 @@ def retrain_func( verbose=verbose, show_warnings=show_warnings, predict_likelihood_parameters=predict_likelihood_parameters, + **predict_kwargs, ) if len(series) == 1: @@ -969,6 +1010,7 @@ def retrain_func( series=train_series, past_covariates=past_covariates_, future_covariates=future_covariates_, + **fit_kwargs, ) else: # untrained model was not trained on the first trainable timestamp @@ -1019,6 +1061,7 @@ def retrain_func( num_samples=num_samples, verbose=verbose, predict_likelihood_parameters=predict_likelihood_parameters, + **predict_kwargs, ) if forecast_components is None: forecast_components = forecast.columns @@ -1076,6 +1119,8 @@ def backtest( reduction: Union[Callable[[np.ndarray], float], None] = np.mean, verbose: bool = False, show_warnings: bool = True, + fit_kwargs: Optional[Dict[str, Any]] = None, + predict_kwargs: Optional[Dict[str, Any]] = None, ) -> Union[float, List[float], Sequence[float], List[Sequence[float]]]: """Compute error values that the model would have produced when used on (potentially multiple) `series`. @@ -1185,6 +1230,10 @@ def backtest( Whether to print progress. show_warnings Whether to show warnings related to parameters `start`, and `train_length`. + fit_kwargs + Additional arguments passed to the model `fit()` method. + predict_kwargs + Additional arguments passed to the model `predict()` method. Returns ------- @@ -1208,6 +1257,8 @@ def backtest( last_points_only=last_points_only, verbose=verbose, show_warnings=show_warnings, + fit_kwargs=fit_kwargs, + predict_kwargs=predict_kwargs, ) else: forecasts = historical_forecasts @@ -1261,6 +1312,8 @@ def gridsearch( verbose=False, n_jobs: int = 1, n_random_samples: Optional[Union[int, float]] = None, + fit_kwargs: Optional[Dict[str, Any]] = None, + predict_kwargs: Optional[Dict[str, Any]] = None, ) -> Tuple["ForecastingModel", Dict[str, Any], float]: """ Find the best hyper-parameters among a given set using a grid search. @@ -1374,6 +1427,10 @@ def gridsearch( must be between `0` and the total number of parameter combinations. If a float, `n_random_samples` is the ratio of parameter combinations selected from the full grid and must be between `0` and `1`. Defaults to `None`, for which random selection will be ignored. + fit_kwargs + Additional arguments passed to the model `fit()` method. + predict_kwargs + Additional arguments passed to the model `predict()` method. Returns ------- @@ -1406,10 +1463,10 @@ def gridsearch( logger, ) - # TODO: here too I'd say we can leave these checks to the models - # if covariates is not None: - # raise_if_not(series.has_same_time_as(covariates), 'The provided series and covariates must have the ' - # 'same time axes.') + if fit_kwargs is None: + fit_kwargs = dict() + if predict_kwargs is None: + predict_kwargs = dict() # compute all hyperparameter combinations from selection params_cross_product = list(product(*parameters.values())) @@ -1437,7 +1494,12 @@ def _evaluate_combination(param_combination) -> float: model = model_class(**param_combination_dict) if use_fitted_values: # fitted value mode - model._fit_wrapper(series, past_covariates, future_covariates) + model._fit_wrapper( + series=series, + past_covariates=past_covariates, + future_covariates=future_covariates, + **fit_kwargs, + ) fitted_values = TimeSeries.from_times_and_values( series.time_index, model.fitted_values ) @@ -1457,16 +1519,24 @@ def _evaluate_combination(param_combination) -> float: last_points_only=last_points_only, verbose=verbose, show_warnings=show_warnings, + fit_kwargs=fit_kwargs, + predict_kwargs=predict_kwargs, ) else: # split mode - model._fit_wrapper(series, past_covariates, future_covariates) + model._fit_wrapper( + series=series, + past_covariates=past_covariates, + future_covariates=future_covariates, + **fit_kwargs, + ) pred = model._predict_wrapper( - len(val_series), - series, - past_covariates, - future_covariates, + n=len(val_series), + series=series, + past_covariates=past_covariates, + future_covariates=future_covariates, num_samples=1, verbose=verbose, + **predict_kwargs, ) error = metric(val_series, pred) @@ -2211,43 +2281,6 @@ def predict( ) ) - def _predict_wrapper( - self, - n: int, - series: Union[TimeSeries, Sequence[TimeSeries]], - past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]], - future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]], - num_samples: int, - verbose: bool = False, - predict_likelihood_parameters: bool = False, - ) -> Union[TimeSeries, Sequence[TimeSeries]]: - kwargs = dict() - if self.supports_likelihood_parameter_prediction: - kwargs["predict_likelihood_parameters"] = predict_likelihood_parameters - return self.predict( - n, - series, - past_covariates=past_covariates, - future_covariates=future_covariates, - num_samples=num_samples, - verbose=verbose, - **kwargs, - ) - - def _fit_wrapper( - self, - series: Union[TimeSeries, Sequence[TimeSeries]], - past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]], - future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]], - ): - self.fit( - series=series, - past_covariates=past_covariates if self.supports_past_covariates else None, - future_covariates=future_covariates - if self.supports_future_covariates - else None, - ) - @property def _supports_non_retrainable_historical_forecasts(self) -> bool: """GlobalForecastingModel supports historical forecasts without retraining the model""" @@ -2340,6 +2373,7 @@ def fit(self, series: TimeSeries, future_covariates: Optional[TimeSeries] = None logger=logger, ) self._expect_future_covariates = True + self._uses_future_covariates = True self.encoders = self.initialize_encoders() if self.encoders.encoding_available: @@ -2448,35 +2482,6 @@ def _predict( """ pass - def _fit_wrapper( - self, - series: TimeSeries, - past_covariates: Optional[TimeSeries], - future_covariates: Optional[TimeSeries], - ): - self.fit(series, future_covariates=future_covariates) - - def _predict_wrapper( - self, - n: int, - series: TimeSeries, - past_covariates: Optional[TimeSeries], - future_covariates: Optional[TimeSeries], - num_samples: int, - verbose: bool = False, - predict_likelihood_parameters: bool = False, - ) -> TimeSeries: - kwargs = dict() - if self.supports_likelihood_parameter_prediction: - kwargs["predict_likelihood_parameters"] = predict_likelihood_parameters - return self.predict( - n, - future_covariates=future_covariates, - num_samples=num_samples, - verbose=verbose, - **kwargs, - ) - @property def _model_encoder_settings( self, @@ -2673,28 +2678,6 @@ def _predict( """ pass - def _predict_wrapper( - self, - n: int, - series: TimeSeries, - past_covariates: Optional[TimeSeries], - future_covariates: Optional[TimeSeries], - num_samples: int, - verbose: bool = False, - predict_likelihood_parameters: bool = False, - ) -> TimeSeries: - kwargs = dict() - if self.supports_likelihood_parameter_prediction: - kwargs["predict_likelihood_parameters"] = predict_likelihood_parameters - return self.predict( - n=n, - series=series, - future_covariates=future_covariates, - num_samples=num_samples, - verbose=verbose, - **kwargs, - ) - @property def _supports_non_retrainable_historical_forecasts(self) -> bool: return True diff --git a/darts/models/forecasting/regression_ensemble_model.py b/darts/models/forecasting/regression_ensemble_model.py index afe9e0b9f6..5d665917ef 100644 --- a/darts/models/forecasting/regression_ensemble_model.py +++ b/darts/models/forecasting/regression_ensemble_model.py @@ -369,8 +369,12 @@ def fit( # maximize covariate usage model._fit_wrapper( series=forecast_training, - past_covariates=past_covariates, - future_covariates=future_covariates, + past_covariates=past_covariates + if model.supports_past_covariates + else None, + future_covariates=future_covariates + if model.supports_future_covariates + else None, ) # we can call direct prediction in any case. Even if we overwrite with historical @@ -407,8 +411,12 @@ def fit( for model in self.forecasting_models: model._fit_wrapper( series=series, - past_covariates=past_covariates, - future_covariates=future_covariates, + past_covariates=past_covariates + if model.supports_past_covariates + else None, + future_covariates=future_covariates + if model.supports_future_covariates + else None, ) return self diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py index 3f4d77f33c..cc1dc61205 100644 --- a/darts/models/forecasting/regression_model.py +++ b/darts/models/forecasting/regression_model.py @@ -1104,6 +1104,7 @@ def _optimized_historical_forecasts( verbose: bool = False, show_warnings: bool = True, predict_likelihood_parameters: bool = False, + **kwargs, ) -> Union[ TimeSeries, List[TimeSeries], Sequence[TimeSeries], Sequence[List[TimeSeries]] ]: @@ -1139,6 +1140,7 @@ def _optimized_historical_forecasts( overlap_end=overlap_end, show_warnings=show_warnings, predict_likelihood_parameters=predict_likelihood_parameters, + **kwargs, ) else: return _optimized_historical_forecasts_all_points( @@ -1154,6 +1156,7 @@ def _optimized_historical_forecasts( overlap_end=overlap_end, show_warnings=show_warnings, predict_likelihood_parameters=predict_likelihood_parameters, + **kwargs, ) diff --git a/darts/models/forecasting/torch_forecasting_model.py b/darts/models/forecasting/torch_forecasting_model.py index 28cc5c8d0f..b8348af5a7 100644 --- a/darts/models/forecasting/torch_forecasting_model.py +++ b/darts/models/forecasting/torch_forecasting_model.py @@ -2045,6 +2045,7 @@ def _optimized_historical_forecasts( verbose: bool = False, show_warnings: bool = True, predict_likelihood_parameters: bool = False, + **kwargs, ) -> Union[ TimeSeries, List[TimeSeries], Sequence[TimeSeries], Sequence[List[TimeSeries]] ]: @@ -2073,8 +2074,9 @@ def _optimized_historical_forecasts( overlap_end=overlap_end, last_points_only=last_points_only, show_warnings=show_warnings, - predict_likelihood_parameters=predict_likelihood_parameters, verbose=verbose, + predict_likelihood_parameters=predict_likelihood_parameters, + **kwargs, ) return forecasts_list diff --git a/darts/tests/models/forecasting/test_backtesting.py b/darts/tests/models/forecasting/test_backtesting.py index 8f87381c2b..e54ca70d5d 100644 --- a/darts/tests/models/forecasting/test_backtesting.py +++ b/darts/tests/models/forecasting/test_backtesting.py @@ -465,6 +465,26 @@ def test_backtest_regression(self): ) assert score > 0.9 + @pytest.mark.parametrize("model_cls", [Theta, ARIMA]) + def test_backtest_bad_covariates(self, model_cls): + """Passing unsupported covariate should raise an exception""" + series = lt(start_value=1, end_value=10, length=31, dtype="float32") + model = model_cls() + bt_kwargs = {"start": -1, "start_format": "position", "show_warnings": False} + model.backtest(series=series, **bt_kwargs) + + with pytest.raises(ValueError) as msg: + model.backtest(series=series, past_covariates=series, **bt_kwargs) + assert str(msg.value).startswith( + "Model cannot be fit/trained with `past_covariates`." + ) + if not model.supports_future_covariates: + with pytest.raises(ValueError) as msg: + model.backtest(series=series, future_covariates=series, **bt_kwargs) + assert str(msg.value).startswith( + "Model cannot be fit/trained with `future_covariates`." + ) + def test_gridsearch(self): np.random.seed(1) @@ -631,3 +651,45 @@ def test_gridsearch_multi(self): "pl_trainer_kwargs": [tfm_kwargs["pl_trainer_kwargs"]], } TCNModel.gridsearch(tcn_params, dummy_series, forecast_horizon=3, metric=mape) + + @pytest.mark.parametrize( + "model_cls,parameters", + zip([Theta, ARIMA], [{"theta": [3, 4]}, {"p": [18, 4]}]), + ) + def test_gridsearch_bad_covariates(self, model_cls, parameters): + """Passing unsupported covariate should raise an exception""" + dummy_series = get_dummy_series( + ts_length=100, lt_end_value=1, st_value_offset=0 + ).astype(np.float32) + ts_train, ts_val = dummy_series.split_before(split_point=0.8) + + bt_kwargs = {"start": -1, "start_format": "position", "show_warnings": False} + + model = model_cls() + model_cls.gridsearch( + parameters=parameters, series=ts_train, val_series=ts_val, **bt_kwargs + ) + + with pytest.raises(ValueError) as msg: + model_cls.gridsearch( + parameters=parameters, + series=ts_train, + past_covariates=dummy_series, + val_series=ts_val, + **bt_kwargs + ) + assert str(msg.value).startswith( + "Model cannot be fit/trained with `past_covariates`." + ) + if not model.supports_future_covariates: + with pytest.raises(ValueError) as msg: + model_cls.gridsearch( + parameters=parameters, + series=ts_train, + future_covariates=dummy_series, + val_series=ts_val, + **bt_kwargs + ) + assert str(msg.value).startswith( + "Model cannot be fit/trained with `future_covariates`." + ) diff --git a/darts/tests/models/forecasting/test_historical_forecasts.py b/darts/tests/models/forecasting/test_historical_forecasts.py index 93ff1e18d6..77907f3f1a 100644 --- a/darts/tests/models/forecasting/test_historical_forecasts.py +++ b/darts/tests/models/forecasting/test_historical_forecasts.py @@ -1,4 +1,5 @@ import itertools +from itertools import product import numpy as np import pandas as pd @@ -333,6 +334,29 @@ class TestHistoricalforecast: # slightly longer to not affect the last predictable timestamp ts_covs = tg.gaussian_timeseries(length=30, start=start_ts) + @staticmethod + def create_model(ocl, use_ll=True, model_type="regression"): + if model_type == "regression": + return LinearRegressionModel( + lags=3, + likelihood="quantile" if use_ll else None, + quantiles=[0.05, 0.4, 0.5, 0.6, 0.95] if use_ll else None, + output_chunk_length=ocl, + ) + else: # model_type == "torch" + if not TORCH_AVAILABLE: + return None + return NLinearModel( + input_chunk_length=3, + likelihood=QuantileRegression([0.05, 0.4, 0.5, 0.6, 0.95]) + if use_ll + else None, + output_chunk_length=ocl, + n_epochs=1, + random_state=42, + **tfm_kwargs, + ) + def test_historical_forecasts_transferrable_future_cov_local_models(self): model = ARIMA() assert model.min_train_series_length == 30 @@ -356,6 +380,17 @@ def test_historical_forecasts_transferrable_future_cov_local_models(self): assert len(res) == 1 assert series.end_time() == res.time_index[0] + # passing non-supported covariates + with pytest.raises(ValueError) as msg: + model.historical_forecasts( + series, + past_covariates=series, + retrain=False, + ) + assert str(msg.value).startswith( + "Model prediction does not support `past_covariates`" + ) + def test_historical_forecasts_future_cov_local_models(self): model = AutoARIMA() assert model.min_train_series_length == 10 @@ -378,6 +413,17 @@ def test_historical_forecasts_future_cov_local_models(self): "with `retrain` set to `False`" ) + # passing non-supported covariates + with pytest.raises(ValueError) as msg: + model.historical_forecasts( + series, + past_covariates=series, + retrain=True, + ) + assert str(msg.value).startswith( + "Model cannot be fit/trained with `past_covariates`." + ) + def test_historical_forecasts_local_models(self): model = NaiveSeasonal() assert model.min_train_series_length == 3 @@ -601,6 +647,28 @@ def test_historical_forecasts(self, config): f"retrain=True and overlap_end=False, and last_points_only=False" ) + if not model.supports_past_covariates: + with pytest.raises(ValueError) as msg: + model.historical_forecasts( + series=self.ts_pass_val_range, + past_covariates=self.ts_passengers, + retrain=True, + ) + assert str(msg.value).startswith( + "Model cannot be fit/trained with `past_covariates`." + ) + + if not model.supports_future_covariates: + with pytest.raises(ValueError) as msg: + model.historical_forecasts( + series=self.ts_pass_val_range, + future_covariates=self.ts_passengers, + last_points_only=False, + ) + assert str(msg.value).startswith( + "Model cannot be fit/trained with `future_covariates`." + ) + def test_sanity_check_invalid_start(self): timeidx_ = tg.linear_timeseries(length=10) rangeidx_step1 = tg.linear_timeseries(start=0, length=10, freq=1) @@ -1827,29 +1895,7 @@ def test_predict_likelihood_parameters(self, model_type): """standard checks that historical forecasts work with direct likelihood parameter predictions with regression and torch models.""" - def create_model(ocl, use_ll=True, model_type="regression"): - if model_type == "regression": - return LinearRegressionModel( - lags=3, - likelihood="quantile" if use_ll else None, - quantiles=[0.05, 0.4, 0.5, 0.6, 0.95] if use_ll else None, - output_chunk_length=ocl, - ) - else: # model_type == "torch" - if not TORCH_AVAILABLE: - return None - return NLinearModel( - input_chunk_length=3, - likelihood=QuantileRegression([0.05, 0.4, 0.5, 0.6, 0.95]) - if use_ll - else None, - output_chunk_length=ocl, - n_epochs=1, - random_state=42, - **tfm_kwargs, - ) - - model = create_model(1, False, model_type=model_type) + model = self.create_model(1, False, model_type=model_type) # skip torch models if not installed if model is None: return @@ -1860,7 +1906,7 @@ def create_model(ocl, use_ll=True, model_type="regression"): predict_likelihood_parameters=True, ) - model = create_model(1, model_type=model_type) + model = self.create_model(1, model_type=model_type) # forecast_horizon > output_chunk_length doesn't work with pytest.raises(ValueError): model.historical_forecasts( @@ -1869,7 +1915,7 @@ def create_model(ocl, use_ll=True, model_type="regression"): forecast_horizon=2, ) - model = create_model(1, model_type=model_type) + model = self.create_model(1, model_type=model_type) # num_samples != 1 doesn't work with pytest.raises(ValueError): model.historical_forecasts( @@ -1884,7 +1930,7 @@ def create_model(ocl, use_ll=True, model_type="regression"): qs_expected = ["q0.05", "q0.40", "q0.50", "q0.60", "q0.95"] qs_expected = pd.Index([target_name + "_" + q for q in qs_expected]) # check that it works with retrain - model = create_model(1, model_type=model_type) + model = self.create_model(1, model_type=model_type) hist_fc = model.historical_forecasts( self.ts_pass_train, predict_likelihood_parameters=True, @@ -1897,7 +1943,7 @@ def create_model(ocl, use_ll=True, model_type="regression"): assert len(hist_fc) == n # check for equal results between predict and hist fc without retraining - model = create_model(1, model_type=model_type) + model = self.create_model(1, model_type=model_type) model.fit(series=self.ts_pass_train[:-n]) hist_fc = model.historical_forecasts( self.ts_pass_train, @@ -1926,7 +1972,7 @@ def create_model(ocl, use_ll=True, model_type="regression"): # check equal results between predict and hist fc with higher output_chunk_length and horizon, # and last_points_only=False - model = create_model(2, model_type=model_type) + model = self.create_model(2, model_type=model_type) # we take one more training step so that model trained on ocl=1 has the same training samples # as model above model.fit(series=self.ts_pass_train[: -(n - 1)]) @@ -1959,3 +2005,142 @@ def create_model(ocl, use_ll=True, model_type="regression"): p.all_values(copy=False), hfc.all_values(copy=False) ) assert len(hist_fc) == n + 1 + + @pytest.mark.parametrize( + "model_type,enable_optimization", + product(["regression", "torch"], [True, False]), + ) + def test_fit_kwargs(self, model_type, enable_optimization): + """check that the parameters provided in fit_kwargs are correctly processed""" + valid_fit_kwargs = {"max_samples_per_ts": 3} + invalid_fit_kwargs = {"series": self.ts_pass_train} + if model_type == "regression": + unsupported_fit_kwargs = {"trainer": None} + else: + unsupported_fit_kwargs = {"n_jobs_multioutput_wrapper": False} + + n = 2 + model = self.create_model(1, use_ll=False, model_type=model_type) + + # torch not available + if model is None: + return + + model.fit(series=self.ts_pass_train[:-n]) + + # supported argument + hist_fc = model.historical_forecasts( + self.ts_pass_train, + forecast_horizon=1, + num_samples=1, + start=len(self.ts_pass_train) - n, + retrain=True, + enable_optimization=enable_optimization, + fit_kwargs=valid_fit_kwargs, + ) + + assert hist_fc.components.equals(self.ts_pass_train.components) + assert len(hist_fc) == n + + # passing unsupported argument + hist_fc = model.historical_forecasts( + self.ts_pass_train, + forecast_horizon=1, + start=len(self.ts_pass_train) - n, + retrain=True, + enable_optimization=enable_optimization, + fit_kwargs=unsupported_fit_kwargs, + ) + + assert hist_fc.components.equals(self.ts_pass_train.components) + assert len(hist_fc) == n + + # passing hist_fc parameters in fit_kwargs, with retrain=False + hist_fc = model.historical_forecasts( + self.ts_pass_train, + forecast_horizon=1, + start=len(self.ts_pass_train) - n, + retrain=False, + enable_optimization=enable_optimization, + fit_kwargs=invalid_fit_kwargs, + ) + + assert hist_fc.components.equals(self.ts_pass_train.components) + assert len(hist_fc) == n + + # passing hist_fc parameters in fit_kwargs, interfering with the logic + with pytest.raises(ValueError) as msg: + model.historical_forecasts( + self.ts_pass_train, + forecast_horizon=1, + start=len(self.ts_pass_train) - n, + retrain=True, + enable_optimization=enable_optimization, + fit_kwargs=invalid_fit_kwargs, + ) + assert str(msg.value).startswith( + "The following parameters cannot be passed in `fit_kwargs`" + ) + + @pytest.mark.parametrize( + "model_type,enable_optimization", + product(["regression", "torch"], [True, False]), + ) + def test_predict_kwargs(self, model_type, enable_optimization): + """check that the parameters provided in predict_kwargs are correctly processed""" + invalid_predict_kwargs = {"predict_likelihood_parameters": False} + if model_type == "regression": + valid_predict_kwargs = {} + unsupported_predict_kwargs = {"batch_size": 10} + else: + valid_predict_kwargs = {"batch_size": 10} + unsupported_predict_kwargs = {"unsupported": "unsupported"} + + n = 2 + model = self.create_model(1, use_ll=False, model_type=model_type) + + # torch not available + if model is None: + return + + model.fit(series=self.ts_pass_train[:-n]) + + # supported argument + hist_fc = model.historical_forecasts( + self.ts_pass_train, + forecast_horizon=1, + start=len(self.ts_pass_train) - n, + retrain=False, + enable_optimization=enable_optimization, + predict_kwargs=valid_predict_kwargs, + ) + + assert hist_fc.components.equals(self.ts_pass_train.components) + assert len(hist_fc) == n + + # passing unsupported argument + hist_fc = model.historical_forecasts( + self.ts_pass_train, + forecast_horizon=1, + start=len(self.ts_pass_train) - n, + retrain=False, + enable_optimization=enable_optimization, + predict_kwargs=unsupported_predict_kwargs, + ) + + assert hist_fc.components.equals(self.ts_pass_train.components) + assert len(hist_fc) == n + + # passing hist_fc parameters in predict_kwargs, interfering with the logic + with pytest.raises(ValueError) as msg: + hist_fc = model.historical_forecasts( + self.ts_pass_train, + forecast_horizon=1, + start=len(self.ts_pass_train) - n, + retrain=False, + enable_optimization=enable_optimization, + predict_kwargs=invalid_predict_kwargs, + ) + assert str(msg.value).startswith( + "The following parameters cannot be passed in `predict_kwargs`" + ) diff --git a/darts/utils/historical_forecasts/optimized_historical_forecasts_regression.py b/darts/utils/historical_forecasts/optimized_historical_forecasts_regression.py index 8d64bbbd45..2876d716eb 100644 --- a/darts/utils/historical_forecasts/optimized_historical_forecasts_regression.py +++ b/darts/utils/historical_forecasts/optimized_historical_forecasts_regression.py @@ -31,6 +31,7 @@ def _optimized_historical_forecasts_last_points_only( overlap_end: bool = False, show_warnings: bool = True, predict_likelihood_parameters: bool = False, + **kwargs, ) -> Union[ TimeSeries, List[TimeSeries], Sequence[TimeSeries], Sequence[List[TimeSeries]] ]: @@ -128,6 +129,7 @@ def _optimized_historical_forecasts_last_points_only( x=np.repeat(X, num_samples, axis=0), num_samples=num_samples, predict_likelihood_parameters=predict_likelihood_parameters, + **kwargs, ) # forecast has shape ((forecastable_index_length-1)*num_samples, k, n_component) # where k = output_chunk length if multi_models, 1 otherwise @@ -177,6 +179,7 @@ def _optimized_historical_forecasts_all_points( overlap_end: bool = False, show_warnings: bool = True, predict_likelihood_parameters: bool = False, + **kwargs, ) -> Union[ TimeSeries, List[TimeSeries], Sequence[TimeSeries], Sequence[List[TimeSeries]] ]: @@ -273,6 +276,7 @@ def _optimized_historical_forecasts_all_points( x=np.repeat(X, num_samples, axis=0), num_samples=num_samples, predict_likelihood_parameters=predict_likelihood_parameters, + **kwargs, ) # reshape and stride the forecast into (forecastable_index, forecast_horizon, n_components, num_samples) diff --git a/darts/utils/historical_forecasts/optimized_historical_forecasts_torch.py b/darts/utils/historical_forecasts/optimized_historical_forecasts_torch.py index f41cbbfdfb..2079850e2b 100644 --- a/darts/utils/historical_forecasts/optimized_historical_forecasts_torch.py +++ b/darts/utils/historical_forecasts/optimized_historical_forecasts_torch.py @@ -5,6 +5,8 @@ except ImportError: from typing_extensions import Literal +import inspect + import numpy as np import pandas as pd @@ -32,8 +34,9 @@ def _optimized_historical_forecasts( overlap_end: bool = False, last_points_only: bool = True, show_warnings: bool = True, - predict_likelihood_parameters: bool = False, verbose: bool = False, + predict_likelihood_parameters: bool = False, + **kwargs, ) -> Union[ TimeSeries, List[TimeSeries], Sequence[TimeSeries], Sequence[List[TimeSeries]] ]: @@ -92,6 +95,7 @@ def _optimized_historical_forecasts( for cls in model.__class__.__mro__ if cls.__name__ == "TorchForecastingModel" ][0] + super_predict_params = inspect.signature(super(tfm_cls, model).predict).parameters super(tfm_cls, model).predict( forecast_horizon, series, @@ -99,6 +103,7 @@ def _optimized_historical_forecasts( future_covariates, num_samples=num_samples, predict_likelihood_parameters=predict_likelihood_parameters, + **{k: v for k, v in kwargs.items() if k in super_predict_params}, ) dataset = model._build_inference_dataset( @@ -113,10 +118,10 @@ def _optimized_historical_forecasts( predictions = model.predict_from_dataset( forecast_horizon, dataset, - trainer=None, verbose=verbose, num_samples=num_samples, predict_likelihood_parameters=predict_likelihood_parameters, + **kwargs, ) # torch models return list of time series in order of historical forecasts: we reorder per time series diff --git a/darts/utils/historical_forecasts/utils.py b/darts/utils/historical_forecasts/utils.py index fac3d4029e..7ed179ca15 100644 --- a/darts/utils/historical_forecasts/utils.py +++ b/darts/utils/historical_forecasts/utils.py @@ -1,11 +1,13 @@ from types import SimpleNamespace -from typing import Any, Callable, Optional, Sequence, Tuple, Union +from typing import Any, Callable, Dict, Optional, Sequence, Set, Tuple, Union try: from typing import Literal except ImportError: from typing_extensions import Literal +import inspect + import numpy as np import pandas as pd from numpy.typing import ArrayLike @@ -209,6 +211,86 @@ def _historical_forecasts_general_checks(model, series, kwargs): ) +def _historical_forecasts_sanitize_kwargs( + model, + fit_kwargs: Optional[Dict[str, Any]], + predict_kwargs: Optional[Dict[str, Any]], + retrain: bool, + show_warnings: bool, +) -> Tuple[Dict[str, Any], Dict[str, Any]]: + """Convert kwargs to dictionary, check that their content is compatible with called methods.""" + hfc_args = set(inspect.signature(model.historical_forecasts).parameters) + # replace `forecast_horizon` with `n` + hfc_args = hfc_args - {"forecast_horizon"} + hfc_args = hfc_args.union({"n"}) + + if fit_kwargs is None: + fit_kwargs = dict() + elif retrain: + fit_args = set(inspect.signature(model.fit).parameters) + fit_kwargs = _historical_forecasts_check_kwargs( + hfc_args=hfc_args, + name_kwargs="fit_kwargs", + dict_kwargs=fit_kwargs, + method_args=fit_args, + show_warnings=show_warnings, + ) + elif show_warnings: + logger.warning( + "`fit_kwargs` was provided with `retrain=False`, the argument will be ignored." + ) + + if predict_kwargs is None: + predict_kwargs = dict() + else: + predict_args = set(inspect.signature(model.predict).parameters) + predict_kwargs = _historical_forecasts_check_kwargs( + hfc_args=hfc_args, + name_kwargs="predict_kwargs", + dict_kwargs=predict_kwargs, + method_args=predict_args, + show_warnings=show_warnings, + ) + + return fit_kwargs, predict_kwargs + + +def _historical_forecasts_check_kwargs( + hfc_args: Set[str], + name_kwargs: str, + dict_kwargs: Dict[str, Any], + method_args: Set[str], + show_warnings: bool, +) -> Dict[str, Any]: + """ + Return the kwargs dict without the arguments unsupported by the model method. + + Raise a warning if some argument are not supported and an exception if some arguments interfere with + historical_forecasts logic. + """ + invalid_args = set(dict_kwargs).intersection(hfc_args) + if len(invalid_args) > 0: + raise_log( + ValueError( + f"The following parameters cannot be passed in `{name_kwargs}`: {invalid_args}. " + f"Make sure to pass them explicitly to the function/method." + ), + logger, + ) + + ignored_args = set(dict_kwargs) - method_args + if len(ignored_args) > 0: + # remove unsupported argument to avoid exception thrown by python + dict_kwargs = {k: v for k, v in dict_kwargs.items() if k not in ignored_args} + if show_warnings: + logger.warning( + f"The following parameters in `{name_kwargs}` will be ignored as they are not supported by " + f"model method: {ignored_args}." + ) + + return dict_kwargs + + def _historical_forecasts_start_warnings( idx: int, start: Union[pd.Timestamp, int],