diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5d4bb741b8..10812c7abd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,9 +20,10 @@ but cannot always guarantee backwards compatibility. Changes that may **break co
 - Improvements to Regression Models:
   - `XGBModel` now leverages XGBoost's native Quantile Regression support that was released in version 2.0.0 for improved probabilistic forecasts. [#2051](https://github.com/unit8co/darts/pull/2051) by [Dennis Bader](https://github.com/dennisbader).
 - Other improvements:
-  - Added support for time index time zone conversion with parameter `tz` before generating/computing holidays and datetime attributes. Support was added to all Time Axis Encoders (standalone encoders and forecasting models' `add_encoders`, time series generation utils functions `holidays_timeseries()` and `datetime_attribute_timeseries()`, and `TimeSeries` methods `add_datetime_attribute()` and `add_holidays()`. [#2054](https://github.com/unit8co/darts/pull/2054) by [Dennis Bader](https://github.com/dennisbader).
+  - Added support for time index time zone conversion with parameter `tz` before generating/computing holidays and datetime attributes. Support was added to all Time Axis Encoders, standalone encoders and forecasting models' `add_encoders`, time series generation utils functions `holidays_timeseries()` and `datetime_attribute_timeseries()`, and `TimeSeries` methods `add_datetime_attribute()` and `add_holidays()`. [#2054](https://github.com/unit8co/darts/pull/2054) by [Dennis Bader](https://github.com/dennisbader).
   - Added optional keyword arguments dict `kwargs` to `ExponentialSmoothing` that will be passed to the constructor of the underlying `statsmodels.tsa.holtwinters.ExponentialSmoothing` model. [#2059](https://github.com/unit8co/darts/pull/2059) by [Antoine Madrona](https://github.com/madtoinou).
   - Added new dataset `ElectricityConsumptionZurichDataset`: The dataset contains the electricity consumption of households in Zurich, Switzerland from 2015-2022 on different grid levels. We also added weather measurements for Zurich which can be used as covariates for modelling. [#2039](https://github.com/unit8co/darts/pull/2039) by [Antoine Madrona](https://github.com/madtoinou) and [Dennis Bader](https://github.com/dennisbader).
+  - Added new arguments `fit_kwargs` and `predict_kwargs` to `historical_forecasts()`, `backtest()` and `gridsearch()` that will be passed to the model's `fit()` and / or `predict` methods. E.g., you can now set a batch size, static validation series, ... depending on the model support. [#2050](https://github.com/unit8co/darts/pull/2050) by [Antoine Madrona](https://github.com/madtoinou)
 
 **Fixed**
 - Fixed a bug when calling optimized `historical_forecasts()` for a `RegressionModel` trained with unequal component-specific lags. [#2040](https://github.com/unit8co/darts/pull/2040) by [Antoine Madrona](https://github.com/madtoinou).
diff --git a/darts/datasets/__init__.py b/darts/datasets/__init__.py
index 9b8262e128..5f2954727e 100644
--- a/darts/datasets/__init__.py
+++ b/darts/datasets/__init__.py
@@ -829,7 +829,8 @@ class ElectricityConsumptionZurichDataset(DatasetLoaderCSV):
     To simplify the dataset, the measurements from the Zch_Schimmelstrasse and Zch_Rosengartenstrasse weather
     stations are discarded to keep only the data recorded in the Zch_Stampfenbachstrasse station.
 
-    Both dataset sources are updated continuously, but this dataset only retrains values between 2015 and 2022.
+    Both dataset sources are updated continuously, but this dataset only retrains values between 2015-01-01 and
+    2022-08-31.
     The time index was converted from CET time zone to UTC.
 
     Components Descriptions:
@@ -864,7 +865,7 @@ def pre_process_dataset(dataset_path):
             # extract pre-determined period
             df = df.loc[
                 (pd.Timestamp("2015-01-01") <= df.index)
-                & (df.index <= pd.Timestamp("2022-12-31"))
+                & (df.index <= pd.Timestamp("2022-08-31"))
             ]
             # download and preprocess the weather information
             df_weather = self._download_weather_data()
@@ -894,7 +895,7 @@ def pre_process_dataset(dataset_path):
                     "ewz_stromabgabe_netzebenen_stadt_zuerich/"
                     "download/ewz_stromabgabe_netzebenen_stadt_zuerich.csv"
                 ),
-                hash="c2fea1a0974611ff1c276abcc1d34619",
+                hash="a019125b7f9c1afeacb0ae60ce7455ef",
                 header_time="Timestamp",
                 freq="15min",
                 pre_process_csv_fn=pre_process_dataset,
@@ -919,6 +920,6 @@ def _download_weather_data():
         )
         df = df.loc[
             (pd.Timestamp("2015-01-01") <= df.index)
-            & (df.index <= pd.Timestamp("2022-12-31"))
+            & (df.index <= pd.Timestamp("2022-08-31"))
         ]
         return df
diff --git a/darts/models/forecasting/baselines.py b/darts/models/forecasting/baselines.py
index 93309f24d1..bee9d23cd0 100644
--- a/darts/models/forecasting/baselines.py
+++ b/darts/models/forecasting/baselines.py
@@ -332,8 +332,12 @@ def fit(
             for model in self.forecasting_models:
                 model._fit_wrapper(
                     series=series,
-                    past_covariates=past_covariates,
-                    future_covariates=future_covariates,
+                    past_covariates=past_covariates
+                    if model.supports_past_covariates
+                    else None,
+                    future_covariates=future_covariates
+                    if model.supports_future_covariates
+                    else None,
                 )
 
         return self
diff --git a/darts/models/forecasting/forecasting_model.py b/darts/models/forecasting/forecasting_model.py
index 1d3bf14eac..2c2bd4909c 100644
--- a/darts/models/forecasting/forecasting_model.py
+++ b/darts/models/forecasting/forecasting_model.py
@@ -42,6 +42,7 @@
     _get_historical_forecast_predict_index,
     _get_historical_forecast_train_index,
     _historical_forecasts_general_checks,
+    _historical_forecasts_sanitize_kwargs,
     _reconciliate_historical_time_indices,
 )
 from darts.utils.timeseries_generation import (
@@ -316,23 +317,47 @@ def _fit_wrapper(
         series: TimeSeries,
         past_covariates: Optional[TimeSeries],
         future_covariates: Optional[TimeSeries],
+        **kwargs,
     ):
-        self.fit(series)
+        supported_params = inspect.signature(self.fit).parameters
+        kwargs_ = {k: v for k, v in kwargs.items() if k in supported_params}
+
+        # handle past and future covariates based on model support
+        for covs, name in zip([past_covariates, future_covariates], ["past", "future"]):
+            covs_name = f"{name}_covariates"
+            if getattr(self, f"supports_{covs_name}"):
+                kwargs_[covs_name] = covs
+            elif covs is not None:
+                raise_log(
+                    ValueError(f"Model cannot be fit/trained with `{covs_name}`."),
+                    logger,
+                )
+        self.fit(series, **kwargs_)
 
     def _predict_wrapper(
         self,
         n: int,
-        series: TimeSeries,
-        past_covariates: Optional[TimeSeries],
-        future_covariates: Optional[TimeSeries],
-        num_samples: int,
-        verbose: bool = False,
-        predict_likelihood_parameters: bool = False,
-    ) -> TimeSeries:
-        kwargs = dict()
-        if self.supports_likelihood_parameter_prediction:
-            kwargs["predict_likelihood_parameters"] = predict_likelihood_parameters
-        return self.predict(n, num_samples=num_samples, verbose=verbose, **kwargs)
+        **kwargs,
+    ) -> Union[TimeSeries, Sequence[TimeSeries]]:
+        supported_params = set(inspect.signature(self.predict).parameters)
+
+        # if predict() accepts covariates, the model might not support them at inference
+        for covs_name in ["past_covariates", "future_covariates"]:
+            if covs_name in kwargs and not getattr(self, f"supports_{covs_name}"):
+                if kwargs[covs_name] is None:
+                    supported_params = supported_params - {covs_name}
+                else:
+                    raise_log(
+                        ValueError(
+                            f"Model prediction does not support `{covs_name}`, either because it "
+                            f"does not support `{covs_name}` in general, or because it was fit/trained "
+                            f"without using `{covs_name}`."
+                        ),
+                        logger,
+                    )
+
+        kwargs_ = {k: v for k, v in kwargs.items() if k in supported_params}
+        return self.predict(n, **kwargs_)
 
     @property
     def min_train_series_length(self) -> int:
@@ -586,6 +611,8 @@ def historical_forecasts(
         show_warnings: bool = True,
         predict_likelihood_parameters: bool = False,
         enable_optimization: bool = True,
+        fit_kwargs: Optional[Dict[str, Any]] = None,
+        predict_kwargs: Optional[Dict[str, Any]] = None,
     ) -> Union[
         TimeSeries, List[TimeSeries], Sequence[TimeSeries], Sequence[List[TimeSeries]]
     ]:
@@ -692,6 +719,10 @@ def historical_forecasts(
             Default: ``False``
         enable_optimization
             Whether to use the optimized version of historical_forecasts when supported and available.
+        fit_kwargs
+            Additional arguments passed to the model `fit()` method.
+        predict_kwargs
+            Additional arguments passed to the model `predict()` method.
 
         Returns
         -------
@@ -802,6 +833,15 @@ def retrain_func(
                 logger,
             )
 
+        # remove unsupported arguments, raise exception if interference with historical forecasts logic
+        fit_kwargs, predict_kwargs = _historical_forecasts_sanitize_kwargs(
+            model=model,
+            fit_kwargs=fit_kwargs,
+            predict_kwargs=predict_kwargs,
+            retrain=retrain is not False and retrain != 0,
+            show_warnings=show_warnings,
+        )
+
         series = series2seq(series)
         past_covariates = series2seq(past_covariates)
         future_covariates = series2seq(future_covariates)
@@ -829,6 +869,7 @@ def retrain_func(
                 verbose=verbose,
                 show_warnings=show_warnings,
                 predict_likelihood_parameters=predict_likelihood_parameters,
+                **predict_kwargs,
             )
 
         if len(series) == 1:
@@ -969,6 +1010,7 @@ def retrain_func(
                             series=train_series,
                             past_covariates=past_covariates_,
                             future_covariates=future_covariates_,
+                            **fit_kwargs,
                         )
                     else:
                         # untrained model was not trained on the first trainable timestamp
@@ -1019,6 +1061,7 @@ def retrain_func(
                     num_samples=num_samples,
                     verbose=verbose,
                     predict_likelihood_parameters=predict_likelihood_parameters,
+                    **predict_kwargs,
                 )
                 if forecast_components is None:
                     forecast_components = forecast.columns
@@ -1076,6 +1119,8 @@ def backtest(
         reduction: Union[Callable[[np.ndarray], float], None] = np.mean,
         verbose: bool = False,
         show_warnings: bool = True,
+        fit_kwargs: Optional[Dict[str, Any]] = None,
+        predict_kwargs: Optional[Dict[str, Any]] = None,
     ) -> Union[float, List[float], Sequence[float], List[Sequence[float]]]:
         """Compute error values that the model would have produced when
         used on (potentially multiple) `series`.
@@ -1185,6 +1230,10 @@ def backtest(
             Whether to print progress.
         show_warnings
             Whether to show warnings related to parameters `start`, and `train_length`.
+        fit_kwargs
+            Additional arguments passed to the model `fit()` method.
+        predict_kwargs
+            Additional arguments passed to the model `predict()` method.
 
         Returns
         -------
@@ -1208,6 +1257,8 @@ def backtest(
                 last_points_only=last_points_only,
                 verbose=verbose,
                 show_warnings=show_warnings,
+                fit_kwargs=fit_kwargs,
+                predict_kwargs=predict_kwargs,
             )
         else:
             forecasts = historical_forecasts
@@ -1261,6 +1312,8 @@ def gridsearch(
         verbose=False,
         n_jobs: int = 1,
         n_random_samples: Optional[Union[int, float]] = None,
+        fit_kwargs: Optional[Dict[str, Any]] = None,
+        predict_kwargs: Optional[Dict[str, Any]] = None,
     ) -> Tuple["ForecastingModel", Dict[str, Any], float]:
         """
         Find the best hyper-parameters among a given set using a grid search.
@@ -1374,6 +1427,10 @@ def gridsearch(
             must be between `0` and the total number of parameter combinations.
             If a float, `n_random_samples` is the ratio of parameter combinations selected from the full grid and must
             be between `0` and `1`. Defaults to `None`, for which random selection will be ignored.
+        fit_kwargs
+            Additional arguments passed to the model `fit()` method.
+        predict_kwargs
+            Additional arguments passed to the model `predict()` method.
 
         Returns
         -------
@@ -1406,10 +1463,10 @@ def gridsearch(
                 logger,
             )
 
-        # TODO: here too I'd say we can leave these checks to the models
-        # if covariates is not None:
-        #     raise_if_not(series.has_same_time_as(covariates), 'The provided series and covariates must have the '
-        #                                                       'same time axes.')
+        if fit_kwargs is None:
+            fit_kwargs = dict()
+        if predict_kwargs is None:
+            predict_kwargs = dict()
 
         # compute all hyperparameter combinations from selection
         params_cross_product = list(product(*parameters.values()))
@@ -1437,7 +1494,12 @@ def _evaluate_combination(param_combination) -> float:
 
             model = model_class(**param_combination_dict)
             if use_fitted_values:  # fitted value mode
-                model._fit_wrapper(series, past_covariates, future_covariates)
+                model._fit_wrapper(
+                    series=series,
+                    past_covariates=past_covariates,
+                    future_covariates=future_covariates,
+                    **fit_kwargs,
+                )
                 fitted_values = TimeSeries.from_times_and_values(
                     series.time_index, model.fitted_values
                 )
@@ -1457,16 +1519,24 @@ def _evaluate_combination(param_combination) -> float:
                     last_points_only=last_points_only,
                     verbose=verbose,
                     show_warnings=show_warnings,
+                    fit_kwargs=fit_kwargs,
+                    predict_kwargs=predict_kwargs,
                 )
             else:  # split mode
-                model._fit_wrapper(series, past_covariates, future_covariates)
+                model._fit_wrapper(
+                    series=series,
+                    past_covariates=past_covariates,
+                    future_covariates=future_covariates,
+                    **fit_kwargs,
+                )
                 pred = model._predict_wrapper(
-                    len(val_series),
-                    series,
-                    past_covariates,
-                    future_covariates,
+                    n=len(val_series),
+                    series=series,
+                    past_covariates=past_covariates,
+                    future_covariates=future_covariates,
                     num_samples=1,
                     verbose=verbose,
+                    **predict_kwargs,
                 )
                 error = metric(val_series, pred)
 
@@ -2211,43 +2281,6 @@ def predict(
                 )
             )
 
-    def _predict_wrapper(
-        self,
-        n: int,
-        series: Union[TimeSeries, Sequence[TimeSeries]],
-        past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]],
-        future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]],
-        num_samples: int,
-        verbose: bool = False,
-        predict_likelihood_parameters: bool = False,
-    ) -> Union[TimeSeries, Sequence[TimeSeries]]:
-        kwargs = dict()
-        if self.supports_likelihood_parameter_prediction:
-            kwargs["predict_likelihood_parameters"] = predict_likelihood_parameters
-        return self.predict(
-            n,
-            series,
-            past_covariates=past_covariates,
-            future_covariates=future_covariates,
-            num_samples=num_samples,
-            verbose=verbose,
-            **kwargs,
-        )
-
-    def _fit_wrapper(
-        self,
-        series: Union[TimeSeries, Sequence[TimeSeries]],
-        past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]],
-        future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]],
-    ):
-        self.fit(
-            series=series,
-            past_covariates=past_covariates if self.supports_past_covariates else None,
-            future_covariates=future_covariates
-            if self.supports_future_covariates
-            else None,
-        )
-
     @property
     def _supports_non_retrainable_historical_forecasts(self) -> bool:
         """GlobalForecastingModel supports historical forecasts without retraining the model"""
@@ -2340,6 +2373,7 @@ def fit(self, series: TimeSeries, future_covariates: Optional[TimeSeries] = None
                 logger=logger,
             )
             self._expect_future_covariates = True
+            self._uses_future_covariates = True
 
         self.encoders = self.initialize_encoders()
         if self.encoders.encoding_available:
@@ -2448,35 +2482,6 @@ def _predict(
         """
         pass
 
-    def _fit_wrapper(
-        self,
-        series: TimeSeries,
-        past_covariates: Optional[TimeSeries],
-        future_covariates: Optional[TimeSeries],
-    ):
-        self.fit(series, future_covariates=future_covariates)
-
-    def _predict_wrapper(
-        self,
-        n: int,
-        series: TimeSeries,
-        past_covariates: Optional[TimeSeries],
-        future_covariates: Optional[TimeSeries],
-        num_samples: int,
-        verbose: bool = False,
-        predict_likelihood_parameters: bool = False,
-    ) -> TimeSeries:
-        kwargs = dict()
-        if self.supports_likelihood_parameter_prediction:
-            kwargs["predict_likelihood_parameters"] = predict_likelihood_parameters
-        return self.predict(
-            n,
-            future_covariates=future_covariates,
-            num_samples=num_samples,
-            verbose=verbose,
-            **kwargs,
-        )
-
     @property
     def _model_encoder_settings(
         self,
@@ -2673,28 +2678,6 @@ def _predict(
         """
         pass
 
-    def _predict_wrapper(
-        self,
-        n: int,
-        series: TimeSeries,
-        past_covariates: Optional[TimeSeries],
-        future_covariates: Optional[TimeSeries],
-        num_samples: int,
-        verbose: bool = False,
-        predict_likelihood_parameters: bool = False,
-    ) -> TimeSeries:
-        kwargs = dict()
-        if self.supports_likelihood_parameter_prediction:
-            kwargs["predict_likelihood_parameters"] = predict_likelihood_parameters
-        return self.predict(
-            n=n,
-            series=series,
-            future_covariates=future_covariates,
-            num_samples=num_samples,
-            verbose=verbose,
-            **kwargs,
-        )
-
     @property
     def _supports_non_retrainable_historical_forecasts(self) -> bool:
         return True
diff --git a/darts/models/forecasting/regression_ensemble_model.py b/darts/models/forecasting/regression_ensemble_model.py
index afe9e0b9f6..5d665917ef 100644
--- a/darts/models/forecasting/regression_ensemble_model.py
+++ b/darts/models/forecasting/regression_ensemble_model.py
@@ -369,8 +369,12 @@ def fit(
                 # maximize covariate usage
                 model._fit_wrapper(
                     series=forecast_training,
-                    past_covariates=past_covariates,
-                    future_covariates=future_covariates,
+                    past_covariates=past_covariates
+                    if model.supports_past_covariates
+                    else None,
+                    future_covariates=future_covariates
+                    if model.supports_future_covariates
+                    else None,
                 )
 
         # we can call direct prediction in any case. Even if we overwrite with historical
@@ -407,8 +411,12 @@ def fit(
             for model in self.forecasting_models:
                 model._fit_wrapper(
                     series=series,
-                    past_covariates=past_covariates,
-                    future_covariates=future_covariates,
+                    past_covariates=past_covariates
+                    if model.supports_past_covariates
+                    else None,
+                    future_covariates=future_covariates
+                    if model.supports_future_covariates
+                    else None,
                 )
         return self
 
diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py
index 3f4d77f33c..cc1dc61205 100644
--- a/darts/models/forecasting/regression_model.py
+++ b/darts/models/forecasting/regression_model.py
@@ -1104,6 +1104,7 @@ def _optimized_historical_forecasts(
         verbose: bool = False,
         show_warnings: bool = True,
         predict_likelihood_parameters: bool = False,
+        **kwargs,
     ) -> Union[
         TimeSeries, List[TimeSeries], Sequence[TimeSeries], Sequence[List[TimeSeries]]
     ]:
@@ -1139,6 +1140,7 @@ def _optimized_historical_forecasts(
                 overlap_end=overlap_end,
                 show_warnings=show_warnings,
                 predict_likelihood_parameters=predict_likelihood_parameters,
+                **kwargs,
             )
         else:
             return _optimized_historical_forecasts_all_points(
@@ -1154,6 +1156,7 @@ def _optimized_historical_forecasts(
                 overlap_end=overlap_end,
                 show_warnings=show_warnings,
                 predict_likelihood_parameters=predict_likelihood_parameters,
+                **kwargs,
             )
 
 
diff --git a/darts/models/forecasting/torch_forecasting_model.py b/darts/models/forecasting/torch_forecasting_model.py
index 28cc5c8d0f..b8348af5a7 100644
--- a/darts/models/forecasting/torch_forecasting_model.py
+++ b/darts/models/forecasting/torch_forecasting_model.py
@@ -2045,6 +2045,7 @@ def _optimized_historical_forecasts(
         verbose: bool = False,
         show_warnings: bool = True,
         predict_likelihood_parameters: bool = False,
+        **kwargs,
     ) -> Union[
         TimeSeries, List[TimeSeries], Sequence[TimeSeries], Sequence[List[TimeSeries]]
     ]:
@@ -2073,8 +2074,9 @@ def _optimized_historical_forecasts(
             overlap_end=overlap_end,
             last_points_only=last_points_only,
             show_warnings=show_warnings,
-            predict_likelihood_parameters=predict_likelihood_parameters,
             verbose=verbose,
+            predict_likelihood_parameters=predict_likelihood_parameters,
+            **kwargs,
         )
         return forecasts_list
 
diff --git a/darts/tests/models/forecasting/test_backtesting.py b/darts/tests/models/forecasting/test_backtesting.py
index 8f87381c2b..e54ca70d5d 100644
--- a/darts/tests/models/forecasting/test_backtesting.py
+++ b/darts/tests/models/forecasting/test_backtesting.py
@@ -465,6 +465,26 @@ def test_backtest_regression(self):
         )
         assert score > 0.9
 
+    @pytest.mark.parametrize("model_cls", [Theta, ARIMA])
+    def test_backtest_bad_covariates(self, model_cls):
+        """Passing unsupported covariate should raise an exception"""
+        series = lt(start_value=1, end_value=10, length=31, dtype="float32")
+        model = model_cls()
+        bt_kwargs = {"start": -1, "start_format": "position", "show_warnings": False}
+        model.backtest(series=series, **bt_kwargs)
+
+        with pytest.raises(ValueError) as msg:
+            model.backtest(series=series, past_covariates=series, **bt_kwargs)
+        assert str(msg.value).startswith(
+            "Model cannot be fit/trained with `past_covariates`."
+        )
+        if not model.supports_future_covariates:
+            with pytest.raises(ValueError) as msg:
+                model.backtest(series=series, future_covariates=series, **bt_kwargs)
+            assert str(msg.value).startswith(
+                "Model cannot be fit/trained with `future_covariates`."
+            )
+
     def test_gridsearch(self):
         np.random.seed(1)
 
@@ -631,3 +651,45 @@ def test_gridsearch_multi(self):
             "pl_trainer_kwargs": [tfm_kwargs["pl_trainer_kwargs"]],
         }
         TCNModel.gridsearch(tcn_params, dummy_series, forecast_horizon=3, metric=mape)
+
+    @pytest.mark.parametrize(
+        "model_cls,parameters",
+        zip([Theta, ARIMA], [{"theta": [3, 4]}, {"p": [18, 4]}]),
+    )
+    def test_gridsearch_bad_covariates(self, model_cls, parameters):
+        """Passing unsupported covariate should raise an exception"""
+        dummy_series = get_dummy_series(
+            ts_length=100, lt_end_value=1, st_value_offset=0
+        ).astype(np.float32)
+        ts_train, ts_val = dummy_series.split_before(split_point=0.8)
+
+        bt_kwargs = {"start": -1, "start_format": "position", "show_warnings": False}
+
+        model = model_cls()
+        model_cls.gridsearch(
+            parameters=parameters, series=ts_train, val_series=ts_val, **bt_kwargs
+        )
+
+        with pytest.raises(ValueError) as msg:
+            model_cls.gridsearch(
+                parameters=parameters,
+                series=ts_train,
+                past_covariates=dummy_series,
+                val_series=ts_val,
+                **bt_kwargs
+            )
+        assert str(msg.value).startswith(
+            "Model cannot be fit/trained with `past_covariates`."
+        )
+        if not model.supports_future_covariates:
+            with pytest.raises(ValueError) as msg:
+                model_cls.gridsearch(
+                    parameters=parameters,
+                    series=ts_train,
+                    future_covariates=dummy_series,
+                    val_series=ts_val,
+                    **bt_kwargs
+                )
+            assert str(msg.value).startswith(
+                "Model cannot be fit/trained with `future_covariates`."
+            )
diff --git a/darts/tests/models/forecasting/test_historical_forecasts.py b/darts/tests/models/forecasting/test_historical_forecasts.py
index 93ff1e18d6..77907f3f1a 100644
--- a/darts/tests/models/forecasting/test_historical_forecasts.py
+++ b/darts/tests/models/forecasting/test_historical_forecasts.py
@@ -1,4 +1,5 @@
 import itertools
+from itertools import product
 
 import numpy as np
 import pandas as pd
@@ -333,6 +334,29 @@ class TestHistoricalforecast:
     # slightly longer to not affect the last predictable timestamp
     ts_covs = tg.gaussian_timeseries(length=30, start=start_ts)
 
+    @staticmethod
+    def create_model(ocl, use_ll=True, model_type="regression"):
+        if model_type == "regression":
+            return LinearRegressionModel(
+                lags=3,
+                likelihood="quantile" if use_ll else None,
+                quantiles=[0.05, 0.4, 0.5, 0.6, 0.95] if use_ll else None,
+                output_chunk_length=ocl,
+            )
+        else:  # model_type == "torch"
+            if not TORCH_AVAILABLE:
+                return None
+            return NLinearModel(
+                input_chunk_length=3,
+                likelihood=QuantileRegression([0.05, 0.4, 0.5, 0.6, 0.95])
+                if use_ll
+                else None,
+                output_chunk_length=ocl,
+                n_epochs=1,
+                random_state=42,
+                **tfm_kwargs,
+            )
+
     def test_historical_forecasts_transferrable_future_cov_local_models(self):
         model = ARIMA()
         assert model.min_train_series_length == 30
@@ -356,6 +380,17 @@ def test_historical_forecasts_transferrable_future_cov_local_models(self):
         assert len(res) == 1
         assert series.end_time() == res.time_index[0]
 
+        # passing non-supported covariates
+        with pytest.raises(ValueError) as msg:
+            model.historical_forecasts(
+                series,
+                past_covariates=series,
+                retrain=False,
+            )
+        assert str(msg.value).startswith(
+            "Model prediction does not support `past_covariates`"
+        )
+
     def test_historical_forecasts_future_cov_local_models(self):
         model = AutoARIMA()
         assert model.min_train_series_length == 10
@@ -378,6 +413,17 @@ def test_historical_forecasts_future_cov_local_models(self):
             "with `retrain` set to `False`"
         )
 
+        # passing non-supported covariates
+        with pytest.raises(ValueError) as msg:
+            model.historical_forecasts(
+                series,
+                past_covariates=series,
+                retrain=True,
+            )
+        assert str(msg.value).startswith(
+            "Model cannot be fit/trained with `past_covariates`."
+        )
+
     def test_historical_forecasts_local_models(self):
         model = NaiveSeasonal()
         assert model.min_train_series_length == 3
@@ -601,6 +647,28 @@ def test_historical_forecasts(self, config):
             f"retrain=True and overlap_end=False, and last_points_only=False"
         )
 
+        if not model.supports_past_covariates:
+            with pytest.raises(ValueError) as msg:
+                model.historical_forecasts(
+                    series=self.ts_pass_val_range,
+                    past_covariates=self.ts_passengers,
+                    retrain=True,
+                )
+            assert str(msg.value).startswith(
+                "Model cannot be fit/trained with `past_covariates`."
+            )
+
+        if not model.supports_future_covariates:
+            with pytest.raises(ValueError) as msg:
+                model.historical_forecasts(
+                    series=self.ts_pass_val_range,
+                    future_covariates=self.ts_passengers,
+                    last_points_only=False,
+                )
+            assert str(msg.value).startswith(
+                "Model cannot be fit/trained with `future_covariates`."
+            )
+
     def test_sanity_check_invalid_start(self):
         timeidx_ = tg.linear_timeseries(length=10)
         rangeidx_step1 = tg.linear_timeseries(start=0, length=10, freq=1)
@@ -1827,29 +1895,7 @@ def test_predict_likelihood_parameters(self, model_type):
         """standard checks that historical forecasts work with direct likelihood parameter predictions
         with regression and torch models."""
 
-        def create_model(ocl, use_ll=True, model_type="regression"):
-            if model_type == "regression":
-                return LinearRegressionModel(
-                    lags=3,
-                    likelihood="quantile" if use_ll else None,
-                    quantiles=[0.05, 0.4, 0.5, 0.6, 0.95] if use_ll else None,
-                    output_chunk_length=ocl,
-                )
-            else:  # model_type == "torch"
-                if not TORCH_AVAILABLE:
-                    return None
-                return NLinearModel(
-                    input_chunk_length=3,
-                    likelihood=QuantileRegression([0.05, 0.4, 0.5, 0.6, 0.95])
-                    if use_ll
-                    else None,
-                    output_chunk_length=ocl,
-                    n_epochs=1,
-                    random_state=42,
-                    **tfm_kwargs,
-                )
-
-        model = create_model(1, False, model_type=model_type)
+        model = self.create_model(1, False, model_type=model_type)
         # skip torch models if not installed
         if model is None:
             return
@@ -1860,7 +1906,7 @@ def create_model(ocl, use_ll=True, model_type="regression"):
                 predict_likelihood_parameters=True,
             )
 
-        model = create_model(1, model_type=model_type)
+        model = self.create_model(1, model_type=model_type)
         # forecast_horizon > output_chunk_length doesn't work
         with pytest.raises(ValueError):
             model.historical_forecasts(
@@ -1869,7 +1915,7 @@ def create_model(ocl, use_ll=True, model_type="regression"):
                 forecast_horizon=2,
             )
 
-        model = create_model(1, model_type=model_type)
+        model = self.create_model(1, model_type=model_type)
         # num_samples != 1 doesn't work
         with pytest.raises(ValueError):
             model.historical_forecasts(
@@ -1884,7 +1930,7 @@ def create_model(ocl, use_ll=True, model_type="regression"):
         qs_expected = ["q0.05", "q0.40", "q0.50", "q0.60", "q0.95"]
         qs_expected = pd.Index([target_name + "_" + q for q in qs_expected])
         # check that it works with retrain
-        model = create_model(1, model_type=model_type)
+        model = self.create_model(1, model_type=model_type)
         hist_fc = model.historical_forecasts(
             self.ts_pass_train,
             predict_likelihood_parameters=True,
@@ -1897,7 +1943,7 @@ def create_model(ocl, use_ll=True, model_type="regression"):
         assert len(hist_fc) == n
 
         # check for equal results between predict and hist fc without retraining
-        model = create_model(1, model_type=model_type)
+        model = self.create_model(1, model_type=model_type)
         model.fit(series=self.ts_pass_train[:-n])
         hist_fc = model.historical_forecasts(
             self.ts_pass_train,
@@ -1926,7 +1972,7 @@ def create_model(ocl, use_ll=True, model_type="regression"):
 
         # check equal results between predict and hist fc with higher output_chunk_length and horizon,
         # and last_points_only=False
-        model = create_model(2, model_type=model_type)
+        model = self.create_model(2, model_type=model_type)
         # we take one more training step so that model trained on ocl=1 has the same training samples
         # as model above
         model.fit(series=self.ts_pass_train[: -(n - 1)])
@@ -1959,3 +2005,142 @@ def create_model(ocl, use_ll=True, model_type="regression"):
                 p.all_values(copy=False), hfc.all_values(copy=False)
             )
             assert len(hist_fc) == n + 1
+
+    @pytest.mark.parametrize(
+        "model_type,enable_optimization",
+        product(["regression", "torch"], [True, False]),
+    )
+    def test_fit_kwargs(self, model_type, enable_optimization):
+        """check that the parameters provided in fit_kwargs are correctly processed"""
+        valid_fit_kwargs = {"max_samples_per_ts": 3}
+        invalid_fit_kwargs = {"series": self.ts_pass_train}
+        if model_type == "regression":
+            unsupported_fit_kwargs = {"trainer": None}
+        else:
+            unsupported_fit_kwargs = {"n_jobs_multioutput_wrapper": False}
+
+        n = 2
+        model = self.create_model(1, use_ll=False, model_type=model_type)
+
+        # torch not available
+        if model is None:
+            return
+
+        model.fit(series=self.ts_pass_train[:-n])
+
+        # supported argument
+        hist_fc = model.historical_forecasts(
+            self.ts_pass_train,
+            forecast_horizon=1,
+            num_samples=1,
+            start=len(self.ts_pass_train) - n,
+            retrain=True,
+            enable_optimization=enable_optimization,
+            fit_kwargs=valid_fit_kwargs,
+        )
+
+        assert hist_fc.components.equals(self.ts_pass_train.components)
+        assert len(hist_fc) == n
+
+        # passing unsupported argument
+        hist_fc = model.historical_forecasts(
+            self.ts_pass_train,
+            forecast_horizon=1,
+            start=len(self.ts_pass_train) - n,
+            retrain=True,
+            enable_optimization=enable_optimization,
+            fit_kwargs=unsupported_fit_kwargs,
+        )
+
+        assert hist_fc.components.equals(self.ts_pass_train.components)
+        assert len(hist_fc) == n
+
+        # passing hist_fc parameters in fit_kwargs, with retrain=False
+        hist_fc = model.historical_forecasts(
+            self.ts_pass_train,
+            forecast_horizon=1,
+            start=len(self.ts_pass_train) - n,
+            retrain=False,
+            enable_optimization=enable_optimization,
+            fit_kwargs=invalid_fit_kwargs,
+        )
+
+        assert hist_fc.components.equals(self.ts_pass_train.components)
+        assert len(hist_fc) == n
+
+        # passing hist_fc parameters in fit_kwargs, interfering with the logic
+        with pytest.raises(ValueError) as msg:
+            model.historical_forecasts(
+                self.ts_pass_train,
+                forecast_horizon=1,
+                start=len(self.ts_pass_train) - n,
+                retrain=True,
+                enable_optimization=enable_optimization,
+                fit_kwargs=invalid_fit_kwargs,
+            )
+        assert str(msg.value).startswith(
+            "The following parameters cannot be passed in `fit_kwargs`"
+        )
+
+    @pytest.mark.parametrize(
+        "model_type,enable_optimization",
+        product(["regression", "torch"], [True, False]),
+    )
+    def test_predict_kwargs(self, model_type, enable_optimization):
+        """check that the parameters provided in predict_kwargs are correctly processed"""
+        invalid_predict_kwargs = {"predict_likelihood_parameters": False}
+        if model_type == "regression":
+            valid_predict_kwargs = {}
+            unsupported_predict_kwargs = {"batch_size": 10}
+        else:
+            valid_predict_kwargs = {"batch_size": 10}
+            unsupported_predict_kwargs = {"unsupported": "unsupported"}
+
+        n = 2
+        model = self.create_model(1, use_ll=False, model_type=model_type)
+
+        # torch not available
+        if model is None:
+            return
+
+        model.fit(series=self.ts_pass_train[:-n])
+
+        # supported argument
+        hist_fc = model.historical_forecasts(
+            self.ts_pass_train,
+            forecast_horizon=1,
+            start=len(self.ts_pass_train) - n,
+            retrain=False,
+            enable_optimization=enable_optimization,
+            predict_kwargs=valid_predict_kwargs,
+        )
+
+        assert hist_fc.components.equals(self.ts_pass_train.components)
+        assert len(hist_fc) == n
+
+        # passing unsupported argument
+        hist_fc = model.historical_forecasts(
+            self.ts_pass_train,
+            forecast_horizon=1,
+            start=len(self.ts_pass_train) - n,
+            retrain=False,
+            enable_optimization=enable_optimization,
+            predict_kwargs=unsupported_predict_kwargs,
+        )
+
+        assert hist_fc.components.equals(self.ts_pass_train.components)
+        assert len(hist_fc) == n
+
+        # passing hist_fc parameters in predict_kwargs, interfering with the logic
+        with pytest.raises(ValueError) as msg:
+            hist_fc = model.historical_forecasts(
+                self.ts_pass_train,
+                forecast_horizon=1,
+                start=len(self.ts_pass_train) - n,
+                retrain=False,
+                enable_optimization=enable_optimization,
+                predict_kwargs=invalid_predict_kwargs,
+            )
+        assert str(msg.value).startswith(
+            "The following parameters cannot be passed in `predict_kwargs`"
+        )
diff --git a/darts/utils/historical_forecasts/optimized_historical_forecasts_regression.py b/darts/utils/historical_forecasts/optimized_historical_forecasts_regression.py
index 8d64bbbd45..2876d716eb 100644
--- a/darts/utils/historical_forecasts/optimized_historical_forecasts_regression.py
+++ b/darts/utils/historical_forecasts/optimized_historical_forecasts_regression.py
@@ -31,6 +31,7 @@ def _optimized_historical_forecasts_last_points_only(
     overlap_end: bool = False,
     show_warnings: bool = True,
     predict_likelihood_parameters: bool = False,
+    **kwargs,
 ) -> Union[
     TimeSeries, List[TimeSeries], Sequence[TimeSeries], Sequence[List[TimeSeries]]
 ]:
@@ -128,6 +129,7 @@ def _optimized_historical_forecasts_last_points_only(
             x=np.repeat(X, num_samples, axis=0),
             num_samples=num_samples,
             predict_likelihood_parameters=predict_likelihood_parameters,
+            **kwargs,
         )
         # forecast has shape ((forecastable_index_length-1)*num_samples, k, n_component)
         # where k = output_chunk length if multi_models, 1 otherwise
@@ -177,6 +179,7 @@ def _optimized_historical_forecasts_all_points(
     overlap_end: bool = False,
     show_warnings: bool = True,
     predict_likelihood_parameters: bool = False,
+    **kwargs,
 ) -> Union[
     TimeSeries, List[TimeSeries], Sequence[TimeSeries], Sequence[List[TimeSeries]]
 ]:
@@ -273,6 +276,7 @@ def _optimized_historical_forecasts_all_points(
             x=np.repeat(X, num_samples, axis=0),
             num_samples=num_samples,
             predict_likelihood_parameters=predict_likelihood_parameters,
+            **kwargs,
         )
 
         # reshape and stride the forecast into (forecastable_index, forecast_horizon, n_components, num_samples)
diff --git a/darts/utils/historical_forecasts/optimized_historical_forecasts_torch.py b/darts/utils/historical_forecasts/optimized_historical_forecasts_torch.py
index f41cbbfdfb..2079850e2b 100644
--- a/darts/utils/historical_forecasts/optimized_historical_forecasts_torch.py
+++ b/darts/utils/historical_forecasts/optimized_historical_forecasts_torch.py
@@ -5,6 +5,8 @@
 except ImportError:
     from typing_extensions import Literal
 
+import inspect
+
 import numpy as np
 import pandas as pd
 
@@ -32,8 +34,9 @@ def _optimized_historical_forecasts(
     overlap_end: bool = False,
     last_points_only: bool = True,
     show_warnings: bool = True,
-    predict_likelihood_parameters: bool = False,
     verbose: bool = False,
+    predict_likelihood_parameters: bool = False,
+    **kwargs,
 ) -> Union[
     TimeSeries, List[TimeSeries], Sequence[TimeSeries], Sequence[List[TimeSeries]]
 ]:
@@ -92,6 +95,7 @@ def _optimized_historical_forecasts(
         for cls in model.__class__.__mro__
         if cls.__name__ == "TorchForecastingModel"
     ][0]
+    super_predict_params = inspect.signature(super(tfm_cls, model).predict).parameters
     super(tfm_cls, model).predict(
         forecast_horizon,
         series,
@@ -99,6 +103,7 @@ def _optimized_historical_forecasts(
         future_covariates,
         num_samples=num_samples,
         predict_likelihood_parameters=predict_likelihood_parameters,
+        **{k: v for k, v in kwargs.items() if k in super_predict_params},
     )
 
     dataset = model._build_inference_dataset(
@@ -113,10 +118,10 @@ def _optimized_historical_forecasts(
     predictions = model.predict_from_dataset(
         forecast_horizon,
         dataset,
-        trainer=None,
         verbose=verbose,
         num_samples=num_samples,
         predict_likelihood_parameters=predict_likelihood_parameters,
+        **kwargs,
     )
 
     # torch models return list of time series in order of historical forecasts: we reorder per time series
diff --git a/darts/utils/historical_forecasts/utils.py b/darts/utils/historical_forecasts/utils.py
index fac3d4029e..7ed179ca15 100644
--- a/darts/utils/historical_forecasts/utils.py
+++ b/darts/utils/historical_forecasts/utils.py
@@ -1,11 +1,13 @@
 from types import SimpleNamespace
-from typing import Any, Callable, Optional, Sequence, Tuple, Union
+from typing import Any, Callable, Dict, Optional, Sequence, Set, Tuple, Union
 
 try:
     from typing import Literal
 except ImportError:
     from typing_extensions import Literal
 
+import inspect
+
 import numpy as np
 import pandas as pd
 from numpy.typing import ArrayLike
@@ -209,6 +211,86 @@ def _historical_forecasts_general_checks(model, series, kwargs):
             )
 
 
+def _historical_forecasts_sanitize_kwargs(
+    model,
+    fit_kwargs: Optional[Dict[str, Any]],
+    predict_kwargs: Optional[Dict[str, Any]],
+    retrain: bool,
+    show_warnings: bool,
+) -> Tuple[Dict[str, Any], Dict[str, Any]]:
+    """Convert kwargs to dictionary, check that their content is compatible with called methods."""
+    hfc_args = set(inspect.signature(model.historical_forecasts).parameters)
+    # replace `forecast_horizon` with `n`
+    hfc_args = hfc_args - {"forecast_horizon"}
+    hfc_args = hfc_args.union({"n"})
+
+    if fit_kwargs is None:
+        fit_kwargs = dict()
+    elif retrain:
+        fit_args = set(inspect.signature(model.fit).parameters)
+        fit_kwargs = _historical_forecasts_check_kwargs(
+            hfc_args=hfc_args,
+            name_kwargs="fit_kwargs",
+            dict_kwargs=fit_kwargs,
+            method_args=fit_args,
+            show_warnings=show_warnings,
+        )
+    elif show_warnings:
+        logger.warning(
+            "`fit_kwargs` was provided with `retrain=False`, the argument will be ignored."
+        )
+
+    if predict_kwargs is None:
+        predict_kwargs = dict()
+    else:
+        predict_args = set(inspect.signature(model.predict).parameters)
+        predict_kwargs = _historical_forecasts_check_kwargs(
+            hfc_args=hfc_args,
+            name_kwargs="predict_kwargs",
+            dict_kwargs=predict_kwargs,
+            method_args=predict_args,
+            show_warnings=show_warnings,
+        )
+
+    return fit_kwargs, predict_kwargs
+
+
+def _historical_forecasts_check_kwargs(
+    hfc_args: Set[str],
+    name_kwargs: str,
+    dict_kwargs: Dict[str, Any],
+    method_args: Set[str],
+    show_warnings: bool,
+) -> Dict[str, Any]:
+    """
+    Return the kwargs dict without the arguments unsupported by the model method.
+
+    Raise a warning if some argument are not supported and an exception if some arguments interfere with
+    historical_forecasts logic.
+    """
+    invalid_args = set(dict_kwargs).intersection(hfc_args)
+    if len(invalid_args) > 0:
+        raise_log(
+            ValueError(
+                f"The following parameters cannot be passed in `{name_kwargs}`: {invalid_args}. "
+                f"Make sure to pass them explicitly to the function/method."
+            ),
+            logger,
+        )
+
+    ignored_args = set(dict_kwargs) - method_args
+    if len(ignored_args) > 0:
+        # remove unsupported argument to avoid exception thrown by python
+        dict_kwargs = {k: v for k, v in dict_kwargs.items() if k not in ignored_args}
+        if show_warnings:
+            logger.warning(
+                f"The following parameters in `{name_kwargs}` will be ignored as they are not supported by "
+                f"model method: {ignored_args}."
+            )
+
+    return dict_kwargs
+
+
 def _historical_forecasts_start_warnings(
     idx: int,
     start: Union[pd.Timestamp, int],