From 1cd5e5bf9805e1c512c12758c019a74d02160cf1 Mon Sep 17 00:00:00 2001 From: Frank LaNasa Date: Wed, 7 Jun 2023 13:21:19 -0400 Subject: [PATCH] update get_forecast_distance to handle gap (#4200) * fix get_forecast_distance to properly handle gap * update release notes * fix test * fix docstring test --- docs/source/release_notes.rst | 1 + evalml/pipelines/time_series_regression_pipeline.py | 9 ++++----- .../pipeline_tests/test_time_series_baseline_pipeline.py | 9 ++++----- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index c1f11d5640..f171ae482a 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -8,6 +8,7 @@ Release Notes * Added option to exclude time index in ``IDColumnsDataCheck`` :pr:`4194` * Fixes * Fixed small errors in ``ARIMARegressor`` implementation :pr:`4186` + * Fixed ``get_forecast_period`` to properly handle ``gap`` parameter :pr:`4200` * Changes * Documentation Changes * Testing Changes diff --git a/evalml/pipelines/time_series_regression_pipeline.py b/evalml/pipelines/time_series_regression_pipeline.py index 5490b2a4ad..93c4ec2a5a 100644 --- a/evalml/pipelines/time_series_regression_pipeline.py +++ b/evalml/pipelines/time_series_regression_pipeline.py @@ -114,7 +114,7 @@ def get_forecast_period(self, X): ValueError: If pipeline is not trained. Returns: - pd.Series: Datetime periods out to `forecast_horizon + gap`. + pd.Series: Datetime periods from `gap` to `forecast_horizon + gap`. Example: >>> X = pd.DataFrame({'date': pd.date_range(start='1-1-2022', periods=10, freq='D'), 'feature': range(10, 20)}) @@ -128,7 +128,7 @@ def get_forecast_period(self, X): >>> pipeline.fit(X, y) pipeline = TimeSeriesRegressionPipeline(component_graph={'Linear Regressor': ['Linear Regressor', 'X', 'y']}, parameters={'Linear Regressor':{'fit_intercept': True, 'n_jobs': -1}, 'pipeline':{'gap': 1, 'max_delay': 1, 'forecast_horizon': 2, 'time_index': 'date'}}, random_seed=0) >>> dates = pipeline.get_forecast_period(X) - >>> expected = pd.Series(pd.date_range(start='2022-01-11', periods=(gap + forecast_horizon), freq='D'), name='date', index=[10, 11, 12]) + >>> expected = pd.Series(pd.date_range(start='2022-01-11', periods=forecast_horizon, freq='D').shift(gap), name='date', index=[10, 11]) >>> assert dates.equals(expected) """ if not self._is_fitted: @@ -142,10 +142,9 @@ def get_forecast_period(self, X): pd.date_range( start=first_date, periods=self.forecast_horizon - + self.gap + 1, # Add additional period to account for dropping first date row freq=self.frequency, - ), + ).shift(self.gap), ) # Generate numerical index @@ -165,7 +164,7 @@ def get_forecast_predictions(self, X, y): y (pd.Series, np.ndarray): Targets used to train the pipeline of shape [n_samples_train]. Returns: - Predictions out to `forecast_horizon + gap` periods. + Predictions from `gap` periods out to `forecast_horizon + gap` periods. """ X, y = self._convert_to_woodwork(X, y) pred_dates = pd.DataFrame(self.get_forecast_period(X)) diff --git a/evalml/tests/pipeline_tests/test_time_series_baseline_pipeline.py b/evalml/tests/pipeline_tests/test_time_series_baseline_pipeline.py index f0d0c60e25..20a5f4edc5 100644 --- a/evalml/tests/pipeline_tests/test_time_series_baseline_pipeline.py +++ b/evalml/tests/pipeline_tests/test_time_series_baseline_pipeline.py @@ -107,9 +107,9 @@ def test_time_series_get_forecast_period(forecast_horizon, gap, numeric_idx, ts_ clf.fit(X, y) result = clf.get_forecast_period(X) - assert result.size == forecast_horizon + gap - assert all(result.index == range(len(X), len(X) + forecast_horizon + gap)) - assert result.iloc[0] == X.iloc[-1]["date"] + np.timedelta64(1, clf.frequency) + assert result.size == forecast_horizon + assert all(result.index == range(len(X), len(X) + forecast_horizon)) + assert result.iloc[0] == X.iloc[-1]["date"] + np.timedelta64(1 + gap, clf.frequency) assert np.issubdtype(result.dtype, np.datetime64) assert result.name == "date" @@ -119,7 +119,7 @@ def test_time_series_get_forecast_predictions(forecast_horizon, gap, ts_data): X, _, y = ts_data(problem_type=ProblemTypes.TIME_SERIES_REGRESSION) X_train, y_train = X.iloc[:15], y.iloc[:15] - X_validation = X.iloc[15 : (15 + gap + forecast_horizon)] + X_validation = X.iloc[15 + gap : (15 + gap + forecast_horizon)] clf = TimeSeriesRegressionPipeline( component_graph={ @@ -166,5 +166,4 @@ def test_time_series_get_forecast_predictions(forecast_horizon, gap, ts_data): clf.fit(X_train, y_train) forecast_preds = clf.get_forecast_predictions(X=X_train, y=y_train) X_val_preds = clf.predict(X_validation, X_train=X_train, y_train=y_train) - assert_series_equal(forecast_preds, X_val_preds)