Skip to content

Commit

Permalink
update get_forecast_distance to handle gap (#4200)
Browse files Browse the repository at this point in the history
* fix get_forecast_distance to properly handle gap

* update release notes

* fix test

* fix docstring test
  • Loading branch information
Frank LaNasa authored Jun 7, 2023
1 parent af53c15 commit 1cd5e5b
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 10 deletions.
1 change: 1 addition & 0 deletions docs/source/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Release Notes
* Added option to exclude time index in ``IDColumnsDataCheck`` :pr:`4194`
* Fixes
* Fixed small errors in ``ARIMARegressor`` implementation :pr:`4186`
* Fixed ``get_forecast_period`` to properly handle ``gap`` parameter :pr:`4200`
* Changes
* Documentation Changes
* Testing Changes
Expand Down
9 changes: 4 additions & 5 deletions evalml/pipelines/time_series_regression_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def get_forecast_period(self, X):
ValueError: If pipeline is not trained.
Returns:
pd.Series: Datetime periods out to `forecast_horizon + gap`.
pd.Series: Datetime periods from `gap` to `forecast_horizon + gap`.
Example:
>>> X = pd.DataFrame({'date': pd.date_range(start='1-1-2022', periods=10, freq='D'), 'feature': range(10, 20)})
Expand All @@ -128,7 +128,7 @@ def get_forecast_period(self, X):
>>> pipeline.fit(X, y)
pipeline = TimeSeriesRegressionPipeline(component_graph={'Linear Regressor': ['Linear Regressor', 'X', 'y']}, parameters={'Linear Regressor':{'fit_intercept': True, 'n_jobs': -1}, 'pipeline':{'gap': 1, 'max_delay': 1, 'forecast_horizon': 2, 'time_index': 'date'}}, random_seed=0)
>>> dates = pipeline.get_forecast_period(X)
>>> expected = pd.Series(pd.date_range(start='2022-01-11', periods=(gap + forecast_horizon), freq='D'), name='date', index=[10, 11, 12])
>>> expected = pd.Series(pd.date_range(start='2022-01-11', periods=forecast_horizon, freq='D').shift(gap), name='date', index=[10, 11])
>>> assert dates.equals(expected)
"""
if not self._is_fitted:
Expand All @@ -142,10 +142,9 @@ def get_forecast_period(self, X):
pd.date_range(
start=first_date,
periods=self.forecast_horizon
+ self.gap
+ 1, # Add additional period to account for dropping first date row
freq=self.frequency,
),
).shift(self.gap),
)

# Generate numerical index
Expand All @@ -165,7 +164,7 @@ def get_forecast_predictions(self, X, y):
y (pd.Series, np.ndarray): Targets used to train the pipeline of shape [n_samples_train].
Returns:
Predictions out to `forecast_horizon + gap` periods.
Predictions from `gap` periods out to `forecast_horizon + gap` periods.
"""
X, y = self._convert_to_woodwork(X, y)
pred_dates = pd.DataFrame(self.get_forecast_period(X))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,9 @@ def test_time_series_get_forecast_period(forecast_horizon, gap, numeric_idx, ts_
clf.fit(X, y)
result = clf.get_forecast_period(X)

assert result.size == forecast_horizon + gap
assert all(result.index == range(len(X), len(X) + forecast_horizon + gap))
assert result.iloc[0] == X.iloc[-1]["date"] + np.timedelta64(1, clf.frequency)
assert result.size == forecast_horizon
assert all(result.index == range(len(X), len(X) + forecast_horizon))
assert result.iloc[0] == X.iloc[-1]["date"] + np.timedelta64(1 + gap, clf.frequency)
assert np.issubdtype(result.dtype, np.datetime64)
assert result.name == "date"

Expand All @@ -119,7 +119,7 @@ def test_time_series_get_forecast_predictions(forecast_horizon, gap, ts_data):
X, _, y = ts_data(problem_type=ProblemTypes.TIME_SERIES_REGRESSION)

X_train, y_train = X.iloc[:15], y.iloc[:15]
X_validation = X.iloc[15 : (15 + gap + forecast_horizon)]
X_validation = X.iloc[15 + gap : (15 + gap + forecast_horizon)]

clf = TimeSeriesRegressionPipeline(
component_graph={
Expand Down Expand Up @@ -166,5 +166,4 @@ def test_time_series_get_forecast_predictions(forecast_horizon, gap, ts_data):
clf.fit(X_train, y_train)
forecast_preds = clf.get_forecast_predictions(X=X_train, y=y_train)
X_val_preds = clf.predict(X_validation, X_train=X_train, y_train=y_train)

assert_series_equal(forecast_preds, X_val_preds)

0 comments on commit 1cd5e5b

Please sign in to comment.