From d99084efc8717952e915f830a3128743965cc927 Mon Sep 17 00:00:00 2001 From: christopherbunn Date: Tue, 30 Jan 2024 15:52:27 -0500 Subject: [PATCH] Added clarifying comments and updated test --- .../transformers/preprocessing/time_series_featurizer.py | 2 +- evalml/pipelines/utils.py | 2 +- evalml/tests/automl_tests/test_iterative_algorithm.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/evalml/pipelines/components/transformers/preprocessing/time_series_featurizer.py b/evalml/pipelines/components/transformers/preprocessing/time_series_featurizer.py index f093d7b19a..61246a78d2 100644 --- a/evalml/pipelines/components/transformers/preprocessing/time_series_featurizer.py +++ b/evalml/pipelines/components/transformers/preprocessing/time_series_featurizer.py @@ -127,7 +127,7 @@ def fit(self, X, y=None): if self.time_index is None: raise ValueError("time_index cannot be None!") - # For the multiseries case, where we only want the start delay lag for the baseline + # For the multiseries case, each series ID has individualized lag values if isinstance(y, pd.DataFrame): self.statistically_significant_lags = {} for column in y.columns: diff --git a/evalml/pipelines/utils.py b/evalml/pipelines/utils.py index 2649a82c3c..f6b0154468 100644 --- a/evalml/pipelines/utils.py +++ b/evalml/pipelines/utils.py @@ -1522,7 +1522,7 @@ def stack_X(X, series_id_name, time_index, starting_index=None, series_id_values separated_name = col.split(MULTISERIES_SEPARATOR_SYMBOL) original_columns.add(MULTISERIES_SEPARATOR_SYMBOL.join(separated_name[:-1])) series_ids.append(separated_name[-1]) - # Remove duplicates + # Remove duplicates while maintaining insertion order seen = set() series_ids = [val for val in series_ids if not (val in seen or seen.add(val))] diff --git a/evalml/tests/automl_tests/test_iterative_algorithm.py b/evalml/tests/automl_tests/test_iterative_algorithm.py index 7c7aada78c..e7e251f3db 100644 --- a/evalml/tests/automl_tests/test_iterative_algorithm.py +++ b/evalml/tests/automl_tests/test_iterative_algorithm.py @@ -97,7 +97,7 @@ def test_iterative_algorithm_init( assert algo.batch_number == 0 assert algo.default_max_batches == 1 estimators = get_estimators(problem_type) - decomposer = [True, False] if is_regression(problem_type) else [True] + decomposer = [True, False] if is_regression(problem_type) else [False] assert len(algo.allowed_pipelines) == len( [ make_pipeline(