From 0e4c32343569e902788318ead5cba4dfb088f084 Mon Sep 17 00:00:00 2001 From: perib Date: Fri, 3 Jan 2025 12:13:10 -0500 Subject: [PATCH] fix bug where the wrong param was passed into columnonehotencoder --- tpot2/tpot_estimator/estimator.py | 4 ++-- tpot2/tpot_estimator/steady_state_estimator.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tpot2/tpot_estimator/estimator.py b/tpot2/tpot_estimator/estimator.py index f7848f09..f079556c 100644 --- a/tpot2/tpot_estimator/estimator.py +++ b/tpot2/tpot_estimator/estimator.py @@ -581,7 +581,7 @@ def fit(self, X, y): if self.categorical_features is not None: #if categorical features are specified, use those pipeline_steps.append(("impute_categorical", tpot2.builtin_modules.ColumnSimpleImputer(self.categorical_features, strategy='most_frequent'))) pipeline_steps.append(("impute_numeric", tpot2.builtin_modules.ColumnSimpleImputer("numeric", strategy='mean'))) - pipeline_steps.append(("ColumnOneHotEncoder", tpot2.builtin_modules.ColumnOneHotEncoder(self.categorical_features, strategy='most_frequent'))) + pipeline_steps.append(("ColumnOneHotEncoder", tpot2.builtin_modules.ColumnOneHotEncoder(self.categorical_features, min_frequency=0.0001))) else: if isinstance(X, pd.DataFrame): @@ -589,7 +589,7 @@ def fit(self, X, y): if len(categorical_columns) > 0: pipeline_steps.append(("impute_categorical", tpot2.builtin_modules.ColumnSimpleImputer("categorical", strategy='most_frequent'))) pipeline_steps.append(("impute_numeric", tpot2.builtin_modules.ColumnSimpleImputer("numeric", strategy='mean'))) - pipeline_steps.append(("ColumnOneHotEncoder", tpot2.builtin_modules.ColumnOneHotEncoder("categorical", strategy='most_frequent'))) + pipeline_steps.append(("ColumnOneHotEncoder", tpot2.builtin_modules.ColumnOneHotEncoder("categorical", min_frequency=0.0001))) else: pipeline_steps.append(("impute_numeric", tpot2.builtin_modules.ColumnSimpleImputer("all", strategy='mean'))) else: diff --git a/tpot2/tpot_estimator/steady_state_estimator.py b/tpot2/tpot_estimator/steady_state_estimator.py index 4f5759f1..3d2be370 100644 --- a/tpot2/tpot_estimator/steady_state_estimator.py +++ b/tpot2/tpot_estimator/steady_state_estimator.py @@ -624,7 +624,7 @@ def fit(self, X, y): if self.categorical_features is not None: #if categorical features are specified, use those pipeline_steps.append(("impute_categorical", tpot2.builtin_modules.ColumnSimpleImputer(self.categorical_features, strategy='most_frequent'))) pipeline_steps.append(("impute_numeric", tpot2.builtin_modules.ColumnSimpleImputer("numeric", strategy='mean'))) - pipeline_steps.append(("impute_categorical", tpot2.builtin_modules.ColumnOneHotEncoder(self.categorical_features, strategy='most_frequent'))) + pipeline_steps.append(("impute_categorical", tpot2.builtin_modules.ColumnOneHotEncoder(self.categorical_features, min_frequency=0.0001))) else: if isinstance(X, pd.DataFrame): @@ -632,7 +632,7 @@ def fit(self, X, y): if len(categorical_columns) > 0: pipeline_steps.append(("impute_categorical", tpot2.builtin_modules.ColumnSimpleImputer("categorical", strategy='most_frequent'))) pipeline_steps.append(("impute_numeric", tpot2.builtin_modules.ColumnSimpleImputer("numeric", strategy='mean'))) - pipeline_steps.append(("impute_categorical", tpot2.builtin_modules.ColumnOneHotEncoder("categorical", strategy='most_frequent'))) + pipeline_steps.append(("impute_categorical", tpot2.builtin_modules.ColumnOneHotEncoder("categorical", min_frequency=0.0001))) else: pipeline_steps.append(("impute_numeric", tpot2.builtin_modules.ColumnSimpleImputer("all", strategy='mean'))) else: