diff --git a/.gitignore b/.gitignore index a9df30b5..bff01e19 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,6 @@ dask-worker-space/ *.egg-info/ .coverage target/ -.venv/ \ No newline at end of file +.venv/ +build/* +*.egg \ No newline at end of file diff --git a/tpot2/config/__init__.py b/tpot2/config/__init__.py index c5c18117..e019b78e 100644 --- a/tpot2/config/__init__.py +++ b/tpot2/config/__init__.py @@ -7,6 +7,12 @@ from .autoqtl_builtins import make_FeatureEncodingFrequencySelector_config_dictionary, make_genetic_encoders_config_dictionary from .hyperparametersuggestor import * +try: + from .classifiers_sklearnex import make_sklearnex_classifier_config_dictionary + from .regressors_sklearnex import make_sklearnex_regressor_config_dictionary +except ModuleNotFoundError: #if optional packages are not installed + pass + try: from .mdr_configs import make_skrebate_config_dictionary, make_MDR_config_dictionary, make_ContinuousMDR_config_dictionary except: #if optional packages are not installed diff --git a/tpot2/config/classifiers_sklearnex.py b/tpot2/config/classifiers_sklearnex.py new file mode 100644 index 00000000..7d4129d0 --- /dev/null +++ b/tpot2/config/classifiers_sklearnex.py @@ -0,0 +1,73 @@ +from sklearnex.ensemble import RandomForestClassifier +from sklearnex.neighbors import KNeighborsClassifier +from sklearnex.svm import SVC +from sklearnex.svm import NuSVC +from sklearnex.linear_model import LogisticRegression + + +def params_RandomForestClassifier(trial, name=None): + return { + 'n_estimators': 100, + 'bootstrap': trial.suggest_categorical(name=f'bootstrap_{name}', choices=[True, False]), + 'min_samples_split': trial.suggest_int(f'min_samples_split_{name}', 2, 20), + 'min_samples_leaf': trial.suggest_int(f'min_samples_leaf_{name}', 1, 20), + 'n_jobs': 1, + } + +def params_KNeighborsClassifier(trial, name=None, n_samples=10): + n_neighbors_max = max(n_samples, 100) + return { + 'n_neighbors': trial.suggest_int(f'n_neighbors_{name}', 1, n_neighbors_max, log=True ), + 'weights': trial.suggest_categorical(f'weights_{name}', ['uniform', 'distance']), + } + +def params_LogisticRegression(trial, name=None): + params = {} + params['dual'] = False + params['penalty'] = 'l2' + params['solver'] = trial.suggest_categorical(name=f'solver_{name}', choices=['liblinear', 'sag', 'saga']), + if params['solver'] == 'liblinear': + params['penalty'] = trial.suggest_categorical(name=f'penalty_{name}', choices=['l1', 'l2']) + if params['penalty'] == 'l2': + params['dual'] = trial.suggest_categorical(name=f'dual_{name}', choices=[True, False]) + else: + params['penalty'] = 'l1' + return { + 'solver': params['solver'], + 'penalty': params['penalty'], + 'dual': params['dual'], + 'C': trial.suggest_float(f'C_{name}', 1e-4, 1e4, log=True), + 'max_iter': 1000, + } + +def params_SVC(trial, name=None): + return { + 'kernel': trial.suggest_categorical(name=f'kernel_{name}', choices=['poly', 'rbf', 'linear', 'sigmoid']), + 'C': trial.suggest_float(f'C_{name}', 1e-4, 25, log=True), + 'degree': trial.suggest_int(f'degree_{name}', 1, 4), + 'class_weight': trial.suggest_categorical(name=f'class_weight_{name}', choices=[None, 'balanced']), + 'max_iter': 3000, + 'tol': 0.005, + 'probability': True, + } + +def params_NuSVC(trial, name=None): + return { + 'nu': trial.suggest_float(f'subsample_{name}', 0.05, 1.0), + 'kernel': trial.suggest_categorical(name=f'kernel_{name}', choices=['poly', 'rbf', 'linear', 'sigmoid']), + 'C': trial.suggest_float(f'C_{name}', 1e-4, 25, log=True), + 'degree': trial.suggest_int(f'degree_{name}', 1, 4), + 'class_weight': trial.suggest_categorical(name=f'class_weight_{name}', choices=[None, 'balanced']), + 'max_iter': 3000, + 'tol': 0.005, + 'probability': True, + } + +def make_sklearnex_classifier_config_dictionary(n_samples=10, n_classes=None): + return { + RandomForestClassifier: params_RandomForestClassifier, + KNeighborsClassifier: params_KNeighborsClassifier, + LogisticRegression: params_LogisticRegression, + SVC: params_SVC, + NuSVC: params_NuSVC, + } diff --git a/tpot2/config/regressors_sklearnex.py b/tpot2/config/regressors_sklearnex.py new file mode 100644 index 00000000..4eb10f1c --- /dev/null +++ b/tpot2/config/regressors_sklearnex.py @@ -0,0 +1,84 @@ +from sklearnex.linear_model import LinearRegression +from sklearnex.linear_model import Ridge +from sklearnex.linear_model import Lasso +from sklearnex.linear_model import ElasticNet + +from sklearnex.svm import SVR +from sklearnex.svm import NuSVR + +from sklearnex.ensemble import RandomForestRegressor +from sklearnex.neighbors import KNeighborsRegressor + + +def params_RandomForestRegressor(trial, name=None): + return { + 'n_estimators': 100, + 'max_features': trial.suggest_float(f'max_features_{name}', 0.05, 1.0), + 'bootstrap': trial.suggest_categorical(name=f'bootstrap_{name}', choices=[True, False]), + 'min_samples_split': trial.suggest_int(f'min_samples_split_{name}', 2, 21), + 'min_samples_leaf': trial.suggest_int(f'min_samples_leaf_{name}', 1, 21), + } + +def params_KNeighborsRegressor(trial, name=None, n_samples=100): + n_neighbors_max = max(n_samples, 100) + return { + 'n_neighbors': trial.suggest_int(f'n_neighbors_{name}', 1, n_neighbors_max), + 'weights': trial.suggest_categorical(f'weights_{name}', ['uniform', 'distance']), + } + +def params_LinearRegression(trial, name=None): + return {} + +def params_Ridge(trial, name=None): + return { + 'alpha': trial.suggest_float(f'alpha_{name}', 0.0, 1.0), + 'fit_intercept': True, + 'tol': trial.suggest_float(f'tol_{name}', 1e-5, 1e-1, log=True), + } + +def params_Lasso(trial, name=None): + return { + 'alpha': trial.suggest_float(f'alpha_{name}', 0.0, 1.0), + 'fit_intercept': True, + 'precompute': trial.suggest_categorical(f'precompute_{name}', [True, False, 'auto']), + 'tol': trial.suggest_float(f'tol_{name}', 1e-5, 1e-1, log=True), + 'positive': trial.suggest_categorical(f'positive_{name}', [True, False]), + 'selection': trial.suggest_categorical(f'selection_{name}', ['cyclic', 'random']), + } + +def params_ElasticNet(trial, name=None): + return { + 'alpha': 1 - trial.suggest_float(f'alpha_{name}', 0.0, 1.0), + 'l1_ratio': 1- trial.suggest_float(f'l1_ratio_{name}',0.0, 1.0), + } + +def params_SVR(trial, name=None): + return { + 'kernel': trial.suggest_categorical(name=f'kernel_{name}', choices=['poly', 'rbf', 'linear', 'sigmoid']), + 'C': trial.suggest_float(f'C_{name}', 1e-4, 25, log=True), + 'degree': trial.suggest_int(f'degree_{name}', 1, 4), + 'max_iter': 3000, + 'tol': 0.005, + } + +def params_NuSVR(trial, name=None): + return { + 'nu': trial.suggest_float(f'subsample_{name}', 0.05, 1.0), + 'kernel': trial.suggest_categorical(name=f'kernel_{name}', choices=['poly', 'rbf', 'linear', 'sigmoid']), + 'C': trial.suggest_float(f'C_{name}', 1e-4, 25, log=True), + 'degree': trial.suggest_int(f'degree_{name}', 1, 4), + 'max_iter': 3000, + 'tol': 0.005, + } + +def make_sklearnex_regressor_config_dictionary(n_samples=10): + return { + RandomForestRegressor: params_RandomForestRegressor, + KNeighborsRegressor: params_KNeighborsRegressor, + LinearRegression: params_LinearRegression, + Ridge: params_Ridge, + Lasso: params_Lasso, + ElasticNet: params_ElasticNet, + SVR: params_SVR, + NuSVR: params_NuSVR, + } diff --git a/tpot2/tpot_estimator/estimator_utils.py b/tpot2/tpot_estimator/estimator_utils.py index fe7a61a7..08d25f1b 100644 --- a/tpot2/tpot_estimator/estimator_utils.py +++ b/tpot2/tpot_estimator/estimator_utils.py @@ -41,9 +41,15 @@ def get_configuration_dictionary(options, n_samples, n_features, classification, elif option == "classifiers": config_dict.update(tpot2.config.make_classifier_config_dictionary(n_samples=n_samples, n_classes=n_classes)) + elif option == "classifiers_sklearnex": + config_dict.update(tpot2.config.make_sklearnex_classifier_config_dictionary(n_samples=n_samples, n_classes=n_classes)) + elif option == "regressors": config_dict.update(tpot2.config.make_regressor_config_dictionary(n_samples=n_samples)) + elif option == "regressors_sklearnex": + config_dict.update(tpot2.config.make_sklearnex_regressor_config_dictionary(n_samples=n_samples)) + elif option == "transformers": config_dict.update(tpot2.config.make_transformer_config_dictionary(n_features=n_features))