Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial sklearnex support #102

Merged
merged 6 commits into from
Oct 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,6 @@ dask-worker-space/
*.egg-info/
.coverage
target/
.venv/
.venv/
build/*
*.egg
6 changes: 6 additions & 0 deletions tpot2/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@
from .autoqtl_builtins import make_FeatureEncodingFrequencySelector_config_dictionary, make_genetic_encoders_config_dictionary
from .hyperparametersuggestor import *

try:
from .classifiers_sklearnex import make_sklearnex_classifier_config_dictionary
from .regressors_sklearnex import make_sklearnex_regressor_config_dictionary
except ModuleNotFoundError: #if optional packages are not installed
pass

try:
from .mdr_configs import make_skrebate_config_dictionary, make_MDR_config_dictionary, make_ContinuousMDR_config_dictionary
except: #if optional packages are not installed
Expand Down
73 changes: 73 additions & 0 deletions tpot2/config/classifiers_sklearnex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from sklearnex.ensemble import RandomForestClassifier
from sklearnex.neighbors import KNeighborsClassifier
from sklearnex.svm import SVC
from sklearnex.svm import NuSVC
from sklearnex.linear_model import LogisticRegression


def params_RandomForestClassifier(trial, name=None):
return {
'n_estimators': 100,
'bootstrap': trial.suggest_categorical(name=f'bootstrap_{name}', choices=[True, False]),
'min_samples_split': trial.suggest_int(f'min_samples_split_{name}', 2, 20),
'min_samples_leaf': trial.suggest_int(f'min_samples_leaf_{name}', 1, 20),
'n_jobs': 1,
}

def params_KNeighborsClassifier(trial, name=None, n_samples=10):
n_neighbors_max = max(n_samples, 100)
return {
'n_neighbors': trial.suggest_int(f'n_neighbors_{name}', 1, n_neighbors_max, log=True ),
'weights': trial.suggest_categorical(f'weights_{name}', ['uniform', 'distance']),
}

def params_LogisticRegression(trial, name=None):
params = {}
params['dual'] = False
params['penalty'] = 'l2'
params['solver'] = trial.suggest_categorical(name=f'solver_{name}', choices=['liblinear', 'sag', 'saga']),
if params['solver'] == 'liblinear':
params['penalty'] = trial.suggest_categorical(name=f'penalty_{name}', choices=['l1', 'l2'])
if params['penalty'] == 'l2':
params['dual'] = trial.suggest_categorical(name=f'dual_{name}', choices=[True, False])
else:
params['penalty'] = 'l1'
return {
'solver': params['solver'],
'penalty': params['penalty'],
'dual': params['dual'],
'C': trial.suggest_float(f'C_{name}', 1e-4, 1e4, log=True),
'max_iter': 1000,
}

def params_SVC(trial, name=None):
return {
'kernel': trial.suggest_categorical(name=f'kernel_{name}', choices=['poly', 'rbf', 'linear', 'sigmoid']),
'C': trial.suggest_float(f'C_{name}', 1e-4, 25, log=True),
'degree': trial.suggest_int(f'degree_{name}', 1, 4),
'class_weight': trial.suggest_categorical(name=f'class_weight_{name}', choices=[None, 'balanced']),
'max_iter': 3000,
'tol': 0.005,
'probability': True,
}

def params_NuSVC(trial, name=None):
return {
'nu': trial.suggest_float(f'subsample_{name}', 0.05, 1.0),
'kernel': trial.suggest_categorical(name=f'kernel_{name}', choices=['poly', 'rbf', 'linear', 'sigmoid']),
'C': trial.suggest_float(f'C_{name}', 1e-4, 25, log=True),
'degree': trial.suggest_int(f'degree_{name}', 1, 4),
'class_weight': trial.suggest_categorical(name=f'class_weight_{name}', choices=[None, 'balanced']),
'max_iter': 3000,
'tol': 0.005,
'probability': True,
}

def make_sklearnex_classifier_config_dictionary(n_samples=10, n_classes=None):
return {
RandomForestClassifier: params_RandomForestClassifier,
KNeighborsClassifier: params_KNeighborsClassifier,
LogisticRegression: params_LogisticRegression,
SVC: params_SVC,
NuSVC: params_NuSVC,
}
84 changes: 84 additions & 0 deletions tpot2/config/regressors_sklearnex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
from sklearnex.linear_model import LinearRegression
from sklearnex.linear_model import Ridge
from sklearnex.linear_model import Lasso
from sklearnex.linear_model import ElasticNet

from sklearnex.svm import SVR
from sklearnex.svm import NuSVR

from sklearnex.ensemble import RandomForestRegressor
from sklearnex.neighbors import KNeighborsRegressor


def params_RandomForestRegressor(trial, name=None):
return {
'n_estimators': 100,
'max_features': trial.suggest_float(f'max_features_{name}', 0.05, 1.0),
'bootstrap': trial.suggest_categorical(name=f'bootstrap_{name}', choices=[True, False]),
'min_samples_split': trial.suggest_int(f'min_samples_split_{name}', 2, 21),
'min_samples_leaf': trial.suggest_int(f'min_samples_leaf_{name}', 1, 21),
}

def params_KNeighborsRegressor(trial, name=None, n_samples=100):
n_neighbors_max = max(n_samples, 100)
return {
'n_neighbors': trial.suggest_int(f'n_neighbors_{name}', 1, n_neighbors_max),
'weights': trial.suggest_categorical(f'weights_{name}', ['uniform', 'distance']),
}

def params_LinearRegression(trial, name=None):
return {}

def params_Ridge(trial, name=None):
return {
'alpha': trial.suggest_float(f'alpha_{name}', 0.0, 1.0),
'fit_intercept': True,
'tol': trial.suggest_float(f'tol_{name}', 1e-5, 1e-1, log=True),
}

def params_Lasso(trial, name=None):
return {
'alpha': trial.suggest_float(f'alpha_{name}', 0.0, 1.0),
'fit_intercept': True,
'precompute': trial.suggest_categorical(f'precompute_{name}', [True, False, 'auto']),
'tol': trial.suggest_float(f'tol_{name}', 1e-5, 1e-1, log=True),
'positive': trial.suggest_categorical(f'positive_{name}', [True, False]),
'selection': trial.suggest_categorical(f'selection_{name}', ['cyclic', 'random']),
}

def params_ElasticNet(trial, name=None):
return {
'alpha': 1 - trial.suggest_float(f'alpha_{name}', 0.0, 1.0),
'l1_ratio': 1- trial.suggest_float(f'l1_ratio_{name}',0.0, 1.0),
}

def params_SVR(trial, name=None):
return {
'kernel': trial.suggest_categorical(name=f'kernel_{name}', choices=['poly', 'rbf', 'linear', 'sigmoid']),
'C': trial.suggest_float(f'C_{name}', 1e-4, 25, log=True),
'degree': trial.suggest_int(f'degree_{name}', 1, 4),
'max_iter': 3000,
'tol': 0.005,
}

def params_NuSVR(trial, name=None):
return {
'nu': trial.suggest_float(f'subsample_{name}', 0.05, 1.0),
'kernel': trial.suggest_categorical(name=f'kernel_{name}', choices=['poly', 'rbf', 'linear', 'sigmoid']),
'C': trial.suggest_float(f'C_{name}', 1e-4, 25, log=True),
'degree': trial.suggest_int(f'degree_{name}', 1, 4),
'max_iter': 3000,
'tol': 0.005,
}

def make_sklearnex_regressor_config_dictionary(n_samples=10):
return {
RandomForestRegressor: params_RandomForestRegressor,
KNeighborsRegressor: params_KNeighborsRegressor,
LinearRegression: params_LinearRegression,
Ridge: params_Ridge,
Lasso: params_Lasso,
ElasticNet: params_ElasticNet,
SVR: params_SVR,
NuSVR: params_NuSVR,
}
6 changes: 6 additions & 0 deletions tpot2/tpot_estimator/estimator_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,15 @@ def get_configuration_dictionary(options, n_samples, n_features, classification,
elif option == "classifiers":
config_dict.update(tpot2.config.make_classifier_config_dictionary(n_samples=n_samples, n_classes=n_classes))

elif option == "classifiers_sklearnex":
config_dict.update(tpot2.config.make_sklearnex_classifier_config_dictionary(n_samples=n_samples, n_classes=n_classes))

elif option == "regressors":
config_dict.update(tpot2.config.make_regressor_config_dictionary(n_samples=n_samples))

elif option == "regressors_sklearnex":
config_dict.update(tpot2.config.make_sklearnex_regressor_config_dictionary(n_samples=n_samples))

elif option == "transformers":
config_dict.update(tpot2.config.make_transformer_config_dictionary(n_features=n_features))

Expand Down