Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix num_nodes test, add update checker, setup.py change #130

Merged
merged 6 commits into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,10 @@ def calculate_version():
'lightgbm>=3.3.3',
'optuna>=3.0.5',
'baikal>=0.4.2',
'jupyter>=1.0.0',
'networkx>=3.0',
'dask>=2024.4.2',
'distributed>=2024.4.2',
'dask-expr>=1.0.12',
'dask-ml>=2023.4.20',
'dask-jobqueue>=0.8.5',
'func_timeout>=4.3.5',
'configspace>=0.7.1',
Expand Down
4 changes: 4 additions & 0 deletions tpot2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,7 @@


from .tpot_estimator import TPOTClassifier, TPOTRegressor, TPOTEstimator, TPOTEstimatorSteadyState

from update_checker import update_check
from ._version import __version__
update_check("tpot2",__version__)
2 changes: 1 addition & 1 deletion tpot2/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.1.5-alpha'
__version__ = '0.1.7-alpha'
18 changes: 10 additions & 8 deletions tpot2/config/classifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,6 @@ def get_HistGradientBoostingClassifier_ConfigurationSpace(n_features, random_sta
validation_fraction_cond = EqualsCondition(validation_fraction, early_stop, "valid")

space = {
'loss': Categorical("loss", ['log_loss', 'exponential']),
'learning_rate': Float("learning_rate", bounds=(1e-3, 1), log=True),
'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 200)),
'max_features': Float("max_features", bounds=(0.1,1.0)),
Expand All @@ -432,7 +431,6 @@ def get_HistGradientBoostingClassifier_ConfigurationSpace(n_features, random_sta
def HistGradientBoostingClassifier_hyperparameter_parser(params):

final_params = {
'loss': params['loss'],
'learning_rate': params['learning_rate'],
'min_samples_leaf': params['min_samples_leaf'],
'max_features': params['max_features'],
Expand All @@ -447,7 +445,7 @@ def HistGradientBoostingClassifier_hyperparameter_parser(params):


if params['early_stop'] == 'off':
final_params['n_iter_no_change'] = None
final_params['n_iter_no_change'] = 0
final_params['validation_fraction'] = None
final_params['early_stopping'] = False
elif params['early_stop'] == 'valid':
Expand Down Expand Up @@ -477,12 +475,14 @@ def get_MLPClassifier_ConfigurationSpace(random_state):

n_hidden_layers = Integer("n_hidden_layers", bounds=(1, 3))
n_nodes_per_layer = Integer("n_nodes_per_layer", bounds=(16, 512))
activation = Categorical("activation", ['tanh', 'relu'])
alpha = Float("alpha", bounds=(1e-7, 1e-1), log=True)
learning_rate = Float("learning_rate", bounds=(1e-4, 1e-1), log=True)
activation = Categorical("activation", ["identity", "logistic",'tanh', 'relu'])
alpha = Float("alpha", bounds=(1e-4, 1e-1), log=True)
early_stopping = Categorical("early_stopping", [True,False])

cs.add_hyperparameters([n_hidden_layers, n_nodes_per_layer, activation, alpha, learning_rate, early_stopping])
learning_rate_init = Float("learning_rate_init", bounds=(1e-4, 1e-1), log=True)
learning_rate = Categorical("learning_rate", ['constant', 'invscaling', 'adaptive'])

cs.add_hyperparameters([n_hidden_layers, n_nodes_per_layer, activation, alpha, learning_rate, early_stopping, learning_rate_init])

return cs

Expand All @@ -492,8 +492,10 @@ def MLPClassifier_hyperparameter_parser(params):
'hidden_layer_sizes' : [params['n_nodes_per_layer']]*params['n_hidden_layers'],
'activation': params['activation'],
'alpha': params['alpha'],
'learning_rate': params['learning_rate'],
'early_stopping': params['early_stopping'],

'learning_rate_init': params['learning_rate_init'],
'learning_rate': params['learning_rate'],
}

if 'random_state' in params:
Expand Down
14 changes: 10 additions & 4 deletions tpot2/config/get_configspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,12 +104,15 @@


# not including "PassiveAggressiveClassifier" in classifiers since it is mainly for larger than memory datasets/online use cases

# TODO need to subclass "GaussianProcessClassifier" and 'GaussianProcessRegressor'. These require n_features as a parameter for the kernel, but n_features may be different depending on selection functions or transformations previously in the pipeline.

GROUPNAMES = {
"selectors": ["SelectFwe", "SelectPercentile", "VarianceThreshold",],
"selectors_classification": ["SelectFwe", "SelectPercentile", "VarianceThreshold", "RFE_classification", "SelectFromModel_classification"],
"selectors_regression": ["SelectFwe", "SelectPercentile", "VarianceThreshold", "RFE_regression", "SelectFromModel_regression"],
"classifiers" : ["LGBMRegressor", "BaggingClassifier", "GaussianProcessClassifier", 'AdaBoostClassifier', 'BernoulliNB', 'DecisionTreeClassifier', 'ExtraTreesClassifier', 'GaussianNB', 'HistGradientBoostingClassifier', 'KNeighborsClassifier','LinearDiscriminantAnalysis', 'LogisticRegression', "LinearSVC", "SVC", 'MLPClassifier', 'MultinomialNB', "QuadraticDiscriminantAnalysis", 'RandomForestClassifier', 'SGDClassifier', 'XGBClassifier'],
"regressors" : ['AdaBoostRegressor', "ARDRegression", 'DecisionTreeRegressor', 'ExtraTreesRegressor', 'GaussianProcessRegressor', 'HistGradientBoostingRegressor', 'KNeighborsRegressor', 'LinearSVR', "MLPRegressor", 'RandomForestRegressor', 'SGDRegressor', 'SVR', 'XGBRegressor'],
"classifiers" : ["LGBMRegressor", "BaggingClassifier", 'AdaBoostClassifier', 'BernoulliNB', 'DecisionTreeClassifier', 'ExtraTreesClassifier', 'GaussianNB', 'HistGradientBoostingClassifier', 'KNeighborsClassifier','LinearDiscriminantAnalysis', 'LogisticRegression', "LinearSVC", "SVC", 'MLPClassifier', 'MultinomialNB', "QuadraticDiscriminantAnalysis", 'RandomForestClassifier', 'SGDClassifier', 'XGBClassifier'],
"regressors" : ['AdaBoostRegressor', "ARDRegression", 'DecisionTreeRegressor', 'ExtraTreesRegressor', 'HistGradientBoostingRegressor', 'KNeighborsRegressor', 'LinearSVR', "MLPRegressor", 'RandomForestRegressor', 'SGDRegressor', 'SVR', 'XGBRegressor'],


"transformers": ["Binarizer", "PCA", "ZeroCount", "ColumnOneHotEncoder", "FastICA", "FeatureAgglomeration", "Nystroem", "RBFSampler", "QuantileTransformer", "PowerTransformer"],
Expand Down Expand Up @@ -263,7 +266,7 @@ def get_configspace(name, n_classes=3, n_samples=100, n_features=100, random_sta
case "FastICA":
return transformers.get_FastICA_configspace(n_features=n_features, random_state=random_state)
case "FeatureAgglomeration":
return transformers.get_FeatureAgglomeration_configspace(n_features=n_features,)
return transformers.get_FeatureAgglomeration_configspace(n_samples=n_samples)
case "Nystroem":
return transformers.get_Nystroem_configspace(n_features=n_features, random_state=random_state)
case "RBFSampler":
Expand Down Expand Up @@ -435,9 +438,12 @@ def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None
if name == "HistGradientBoostingClassifier":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.HistGradientBoostingClassifier_hyperparameter_parser)
if name == "GradientBoostingRegressor" or name == "HistGradientBoostingRegressor":
if name == "GradientBoostingRegressor":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.GradientBoostingRegressor_hyperparameter_parser)
if name == "HistGradientBoostingRegressor":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.HistGradientBoostingRegressor_hyperparameter_parser)
if name == "MLPClassifier":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.MLPClassifier_hyperparameter_parser)
Expand Down
53 changes: 44 additions & 9 deletions tpot2/config/regressors.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,36 @@ def get_GradientBoostingRegressor_ConfigurationSpace(n_features, random_state):
cs.add_conditions([validation_fraction_cond, n_iter_no_change_cond])
return cs

def GradientBoostingRegressor_hyperparameter_parser(params):

final_params = {
'loss': params['loss'],
'learning_rate': params['learning_rate'],
'min_samples_leaf': params['min_samples_leaf'],
'min_samples_split': params['min_samples_split'],
'max_features': params['max_features'],
'max_leaf_nodes': params['max_leaf_nodes'],
'max_depth': params['max_depth'],
'tol': params['tol'],
'subsample': params['subsample']
}

if 'random_state' in params:
final_params['random_state'] = params['random_state']

if params['early_stop'] == 'off':
final_params['n_iter_no_change'] = None
final_params['validation_fraction'] = None
elif params['early_stop'] == 'valid':
final_params['n_iter_no_change'] = params['n_iter_no_change']
final_params['validation_fraction'] = params['validation_fraction']
elif params['early_stop'] == 'train':
final_params['n_iter_no_change'] = params['n_iter_no_change']
final_params['validation_fraction'] = None


return final_params

#only difference is l2_regularization
def get_HistGradientBoostingRegressor_ConfigurationSpace(n_features, random_state):
early_stop = Categorical("early_stop", ["off", "valid", "train"])
Expand Down Expand Up @@ -443,39 +473,40 @@ def get_HistGradientBoostingRegressor_ConfigurationSpace(n_features, random_stat

return cs

def GradientBoostingRegressor_hyperparameter_parser(params):

def HistGradientBoostingRegressor_hyperparameter_parser(params):

final_params = {
'loss': params['loss'],
'learning_rate': params['learning_rate'],
'min_samples_leaf': params['min_samples_leaf'],
'max_features': params['max_features'],
'max_leaf_nodes': params['max_leaf_nodes'],
'max_depth': params['max_depth'],
'tol': params['tol'],
'l2_regularization': params['l2_regularization']
}

if "l2_regularization" in params:
final_params['l2_regularization'] = params['l2_regularization']

if 'random_state' in params:
final_params['random_state'] = params['random_state']


if params['early_stop'] == 'off':
final_params['n_iter_no_change'] = None
final_params['n_iter_no_change'] = 0
final_params['validation_fraction'] = None
final_params['early_stopping'] = False
elif params['early_stop'] == 'valid':
final_params['n_iter_no_change'] = params['n_iter_no_change']
final_params['validation_fraction'] = params['validation_fraction']
final_params['early_stopping'] = True
elif params['early_stop'] == 'train':
final_params['n_iter_no_change'] = params['n_iter_no_change']
final_params['validation_fraction'] = None
final_params['early_stopping'] = True


return final_params



###

def get_MLPRegressor_ConfigurationSpace(random_state):
Expand All @@ -495,7 +526,10 @@ def get_MLPRegressor_ConfigurationSpace(random_state):
learning_rate = Float("learning_rate", bounds=(1e-4, 1e-1), log=True)
early_stopping = Categorical("early_stopping", [True,False])

cs.add_hyperparameters([n_hidden_layers, n_nodes_per_layer, activation, alpha, learning_rate, early_stopping])
learning_rate_init = Float("learning_rate_init", bounds=(1e-4, 1e-1), log=True)
learning_rate = Categorical("learning_rate", ['constant', 'invscaling', 'adaptive'])

cs.add_hyperparameters([n_hidden_layers, n_nodes_per_layer, activation, alpha, learning_rate, early_stopping, learning_rate_init])

return cs

Expand All @@ -505,8 +539,9 @@ def MLPRegressor_hyperparameter_parser(params):
'hidden_layer_sizes' : [params['n_nodes_per_layer']]*params['n_hidden_layers'],
'activation': params['activation'],
'alpha': params['alpha'],
'learning_rate': params['learning_rate'],
'early_stopping': params['early_stopping'],
'learning_rate_init': params['learning_rate_init'],
'learning_rate': params['learning_rate'],
}

if 'random_state' in params:
Expand Down
4 changes: 2 additions & 2 deletions tpot2/config/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ def get_FastICA_configspace(n_features=100, random_state=None):

)

def get_FeatureAgglomeration_configspace(n_features=100):
def get_FeatureAgglomeration_configspace(n_samples):

linkage = Categorical('linkage', ['ward', 'complete', 'average'])
metric = Categorical('metric', ['euclidean', 'l1', 'l2', 'manhattan', 'cosine'])
n_clusters = Integer('n_clusters', bounds=(2, 400))
n_clusters = Integer('n_clusters', bounds=(2, min(n_samples,400)))
pooling_func = Categorical('pooling_func', ['mean', 'median', 'max'])

metric_condition = NotEqualsCondition(metric, linkage, 'ward')
Expand Down
2 changes: 1 addition & 1 deletion tpot2/objectives/number_of_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

def number_of_nodes_objective(est):
if isinstance(est, GraphPipeline):
return sum(node["instance"] for node in est.graph.nodes)
return sum(number_of_nodes_objective(est.graph.nodes[node]["instance"]) for node in est.graph.nodes)
if isinstance(est, Pipeline):
return sum(number_of_nodes_objective(estimator) for _,estimator in est.steps)
if isinstance(est, sklearn.pipeline.FeatureUnion):
Expand Down
1 change: 0 additions & 1 deletion tpot2/objectives/tests/test_complexity_objective.py
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@
from ..complexity import BernoulliNB_Complexity, GaussianNB_Complexity, MultinomialNB_Complexity
6 changes: 5 additions & 1 deletion tpot2/search_spaces/pipelines/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -750,7 +750,11 @@ def generate(self, rng=None):
self.cross_val_predict_cv, self.method, self.memory, self.use_label_encoder, rng=rng)
# if user specified limit, grab a random number between that limit

n_nodes = min(rng.integers(1, self.max_size), 5)
if self.max_size is None or self.max_size == np.inf:
n_nodes = rng.integers(1, 5)
else:
n_nodes = min(rng.integers(1, self.max_size), 5)

starting_ops = []
if self.inner_search_space is not None:
starting_ops.append(ind._mutate_insert_inner_node)
Expand Down
41 changes: 28 additions & 13 deletions tpot2/tpot_estimator/templates/tpottemplates.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def __init__( self,
memory_limit = "4GB",
client = None,
random_state=None,
allow_inner_regressors=True,
**tpotestimator_kwargs,
):
"""
Expand Down Expand Up @@ -58,6 +59,7 @@ def __init__( self,
self.memory_limit = memory_limit
self.client = client
self.random_state = random_state
self.allow_inner_regressors = allow_inner_regressors
self.tpotestimator_kwargs = tpotestimator_kwargs

self.initialized = False
Expand All @@ -71,13 +73,18 @@ def fit(self, X, y):
"n_features":X.shape[1],
"random_state":self.random_state}

search_space = tpot2.search_spaces.pipelines.GraphPipeline(
root_search_space= tpot2.config.get_search_space("regressors", **get_search_space_params),
leaf_search_space = None,
inner_search_space = tpot2.config.get_search_space(["selectors","transformers","regressors","scalers"],**get_search_space_params),
max_size = 10,
)

if self.allow_inner_regressors:
search_space = tpot2.search_spaces.pipelines.GraphPipeline(
root_search_space= tpot2.config.get_search_space("regressors", **get_search_space_params),
leaf_search_space = None,
inner_search_space = tpot2.config.get_search_space(["selectors","transformers","regressors","scalers"],**get_search_space_params),
)
else:
search_space = tpot2.search_spaces.pipelines.GraphPipeline(
root_search_space= tpot2.config.get_search_space("regressors", **get_search_space_params),
leaf_search_space = None,
inner_search_space = tpot2.config.get_search_space(["selectors","transformers","scalers"],**get_search_space_params),
)

super(TPOTRegressor,self).__init__(
search_space=search_space,
Expand Down Expand Up @@ -134,6 +141,7 @@ def __init__( self,
memory_limit = "4GB",
client = None,
random_state=None,
allow_inner_classifiers=True,
**tpotestimator_kwargs,

):
Expand Down Expand Up @@ -164,6 +172,7 @@ def __init__( self,
self.client = client
self.random_state = random_state
self.tpotestimator_kwargs = tpotestimator_kwargs
self.allow_inner_classifiers = allow_inner_classifiers

self.initialized = False

Expand All @@ -176,12 +185,18 @@ def fit(self, X, y):
"n_features":X.shape[1],
"random_state":self.random_state}

search_space = tpot2.search_spaces.pipelines.GraphPipeline(
root_search_space= tpot2.config.get_search_space("classifiers", **get_search_space_params),
leaf_search_space = None,
inner_search_space = tpot2.config.get_search_space(["selectors","transformers","classifiers", "scalers"], **get_search_space_params),
max_size = 10,
)
if self.allow_inner_classifiers:
search_space = tpot2.search_spaces.pipelines.GraphPipeline(
root_search_space= tpot2.config.get_search_space("classifiers", **get_search_space_params),
leaf_search_space = None,
inner_search_space = tpot2.config.get_search_space(["selectors","transformers","classifiers", "scalers"], **get_search_space_params),
)
else:
search_space = tpot2.search_spaces.pipelines.GraphPipeline(
root_search_space= tpot2.config.get_search_space("classifiers", **get_search_space_params),
leaf_search_space = None,
inner_search_space = tpot2.config.get_search_space(["selectors","transformers","scalers"], **get_search_space_params),
)


super(TPOTClassifier,self).__init__(
Expand Down
Loading