From 1495af6240f35cf4104a1a3a4a7a8f75f25e4287 Mon Sep 17 00:00:00 2001 From: perib Date: Thu, 12 Oct 2023 14:36:13 -0700 Subject: [PATCH 1/7] hyperparameter mutation changes --- tpot2/config/hyperparametersuggestor.py | 261 +++++++++++++----- .../graph_pipeline_individual/individual.py | 64 ++++- .../graph_pipeline_individual/templates.py | 7 + tpot2/tpot_estimator/estimator.py | 10 + 4 files changed, 259 insertions(+), 83 deletions(-) diff --git a/tpot2/config/hyperparametersuggestor.py b/tpot2/config/hyperparametersuggestor.py index 01fa188d..f0f1318c 100644 --- a/tpot2/config/hyperparametersuggestor.py +++ b/tpot2/config/hyperparametersuggestor.py @@ -4,86 +4,203 @@ +class Trial(): -#Replicating the API found in optuna: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html -#copy-pasted some code -def suggest_categorical(name, choices): - return random.choice(choices) - -def suggest_float( - name: str, - low: float, - high: float, - *, - step = None, - log = False, - ): - - if log and step is not None: - raise ValueError("The parameter `step` is not supported when `log` is true.") - - if low > high: - raise ValueError( - "The `low` value must be smaller than or equal to the `high` value " - "(low={}, high={}).".format(low, high) - ) - - if log and low <= 0.0: - raise ValueError( - "The `low` value must be larger than 0 for a log distribution " - "(low={}, high={}).".format(low, high) - ) - - if step is not None and step <= 0: - raise ValueError( - "The `step` value must be non-zero positive value, " "but step={}.".format(step) - ) - - #TODO check this produces correct output - if log: - value = np.random.uniform(np.log(low),np.log(high)) - return np.e**value - - else: - if step is not None: - return np.random.choice(np.arange(low,high,step)) + def __init__(self, old_params=None, alpha=1, hyperparameter_probability=1): + self._params = dict() + + self.old_params = old_params + self.alpha = alpha + self.hyperparameter_probability = hyperparameter_probability + + if old_params is not None: + self.params_to_update = set(random.sample(list(old_params.keys()), max(int(len(old_params.keys())*self.hyperparameter_probability),1))) else: - return np.random.uniform(low,high) + self.params_to_update = None + + + #Replicating the API found in optuna: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html + #copy-pasted some code + def suggest_categorical(self, name, choices): + if self.params_to_update == None or name in self.params_to_update: #If this parameter is selected to be changed + choice = self.suggest_categorical_(name, choices) + else: #if this parameter is not selected to be changed + if name not in self.old_params: #if this parameter is not in the old params, then we need to choose a value for it + choice = self.suggest_categorical_(name, choices) + else: #if this parameter is in the old params, then we can just use the old value + choice = self.old_params[name] + if choice not in choices: #if the old value is not in the choices, then we need to choose a value for it + choice = self.suggest_categorical_(name, choices) + + self._params[name] = choice + return choice + + def suggest_float(self, + name: str, + low: float, + high: float, + *, + step = None, + log = False, + ): + if self.params_to_update == None or name in self.params_to_update: #If this parameter is selected to be changed + choice = self.suggest_float_(name, low=low, high=high, step=step, log=log) + if self.old_params is not None and name in self.old_params: + choice = self.alpha*choice + (1-self.alpha)*self.old_params[name] + else: #if this parameter is not selected to be changed + + if name not in self.old_params: + choice = self.suggest_float_(name, low=low, high=high, step=step, log=log) + else: + choice = self.old_params[name] + + self._params[name] = choice + return choice + + + + def suggest_discrete_uniform(self, name, low, high, q): + if self.params_to_update == None or name in self.params_to_update: + choice = self.suggest_discrete_uniform_(name, low=low, high=high, q=q) + if self.old_params is not None and name in self.old_params: + choice = self.alpha*choice + (1-self.alpha)*self.old_params[name] + else: + if name not in self.old_params: + choice = self.suggest_discrete_uniform_(name, low=low, high=high, q=q) + else: + choice = self.old_params[name] + self._params[name] = choice + return choice -def suggest_discrete_uniform(name, low, high, q): - return suggest_float(name, low, high, step=q) -def suggest_int(name, low, high, step=1, log=False): - if low == high: #TODO check that this matches optuna's behaviour - return low - - if log and step >1: - raise ValueError("The parameter `step`>1 is not supported when `log` is true.") + def suggest_int(self, name, low, high, step=1, log=False): + if self.params_to_update == None or name in self.params_to_update: + choice = self.suggest_int_(name, low=low, high=high, step=step, log=log) + if self.old_params is not None and name in self.old_params: + choice = int(self.alpha*choice + (1-self.alpha)*self.old_params[name]) + else: + if name not in self.old_params: + choice = self.suggest_int_(name, low=low, high=high, step=step, log=log) + else: + choice = self.old_params[name] - if low > high: - raise ValueError( - "The `low` value must be smaller than or equal to the `high` value " - "(low={}, high={}).".format(low, high) - ) + self._params[name] = choice + return choice - if log and low <= 0.0: - raise ValueError( - "The `low` value must be larger than 0 for a log distribution " - "(low={}, high={}).".format(low, high) - ) - if step is not None and step <= 0: - raise ValueError( - "The `step` value must be non-zero positive value, " "but step={}.".format(step) - ) + def suggest_uniform(self, name, low, high): + if self.params_to_update == None or name in self.params_to_update: + choice = self.suggest_uniform_(name, low=low, high=high) + if self.old_params is not None and name in self.old_params: + choice = self.alpha*choice + (1-self.alpha)*self.old_params[name] + else: + if name not in self.old_params: + choice = self.suggest_uniform_(name, low=low, high=high) + else: + choice = self.old_params[name] + + self._params[name] = choice + return choice + + + +#################################### + #Replicating the API found in optuna: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html + #copy-pasted some code + def suggest_categorical_(self, name, choices): + + choice = random.choice(choices) + self._params[name] = choice + return choice + + def suggest_float_(self, + name: str, + low: float, + high: float, + *, + step = None, + log = False, + ): + + if log and step is not None: + raise ValueError("The parameter `step` is not supported when `log` is true.") + + if low > high: + raise ValueError( + "The `low` value must be smaller than or equal to the `high` value " + "(low={}, high={}).".format(low, high) + ) + + if log and low <= 0.0: + raise ValueError( + "The `low` value must be larger than 0 for a log distribution " + "(low={}, high={}).".format(low, high) + ) + + if step is not None and step <= 0: + raise ValueError( + "The `step` value must be non-zero positive value, " "but step={}.".format(step) + ) + + #TODO check this produces correct output + if log: + value = np.random.uniform(np.log(low),np.log(high)) + choice = np.e**value + self._params[name] = choice + return choice - if log: - value = np.random.uniform(np.log(low),np.log(high)) - return int(np.e**value) - else: - return np.random.choice(list(range(low,high,step))) + else: + if step is not None: + choice = np.random.choice(np.arange(low,high,step)) + self._params[name] = choice + return choice + else: + choice = np.random.uniform(low,high) + self._params[name] = choice + return choice + + + def suggest_discrete_uniform_(self, name, low, high, q): + choice = self.suggest_float(name, low, high, step=q) + self._params[name] = choice + return choice + + + def suggest_int_(self, name, low, high, step=1, log=False): + if low == high: #TODO check that this matches optuna's behaviour + return low + + if log and step >1: + raise ValueError("The parameter `step`>1 is not supported when `log` is true.") + + if low > high: + raise ValueError( + "The `low` value must be smaller than or equal to the `high` value " + "(low={}, high={}).".format(low, high) + ) + + if log and low <= 0.0: + raise ValueError( + "The `low` value must be larger than 0 for a log distribution " + "(low={}, high={}).".format(low, high) + ) + + if step is not None and step <= 0: + raise ValueError( + "The `step` value must be non-zero positive value, " "but step={}.".format(step) + ) + + if log: + value = np.random.uniform(np.log(low),np.log(high)) + choice = int(np.e**value) + self._params[name] = choice + return choice + else: + choice = np.random.choice(list(range(low,high,step))) + self._params[name] = choice + return choice -def suggest_uniform(name, low, high): - return suggest_float(name, low, high) \ No newline at end of file + def suggest_uniform_(self, name, low, high): + return self.suggest_float(name, low, high) \ No newline at end of file diff --git a/tpot2/individual_representations/graph_pipeline_individual/individual.py b/tpot2/individual_representations/graph_pipeline_individual/individual.py index 89abf09b..d4bed314 100644 --- a/tpot2/individual_representations/graph_pipeline_individual/individual.py +++ b/tpot2/individual_representations/graph_pipeline_individual/individual.py @@ -29,6 +29,9 @@ def __init__(self, *, self.method_class = method_class #transformer or baseestimator self.hyperparameters = hyperparameters self.label = label + self._params = None + + from functools import partial #@https://stackoverflow.com/questions/20530455/isomorphic-comparison-of-networkx-graph-objects-instead-of-the-default-address @@ -114,6 +117,9 @@ def __init__( crossover_same_depth = False, crossover_same_recursive_depth = True, + hyperparameter_probability = 1, + hyper_node_probability = 0, + hyperparameter_alpha = 1, unique_subset_values = None, initial_subset_values = None, @@ -135,6 +141,10 @@ def __init__( self.unique_subset_values = unique_subset_values self.initial_subset_values = initial_subset_values + self.hyperparameter_probability = hyperparameter_probability + self.hyper_node_probability = hyper_node_probability + self.hyperparameter_alpha = hyperparameter_alpha + if self.unique_subset_values is not None: self.row_subset_selector = tpot2.representations.SubsetSelector(values=unique_subset_values, initial_set=initial_subset_values,k=20) @@ -237,7 +247,8 @@ def initialize_all_nodes(self,): if node.method_class is None: node.method_class = random.choice(list(self.select_config_dict(node).keys())) if node.hyperparameters is None: - node.hyperparameters = self.select_config_dict(node)[node.method_class](config.hyperparametersuggestor) + get_hyperparameter(self.select_config_dict(node)[node.method_class], nodelabel=node, alpha=self.hyperparameter_alpha, hyperparameter_probability=self.hyperparameter_probability) + def fix_noncompliant_leafs(self): leafs = [node for node in self.graph.nodes if len(list(self.graph.successors(node)))==0] @@ -254,6 +265,7 @@ def fix_noncompliant_leafs(self): first_leaf = NodeLabel(config_dict=self.leaf_config_dict) first_leaf.method_class = random.choice(list(first_leaf.config_dict.keys())) #TODO: check when there is no new method first_leaf.hyperparameters = first_leaf.config_dict[first_leaf.method_class](config.hyperparametersuggestor) + get_hyperparameter(self.select_config_dict(first_leaf)[first_leaf.method_class], nodelabel=first_leaf, alpha=self.hyperparameter_alpha, hyperparameter_probability=self.hyperparameter_probability) compliant_leafs.append(first_leaf) #connect bad leaves to good leaves (making them internal nodes) @@ -547,15 +559,24 @@ def _mutate_hyperparameters(self): ''' sorted_nodes_list = list(self.graph.nodes) random.shuffle(sorted_nodes_list) + completed_one = False for node in sorted_nodes_list: if isinstance(node,GraphIndividual): continue if isinstance(self.select_config_dict(node)[node.method_class], dict): continue - node.hyperparameters = self.select_config_dict(node)[node.method_class](config.hyperparametersuggestor) - + + if not completed_one: + + get_hyperparameter(self.select_config_dict(node)[node.method_class], nodelabel=node, alpha=self.hyperparameter_alpha, hyperparameter_probability=self.hyperparameter_probability) + else: + if self.hyper_node_probability < random.random(): + get_hyperparameter(self.select_config_dict(node)[node.method_class], nodelabel=node, alpha=self.hyperparameter_alpha, hyperparameter_probability=self.hyperparameter_probability) return True return False + + + def _mutate_replace_node(self): ''' @@ -570,9 +591,11 @@ def _mutate_replace_node(self): node.method_class = random.choice(list(self.select_config_dict(node).keys())) if isinstance(self.select_config_dict(node)[node.method_class], dict): hyperparameters = self.select_config_dict(node)[node.method_class] + node.hyperparameters = hyperparameters else: - hyperparameters = self.select_config_dict(node)[node.method_class](config.hyperparametersuggestor) - node.hyperparameters = hyperparameters + #hyperparameters = self.select_config_dict(node)[node.method_class](config.hyperparametersuggestor) + get_hyperparameter(self.select_config_dict(node)[node.method_class], nodelabel=node, alpha=self.hyperparameter_alpha, hyperparameter_probability=self.hyperparameter_probability) + return True return False @@ -1024,6 +1047,7 @@ def _optimize_optuna_single_method_full_pipeline(self, objective_function, steps def objective(trial): params = self.select_config_dict(node)[node.method_class](trial) node.hyperparameters = params + trial.set_user_attr('params', params) try: return objective_function(self) @@ -1115,14 +1139,14 @@ def create_node(config_dict): if method_class == 'Recursive': node = GraphIndividual(**config_dict[method_class]) else: - if isinstance(config_dict[method_class], dict): - hyperparameters = config_dict[method_class] - else: - hyperparameters = config_dict[method_class](config.hyperparametersuggestor) + hyperparameters, params = get_hyperparameter(config_dict[method_class], nodelabel=None) node = NodeLabel( method_class=method_class, - hyperparameters=hyperparameters) + hyperparameters=hyperparameters + ) + node._params = params + return node @@ -1137,4 +1161,22 @@ def random_weighted_sort(l,weights): indeces.pop(next_item) sorted_l.append(l[next_item]) - return sorted_l \ No newline at end of file + return sorted_l + + + +def get_hyperparameter(config_func, nodelabel=None, alpha=1, hyperparameter_probability=1): + if isinstance(config_func, dict): + return config_func, None + + if nodelabel is not None: + trial = config.hyperparametersuggestor.Trial(old_params=nodelabel._params, alpha=alpha, hyperparameter_probability=hyperparameter_probability) + new_params = config_func(trial) + + nodelabel._params = trial._params + nodelabel.hyperparameters = new_params + else: + trial = config.hyperparametersuggestor.Trial(old_params=None, alpha=alpha, hyperparameter_probability=hyperparameter_probability) + new_params = config_func(trial) + + return new_params, trial._params, \ No newline at end of file diff --git a/tpot2/individual_representations/graph_pipeline_individual/templates.py b/tpot2/individual_representations/graph_pipeline_individual/templates.py index 6991c042..decc4570 100644 --- a/tpot2/individual_representations/graph_pipeline_individual/templates.py +++ b/tpot2/individual_representations/graph_pipeline_individual/templates.py @@ -14,6 +14,10 @@ def estimator_graph_individual_generator( leaf_config_dict=None, max_size = np.inf, linear_pipeline = False, + + hyperparameter_probability = 1, + hyper_node_probability = 0, + hyperparameter_alpha = 1, **kwargs, ) : @@ -37,6 +41,9 @@ def estimator_graph_individual_generator( max_size = max_size, linear_pipeline = linear_pipeline, + hyperparameter_probability = hyperparameter_probability, + hyper_node_probability = hyper_node_probability, + hyperparameter_alpha = hyperparameter_alpha, **kwargs, ) diff --git a/tpot2/tpot_estimator/estimator.py b/tpot2/tpot_estimator/estimator.py index 351aa12b..8b9af74d 100644 --- a/tpot2/tpot_estimator/estimator.py +++ b/tpot2/tpot_estimator/estimator.py @@ -37,6 +37,10 @@ def __init__(self, scorers, other_objective_functions_weights = [], objective_function_names = None, bigger_is_better = True, + + hyperparameter_probability = 1, + hyper_node_probability = 0, + hyperparameter_alpha = 1, max_size = np.inf, linear_pipeline = False, root_config_dict= 'Auto', @@ -427,6 +431,9 @@ def __init__(self, scorers, self.other_objective_functions_weights = other_objective_functions_weights self.objective_function_names = objective_function_names self.bigger_is_better = bigger_is_better + self.hyperparameter_probability = hyperparameter_probability + self.hyper_node_probability = hyper_node_probability + self.hyperparameter_alpha = hyperparameter_alpha self.max_size = max_size self.linear_pipeline = linear_pipeline self.root_config_dict= root_config_dict @@ -685,6 +692,9 @@ def objective_function(pipeline_individual, leaf_config_dict=leaf_config_dict, max_size = self.max_size, linear_pipeline=self.linear_pipeline, + hyperparameter_probability=self.hyperparameter_probability, + hyper_node_probability=self.hyper_node_probability, + hyperparameter_alpha=self.hyperparameter_alpha, ) if self.threshold_evaluation_early_stop is not None or self.selection_evaluation_early_stop is not None: From 98358196f762b9abc59fb993c1e423a764e7bd0f Mon Sep 17 00:00:00 2001 From: perib Date: Thu, 12 Oct 2023 16:54:14 -0700 Subject: [PATCH 2/7] replace node creates new node hyper fix --- .../graph_pipeline_individual/individual.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tpot2/individual_representations/graph_pipeline_individual/individual.py b/tpot2/individual_representations/graph_pipeline_individual/individual.py index d4bed314..257e2e20 100644 --- a/tpot2/individual_representations/graph_pipeline_individual/individual.py +++ b/tpot2/individual_representations/graph_pipeline_individual/individual.py @@ -594,8 +594,13 @@ def _mutate_replace_node(self): node.hyperparameters = hyperparameters else: #hyperparameters = self.select_config_dict(node)[node.method_class](config.hyperparametersuggestor) - get_hyperparameter(self.select_config_dict(node)[node.method_class], nodelabel=node, alpha=self.hyperparameter_alpha, hyperparameter_probability=self.hyperparameter_probability) - + #get_hyperparameter(self.select_config_dict(node)[node.method_class], nodelabel=None, alpha=self.hyperparameter_alpha, hyperparameter_probability=self.hyperparameter_probability) + new_node = create_node(self.select_config_dict(node)[node.method_class]) + #TODO cleanup + node.hyperparameters = new_node.hyperparameters + node.method_class = new_node.method_class + node.label = new_node.label + return True return False From 10e1c75de044d64a7403644dfdf65e48834b4091 Mon Sep 17 00:00:00 2001 From: perib Date: Mon, 16 Oct 2023 11:22:32 -0700 Subject: [PATCH 3/7] fix --- .../graph_pipeline_individual/individual.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tpot2/individual_representations/graph_pipeline_individual/individual.py b/tpot2/individual_representations/graph_pipeline_individual/individual.py index 257e2e20..d3b5f63b 100644 --- a/tpot2/individual_representations/graph_pipeline_individual/individual.py +++ b/tpot2/individual_representations/graph_pipeline_individual/individual.py @@ -569,6 +569,7 @@ def _mutate_hyperparameters(self): if not completed_one: get_hyperparameter(self.select_config_dict(node)[node.method_class], nodelabel=node, alpha=self.hyperparameter_alpha, hyperparameter_probability=self.hyperparameter_probability) + completed_one = True else: if self.hyper_node_probability < random.random(): get_hyperparameter(self.select_config_dict(node)[node.method_class], nodelabel=node, alpha=self.hyperparameter_alpha, hyperparameter_probability=self.hyperparameter_probability) From 8e53a91c794b09daeb3497de81d6c2238d378f62 Mon Sep 17 00:00:00 2001 From: perib Date: Mon, 16 Oct 2023 14:13:48 -0700 Subject: [PATCH 4/7] fix --- .../graph_pipeline_individual/individual.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tpot2/individual_representations/graph_pipeline_individual/individual.py b/tpot2/individual_representations/graph_pipeline_individual/individual.py index d3b5f63b..6fc10503 100644 --- a/tpot2/individual_representations/graph_pipeline_individual/individual.py +++ b/tpot2/individual_representations/graph_pipeline_individual/individual.py @@ -568,13 +568,12 @@ def _mutate_hyperparameters(self): if not completed_one: - get_hyperparameter(self.select_config_dict(node)[node.method_class], nodelabel=node, alpha=self.hyperparameter_alpha, hyperparameter_probability=self.hyperparameter_probability) - completed_one = True + _,_, completed_one = get_hyperparameter(self.select_config_dict(node)[node.method_class], nodelabel=node, alpha=self.hyperparameter_alpha, hyperparameter_probability=self.hyperparameter_probability) else: if self.hyper_node_probability < random.random(): get_hyperparameter(self.select_config_dict(node)[node.method_class], nodelabel=node, alpha=self.hyperparameter_alpha, hyperparameter_probability=self.hyperparameter_probability) - return True - return False + + return completed_one @@ -1145,7 +1144,7 @@ def create_node(config_dict): if method_class == 'Recursive': node = GraphIndividual(**config_dict[method_class]) else: - hyperparameters, params = get_hyperparameter(config_dict[method_class], nodelabel=None) + hyperparameters, params, _ = get_hyperparameter(config_dict[method_class], nodelabel=None) node = NodeLabel( method_class=method_class, @@ -1172,17 +1171,18 @@ def random_weighted_sort(l,weights): def get_hyperparameter(config_func, nodelabel=None, alpha=1, hyperparameter_probability=1): + changed = False if isinstance(config_func, dict): - return config_func, None + return config_func, None, changed if nodelabel is not None: trial = config.hyperparametersuggestor.Trial(old_params=nodelabel._params, alpha=alpha, hyperparameter_probability=hyperparameter_probability) new_params = config_func(trial) - + changed = trial._params != nodelabel._params nodelabel._params = trial._params nodelabel.hyperparameters = new_params else: trial = config.hyperparametersuggestor.Trial(old_params=None, alpha=alpha, hyperparameter_probability=hyperparameter_probability) new_params = config_func(trial) - return new_params, trial._params, \ No newline at end of file + return new_params, trial._params, changed \ No newline at end of file From 82676cc1b2631bd686e05c9b0452417e1db904a3 Mon Sep 17 00:00:00 2001 From: perib Date: Mon, 16 Oct 2023 14:18:11 -0700 Subject: [PATCH 5/7] fix --- .../graph_pipeline_individual/individual.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tpot2/individual_representations/graph_pipeline_individual/individual.py b/tpot2/individual_representations/graph_pipeline_individual/individual.py index 6fc10503..f994e270 100644 --- a/tpot2/individual_representations/graph_pipeline_individual/individual.py +++ b/tpot2/individual_representations/graph_pipeline_individual/individual.py @@ -567,10 +567,9 @@ def _mutate_hyperparameters(self): continue if not completed_one: - _,_, completed_one = get_hyperparameter(self.select_config_dict(node)[node.method_class], nodelabel=node, alpha=self.hyperparameter_alpha, hyperparameter_probability=self.hyperparameter_probability) else: - if self.hyper_node_probability < random.random(): + if self.hyper_node_probability > random.random(): get_hyperparameter(self.select_config_dict(node)[node.method_class], nodelabel=node, alpha=self.hyperparameter_alpha, hyperparameter_probability=self.hyperparameter_probability) return completed_one From 9aaf019e1ff0acdf30bb32c6243f142ed984f845 Mon Sep 17 00:00:00 2001 From: perib Date: Tue, 17 Oct 2023 12:19:14 -0700 Subject: [PATCH 6/7] fix --- .../graph_pipeline_individual/individual.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpot2/individual_representations/graph_pipeline_individual/individual.py b/tpot2/individual_representations/graph_pipeline_individual/individual.py index f994e270..a611755c 100644 --- a/tpot2/individual_representations/graph_pipeline_individual/individual.py +++ b/tpot2/individual_representations/graph_pipeline_individual/individual.py @@ -594,7 +594,7 @@ def _mutate_replace_node(self): else: #hyperparameters = self.select_config_dict(node)[node.method_class](config.hyperparametersuggestor) #get_hyperparameter(self.select_config_dict(node)[node.method_class], nodelabel=None, alpha=self.hyperparameter_alpha, hyperparameter_probability=self.hyperparameter_probability) - new_node = create_node(self.select_config_dict(node)[node.method_class]) + new_node = create_node(self.select_config_dict(node)) #TODO cleanup node.hyperparameters = new_node.hyperparameters node.method_class = new_node.method_class From 3ea17297c557a295cca08cd517c2e2c5e890f5c8 Mon Sep 17 00:00:00 2001 From: perib Date: Wed, 18 Oct 2023 11:30:08 -0700 Subject: [PATCH 7/7] fix --- tpot2/config/hyperparametersuggestor.py | 54 +++++++++---------------- 1 file changed, 19 insertions(+), 35 deletions(-) diff --git a/tpot2/config/hyperparametersuggestor.py b/tpot2/config/hyperparametersuggestor.py index f0f1318c..73c9c678 100644 --- a/tpot2/config/hyperparametersuggestor.py +++ b/tpot2/config/hyperparametersuggestor.py @@ -2,6 +2,13 @@ from scipy.stats import loguniform, logser #TODO: remove this dependency? import numpy as np #TODO: remove this dependency and use scipy instead? +#function that selects selects items from a list with each having independent probability p of being selected +def select(items, p): + selected = [item for item in items if random.random() < p] + #if selected is empty, select one item at random + if not selected: + return [random.choice(items)] + return selected class Trial(): @@ -13,8 +20,8 @@ def __init__(self, old_params=None, alpha=1, hyperparameter_probability=1): self.alpha = alpha self.hyperparameter_probability = hyperparameter_probability - if old_params is not None: - self.params_to_update = set(random.sample(list(old_params.keys()), max(int(len(old_params.keys())*self.hyperparameter_probability),1))) + if old_params is not None and len(old_params) > 0: + self.params_to_update = select(list(old_params.keys()), self.hyperparameter_probability) else: self.params_to_update = None @@ -22,15 +29,12 @@ def __init__(self, old_params=None, alpha=1, hyperparameter_probability=1): #Replicating the API found in optuna: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html #copy-pasted some code def suggest_categorical(self, name, choices): - if self.params_to_update == None or name in self.params_to_update: #If this parameter is selected to be changed + if self.params_to_update == None or name in self.params_to_update or name not in self.old_params: #If this parameter is selected to be changed choice = self.suggest_categorical_(name, choices) else: #if this parameter is not selected to be changed - if name not in self.old_params: #if this parameter is not in the old params, then we need to choose a value for it + choice = self.old_params[name] + if choice not in choices: #if the old value is not in the choices, then we need to choose a value for it choice = self.suggest_categorical_(name, choices) - else: #if this parameter is in the old params, then we can just use the old value - choice = self.old_params[name] - if choice not in choices: #if the old value is not in the choices, then we need to choose a value for it - choice = self.suggest_categorical_(name, choices) self._params[name] = choice return choice @@ -43,15 +47,11 @@ def suggest_float(self, step = None, log = False, ): - if self.params_to_update == None or name in self.params_to_update: #If this parameter is selected to be changed + if self.params_to_update == None or name in self.params_to_update or name not in self.old_params: #If this parameter is selected to be changed choice = self.suggest_float_(name, low=low, high=high, step=step, log=log) if self.old_params is not None and name in self.old_params: choice = self.alpha*choice + (1-self.alpha)*self.old_params[name] else: #if this parameter is not selected to be changed - - if name not in self.old_params: - choice = self.suggest_float_(name, low=low, high=high, step=step, log=log) - else: choice = self.old_params[name] self._params[name] = choice @@ -60,15 +60,12 @@ def suggest_float(self, def suggest_discrete_uniform(self, name, low, high, q): - if self.params_to_update == None or name in self.params_to_update: + if self.params_to_update == None or name in self.params_to_update or name not in self.old_params: choice = self.suggest_discrete_uniform_(name, low=low, high=high, q=q) if self.old_params is not None and name in self.old_params: choice = self.alpha*choice + (1-self.alpha)*self.old_params[name] else: - if name not in self.old_params: - choice = self.suggest_discrete_uniform_(name, low=low, high=high, q=q) - else: - choice = self.old_params[name] + choice = self.old_params[name] self._params[name] = choice return choice @@ -76,30 +73,24 @@ def suggest_discrete_uniform(self, name, low, high, q): def suggest_int(self, name, low, high, step=1, log=False): - if self.params_to_update == None or name in self.params_to_update: + if self.params_to_update == None or name in self.params_to_update or name not in self.old_params: choice = self.suggest_int_(name, low=low, high=high, step=step, log=log) if self.old_params is not None and name in self.old_params: choice = int(self.alpha*choice + (1-self.alpha)*self.old_params[name]) else: - if name not in self.old_params: - choice = self.suggest_int_(name, low=low, high=high, step=step, log=log) - else: - choice = self.old_params[name] + choice = self.old_params[name] self._params[name] = choice return choice def suggest_uniform(self, name, low, high): - if self.params_to_update == None or name in self.params_to_update: + if self.params_to_update == None or name in self.params_to_update or name not in self.old_params: choice = self.suggest_uniform_(name, low=low, high=high) if self.old_params is not None and name in self.old_params: choice = self.alpha*choice + (1-self.alpha)*self.old_params[name] else: - if name not in self.old_params: - choice = self.suggest_uniform_(name, low=low, high=high) - else: - choice = self.old_params[name] + choice = self.old_params[name] self._params[name] = choice return choice @@ -112,7 +103,6 @@ def suggest_uniform(self, name, low, high): def suggest_categorical_(self, name, choices): choice = random.choice(choices) - self._params[name] = choice return choice def suggest_float_(self, @@ -148,23 +138,19 @@ def suggest_float_(self, if log: value = np.random.uniform(np.log(low),np.log(high)) choice = np.e**value - self._params[name] = choice return choice else: if step is not None: choice = np.random.choice(np.arange(low,high,step)) - self._params[name] = choice return choice else: choice = np.random.uniform(low,high) - self._params[name] = choice return choice def suggest_discrete_uniform_(self, name, low, high, q): choice = self.suggest_float(name, low, high, step=q) - self._params[name] = choice return choice @@ -195,11 +181,9 @@ def suggest_int_(self, name, low, high, step=1, log=False): if log: value = np.random.uniform(np.log(low),np.log(high)) choice = int(np.e**value) - self._params[name] = choice return choice else: choice = np.random.choice(list(range(low,high,step))) - self._params[name] = choice return choice def suggest_uniform_(self, name, low, high):