Skip to content

Commit

Permalink
Run STBO with mixed Multitask model
Browse files Browse the repository at this point in the history
  • Loading branch information
marcosfelt committed Jun 16, 2021
1 parent 03a4064 commit 2cd0216
Show file tree
Hide file tree
Showing 5 changed files with 168 additions and 40 deletions.
53 changes: 53 additions & 0 deletions dvc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -231,3 +231,56 @@ stages:
outs:
- data/multitask_results/results_${key}


suzuki_mtbo_mixed_four_cotraining:
foreach:
baumgartner_suzuki:
benchmark: data/baumgartner_suzuki/emulator
dataset_1: data/reizman_suzuki/ord/reizman_suzuki_case_1.pb
dataset_2: data/reizman_suzuki/ord/reizman_suzuki_case_2.pb
dataset_3: data/reizman_suzuki/ord/reizman_suzuki_case_3.pb
dataset_4: data/reizman_suzuki/ord/reizman_suzuki_case_4.pb
reizman_suzuki_case_1:
benchmark: data/reizman_suzuki/emulator_case_1
dataset_1: data/baumgartner_suzuki/ord/baumgartner_suzuki.pb
dataset_2: data/reizman_suzuki/ord/reizman_suzuki_case_2.pb
dataset_3: data/reizman_suzuki/ord/reizman_suzuki_case_3.pb
dataset_4: data/reizman_suzuki/ord/reizman_suzuki_case_4.pb
reizman_suzuki_case_2:
benchmark: data/reizman_suzuki/emulator_case_2
dataset_1: data/baumgartner_suzuki/ord/baumgartner_suzuki.pb
dataset_2: data/reizman_suzuki/ord/reizman_suzuki_case_1.pb
dataset_3: data/reizman_suzuki/ord/reizman_suzuki_case_3.pb
dataset_4: data/reizman_suzuki/ord/reizman_suzuki_case_4.pb
reizman_suzuki_case_3:
benchmark: data/reizman_suzuki/emulator_case_3
dataset_1: data/baumgartner_suzuki/ord/baumgartner_suzuki.pb
dataset_2: data/reizman_suzuki/ord/reizman_suzuki_case_1.pb
dataset_3: data/reizman_suzuki/ord/reizman_suzuki_case_2.pb
dataset_4: data/reizman_suzuki/ord/reizman_suzuki_case_4.pb
reizman_suzuki_case_4:
benchmark: data/reizman_suzuki/emulator_case_4
dataset_1: data/baumgartner_suzuki/ord/baumgartner_suzuki.pb
dataset_2: data/reizman_suzuki/ord/reizman_suzuki_case_1.pb
dataset_3: data/reizman_suzuki/ord/reizman_suzuki_case_2.pb
dataset_4: data/reizman_suzuki/ord/reizman_suzuki_case_3.pb
do:
desc: "Optimization using Multitask Bayesian Optimization"
cmd: >-
python multitask/suzuki_optimization.py mtbo
${key}
${item.benchmark}
${item.dataset_1} ${item.dataset_2} ${item.dataset_3} ${item.dataset_4}
data/multitask_results/results_multitask_four_cotraining_mixed_domain_${key}
--brute-force-categorical:
--max-experiments ${suzuki_optimization.max-experiments}
--batch-size ${suzuki_optimization.batch-size}
--repeats ${suzuki_optimization.repeats}
--no-print-warnings
deps:
- multitask/suzuki_optimization.py
- multitask/suzuki_emulator.py
- multitask/suzuki_data_utils.py
- ${item.benchmark}
outs:
- data/multitask_results/results_multitask_four_cotraining_mixed_domain_${key}
File renamed without changes.
86 changes: 64 additions & 22 deletions multitask/mt.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .mixed_gp_regression import MixedMultiTaskGP
from summit import *
from summit.benchmarks.experimental_emulator import numpy_to_tensor
from summit.strategies.base import Strategy, Transform
Expand Down Expand Up @@ -80,7 +81,7 @@ def __init__(
self.pretraining_data = pretraining_data
self.task = task
self.categorical_method = categorical_method
if self.categorical_method not in ["one-hot", "descriptors"]:
if self.categorical_method not in ["one-hot", "descriptors", None]:
raise ValueError(
"categorical_method must be one of 'one-hot' or 'descriptors'."
)
Expand Down Expand Up @@ -165,6 +166,16 @@ def suggest_experiments(self, num_experiments, prev_res: DataSet = None, **kwarg
standardize_outputs=True,
)

if self.categorical_method is None:
cat_mappings = {}
cat_dimensions = []
for i, v in enumerate(self.domain.input_variables):
if v.variable_type == "categorical":
cat_mapping = {l: i for i, l in enumerate(v.levels)}
inputs[v.name] = inputs[v.name].replace(cat_mapping)
cat_mappings[v.name] = cat_mapping
cat_dimensions.append(i)

# Add column to inputs indicating task
task_data = data["task"].dropna().to_numpy()
if data.shape[0] != data.shape[0]:
Expand All @@ -175,12 +186,21 @@ def suggest_experiments(self, num_experiments, prev_res: DataSet = None, **kwarg
)

# Train model
self.model = MultiTaskGP(
torch.tensor(inputs_task).float(),
torch.tensor(output.data_to_numpy()).float(),
task_feature=-1,
output_tasks=[self.task],
)
if self.brute_force_categorical:
self.model = MixedMultiTaskGP(
torch.tensor(inputs_task).float(),
torch.tensor(output.data_to_numpy()).float(),
cat_dims=cat_dimensions,
task_feature=-1,
output_tasks=[self.task],
)
else:
self.model = MultiTaskGP(
torch.tensor(inputs_task).float(),
torch.tensor(output.data_to_numpy()).float(),
task_feature=-1,
output_tasks=[self.task],
)
mll = ExactMarginalLogLikelihood(self.model.likelihood, self.model)
fit_gpytorch_model(mll)

Expand All @@ -202,27 +222,18 @@ def suggest_experiments(self, num_experiments, prev_res: DataSet = None, **kwarg
# Optimize acquisition function
if self.brute_force_categorical:
self.ei = EI(self.model, best_f=fbest_scaled, maximize=maximize)
combos = self.domain.get_categorical_combinations()
fixed_features_list = []
k = 0
for v in self.domain.input_variables:
# One-hot encoding
if v.variable_type == "categorical":
encoded_combos = self.transform.encoders[v.name].transform(
combos[v.name]
)
for i in range(encoded_combos.shape[1]):
fixed_features_list.append({k: encoded_combos[i, :]})
k += 1
else:
k += 1
if self.categorical_method is None:
combos = np.arange(0, len(cat_mapping))
fixed_features_list = [{0: float(combo)} for combo in combos]
else:
fixed_features_list = self._get_fixed_features()
results, _ = optimize_acqf_mixed(
acq_function=self.ei,
bounds=self._get_bounds(),
num_restarts=20,
fixed_features_list=fixed_features_list,
q=num_experiments,
raw_samples=100,
raw_samples=20,
)
else:
self.ei = CategoricalEI(
Expand All @@ -243,6 +254,11 @@ def suggest_experiments(self, num_experiments, prev_res: DataSet = None, **kwarg
)

# Untransform
if self.categorical_method is None:
cat_mapping = {
i: l for i, l in enumerate(self.domain["catalyst_smiles"].levels)
}
result["catalyst_smiles"] = result["catalyst_smiles"].replace(cat_mapping)
result = self.transform.un_transform(
result, categorical_method=self.categorical_method, standardize_inputs=True
)
Expand All @@ -252,6 +268,30 @@ def suggest_experiments(self, num_experiments, prev_res: DataSet = None, **kwarg
result[("task", "METADATA")] = self.task
return result

def _get_fixed_features(self):
combos = self.domain.get_categorical_combinations()
encoded_combos = {
v.name: self.transform.encoders[v.name].transform(combos[[v.name]])
for v in self.domain.input_variables
if v.variable_type == "categorical"
}
fixed_features_list = []
for i, combo in enumerate(combos):
fixed_features = {}
k = 0
for v in self.domain.input_variables:
# One-hot encoding
if v.variable_type == "categorical":
for j in range(encoded_combos[v.name].shape[1]):
fixed_features[k] = numpy_to_tensor(
encoded_combos[v.name][i, j]
)
k += 1
else:
k += 1
fixed_features_list.append(fixed_features)
return fixed_features_list

def _get_bounds(self):
bounds = []
for v in self.domain.input_variables:
Expand All @@ -266,6 +306,8 @@ def _get_bounds(self):
and self.categorical_method == "one-hot"
):
bounds += [[0, 1] for _ in v.levels]
elif isinstance(v, CategoricalVariable) and self.categorical_method is None:
bounds.append([0, len(v.levels)])
return torch.tensor(bounds).T.float()

def reset(self):
Expand Down
19 changes: 16 additions & 3 deletions multitask/suzuki_optimization.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from multitask.suzuki_emulator import SuzukiEmulator
from multitask.suzuki_data_utils import get_suzuki_dataset
from multitask.mt import NewSTBO
from multitask.mt import NewSTBO, NewMTBO
from summit import *

import typer
Expand Down Expand Up @@ -83,6 +83,7 @@ def mtbo(
batch_size: Optional[int] = 1,
repeats: Optional[int] = 20,
print_warnings: Optional[bool] = True,
brute_force_categorical: bool = False,
):
"""Optimization of a Suzuki benchmark with Multitask Bayesian Optimziation"""
# Load benchmark
Expand Down Expand Up @@ -112,6 +113,10 @@ def mtbo(
output_path = Path(output_path)
output_path.mkdir(exist_ok=True)
opt_task = len(ds_list)
if brute_force_categorical:
categorical_method = None
else:
categorical_method = "one-hot"
for i in trange(repeats):
with warnings.catch_warnings():
warnings.simplefilter("ignore")
Expand All @@ -121,6 +126,8 @@ def mtbo(
max_iterations=max_iterations,
batch_size=batch_size,
task=opt_task,
brute_force_categorical=brute_force_categorical,
categorical_method=categorical_method,
)
result.save(output_path / f"repeat_{i}.json")

Expand Down Expand Up @@ -156,12 +163,18 @@ def run_mtbo(
max_iterations: int = 10,
batch_size=1,
task: int = 1,
brute_force_categorical: bool = False,
categorical_method: str = "one-hot",
):
"""Run Multitask Bayesian optimization"""
exp.reset()
assert exp.data.shape[0] == 0
strategy = MTBO(
exp.domain, pretraining_data=ct_data, categorical_method="one-hot", task=task
strategy = NewMTBO(
exp.domain,
pretraining_data=ct_data,
task=task,
brute_force_categorical=brute_force_categorical,
categorical_method=categorical_method,
)
r = Runner(
strategy=strategy,
Expand Down
50 changes: 35 additions & 15 deletions nbs/evaluation.ipynb

Large diffs are not rendered by default.

0 comments on commit 2cd0216

Please sign in to comment.