Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sweep: Emulator should not be retrained on initialization #243

Open
marcosfelt opened this issue Mar 19, 2023 · 1 comment
Open

Sweep: Emulator should not be retrained on initialization #243

marcosfelt opened this issue Mar 19, 2023 · 1 comment
Labels
sweep Assigns Sweep to an issue or pull request.

Comments

@marcosfelt
Copy link
Collaborator

marcosfelt commented Mar 19, 2023

ExperimentalEmulator should not be retrained on reinitialization (see summit/benchmarks/experimental_emulator.py). See:

if not initializing:
self.logger.info("Starting training.")
res = cross_validate(
predictor,
self.X_train,
y_train,
scoring=scoring,
cv=folds,
return_estimator=True,
)

@marcosfelt marcosfelt changed the title Emulator should not be retrained on initialization Sweep: Emulator should not be retrained on initialization Jun 16, 2023
@marcosfelt marcosfelt added the sweep Assigns Sweep to an issue or pull request. label Jun 21, 2023
@sweep-ai
Copy link
Contributor

sweep-ai bot commented Jun 21, 2023

Hey @marcosfelt,

I've started working on this issue. The plan is to modify the train method in the ExperimentalEmulator class to check if the emulator has already been trained. If it has, we'll skip the retraining process.

Give me a minute!

Some code snippets I looked at (click to expand). If some file is missing from here, you can mention the path in the ticket description.

summit/summit/run.py

Lines 98 to 124 in 8b987de

"""
def __init__(
self,
strategy: Strategy,
experiment: Experiment,
num_initial_experiments=None,
max_iterations=100,
batch_size=1,
f_tol=1e-5,
max_same=None,
max_restarts=0,
**kwargs,
):
self.strategy = strategy
self.experiment = experiment
self.n_init = num_initial_experiments
self.max_iterations = max_iterations
self.f_tol = f_tol
self.batch_size = batch_size
self.max_same = max_same
self.max_restarts = max_restarts
# Set up logging
self.logger = logging.getLogger(__name__)

@pytest.mark.parametrize("strategy", [SOBO, SNOBFIT, NelderMead, Random, LHS, TSEMO])
@pytest.mark.parametrize(
"experiment",
[
get_pretrained_reizman_suzuki_emulator(),
get_pretrained_baumgartner_cc_emulator(include_cost=True),
DTLZ2,
VLMOP2,
SnarBenchmark,
],
)
def test_runner_mo_integration(strategy, experiment):
"""Test Runner with multiobjective optimization strategies and benchmarks"""
if not isinstance(experiment, ExperimentalEmulator):
exp = experiment()
else:
exp = experiment
if experiment.__class__.__name__ == "ReizmanSuzukiEmulator" and strategy not in [
SOBO,
TSEMO,
]:
# only run on strategies that work with categorical variables direclty
return
elif strategy == TSEMO:
s = strategy(exp.domain)
iterations = 3
else:
hierarchy = {
v.name: {"hierarchy": i, "tolerance": 1}
for i, v in enumerate(exp.domain.output_variables)
}
transform = Chimera(exp.domain, hierarchy)
s = strategy(exp.domain, transform=transform)
iterations = 3
r = Runner(strategy=s, experiment=exp, num_initial_experiments=8, max_iterations=iterations, batch_size=1)
r.run()
# Try saving and loading
# r.save("test_save.json")
# r.load("test_save.json")

# Run experiments
@pytest.mark.parametrize("strategy", [Random])
def test_baselines(strategy, num_repeats=1):
"""Test Multiobjective CN Benchmark with baseline strategies (random, full factorial)"""
for i in range(num_repeats):
experiment.reset()
s = strategy(experiment.domain, transform_descriptors=True)
name = f"cn_experiment_MO_baselines_{s.__class__.__name__}_repeat_{i}"
r = SlurmRunner(
experiment=experiment,
strategy=s,
docker_container="marcosfelt/summit:cn_benchmark",
neptune_project=NEPTUNE_PROJECT,
neptune_experiment_name=name,
neptune_tags=["cn_experiment_MO", s.__class__.__name__],
neptune_files=["slurm_summit_cn_experiment.sh"],
max_iterations=MAX_EXPERIMENTS // BATCH_SIZE,
batch_size=BATCH_SIZE,
hypervolume_ref=HYPERVOLUME_REF,
)
r.run(save_at_end=True)

warnings.filterwarnings("ignore", category=RuntimeWarning)
for i in range(NUM_REPEATS):
experiment.reset()
s = strategy(experiment.domain, transform=transform, sampling_strategies=1)
# Special considerations for Neldermead
if strategy == NelderMead:
f_tol = 1e-5
s.random_start = True
max_same = 2
max_restarts = 10
s.adaptive = True
else:
f_tol = None
max_same = None
max_restarts = 0
container = "marcosfelt/summit:cn_benchmark"
if strategy == DRO:
container = "marcosfelt/summit:dro"
strategy._model_size = "bigger"
exp_name = f"snar_experiment_{s.__class__.__name__}_{transform.__class__.__name__}_repeat_{i}"
r = NeptuneRunner(
experiment=experiment,
strategy=s,
docker_container=container,
neptune_project=NEPTUNE_PROJECT,
neptune_experiment_name=exp_name,
neptune_files=["slurm_summit_snar_experiment.sh"],
neptune_tags=[
"snar_experiment",
s.__class__.__name__,
transform.__class__.__name__,
"bigger_model",
],
max_iterations=MAX_EXPERIMENTS // BATCH_SIZE,
batch_size=BATCH_SIZE,
f_tol=f_tol,
max_same=max_same,
max_restarts=max_restarts,
hypervolume_ref=[-2957, 10.7],
)

def test_runner_unit(max_iterations, batch_size, max_same, max_restarts, runner):
class MockStrategy(Strategy):
iterations = 0
def suggest_experiments(self, num_experiments=1, **kwargs):
values = 0.5 * np.ones([num_experiments, 2])
self.iterations += 1
return DataSet(values, columns=["x_1", "x_2"])
def reset(self):
pass
class MockExperiment(Experiment):
def __init__(self):
super().__init__(self.create_domain())
def create_domain(self):
domain = Domain()
domain += ContinuousVariable("x_1", description="", bounds=[0, 1])
domain += ContinuousVariable("x_2", description="", bounds=[0, 1])
domain += ContinuousVariable(
"y_1", description="", bounds=[0, 1], is_objective=True, maximize=True
)
return domain
def _run(self, conditions, **kwargs):
conditions[("y_1", "DATA")] = 0.5
return conditions, {}
class MockNeptuneExperiment:
def send_metric(self, metric, value):
pass
def send_artifact(self, filename):
pass
def stop(self):
pass
exp = MockExperiment()
strategy = MockStrategy(exp.domain)
r = runner(
strategy=strategy,
experiment=exp,
max_iterations=max_iterations,
batch_size=batch_size,
max_same=max_same,
max_restarts=max_restarts,
neptune_project="sustainable-processes/summit",
neptune_experiment_name="test_experiment",
neptune_exp=MockNeptuneExperiment(),
)
r.run()
# Check that correct number of iterations run
if max_same is not None:
iterations = (max_restarts + 1) * max_same
iterations = iterations if iterations < max_iterations else max_iterations
else:
iterations = max_iterations
assert r.strategy.iterations == iterations
assert r.experiment.data.shape[0] == int(batch_size * iterations)
# Check that reset works
r.reset()
assert r.experiment.data.shape[0] == 0
# Check that using previous data works
r.strategy.iterations = 0
suggestions = r.strategy.suggest_experiments(num_experiments=10)
results = exp.run_experiments(suggestions)
r.run(prev_res=results)
assert r.strategy.iterations == iterations + 1


I'm a bot that handles simple bugs and feature requests but I might make mistakes. Please be kind!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
sweep Assigns Sweep to an issue or pull request.
Projects
None yet
Development

Successfully merging a pull request may close this issue.

1 participant