-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbayes_opt.py
55 lines (48 loc) · 1.8 KB
/
bayes_opt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import hydra
import gym
import numpy as np
# from dragonfly import maximise_function
from skopt import gp_minimize
import barl.envs
from functools import partial
from tqdm import trange
from barl.util.misc_util import Dumper
from rlkit.envs.wrappers import NormalizedBoxEnv
def eval_function(action_sequence, env, start_state):
action_dim = env.action_space.low.size
action_sequence = np.array(action_sequence).reshape(env.horizon, action_dim)
obs = env.reset(obs=start_state)
# todo maybe reset aciton sequence
total_rew = 0.
for action in action_sequence:
obs, rew, done, info = env.step(action)
total_rew += rew
return total_rew
def neg_eval_function(action_sequence, env, start_state):
return -1 * eval_function(action_sequence, env, start_state)
@hydra.main(config_path="cfg", config_name="bayes_opt")
def main(config):
import barl.envs
np.random.seed(config.seed + 15)
env = NormalizedBoxEnv(gym.make(config.env.name))
env.seed(config.seed)
start_state = env.reset()
action_dim = env.action_space.low.size
horizon = env.horizon
domain = [[-1, 1]] * action_dim * horizon
print(f"{start_state=}")
dumper = Dumper(config.name)
objfn = partial(neg_eval_function, env=env, start_state=start_state)
for capital in trange(1, config.max_episodes):
res = gp_minimize(objfn,
domain,
n_initial_points=1,
n_calls=capital,
random_state=config.seed)
# max_val, max_pt, history = maximise_function(objfn, domain, capital)
# dumper.add("Eval Returns", [max_val])
dumper.add("Eval Returns", [-1 * res.fun])
dumper.add("Eval ndata", horizon * capital)
dumper.save()
if __name__ == '__main__':
main()