-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhyper_parameter_experiments.py
78 lines (73 loc) · 2.16 KB
/
hyper_parameter_experiments.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
from dqn import DQNAgent
import itertools
experiments = [
{
'prioritized_memory': True,
'learning_rate_schedule': False,
'smart_reward': False,
'beta_increment': False,
'dueling': False,
'double': False,
'gradient_clipping': False
},
{
'prioritized_memory': False,
'learning_rate_schedule': True,
'smart_reward': False,
'beta_increment': False,
'dueling': False,
'double': False,
'gradient_clipping': False
},
{
'prioritized_memory': False,
'learning_rate_schedule': False,
'smart_reward': True,
'beta_increment': False,
'dueling': False,
'double': False,
'gradient_clipping': False
},
{
'prioritized_memory': False,
'learning_rate_schedule': False,
'smart_reward': False,
'beta_increment': True,
'dueling': False,
'double': False,
'gradient_clipping': False
},
{
'prioritized_memory': False,
'learning_rate_schedule': False,
'smart_reward': False,
'beta_increment': False,
'dueling': True,
'double': True,
'gradient_clipping': False
},
{
'prioritized_memory': False,
'learning_rate_schedule': False,
'smart_reward': False,
'beta_increment': False,
'dueling': False,
'double': False,
'gradient_clipping': True
},
]
def run_experiments(experiments):
print(f"running {len(experiments)} hyperparameter experiments")
for exp in experiments:
print(f"Running experiment with hyperparameters: {exp}")
agent = DQNAgent(exp['prioritized_memory'])
agent.prioritized_memory = exp['prioritized_memory']
agent.learning_rate_schedule = exp['learning_rate_schedule']
agent.smart_reward = exp['smart_reward']
agent.beta_increment = exp['beta_increment']
agent.dueling = exp['dueling']
agent.double = exp['double']
agent.gradient_clipping = exp['gradient_clipping']
agent.warm_up_memory_buffer()
agent.run_dqn_agent()
run_experiments(experiments)