-
Notifications
You must be signed in to change notification settings - Fork 442
/
Copy pathmain.py
131 lines (104 loc) · 5.08 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
# This program is free software; you can redistribute it and/or modify it under
# the terms of the MIT license.
# This program is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE. See the MIT License for more details.
from typing import List
import os
import numpy as np
import yaml
from ray import tune
from ray.tune import run, Experiment
from agents.common.utils import get_global_name, get_global_agent_name
from agents.common.config import process_glob_config, process_config_per_agent
from sac_main_fn import main as sac_main
os.environ["Timer"] = '1'
def trial_name(trial: Experiment, hp_to_write: List[str]) -> str:
"""
Generate a unique identifier for a trial based on specified hyperparameters and trial information.
Parameters:
----------
trial : ray.tune.Experiment
The Ray Tune Experiment for which to generate the identifier.
hp_to_write : List[str]
List of hyperparameter names to include in the identifier.
Returns:
----------
str
The generated trial identifier.
"""
ti = 'repeat_run'
identifier = ','.join([f'{hp}={trial.config[hp]}' for hp in hp_to_write]) + \
f',trial={trial.config[ti]},id={trial.trial_id}'
return identifier
if __name__ == '__main__':
envs = ['ball_in_cup']
agents = [
'SAC',
'SAG',
'PIG',
'PAG',
]
for env in envs:
# get global name to retrieve configs
glob_name = get_global_name(env)
# retrieve config
with open(os.path.join(os.getcwd(), 'ray_config', f'{glob_name}_cfg.yaml')) as f:
config = yaml.safe_load(f)
np.random.seed(config['seed'])
del config['seed']
# add important elements to the config file
config['orig_cwd'] = os.getcwd()
config['env'] = env
config['glob_name'] = glob_name
config['device'] = 'cpu'
# process config and retrieve elements for loops
expert_names, dict_pos_tol, dict_beta, dict_delta, dict_phi, decay_parameter_list = process_glob_config(config)
# loop over experts (if multiple experts)
for expert in expert_names:
config['expert'] = expert
# loop over agents (if multiple agents)
for agent_name in agents:
# agent name
glob_agent_name = get_global_agent_name(agent_name)
config['agent_name'] = agent_name
# further process hyperparamers to make them dependent on agent
process_config_per_agent(config, agent_name, dict_beta, dict_delta, dict_phi, dict_pos_tol)
# decay or not (only relevant for PAG)
agent_name_to_show = agent_name
if agent_name in ['SAC', 'SAG', 'NaiveSAG']:
decay_parameter_list = [False]
for decay_parameter in decay_parameter_list:
# to avoid unecessary runs
config['decay_parameter'] = decay_parameter
if decay_parameter:
agent_name_to_show = 'Decreased' + agent_name_to_show
else:
# to avoid unecessary runs
config['delta'] = [1]
# ray preparation
hps = [k for k, v in config.items() if type(v) is list]
config_ray = config.copy()
config_ray = {k: tune.grid_search(v) if type(v) is list else v for k, v in config.items()}
config_ray['repeat_run'] = tune.grid_search(list(range(config['repeat_run'])))
metric_columns = ['epoch', 'average_return', 'mean_avg_return', 'epoch_time']
reporter = tune.CLIReporter(parameter_columns=hps, metric_columns=metric_columns)
env_name_folder = env
if agent_name in ['SAC']:
save_path = f'./ray_results_test/{env_name_folder}/{agent_name_to_show}'
else:
save_path = f'./ray_results_test/{env_name_folder}/{agent_name_to_show}/{expert}'
analysis = run(
sac_main,
config=config_ray,
metric=config_ray['metric'],
mode=config_ray['mode'],
resources_per_trial={"cpu": 1, "gpu": 1 if config_ray['device'] == 'cuda' else 0},
max_concurrent_trials=15,
log_to_file=True,
local_dir=save_path,
trial_name_creator=lambda t: trial_name(t, hps),
trial_dirname_creator=lambda t: trial_name(t, hps),
progress_reporter=reporter,
verbose=1) # resume=True,