From 1103b5ec6a1be25a4f3a077d6773d2375e8a1a99 Mon Sep 17 00:00:00 2001 From: Neythen Treloar Date: Fri, 28 Apr 2023 07:32:03 +0100 Subject: [PATCH 1/9] added weights and bias experiment tracking to figures 3 and 4 examples --- .../example/Figure_3_RT3D_chemostat.yaml | 2 + .../example/Figure_4_RT3D_chemostat.yaml | 2 + .../Figure_3_RT3D_chemostat/train_RT3D.py | 37 ++++++++++++------- .../Figure_4_RT3D_chemostat/train_RT3D.py | 14 ++++++- 4 files changed, 40 insertions(+), 15 deletions(-) diff --git a/RED/configs/example/Figure_3_RT3D_chemostat.yaml b/RED/configs/example/Figure_3_RT3D_chemostat.yaml index 3164c03..452e6e7 100644 --- a/RED/configs/example/Figure_3_RT3D_chemostat.yaml +++ b/RED/configs/example/Figure_3_RT3D_chemostat.yaml @@ -3,6 +3,8 @@ defaults: - /model: RT3D_agent - _self_ +project_name: figure3-example + policy_delay: 2 initial_explore_rate: 1 explore_rate_mul: 1 diff --git a/RED/configs/example/Figure_4_RT3D_chemostat.yaml b/RED/configs/example/Figure_4_RT3D_chemostat.yaml index 3164c03..a97dcab 100644 --- a/RED/configs/example/Figure_4_RT3D_chemostat.yaml +++ b/RED/configs/example/Figure_4_RT3D_chemostat.yaml @@ -3,6 +3,8 @@ defaults: - /model: RT3D_agent - _self_ +project_name: figure4-example + policy_delay: 2 initial_explore_rate: 1 explore_rate_mul: 1 diff --git a/examples/Figure_3_RT3D_chemostat/train_RT3D.py b/examples/Figure_3_RT3D_chemostat/train_RT3D.py index 6eafea7..0112e39 100644 --- a/examples/Figure_3_RT3D_chemostat/train_RT3D.py +++ b/examples/Figure_3_RT3D_chemostat/train_RT3D.py @@ -1,4 +1,3 @@ - import math import os import sys @@ -14,7 +13,7 @@ from casadi import * from hydra.utils import instantiate from omegaconf import DictConfig, OmegaConf - +import wandb from RED.agents.continuous_agents.rt3d import RT3D_agent from RED.environments.chemostat.xdot_chemostat import xdot from RED.environments.OED_env import OED_env @@ -25,7 +24,7 @@ @hydra.main(version_base=None, config_path="../../RED/configs", config_name="example/Figure_3_RT3D_chemostat") -def train_RT3D(cfg : DictConfig): +def train_RT3D(cfg: DictConfig): ### config setup cfg = cfg.example print( @@ -35,6 +34,9 @@ def train_RT3D(cfg : DictConfig): sep="\n\n" ) + # start a new wandb run to track this script + wandb.init(project=cfg.project_name, config=dict(cfg)) + ### prepare save path os.makedirs(cfg.save_path, exist_ok=True) print("Results will be saved in: ", cfg.save_path) @@ -53,14 +55,14 @@ def train_RT3D(cfg : DictConfig): update_count = 0 ### training loop - for episode in range(total_episodes): + for episode in range(10): actual_params = np.random.uniform( low=cfg.environment.actual_params, high=cfg.environment.actual_params, size=(cfg.environment.n_parallel_experiments, n_params) ) env.param_guesses = DM(actual_params) - + ### episode buffers for agent states = [env.get_initial_RL_state_parallel() for i in range(cfg.environment.n_parallel_experiments)] trajectories = [[] for _ in range(cfg.environment.n_parallel_experiments)] @@ -86,7 +88,8 @@ def train_RT3D(cfg : DictConfig): if episode < skip_first_n_episodes: actions = agent.get_actions(inputs, explore_rate=1, test_episode=cfg.test_episode, recurrent=True) else: - actions = agent.get_actions(inputs, explore_rate=explore_rate, test_episode=cfg.test_episode, recurrent=True) + actions = agent.get_actions(inputs, explore_rate=explore_rate, test_episode=cfg.test_episode, + recurrent=True) e_actions.append(actions) ### step env @@ -94,12 +97,12 @@ def train_RT3D(cfg : DictConfig): next_states = [] for i, obs in enumerate(outputs): state, action = states[i], actions[i] - next_state, reward, done, _, u = obs + next_state, reward, done, _, u = obs ### set done flag if control_interval == cfg.environment.N_control_intervals - 1 \ - or np.all(np.abs(next_state) >= 1) \ - or math.isnan(np.sum(next_state)): + or np.all(np.abs(next_state) >= 1) \ + or math.isnan(np.sum(next_state)): done = True ### memorize transition @@ -110,9 +113,10 @@ def train_RT3D(cfg : DictConfig): ### log episode data e_us[i].append(u) next_states.append(next_state) - if reward != -1: # dont include the unstable trajectories as they override the true return + if reward != -1: # dont include the unstable trajectories as they override the true return e_rewards[i].append(reward) e_returns[i] += reward + states = next_states ### do not memorize the test trajectory (the last one) @@ -123,7 +127,7 @@ def train_RT3D(cfg : DictConfig): for trajectory in trajectories: # check for instability if np.all([np.all(np.abs(trajectory[i][0]) <= 1) for i in range(len(trajectory))]) \ - and not math.isnan(np.sum(trajectory[-1][0])): + and not math.isnan(np.sum(trajectory[-1][0])): agent.memory.append(trajectory) ### train agent @@ -148,6 +152,11 @@ def train_RT3D(cfg : DictConfig): history["us"].extend(e_us) history["explore_rate"].append(explore_rate) + ### log results to w and b + for i in range(len(e_returns)): + wandb.log({"returns": e_returns[i], "actions": np.array(e_actions).transpose(1, 0, 2)[i], + "us": e_us[i], "explore_rate": explore_rate}) + print( f"\nEPISODE: [{episode}/{total_episodes}] ({episode * cfg.environment.n_parallel_experiments} experiments)", f"explore rate:\t{explore_rate:.2f}", @@ -183,6 +192,8 @@ def train_RT3D(cfg : DictConfig): conv_window=25, ) + wandb.finish() + def setup_env(cfg): n_cores = multiprocessing.cpu_count() @@ -191,8 +202,8 @@ def setup_env(cfg): n_params = actual_params.size()[0] param_guesses = actual_params args = cfg.environment.y0, xdot, param_guesses, actual_params, cfg.environment.n_observed_variables, \ - cfg.environment.n_controlled_inputs, cfg.environment.num_inputs, cfg.environment.input_bounds, \ - cfg.environment.dt, cfg.environment.control_interval_time, normaliser + cfg.environment.n_controlled_inputs, cfg.environment.num_inputs, cfg.environment.input_bounds, \ + cfg.environment.dt, cfg.environment.control_interval_time, normaliser env = OED_env(*args) env.mapped_trajectory_solver = env.CI_solver.map(cfg.environment.n_parallel_experiments, "thread", n_cores) return env, n_params diff --git a/examples/Figure_4_RT3D_chemostat/train_RT3D.py b/examples/Figure_4_RT3D_chemostat/train_RT3D.py index 0b23c06..9238de8 100644 --- a/examples/Figure_4_RT3D_chemostat/train_RT3D.py +++ b/examples/Figure_4_RT3D_chemostat/train_RT3D.py @@ -7,7 +7,7 @@ sys.path.append(IMPORT_PATH) import multiprocessing - +import wandb import hydra import numpy as np from casadi import * @@ -34,6 +34,9 @@ def train_RT3D(cfg : DictConfig): sep="\n\n" ) + # start a new wandb run to track this script + wandb.init(project="figure4-example", config=dict(cfg)) + ### prepare save path os.makedirs(cfg.save_path, exist_ok=True) print("Results will be saved in: ", cfg.save_path) @@ -57,7 +60,7 @@ def train_RT3D(cfg : DictConfig): actual_params = np.random.uniform( low=cfg.environment.lb, high=cfg.environment.ub, - size=(cfg.environment.n_parallel_experiments, 3) + size=(cfg.environment.n_parallel_experiments, n_params) ) env.param_guesses = DM(actual_params) @@ -148,6 +151,11 @@ def train_RT3D(cfg : DictConfig): history["us"].extend(e_us) history["explore_rate"].append(explore_rate) + ### log results to w and b + for i in range(len(e_returns)): + wandb.log({"returns": e_returns[i], "actions": np.array(e_actions).transpose(1, 0, 2)[i], + "us": e_us[i], "explore_rate": explore_rate}) + print( f"\nEPISODE: [{episode}/{total_episodes}] ({episode * cfg.environment.n_parallel_experiments} experiments)", f"explore rate:\t{explore_rate:.2f}", @@ -183,6 +191,8 @@ def train_RT3D(cfg : DictConfig): conv_window=25, ) + wandb.finish() + def setup_env(cfg): n_cores = multiprocessing.cpu_count() From d4dc453cdd756220a19583f8407ccbcba62e49ce Mon Sep 17 00:00:00 2001 From: Neythen Treloar Date: Fri, 28 Apr 2023 07:39:42 +0100 Subject: [PATCH 2/9] removed debugging code --- examples/Figure_3_RT3D_chemostat/train_RT3D.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/Figure_3_RT3D_chemostat/train_RT3D.py b/examples/Figure_3_RT3D_chemostat/train_RT3D.py index 0112e39..52a91cc 100644 --- a/examples/Figure_3_RT3D_chemostat/train_RT3D.py +++ b/examples/Figure_3_RT3D_chemostat/train_RT3D.py @@ -55,7 +55,7 @@ def train_RT3D(cfg: DictConfig): update_count = 0 ### training loop - for episode in range(10): + for episode in range(0, cfg.environment.N_control_intervals): actual_params = np.random.uniform( low=cfg.environment.actual_params, high=cfg.environment.actual_params, From fbe8a059ea31127444a30d4d8c1174a8094f7659 Mon Sep 17 00:00:00 2001 From: Neythen Treloar Date: Fri, 28 Apr 2023 17:05:10 +0100 Subject: [PATCH 3/9] bugfix --- examples/Figure_3_RT3D_chemostat/train_RT3D.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/Figure_3_RT3D_chemostat/train_RT3D.py b/examples/Figure_3_RT3D_chemostat/train_RT3D.py index 52a91cc..948586f 100644 --- a/examples/Figure_3_RT3D_chemostat/train_RT3D.py +++ b/examples/Figure_3_RT3D_chemostat/train_RT3D.py @@ -55,7 +55,7 @@ def train_RT3D(cfg: DictConfig): update_count = 0 ### training loop - for episode in range(0, cfg.environment.N_control_intervals): + for episode in range(total_episodes): actual_params = np.random.uniform( low=cfg.environment.actual_params, high=cfg.environment.actual_params, From 2338debb1ab0f72d6d9c3fef7851f0398ac5c855 Mon Sep 17 00:00:00 2001 From: Neythen Treloar Date: Sat, 29 Apr 2023 12:14:08 +0100 Subject: [PATCH 4/9] added w and b login instructions --- WANDB_LOGIN.md | 34 +++++++++++++++++++ .../Figure_3_RT3D_chemostat/train_RT3D.py | 1 + 2 files changed, 35 insertions(+) create mode 100644 WANDB_LOGIN.md diff --git a/WANDB_LOGIN.md b/WANDB_LOGIN.md new file mode 100644 index 0000000..2708a63 --- /dev/null +++ b/WANDB_LOGIN.md @@ -0,0 +1,34 @@ +## 1. Get your w and b api key +The weights and bias api key can be found by logging into the rl-oed team here: https://stability.wandb.io/rl-oed. +Make sure you get the api key associated with the rl-oed team and not your personal one, otherwise experiments will be +logged in the wrong place. You will need access to the stability cluster first, message NeythenT on discord to get help +with this + +## 2. Set the WANDB_API_KEY login variable +Set the WANDB_API_KEY environment variable to your api key by running +``` +$ export WANDB_API_KEY +``` +from the command line or +```python +os.environ["WANDB_API_KEY"] = "" +``` +from Python + +## Login to w and b +To log in from command line +``` +$ wandb login --host=https://stability.wandb.io +``` +or in a python script +```python +wandb.login(host='https://stability.wandb.io', relogin=False) +``` + +## Running automated slurm jobs +I suggest we add the following lines to the job script that gets pushed to the github and people just copy their api +keys in. +``` +$ export WANDB_API_KEY +$ wandb login --host=https://stability.wandb.io +``` diff --git a/examples/Figure_3_RT3D_chemostat/train_RT3D.py b/examples/Figure_3_RT3D_chemostat/train_RT3D.py index 948586f..c808edc 100644 --- a/examples/Figure_3_RT3D_chemostat/train_RT3D.py +++ b/examples/Figure_3_RT3D_chemostat/train_RT3D.py @@ -210,4 +210,5 @@ def setup_env(cfg): if __name__ == '__main__': + train_RT3D() From 35190c9192b45cb33b0518fd902c8d8bdeb2973a Mon Sep 17 00:00:00 2001 From: Neythen Treloar Date: Sat, 29 Apr 2023 13:59:19 +0100 Subject: [PATCH 5/9] README update --- WANDB_LOGIN.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/WANDB_LOGIN.md b/WANDB_LOGIN.md index 2708a63..4c9940c 100644 --- a/WANDB_LOGIN.md +++ b/WANDB_LOGIN.md @@ -9,14 +9,14 @@ Set the WANDB_API_KEY environment variable to your api key by running ``` $ export WANDB_API_KEY ``` -from the command line or +from the command line (RECOMMENDED) or ```python os.environ["WANDB_API_KEY"] = "" ``` from Python ## Login to w and b -To log in from command line +To log in from command line (RECOMMENDED) ``` $ wandb login --host=https://stability.wandb.io ``` From 89ae56de4dc28fa3f954f509ecf048737996831c Mon Sep 17 00:00:00 2001 From: Neythen Treloar Date: Sat, 29 Apr 2023 18:14:54 +0100 Subject: [PATCH 6/9] added weight and bias team to the configuration so experiments are logged in the rl-oed team --- RED/configs/example/Figure_3_RT3D_chemostat.yaml | 3 ++- RED/configs/example/Figure_4_RT3D_chemostat.yaml | 3 ++- examples/Figure_3_RT3D_chemostat/train_RT3D.py | 4 ++-- examples/Figure_4_RT3D_chemostat/train_RT3D.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/RED/configs/example/Figure_3_RT3D_chemostat.yaml b/RED/configs/example/Figure_3_RT3D_chemostat.yaml index 452e6e7..b3e4aaf 100644 --- a/RED/configs/example/Figure_3_RT3D_chemostat.yaml +++ b/RED/configs/example/Figure_3_RT3D_chemostat.yaml @@ -3,7 +3,8 @@ defaults: - /model: RT3D_agent - _self_ -project_name: figure3-example +wandb_project_name: figure3-example +wandb_team: rl-oed policy_delay: 2 initial_explore_rate: 1 diff --git a/RED/configs/example/Figure_4_RT3D_chemostat.yaml b/RED/configs/example/Figure_4_RT3D_chemostat.yaml index a97dcab..7755039 100644 --- a/RED/configs/example/Figure_4_RT3D_chemostat.yaml +++ b/RED/configs/example/Figure_4_RT3D_chemostat.yaml @@ -3,7 +3,8 @@ defaults: - /model: RT3D_agent - _self_ -project_name: figure4-example +wandb_project_name: figure4-example +wandb_team: rl-oed policy_delay: 2 initial_explore_rate: 1 diff --git a/examples/Figure_3_RT3D_chemostat/train_RT3D.py b/examples/Figure_3_RT3D_chemostat/train_RT3D.py index c808edc..57ae26d 100644 --- a/examples/Figure_3_RT3D_chemostat/train_RT3D.py +++ b/examples/Figure_3_RT3D_chemostat/train_RT3D.py @@ -35,7 +35,7 @@ def train_RT3D(cfg: DictConfig): ) # start a new wandb run to track this script - wandb.init(project=cfg.project_name, config=dict(cfg)) + wandb.init(project=cfg.wandb_project_name, entity=cfg.wandb_team, config=dict(cfg)) ### prepare save path os.makedirs(cfg.save_path, exist_ok=True) @@ -55,7 +55,7 @@ def train_RT3D(cfg: DictConfig): update_count = 0 ### training loop - for episode in range(total_episodes): + for episode in range(10): actual_params = np.random.uniform( low=cfg.environment.actual_params, high=cfg.environment.actual_params, diff --git a/examples/Figure_4_RT3D_chemostat/train_RT3D.py b/examples/Figure_4_RT3D_chemostat/train_RT3D.py index 9238de8..7152858 100644 --- a/examples/Figure_4_RT3D_chemostat/train_RT3D.py +++ b/examples/Figure_4_RT3D_chemostat/train_RT3D.py @@ -35,7 +35,7 @@ def train_RT3D(cfg : DictConfig): ) # start a new wandb run to track this script - wandb.init(project="figure4-example", config=dict(cfg)) + wandb.init(project=cfg.wandb_project_name, entity=cfg.wandb_team, config=dict(cfg)) ### prepare save path os.makedirs(cfg.save_path, exist_ok=True) From 75977bd8a4a35ed89aa844b6a780314daf94f5d5 Mon Sep 17 00:00:00 2001 From: Neythen Treloar Date: Sat, 29 Apr 2023 18:19:56 +0100 Subject: [PATCH 7/9] removed debugging code --- examples/Figure_3_RT3D_chemostat/train_RT3D.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/Figure_3_RT3D_chemostat/train_RT3D.py b/examples/Figure_3_RT3D_chemostat/train_RT3D.py index 57ae26d..0dc6a9d 100644 --- a/examples/Figure_3_RT3D_chemostat/train_RT3D.py +++ b/examples/Figure_3_RT3D_chemostat/train_RT3D.py @@ -55,7 +55,7 @@ def train_RT3D(cfg: DictConfig): update_count = 0 ### training loop - for episode in range(10): + for episode in range(total_episodes): actual_params = np.random.uniform( low=cfg.environment.actual_params, high=cfg.environment.actual_params, From 0be4dde4c2cae1305a1f1c0999900b76fcff9eb1 Mon Sep 17 00:00:00 2001 From: Neythen Treloar <33317183+zcqsntr@users.noreply.github.com> Date: Sat, 29 Apr 2023 19:35:50 +0100 Subject: [PATCH 8/9] Update WANDB_LOGIN.md --- WANDB_LOGIN.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/WANDB_LOGIN.md b/WANDB_LOGIN.md index 4c9940c..c2fb6a8 100644 --- a/WANDB_LOGIN.md +++ b/WANDB_LOGIN.md @@ -1,7 +1,6 @@ ## 1. Get your w and b api key -The weights and bias api key can be found by logging into the rl-oed team here: https://stability.wandb.io/rl-oed. -Make sure you get the api key associated with the rl-oed team and not your personal one, otherwise experiments will be -logged in the wrong place. You will need access to the stability cluster first, message NeythenT on discord to get help +The weights and bias api key can be found by logging into the rl-oed team here: https://stability.wandb.io/. +You will need access to the stability cluster first, message NeythenT on discord to get help with this ## 2. Set the WANDB_API_KEY login variable From 27ad564dee73cdbc8722d945354f8a6c02998009 Mon Sep 17 00:00:00 2001 From: Neythen Treloar <33317183+zcqsntr@users.noreply.github.com> Date: Sat, 29 Apr 2023 19:37:24 +0100 Subject: [PATCH 9/9] Update WANDB_LOGIN.md --- WANDB_LOGIN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/WANDB_LOGIN.md b/WANDB_LOGIN.md index c2fb6a8..893007e 100644 --- a/WANDB_LOGIN.md +++ b/WANDB_LOGIN.md @@ -6,7 +6,7 @@ with this ## 2. Set the WANDB_API_KEY login variable Set the WANDB_API_KEY environment variable to your api key by running ``` -$ export WANDB_API_KEY +$ export WANDB_API_KEY= ``` from the command line (RECOMMENDED) or ```python