From 1103b5ec6a1be25a4f3a077d6773d2375e8a1a99 Mon Sep 17 00:00:00 2001
From: Neythen Treloar <neythen.t@gmail.com>
Date: Fri, 28 Apr 2023 07:32:03 +0100
Subject: [PATCH 1/9] added weights and bias experiment tracking to figures 3
 and 4 examples

---
 .../example/Figure_3_RT3D_chemostat.yaml      |  2 +
 .../example/Figure_4_RT3D_chemostat.yaml      |  2 +
 .../Figure_3_RT3D_chemostat/train_RT3D.py     | 37 ++++++++++++-------
 .../Figure_4_RT3D_chemostat/train_RT3D.py     | 14 ++++++-
 4 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/RED/configs/example/Figure_3_RT3D_chemostat.yaml b/RED/configs/example/Figure_3_RT3D_chemostat.yaml
index 3164c03..452e6e7 100644
--- a/RED/configs/example/Figure_3_RT3D_chemostat.yaml
+++ b/RED/configs/example/Figure_3_RT3D_chemostat.yaml
@@ -3,6 +3,8 @@ defaults:
   - /model: RT3D_agent
   - _self_
 
+project_name: figure3-example
+
 policy_delay: 2
 initial_explore_rate: 1
 explore_rate_mul: 1
diff --git a/RED/configs/example/Figure_4_RT3D_chemostat.yaml b/RED/configs/example/Figure_4_RT3D_chemostat.yaml
index 3164c03..a97dcab 100644
--- a/RED/configs/example/Figure_4_RT3D_chemostat.yaml
+++ b/RED/configs/example/Figure_4_RT3D_chemostat.yaml
@@ -3,6 +3,8 @@ defaults:
   - /model: RT3D_agent
   - _self_
 
+project_name: figure4-example
+
 policy_delay: 2
 initial_explore_rate: 1
 explore_rate_mul: 1
diff --git a/examples/Figure_3_RT3D_chemostat/train_RT3D.py b/examples/Figure_3_RT3D_chemostat/train_RT3D.py
index 6eafea7..0112e39 100644
--- a/examples/Figure_3_RT3D_chemostat/train_RT3D.py
+++ b/examples/Figure_3_RT3D_chemostat/train_RT3D.py
@@ -1,4 +1,3 @@
-
 import math
 import os
 import sys
@@ -14,7 +13,7 @@
 from casadi import *
 from hydra.utils import instantiate
 from omegaconf import DictConfig, OmegaConf
-
+import wandb
 from RED.agents.continuous_agents.rt3d import RT3D_agent
 from RED.environments.chemostat.xdot_chemostat import xdot
 from RED.environments.OED_env import OED_env
@@ -25,7 +24,7 @@
 
 
 @hydra.main(version_base=None, config_path="../../RED/configs", config_name="example/Figure_3_RT3D_chemostat")
-def train_RT3D(cfg : DictConfig):
+def train_RT3D(cfg: DictConfig):
     ### config setup
     cfg = cfg.example
     print(
@@ -35,6 +34,9 @@ def train_RT3D(cfg : DictConfig):
         sep="\n\n"
     )
 
+    # start a new wandb run to track this script
+    wandb.init(project=cfg.project_name, config=dict(cfg))
+
     ### prepare save path
     os.makedirs(cfg.save_path, exist_ok=True)
     print("Results will be saved in: ", cfg.save_path)
@@ -53,14 +55,14 @@ def train_RT3D(cfg : DictConfig):
     update_count = 0
 
     ### training loop
-    for episode in range(total_episodes):
+    for episode in range(10):
         actual_params = np.random.uniform(
             low=cfg.environment.actual_params,
             high=cfg.environment.actual_params,
             size=(cfg.environment.n_parallel_experiments, n_params)
         )
         env.param_guesses = DM(actual_params)
-        
+
         ### episode buffers for agent
         states = [env.get_initial_RL_state_parallel() for i in range(cfg.environment.n_parallel_experiments)]
         trajectories = [[] for _ in range(cfg.environment.n_parallel_experiments)]
@@ -86,7 +88,8 @@ def train_RT3D(cfg : DictConfig):
             if episode < skip_first_n_episodes:
                 actions = agent.get_actions(inputs, explore_rate=1, test_episode=cfg.test_episode, recurrent=True)
             else:
-                actions = agent.get_actions(inputs, explore_rate=explore_rate, test_episode=cfg.test_episode, recurrent=True)
+                actions = agent.get_actions(inputs, explore_rate=explore_rate, test_episode=cfg.test_episode,
+                                            recurrent=True)
             e_actions.append(actions)
 
             ### step env
@@ -94,12 +97,12 @@ def train_RT3D(cfg : DictConfig):
             next_states = []
             for i, obs in enumerate(outputs):
                 state, action = states[i], actions[i]
-                next_state, reward, done, _, u  = obs
+                next_state, reward, done, _, u = obs
 
                 ### set done flag
                 if control_interval == cfg.environment.N_control_intervals - 1 \
-                    or np.all(np.abs(next_state) >= 1) \
-                    or math.isnan(np.sum(next_state)):
+                        or np.all(np.abs(next_state) >= 1) \
+                        or math.isnan(np.sum(next_state)):
                     done = True
 
                 ### memorize transition
@@ -110,9 +113,10 @@ def train_RT3D(cfg : DictConfig):
                 ### log episode data
                 e_us[i].append(u)
                 next_states.append(next_state)
-                if reward != -1: # dont include the unstable trajectories as they override the true return
+                if reward != -1:  # dont include the unstable trajectories as they override the true return
                     e_rewards[i].append(reward)
                     e_returns[i] += reward
+
             states = next_states
 
         ### do not memorize the test trajectory (the last one)
@@ -123,7 +127,7 @@ def train_RT3D(cfg : DictConfig):
         for trajectory in trajectories:
             # check for instability
             if np.all([np.all(np.abs(trajectory[i][0]) <= 1) for i in range(len(trajectory))]) \
-                and not math.isnan(np.sum(trajectory[-1][0])):
+                    and not math.isnan(np.sum(trajectory[-1][0])):
                 agent.memory.append(trajectory)
 
         ### train agent
@@ -148,6 +152,11 @@ def train_RT3D(cfg : DictConfig):
         history["us"].extend(e_us)
         history["explore_rate"].append(explore_rate)
 
+        ### log results to w and b
+        for i in range(len(e_returns)):
+            wandb.log({"returns": e_returns[i], "actions": np.array(e_actions).transpose(1, 0, 2)[i],
+                       "us": e_us[i], "explore_rate": explore_rate})
+
         print(
             f"\nEPISODE: [{episode}/{total_episodes}] ({episode * cfg.environment.n_parallel_experiments} experiments)",
             f"explore rate:\t{explore_rate:.2f}",
@@ -183,6 +192,8 @@ def train_RT3D(cfg : DictConfig):
         conv_window=25,
     )
 
+    wandb.finish()
+
 
 def setup_env(cfg):
     n_cores = multiprocessing.cpu_count()
@@ -191,8 +202,8 @@ def setup_env(cfg):
     n_params = actual_params.size()[0]
     param_guesses = actual_params
     args = cfg.environment.y0, xdot, param_guesses, actual_params, cfg.environment.n_observed_variables, \
-        cfg.environment.n_controlled_inputs, cfg.environment.num_inputs, cfg.environment.input_bounds, \
-        cfg.environment.dt, cfg.environment.control_interval_time, normaliser
+           cfg.environment.n_controlled_inputs, cfg.environment.num_inputs, cfg.environment.input_bounds, \
+           cfg.environment.dt, cfg.environment.control_interval_time, normaliser
     env = OED_env(*args)
     env.mapped_trajectory_solver = env.CI_solver.map(cfg.environment.n_parallel_experiments, "thread", n_cores)
     return env, n_params
diff --git a/examples/Figure_4_RT3D_chemostat/train_RT3D.py b/examples/Figure_4_RT3D_chemostat/train_RT3D.py
index 0b23c06..9238de8 100644
--- a/examples/Figure_4_RT3D_chemostat/train_RT3D.py
+++ b/examples/Figure_4_RT3D_chemostat/train_RT3D.py
@@ -7,7 +7,7 @@
 sys.path.append(IMPORT_PATH)
 
 import multiprocessing
-
+import wandb
 import hydra
 import numpy as np
 from casadi import *
@@ -34,6 +34,9 @@ def train_RT3D(cfg : DictConfig):
         sep="\n\n"
     )
 
+    # start a new wandb run to track this script
+    wandb.init(project="figure4-example", config=dict(cfg))
+
     ### prepare save path
     os.makedirs(cfg.save_path, exist_ok=True)
     print("Results will be saved in: ", cfg.save_path)
@@ -57,7 +60,7 @@ def train_RT3D(cfg : DictConfig):
         actual_params = np.random.uniform(
             low=cfg.environment.lb,
             high=cfg.environment.ub,
-            size=(cfg.environment.n_parallel_experiments, 3)
+            size=(cfg.environment.n_parallel_experiments, n_params)
         )
         env.param_guesses = DM(actual_params)
 
@@ -148,6 +151,11 @@ def train_RT3D(cfg : DictConfig):
         history["us"].extend(e_us)
         history["explore_rate"].append(explore_rate)
 
+        ### log results to w and b
+        for i in range(len(e_returns)):
+            wandb.log({"returns": e_returns[i], "actions": np.array(e_actions).transpose(1, 0, 2)[i],
+                       "us": e_us[i], "explore_rate": explore_rate})
+
         print(
             f"\nEPISODE: [{episode}/{total_episodes}] ({episode * cfg.environment.n_parallel_experiments} experiments)",
             f"explore rate:\t{explore_rate:.2f}",
@@ -183,6 +191,8 @@ def train_RT3D(cfg : DictConfig):
         conv_window=25,
     )
 
+    wandb.finish()
+
 
 def setup_env(cfg):
     n_cores = multiprocessing.cpu_count()

From d4dc453cdd756220a19583f8407ccbcba62e49ce Mon Sep 17 00:00:00 2001
From: Neythen Treloar <neythen.t@gmail.com>
Date: Fri, 28 Apr 2023 07:39:42 +0100
Subject: [PATCH 2/9] removed debugging code

---
 examples/Figure_3_RT3D_chemostat/train_RT3D.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/Figure_3_RT3D_chemostat/train_RT3D.py b/examples/Figure_3_RT3D_chemostat/train_RT3D.py
index 0112e39..52a91cc 100644
--- a/examples/Figure_3_RT3D_chemostat/train_RT3D.py
+++ b/examples/Figure_3_RT3D_chemostat/train_RT3D.py
@@ -55,7 +55,7 @@ def train_RT3D(cfg: DictConfig):
     update_count = 0
 
     ### training loop
-    for episode in range(10):
+    for episode in range(0, cfg.environment.N_control_intervals):
         actual_params = np.random.uniform(
             low=cfg.environment.actual_params,
             high=cfg.environment.actual_params,

From fbe8a059ea31127444a30d4d8c1174a8094f7659 Mon Sep 17 00:00:00 2001
From: Neythen Treloar <neythen.t@gmail.com>
Date: Fri, 28 Apr 2023 17:05:10 +0100
Subject: [PATCH 3/9] bugfix

---
 examples/Figure_3_RT3D_chemostat/train_RT3D.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/Figure_3_RT3D_chemostat/train_RT3D.py b/examples/Figure_3_RT3D_chemostat/train_RT3D.py
index 52a91cc..948586f 100644
--- a/examples/Figure_3_RT3D_chemostat/train_RT3D.py
+++ b/examples/Figure_3_RT3D_chemostat/train_RT3D.py
@@ -55,7 +55,7 @@ def train_RT3D(cfg: DictConfig):
     update_count = 0
 
     ### training loop
-    for episode in range(0, cfg.environment.N_control_intervals):
+    for episode in range(total_episodes):
         actual_params = np.random.uniform(
             low=cfg.environment.actual_params,
             high=cfg.environment.actual_params,

From 2338debb1ab0f72d6d9c3fef7851f0398ac5c855 Mon Sep 17 00:00:00 2001
From: Neythen Treloar <neythen.t@gmail.com>
Date: Sat, 29 Apr 2023 12:14:08 +0100
Subject: [PATCH 4/9] added w and b login instructions

---
 WANDB_LOGIN.md                                | 34 +++++++++++++++++++
 .../Figure_3_RT3D_chemostat/train_RT3D.py     |  1 +
 2 files changed, 35 insertions(+)
 create mode 100644 WANDB_LOGIN.md

diff --git a/WANDB_LOGIN.md b/WANDB_LOGIN.md
new file mode 100644
index 0000000..2708a63
--- /dev/null
+++ b/WANDB_LOGIN.md
@@ -0,0 +1,34 @@
+## 1. Get your w and b api key
+The weights and bias api key can be found by logging into the rl-oed team here: https://stability.wandb.io/rl-oed. 
+Make sure you get the api key associated with the rl-oed team and not your personal one, otherwise experiments will be 
+logged in the wrong place. You will need access to the stability cluster first, message NeythenT on discord to get help 
+with this
+
+## 2. Set the WANDB_API_KEY login variable
+Set the WANDB_API_KEY environment variable to your api key by running 
+```
+$ export WANDB_API_KEY <YOUR API KEY>
+```
+from the command line or 
+```python
+os.environ["WANDB_API_KEY"] = "<YOUR API KEY>"
+```
+from Python
+
+## Login to w and b 
+To log in from command line
+```
+$ wandb login --host=https://stability.wandb.io
+```
+or in a python script
+```python
+wandb.login(host='https://stability.wandb.io', relogin=False)
+```
+
+## Running automated slurm jobs
+I suggest we add the following lines to the job script that gets pushed to the github and people just copy their api 
+keys in. 
+```
+$ export WANDB_API_KEY <YOUR API KEY>
+$ wandb login --host=https://stability.wandb.io
+```
diff --git a/examples/Figure_3_RT3D_chemostat/train_RT3D.py b/examples/Figure_3_RT3D_chemostat/train_RT3D.py
index 948586f..c808edc 100644
--- a/examples/Figure_3_RT3D_chemostat/train_RT3D.py
+++ b/examples/Figure_3_RT3D_chemostat/train_RT3D.py
@@ -210,4 +210,5 @@ def setup_env(cfg):
 
 
 if __name__ == '__main__':
+
     train_RT3D()

From 35190c9192b45cb33b0518fd902c8d8bdeb2973a Mon Sep 17 00:00:00 2001
From: Neythen Treloar <neythen.t@gmail.com>
Date: Sat, 29 Apr 2023 13:59:19 +0100
Subject: [PATCH 5/9] README update

---
 WANDB_LOGIN.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/WANDB_LOGIN.md b/WANDB_LOGIN.md
index 2708a63..4c9940c 100644
--- a/WANDB_LOGIN.md
+++ b/WANDB_LOGIN.md
@@ -9,14 +9,14 @@ Set the WANDB_API_KEY environment variable to your api key by running
 ```
 $ export WANDB_API_KEY <YOUR API KEY>
 ```
-from the command line or 
+from the command line (RECOMMENDED) or 
 ```python
 os.environ["WANDB_API_KEY"] = "<YOUR API KEY>"
 ```
 from Python
 
 ## Login to w and b 
-To log in from command line
+To log in from command line (RECOMMENDED)
 ```
 $ wandb login --host=https://stability.wandb.io
 ```

From 89ae56de4dc28fa3f954f509ecf048737996831c Mon Sep 17 00:00:00 2001
From: Neythen Treloar <neythen.t@gmail.com>
Date: Sat, 29 Apr 2023 18:14:54 +0100
Subject: [PATCH 6/9] added weight and bias team to the configuration so
 experiments are logged in the rl-oed team

---
 RED/configs/example/Figure_3_RT3D_chemostat.yaml | 3 ++-
 RED/configs/example/Figure_4_RT3D_chemostat.yaml | 3 ++-
 examples/Figure_3_RT3D_chemostat/train_RT3D.py   | 4 ++--
 examples/Figure_4_RT3D_chemostat/train_RT3D.py   | 2 +-
 4 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/RED/configs/example/Figure_3_RT3D_chemostat.yaml b/RED/configs/example/Figure_3_RT3D_chemostat.yaml
index 452e6e7..b3e4aaf 100644
--- a/RED/configs/example/Figure_3_RT3D_chemostat.yaml
+++ b/RED/configs/example/Figure_3_RT3D_chemostat.yaml
@@ -3,7 +3,8 @@ defaults:
   - /model: RT3D_agent
   - _self_
 
-project_name: figure3-example
+wandb_project_name: figure3-example
+wandb_team: rl-oed
 
 policy_delay: 2
 initial_explore_rate: 1
diff --git a/RED/configs/example/Figure_4_RT3D_chemostat.yaml b/RED/configs/example/Figure_4_RT3D_chemostat.yaml
index a97dcab..7755039 100644
--- a/RED/configs/example/Figure_4_RT3D_chemostat.yaml
+++ b/RED/configs/example/Figure_4_RT3D_chemostat.yaml
@@ -3,7 +3,8 @@ defaults:
   - /model: RT3D_agent
   - _self_
 
-project_name: figure4-example
+wandb_project_name: figure4-example
+wandb_team: rl-oed
 
 policy_delay: 2
 initial_explore_rate: 1
diff --git a/examples/Figure_3_RT3D_chemostat/train_RT3D.py b/examples/Figure_3_RT3D_chemostat/train_RT3D.py
index c808edc..57ae26d 100644
--- a/examples/Figure_3_RT3D_chemostat/train_RT3D.py
+++ b/examples/Figure_3_RT3D_chemostat/train_RT3D.py
@@ -35,7 +35,7 @@ def train_RT3D(cfg: DictConfig):
     )
 
     # start a new wandb run to track this script
-    wandb.init(project=cfg.project_name, config=dict(cfg))
+    wandb.init(project=cfg.wandb_project_name, entity=cfg.wandb_team, config=dict(cfg))
 
     ### prepare save path
     os.makedirs(cfg.save_path, exist_ok=True)
@@ -55,7 +55,7 @@ def train_RT3D(cfg: DictConfig):
     update_count = 0
 
     ### training loop
-    for episode in range(total_episodes):
+    for episode in range(10):
         actual_params = np.random.uniform(
             low=cfg.environment.actual_params,
             high=cfg.environment.actual_params,
diff --git a/examples/Figure_4_RT3D_chemostat/train_RT3D.py b/examples/Figure_4_RT3D_chemostat/train_RT3D.py
index 9238de8..7152858 100644
--- a/examples/Figure_4_RT3D_chemostat/train_RT3D.py
+++ b/examples/Figure_4_RT3D_chemostat/train_RT3D.py
@@ -35,7 +35,7 @@ def train_RT3D(cfg : DictConfig):
     )
 
     # start a new wandb run to track this script
-    wandb.init(project="figure4-example", config=dict(cfg))
+    wandb.init(project=cfg.wandb_project_name, entity=cfg.wandb_team, config=dict(cfg))
 
     ### prepare save path
     os.makedirs(cfg.save_path, exist_ok=True)

From 75977bd8a4a35ed89aa844b6a780314daf94f5d5 Mon Sep 17 00:00:00 2001
From: Neythen Treloar <neythen.t@gmail.com>
Date: Sat, 29 Apr 2023 18:19:56 +0100
Subject: [PATCH 7/9] removed debugging code

---
 examples/Figure_3_RT3D_chemostat/train_RT3D.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/Figure_3_RT3D_chemostat/train_RT3D.py b/examples/Figure_3_RT3D_chemostat/train_RT3D.py
index 57ae26d..0dc6a9d 100644
--- a/examples/Figure_3_RT3D_chemostat/train_RT3D.py
+++ b/examples/Figure_3_RT3D_chemostat/train_RT3D.py
@@ -55,7 +55,7 @@ def train_RT3D(cfg: DictConfig):
     update_count = 0
 
     ### training loop
-    for episode in range(10):
+    for episode in range(total_episodes):
         actual_params = np.random.uniform(
             low=cfg.environment.actual_params,
             high=cfg.environment.actual_params,

From 0be4dde4c2cae1305a1f1c0999900b76fcff9eb1 Mon Sep 17 00:00:00 2001
From: Neythen Treloar <33317183+zcqsntr@users.noreply.github.com>
Date: Sat, 29 Apr 2023 19:35:50 +0100
Subject: [PATCH 8/9] Update WANDB_LOGIN.md

---
 WANDB_LOGIN.md | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/WANDB_LOGIN.md b/WANDB_LOGIN.md
index 4c9940c..c2fb6a8 100644
--- a/WANDB_LOGIN.md
+++ b/WANDB_LOGIN.md
@@ -1,7 +1,6 @@
 ## 1. Get your w and b api key
-The weights and bias api key can be found by logging into the rl-oed team here: https://stability.wandb.io/rl-oed. 
-Make sure you get the api key associated with the rl-oed team and not your personal one, otherwise experiments will be 
-logged in the wrong place. You will need access to the stability cluster first, message NeythenT on discord to get help 
+The weights and bias api key can be found by logging into the rl-oed team here: https://stability.wandb.io/. 
+You will need access to the stability cluster first, message NeythenT on discord to get help 
 with this
 
 ## 2. Set the WANDB_API_KEY login variable

From 27ad564dee73cdbc8722d945354f8a6c02998009 Mon Sep 17 00:00:00 2001
From: Neythen Treloar <33317183+zcqsntr@users.noreply.github.com>
Date: Sat, 29 Apr 2023 19:37:24 +0100
Subject: [PATCH 9/9] Update WANDB_LOGIN.md

---
 WANDB_LOGIN.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/WANDB_LOGIN.md b/WANDB_LOGIN.md
index c2fb6a8..893007e 100644
--- a/WANDB_LOGIN.md
+++ b/WANDB_LOGIN.md
@@ -6,7 +6,7 @@ with this
 ## 2. Set the WANDB_API_KEY login variable
 Set the WANDB_API_KEY environment variable to your api key by running 
 ```
-$ export WANDB_API_KEY <YOUR API KEY>
+$ export WANDB_API_KEY=<YOUR API KEY>
 ```
 from the command line (RECOMMENDED) or 
 ```python