OpenBioML · zcqsntr · Apr 28, 2023 · Apr 28, 2023 · Apr 28, 2023 · Apr 29, 2023
diff --git a/RED/configs/example/Figure_3_RT3D_chemostat.yaml b/RED/configs/example/Figure_3_RT3D_chemostat.yaml
@@ -3,6 +3,9 @@ defaults:
   - /model: RT3D_agent
   - _self_
 
+wandb_project_name: figure3-example
+wandb_team: rl-oed
+
 policy_delay: 2
 initial_explore_rate: 1
 explore_rate_mul: 1

diff --git a/RED/configs/example/Figure_4_RT3D_chemostat.yaml b/RED/configs/example/Figure_4_RT3D_chemostat.yaml
@@ -3,6 +3,9 @@ defaults:
   - /model: RT3D_agent
   - _self_
 
+wandb_project_name: figure4-example
+wandb_team: rl-oed
+
 policy_delay: 2
 initial_explore_rate: 1
 explore_rate_mul: 1

diff --git a/WANDB_LOGIN.md b/WANDB_LOGIN.md
@@ -0,0 +1,33 @@
+## 1. Get your w and b api key
+The weights and bias api key can be found by logging into the rl-oed team here: https://stability.wandb.io/. 
+You will need access to the stability cluster first, message NeythenT on discord to get help 
+with this
+
+## 2. Set the WANDB_API_KEY login variable
+Set the WANDB_API_KEY environment variable to your api key by running 
+```
+$ export WANDB_API_KEY=<YOUR API KEY>
+```
+from the command line (RECOMMENDED) or 
+```python
+os.environ["WANDB_API_KEY"] = "<YOUR API KEY>"
+```
+from Python
+
+## Login to w and b 
+To log in from command line (RECOMMENDED)
+```
+$ wandb login --host=https://stability.wandb.io
+```
+or in a python script
+```python
+wandb.login(host='https://stability.wandb.io', relogin=False)
+```
+
+## Running automated slurm jobs
+I suggest we add the following lines to the job script that gets pushed to the github and people just copy their api 
+keys in. 
+```
+$ export WANDB_API_KEY <YOUR API KEY>
+$ wandb login --host=https://stability.wandb.io
+```
diff --git a/examples/Figure_3_RT3D_chemostat/train_RT3D.py b/examples/Figure_3_RT3D_chemostat/train_RT3D.py
@@ -1,5 +1,7 @@
 
+
 import json
+
 import math
 import os
 import sys
@@ -15,7 +17,7 @@
 from casadi import *
 from hydra.utils import instantiate
 from omegaconf import DictConfig, OmegaConf
-
+import wandb
 from RED.agents.continuous_agents.rt3d import RT3D_agent
 from RED.environments.chemostat.xdot_chemostat import xdot
 from RED.environments.OED_env import OED_env
@@ -26,7 +28,7 @@
 
 
 @hydra.main(version_base=None, config_path="../../RED/configs", config_name="example/Figure_3_RT3D_chemostat")
-def train_RT3D(cfg : DictConfig):
+def train_RT3D(cfg: DictConfig):
     ### config setup
     cfg = cfg.example
     print(
@@ -36,6 +38,9 @@ def train_RT3D(cfg : DictConfig):
         sep="\n\n"
     )
 
+    # start a new wandb run to track this script
+    wandb.init(project=cfg.wandb_project_name, entity=cfg.wandb_team, config=dict(cfg))
+
     ### prepare save path
     os.makedirs(cfg.save_path, exist_ok=True)
     print("Results will be saved in: ", cfg.save_path)
@@ -84,7 +89,7 @@ def train_RT3D(cfg : DictConfig):
             size=(cfg.environment.n_parallel_experiments, n_params)
         )
         env.param_guesses = DM(actual_params)
-        
+
         ### episode buffers for agent
         states = [env.get_initial_RL_state_parallel() for i in range(cfg.environment.n_parallel_experiments)]
         trajectories = [[] for _ in range(cfg.environment.n_parallel_experiments)]
@@ -110,20 +115,21 @@ def train_RT3D(cfg : DictConfig):
             if episode < skip_first_n_episodes:
                 actions = agent.get_actions(inputs, explore_rate=1, test_episode=cfg.test_episode, recurrent=True)
             else:
-                actions = agent.get_actions(inputs, explore_rate=explore_rate, test_episode=cfg.test_episode, recurrent=True)
+                actions = agent.get_actions(inputs, explore_rate=explore_rate, test_episode=cfg.test_episode,
+                                            recurrent=True)
             e_actions.append(actions)
 
             ### step env
             outputs = env.map_parallel_step(actions.T, actual_params, continuous=True)
             next_states = []
             for i, obs in enumerate(outputs):
                 state, action = states[i], actions[i]
-                next_state, reward, done, _, u  = obs
+                next_state, reward, done, _, u = obs
 
                 ### set done flag
                 if control_interval == cfg.environment.N_control_intervals - 1 \
-                    or np.all(np.abs(next_state) >= 1) \
-                    or math.isnan(np.sum(next_state)):
+                        or np.all(np.abs(next_state) >= 1) \
+                        or math.isnan(np.sum(next_state)):
                     done = True
 
                 ### memorize transition
@@ -134,8 +140,11 @@ def train_RT3D(cfg : DictConfig):
                 ### log episode data
                 e_us[i].append(u.tolist())
                 next_states.append(next_state)
+
+
                 e_rewards[i].append(reward)
                 e_returns[i] += reward
+
             states = next_states
 
         ### do not memorize the test trajectory (the last one)
@@ -146,7 +155,7 @@ def train_RT3D(cfg : DictConfig):
         for trajectory in trajectories:
             # check for instability
             if np.all([np.all(np.abs(trajectory[i][0]) <= 1) for i in range(len(trajectory))]) \
-                and not math.isnan(np.sum(trajectory[-1][0])):
+                    and not math.isnan(np.sum(trajectory[-1][0])):
                 agent.memory.append(trajectory)
 
         ### train agent
@@ -173,6 +182,11 @@ def train_RT3D(cfg : DictConfig):
         history["us"].extend(e_us)
         history["explore_rate"].append(explore_rate)
 
+        ### log results to w and b
+        for i in range(len(e_returns)):
+            wandb.log({"returns": e_returns[i], "actions": np.array(e_actions).transpose(1, 0, 2)[i],
+                       "us": e_us[i], "explore_rate": explore_rate})
+
         print(
             f"\nEPISODE: [{episode}/{total_episodes}] ({episode * cfg.environment.n_parallel_experiments} experiments)",
             f"explore rate:\t{explore_rate:.2f}",
@@ -211,6 +225,8 @@ def train_RT3D(cfg : DictConfig):
         conv_window=25,
     )
 
+    wandb.finish()
+
 
 def setup_env(cfg):
     n_cores = multiprocessing.cpu_count()
@@ -219,12 +235,13 @@ def setup_env(cfg):
     n_params = actual_params.size()[0]
     param_guesses = actual_params
     args = cfg.environment.y0, xdot, param_guesses, actual_params, cfg.environment.n_observed_variables, \
-        cfg.environment.n_controlled_inputs, cfg.environment.num_inputs, cfg.environment.input_bounds, \
-        cfg.environment.dt, cfg.environment.control_interval_time, normaliser
+           cfg.environment.n_controlled_inputs, cfg.environment.num_inputs, cfg.environment.input_bounds, \
+           cfg.environment.dt, cfg.environment.control_interval_time, normaliser
     env = OED_env(*args)
     env.mapped_trajectory_solver = env.CI_solver.map(cfg.environment.n_parallel_experiments, "thread", n_cores)
     return env, n_params
 
 
 if __name__ == '__main__':
+
     train_RT3D()
diff --git a/examples/Figure_4_RT3D_chemostat/train_RT3D.py b/examples/Figure_4_RT3D_chemostat/train_RT3D.py
@@ -8,7 +8,7 @@
 sys.path.append(IMPORT_PATH)
 
 import multiprocessing
-
+import wandb
 import hydra
 import numpy as np
 from casadi import *
@@ -35,6 +35,9 @@ def train_RT3D(cfg : DictConfig):
         sep="\n\n"
     )
 
+    # start a new wandb run to track this script
+    wandb.init(project=cfg.wandb_project_name, entity=cfg.wandb_team, config=dict(cfg))
+
     ### prepare save path
     os.makedirs(cfg.save_path, exist_ok=True)
     print("Results will be saved in: ", cfg.save_path)
@@ -81,7 +84,7 @@ def train_RT3D(cfg : DictConfig):
         actual_params = np.random.uniform(
             low=cfg.environment.lb,
             high=cfg.environment.ub,
-            size=(cfg.environment.n_parallel_experiments, 3)
+            size=(cfg.environment.n_parallel_experiments, n_params)
         )
         env.param_guesses = DM(actual_params)
 
@@ -173,6 +176,11 @@ def train_RT3D(cfg : DictConfig):
         history["us"].extend(e_us)
         history["explore_rate"].append(explore_rate)
 
+        ### log results to w and b
+        for i in range(len(e_returns)):
+            wandb.log({"returns": e_returns[i], "actions": np.array(e_actions).transpose(1, 0, 2)[i],
+                       "us": e_us[i], "explore_rate": explore_rate})
+
         print(
             f"\nEPISODE: [{episode}/{total_episodes}] ({episode * cfg.environment.n_parallel_experiments} experiments)",
             f"explore rate:\t{explore_rate:.2f}",
@@ -211,6 +219,8 @@ def train_RT3D(cfg : DictConfig):
         conv_window=25,
     )
 
+    wandb.finish()
+
 
 def setup_env(cfg):
     n_cores = multiprocessing.cpu_count()