From 1edb99126b7a0b889650837d66462237302dc35d Mon Sep 17 00:00:00 2001
From: Ali-Hossam <ali.hossamx22@gmail.com>
Date: Mon, 1 Apr 2024 14:37:01 +0200
Subject: [PATCH 01/12] updated README

---
 README.md | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/README.md b/README.md
index 50c59e4c..815e21c6 100644
--- a/README.md
+++ b/README.md
@@ -10,10 +10,12 @@ https://lab.mlpack.org/.
 
 ### 0. Contents
 
-  1. [Overview](#1-overview)
-  2. [Building the examples and usage](#2-Building-the-examples-and-usage)
-  3. [List of examples](#3-List-of-examples)
-  4. [Datasets](#4-datasets)
+- [0. Contents](#0-contents)
+- [1. Overview](#1-overview)
+- [2. Building the examples and usage](#2-building-the-examples-and-usage)
+- [3. List of examples](#3-list-of-examples)
+- [4. Datasets](#4-datasets)
+- [5. Setup](#5-setup)
 
 ###  1. Overview
 
@@ -93,3 +95,9 @@ extract all the necessary dataset in order for examples to work perfectly:
 cd tools/
 ./download_data_set.py
 ```
+
+### 5. Setup
+To setup a jupyter local environment that work with C++ using xeus-cling you shall execute the following command:
+```sh
+./script/jupyter-conda-setup.sh <environment_name>
+```
\ No newline at end of file

From 5c621a9544f0cb52146f4fb48d6d553c40cfd62d Mon Sep 17 00:00:00 2001
From: Ali-Hossam <ali.hossamx22@gmail.com>
Date: Sat, 6 Apr 2024 14:39:41 +0200
Subject: [PATCH 02/12] Adapted to work with new DiscreteActionEnv class

---
 .../acrobot_dqn/acrobot_dqn.cpp               | 33 +++++++++++--------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/reinforcement_learning_gym/acrobot_dqn/acrobot_dqn.cpp b/reinforcement_learning_gym/acrobot_dqn/acrobot_dqn.cpp
index eb434939..971098bf 100644
--- a/reinforcement_learning_gym/acrobot_dqn/acrobot_dqn.cpp
+++ b/reinforcement_learning_gym/acrobot_dqn/acrobot_dqn.cpp
@@ -17,6 +17,10 @@
 using namespace mlpack;
 using namespace ens;
 
+// Set up the state and action space.
+constexpr size_t stateDimension = 6;
+constexpr size_t actionSize = 3;
+
 // Function to train the agent on the Acrobot-v1 gym environment.
 template<typename EnvironmentType,
          typename NetworkType,
@@ -49,7 +53,7 @@ void Train(gym::Environment& env,
       arma::mat action = {double(agent.Action().action)};
 
       env.step(action);
-      DiscreteActionEnv::State nextState;
+      DiscreteActionEnv<stateDimension, actionSize>::State nextState;
       nextState.Data() = env.observation;
 
       replayMethod.Store(
@@ -85,22 +89,22 @@ void Train(gym::Environment& env,
 int main()
 {
   // Initializing the agent.
-  // Set up the state and action space.
-  DiscreteActionEnv::State::dimension = 6;
-  DiscreteActionEnv::Action::size = 3;
 
   // Set up the network.
   FFN<MeanSquaredError, GaussianInitialization> module(
       MeanSquaredError(), GaussianInitialization(0, 1));
-  module.Add<Linear>(DiscreteActionEnv::State::dimension, 64);
-  module.Add<ReLULayer>();
-  module.Add<Linear>(64, DiscreteActionEnv::Action::size);
+  module.Add<Linear>(64);
+  module.Add<ReLU>();
+  module.Add<Linear>(actionSize);
   SimpleDQN<> model(module);
 
   // Set up the policy method.
-  GreedyPolicy<DiscreteActionEnv> policy(1.0, 1000, 0.1, 0.99);
+  GreedyPolicy<DiscreteActionEnv<stateDimension, actionSize>> 
+      policy(1.0, 1000, 0.1, 0.99);
+
   // To enable 3-step learning, we set the last parameter of the replay method as 3.
-  PrioritizedReplay<DiscreteActionEnv> replayMethod(64, 5000, 0.6, 3);
+  PrioritizedReplay<DiscreteActionEnv<stateDimension, actionSize>>
+      replayMethod(64, 5000, 0.6, 3);
 
   // Set up training configurations.
   TrainingConfig config;
@@ -111,7 +115,7 @@ int main()
   config.DoubleQLearning() = true;
 
   // Set up DQN agent.
-  QLearning<DiscreteActionEnv,
+  QLearning<DiscreteActionEnv<stateDimension, actionSize>,
             decltype(model),
             AdamUpdate,
             decltype(policy),
@@ -120,7 +124,7 @@ int main()
 
   // Preparation for training the agent
   // Set up the gym training environment.
-  gym::Environment env("gym.kurg.org", "4040", "Acrobot-v1");
+  gym::Environment env("localhost", "4040", "Acrobot-v1");
 
   // Initializing training variables.
   std::vector<double> returnList;
@@ -144,7 +148,7 @@ int main()
   agent.Deterministic() = true;
 
   // Creating and setting up the gym environment for testing.
-  gym::Environment envTest("gym.kurg.org", "4040", "Acrobot-v1");
+  gym::Environment envTest("localhost", "4040", "Acrobot-v1");
   envTest.monitor.start("./dummy/", true, true);
 
   // Resets the environment.
@@ -193,8 +197,9 @@ int main()
   agent.Deterministic() = true;
 
   // Creating and setting up the gym environment for testing.
-  envTest.monitor.start("./dummy/", true, true);
-
+  // envTest.monitor.start("./dummy/", true, true);
+  envTest.compression(9);
+  
   // Resets the environment.
   envTest.reset();
   envTest.render();

From 81448915894c42437bc889f2d5130f7eece2b701 Mon Sep 17 00:00:00 2001
From: Ali-Hossam <ali.hossamx22@gmail.com>
Date: Sun, 7 Apr 2024 14:05:22 +0200
Subject: [PATCH 03/12] Adapt bidpedal_walker_sac to work with new
 ContinuousActionEnv class

---
 .../bipedal_walker_sac/bipedal_walker_sac.cpp | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/reinforcement_learning_gym/bipedal_walker_sac/bipedal_walker_sac.cpp b/reinforcement_learning_gym/bipedal_walker_sac/bipedal_walker_sac.cpp
index 3ca5570f..56f15899 100644
--- a/reinforcement_learning_gym/bipedal_walker_sac/bipedal_walker_sac.cpp
+++ b/reinforcement_learning_gym/bipedal_walker_sac/bipedal_walker_sac.cpp
@@ -19,6 +19,10 @@ using namespace mlpack;
 using namespace ens;
 using namespace gym;
 
+// Set up the state and action space.
+constexpr size_t stateDimension = 24;
+constexpr size_t actionSize = 4;
+
 template<typename EnvironmentType,
          typename NetworkType,
          typename UpdaterType,
@@ -48,7 +52,7 @@ void Train(gym::Environment& env,
       arma::mat action = {agent.Action().action};
 
       env.step(action);
-      ContinuousActionEnv::State nextState;
+      ContinuousActionEnv<stateDimension, actionSize>::State nextState;
       nextState.Data() = env.observation;
 
       replayMethod.Store(
@@ -94,10 +98,6 @@ void Train(gym::Environment& env,
 int main()
 {
   // Initializing the agent.
-  // Set up the state and action space.
-  ContinuousActionEnv::State::dimension = 24;
-  ContinuousActionEnv::Action::size = 4;
-
   bool usePreTrainedModel = true;
 
   // Set up the actor and critic networks.
@@ -107,9 +107,8 @@ int main()
   policyNetwork.Add<ReLU>();
   policyNetwork.Add<Linear>(128);
   policyNetwork.Add<ReLU>();
-  policyNetwork.Add<Linear>(ContinuousActionEnv::Action::size);
+  policyNetwork.Add<Linear>(actionSize);
   policyNetwork.Add<TanH>();
-  policyNetwork.ResetParameters();
 
   FFN<EmptyLoss, GaussianInitialization> qNetwork(
       EmptyLoss(), GaussianInitialization(0, 0.01));
@@ -118,10 +117,11 @@ int main()
   qNetwork.Add<Linear>(128);
   qNetwork.Add<ReLU>();
   qNetwork.Add<Linear>(1);
-  qNetwork.ResetParameters();
+
 
   // Set up the replay method.
-  RandomReplay<ContinuousActionEnv> replayMethod(32, 10000);
+  RandomReplay<ContinuousActionEnv<stateDimension, actionSize>> 
+      replayMethod(32, 10000);
 
   // Set up training configurations.
   TrainingConfig config;
@@ -148,14 +148,14 @@ int main()
    * To default is to use the usePreTrainedModel. Otherwise you can disable this
    * by change the usePreTrainedModel to false and then recompile this example.
    */
-  SAC<ContinuousActionEnv,
+  SAC<ContinuousActionEnv<stateDimension, actionSize>,
       decltype(qNetwork),
       decltype(policyNetwork),
       AdamUpdate>
       agent(config, qNetwork, policyNetwork, replayMethod);
 
   const std::string environment = "BipedalWalker-v3";
-  const std::string host = "gym.kurg.org";
+  const std::string host = "127.0.0.1";
   const std::string port = "4040";
 
   Environment env(host, port, environment);
@@ -187,7 +187,7 @@ int main()
   agent.Deterministic() = true;
 
   // Creating and setting up the gym environment for testing.
-  gym::Environment envTest("gym.kurg.org", "4040", "BipedalWalker-v3");
+  gym::Environment envTest(host, port, environment);
   envTest.monitor.start("./dummy/", true, true);
 
   // Resets the environment.

From bb9e8d3595cafdecce9baa6f80b8fc9db4f65a4e Mon Sep 17 00:00:00 2001
From: Ali-Hossam <ali.hossamx22@gmail.com>
Date: Sun, 7 Apr 2024 23:29:01 +0200
Subject: [PATCH 04/12] Adapt lunar_lander_dqn to work with new
 DiscreteActionEnv class

---
 .../lunar_lander_dqn/lunar_lander_dqn.cpp     | 29 +++++++++++--------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/reinforcement_learning_gym/lunar_lander_dqn/lunar_lander_dqn.cpp b/reinforcement_learning_gym/lunar_lander_dqn/lunar_lander_dqn.cpp
index d02e2e9c..4a2babed 100644
--- a/reinforcement_learning_gym/lunar_lander_dqn/lunar_lander_dqn.cpp
+++ b/reinforcement_learning_gym/lunar_lander_dqn/lunar_lander_dqn.cpp
@@ -18,6 +18,10 @@
 using namespace mlpack;
 using namespace ens;
 
+// Set up the state and action space.
+constexpr size_t stateDimension = 8;
+constexpr size_t actionSize = 4;
+
 template<typename EnvironmentType,
          typename NetworkType,
          typename UpdaterType,
@@ -46,7 +50,7 @@ void Train(
       arma::mat action = {double(agent.Action().action)};
 
       env.step(action);
-      DiscreteActionEnv::State nextState;
+      DiscreteActionEnv<stateDimension, actionSize>::State nextState;
       nextState.Data() = env.observation;
 
       replayMethod.Store(
@@ -82,22 +86,20 @@ void Train(
 int main()
 {
   // Initializing the agent.
-  // Set up the state and action space.
-  DiscreteActionEnv::State::dimension = 8;
-  DiscreteActionEnv::Action::size = 4;
-
   // Set up the network.
   FFN<MeanSquaredError, GaussianInitialization> network(
       MeanSquaredError(), GaussianInitialization(0, 1));
   network.Add<Linear>(128);
-  network.Add<ReLULayer>();
-  network.Add<Linear>(DiscreteActionEnv::Action::size);
+  network.Add<ReLU>();
+  network.Add<Linear>(actionSize);
 
   SimpleDQN<> model(network);
 
   // Set up the policy and replay method.
-  GreedyPolicy<DiscreteActionEnv> policy(1.0, 2000, 0.1, 0.99);
-  RandomReplay<DiscreteActionEnv> replayMethod(64, 100000);
+  GreedyPolicy<DiscreteActionEnv<stateDimension, actionSize>> 
+      policy(1.0, 2000, 0.1, 0.99);
+  RandomReplay<DiscreteActionEnv<stateDimension, actionSize>> 
+      replayMethod(64, 100000);
 
   // Set up training configurations.
   TrainingConfig config;
@@ -105,12 +107,15 @@ int main()
   config.DoubleQLearning() = false;
 
   // Set up DQN agent.
-  QLearning<DiscreteActionEnv, decltype(model), AdamUpdate, decltype(policy)>
+  QLearning<DiscreteActionEnv<stateDimension, actionSize>,
+            decltype(model),
+            AdamUpdate,
+            decltype(policy)>
       agent(config, model, policy, replayMethod);
 
   // Preparation for training the agent.
   // Set up the gym training environment.
-  gym::Environment env("gym.kurg.org", "4040", "LunarLander-v2");
+  gym::Environment env("localhost", "4040", "LunarLander-v2");
 
   // Initializing training variables.
   std::vector<double> returnList;
@@ -135,7 +140,7 @@ int main()
   agent.Deterministic() = true;
 
   // Creating and setting up the gym environment for testing.
-  gym::Environment envTest("gym.kurg.org", "4040", "LunarLander-v2");
+  gym::Environment envTest("localhost", "4040", "LunarLander-v2");
   envTest.monitor.start("./dummy/", true, true);
 
   // Resets the environment.

From 3bf7dd7b9cdead0316c7971f3baa8b7d6261e606 Mon Sep 17 00:00:00 2001
From: Ali-Hossam <ali.hossamx22@gmail.com>
Date: Sun, 7 Apr 2024 23:39:23 +0200
Subject: [PATCH 05/12] Adapt cartpole_dqn.cpp to work with new
 DicreteActionEnv class

---
 .../mountain_car_dqn/mountain_car_dqn.cpp     | 26 ++++++++++---------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/reinforcement_learning_gym/mountain_car_dqn/mountain_car_dqn.cpp b/reinforcement_learning_gym/mountain_car_dqn/mountain_car_dqn.cpp
index 738923c7..c7f19819 100644
--- a/reinforcement_learning_gym/mountain_car_dqn/mountain_car_dqn.cpp
+++ b/reinforcement_learning_gym/mountain_car_dqn/mountain_car_dqn.cpp
@@ -16,6 +16,10 @@
 using namespace mlpack;
 using namespace ens;
 
+// Set up the state and action space.
+constexpr size_t stateDimension = 2;
+constexpr size_t actionSize = 3;
+
 template<typename EnvironmentType,
          typename NetworkType,
          typename UpdaterType,
@@ -45,7 +49,7 @@ void Train(
       arma::mat action = {double(agent.Action().action)};
 
       env.step(action);
-      DiscreteActionEnv::State nextState;
+      DiscreteActionEnv<stateDimension, actionSize>::State nextState;
       nextState.Data() = env.observation;
 
       // Use an adjusted reward for task completion.
@@ -92,22 +96,20 @@ void Train(
 int main()
 {
   // Initializing the agent.
-  // Set up the state and action space.
-  DiscreteActionEnv::State::dimension = 2;
-  DiscreteActionEnv::Action::size = 3;
-
   // Set up the network.
   FFN<MeanSquaredError, GaussianInitialization> network(
       MeanSquaredError(), GaussianInitialization(0, 1));
   network.Add<Linear>(128);
-  network.Add<ReLULayer>();
-  network.Add<Linear>(DiscreteActionEnv::Action::size);
+  network.Add<ReLU>();
+  network.Add<Linear>(actionSize);
   // Set up the network.
   SimpleDQN<> model(network);
 
   // Set up the policy method.
-  GreedyPolicy<DiscreteActionEnv> policy(1.0, 1000, 0.1, 0.99);
-  RandomReplay<DiscreteActionEnv> replayMethod(32, 10000);
+  GreedyPolicy<DiscreteActionEnv<stateDimension, actionSize>> 
+      policy(1.0, 1000, 0.1, 0.99);
+  RandomReplay<DiscreteActionEnv<stateDimension, actionSize>> 
+      replayMethod(32, 10000);
 
   // Set up training configurations.
   TrainingConfig config;
@@ -115,7 +117,7 @@ int main()
   config.ExplorationSteps() = 400;
 
   // Set up DQN agent.
-  QLearning<DiscreteActionEnv,
+  QLearning<DiscreteActionEnv<stateDimension, actionSize>,
             decltype(model),
             AdamUpdate,
             decltype(policy),
@@ -125,7 +127,7 @@ int main()
   // Preparation for training the agent.
 
   // Set up the gym training environment.
-  gym::Environment env("gym.kurg.org", "4040", "MountainCar-v0");
+  gym::Environment env("localhost", "4040", "MountainCar-v0");
 
   // Initializing training variables.
   std::vector<double> returnList;
@@ -164,7 +166,7 @@ int main()
   agent.Deterministic() = true;
 
   // Creating and setting up the gym environment for testing.
-  gym::Environment envTest("gym.kurg.org", "4040", "MountainCar-v0");
+  gym::Environment envTest("localhost", "4040", "MountainCar-v0");
   envTest.monitor.start("./dummy/", true, true);
 
   // Resets the environment.

From ad4f08c85c9c47511899d214c126d76d863c72ff Mon Sep 17 00:00:00 2001
From: Ali-Hossam <ali.hossamx22@gmail.com>
Date: Sun, 7 Apr 2024 23:49:56 +0200
Subject: [PATCH 06/12] Adapt pendulum_dqn.cpp to work with new
 DiscreteActionEnv class

---
 .../pendulum_dqn/pendulum_dqn.cpp             | 28 +++++++++++--------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/reinforcement_learning_gym/pendulum_dqn/pendulum_dqn.cpp b/reinforcement_learning_gym/pendulum_dqn/pendulum_dqn.cpp
index 40868a75..8f4fa77f 100644
--- a/reinforcement_learning_gym/pendulum_dqn/pendulum_dqn.cpp
+++ b/reinforcement_learning_gym/pendulum_dqn/pendulum_dqn.cpp
@@ -17,6 +17,10 @@
 using namespace mlpack;
 using namespace ens;
 
+// Set up the state and action space.
+constexpr size_t stateDimension = 3;
+constexpr size_t actionSize = 3;
+
 template<typename EnvironmentType,
          typename NetworkType,
          typename UpdaterType,
@@ -45,7 +49,7 @@ void Train(
       arma::mat action = {double(agent.Action().action) - 1.0};
 
       env.step(action);
-      DiscreteActionEnv::State nextState;
+      DiscreteActionEnv<stateDimension, actionSize>::State nextState;
       nextState.Data() = env.observation;
 
       replayMethod.Store(
@@ -82,35 +86,35 @@ void Train(
 int main()
 {
   // Initializing the agent.
-
-  // Set up the state and action space.
-  DiscreteActionEnv::State::dimension = 3;
-  DiscreteActionEnv::Action::size = 3;
-
   // Set up the network.
   FFN<MeanSquaredError, GaussianInitialization> network(
       MeanSquaredError(), GaussianInitialization(0, 1));
   network.Add<Linear>(128);
   network.Add<ReLU>();
-  network.Add<Linear>(DiscreteActionEnv::Action::size);
+  network.Add<Linear>(actionSize);
   SimpleDQN<> model(network);
 
   // Set up the policy and replay method.
-  GreedyPolicy<DiscreteActionEnv> policy(1.0, 1000, 0.1, 0.99);
-  RandomReplay<DiscreteActionEnv> replayMethod(32, 10000);
+  GreedyPolicy<DiscreteActionEnv<stateDimension, actionSize>> 
+      policy(1.0, 1000, 0.1, 0.99);
+  RandomReplay<DiscreteActionEnv<stateDimension, actionSize>> 
+      replayMethod(32, 10000);
 
   // Set up training configurations.
   TrainingConfig config;
   config.ExplorationSteps() = 100;
 
   // Set up DQN agent.
-  QLearning<DiscreteActionEnv, decltype(model), AdamUpdate, decltype(policy)>
+  QLearning<DiscreteActionEnv<stateDimension, actionSize>,
+            decltype(model), 
+            AdamUpdate, 
+            decltype(policy)>
       agent(config, model, policy, replayMethod);
 
   // Preparation for training the agent.
 
   // Set up the gym training environment.
-  gym::Environment env("gym.kurg.org", "4040", "Pendulum-v0");
+  gym::Environment env("localhost", "4040", "Pendulum-v1");
 
   // Initializing training variables.
   std::vector<double> returnList;
@@ -153,7 +157,7 @@ int main()
   agent.Deterministic() = true;
 
   // Creating and setting up the gym environment for testing.
-  gym::Environment envTest("gym.kurg.org", "4040", "Pendulum-v0");
+  gym::Environment envTest("localhost", "4040", "Pendulum-v1");
   envTest.monitor.start("./dummy/", true, true);
 
   // Resets the environment.

From fc108dc6ecb0b00ee2761959d477bae7829475ff Mon Sep 17 00:00:00 2001
From: Ali-Hossam <ali.hossamx22@gmail.com>
Date: Mon, 8 Apr 2024 12:53:22 +0200
Subject: [PATCH 07/12] Refactor pendulum_sac.cpp to work with the new version
 of ContinousActionEnv class

---
 .../pendulum_sac/pendulum_sac.cpp             | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/reinforcement_learning_gym/pendulum_sac/pendulum_sac.cpp b/reinforcement_learning_gym/pendulum_sac/pendulum_sac.cpp
index ea5c8e50..78ed6753 100644
--- a/reinforcement_learning_gym/pendulum_sac/pendulum_sac.cpp
+++ b/reinforcement_learning_gym/pendulum_sac/pendulum_sac.cpp
@@ -17,6 +17,10 @@
 using namespace mlpack;
 using namespace ens;
 
+// Set up the state and action space.
+constexpr size_t stateDimension = 3;
+constexpr size_t actionSize = 1;
+
 template<typename EnvironmentType,
          typename NetworkType,
          typename UpdaterType,
@@ -44,7 +48,7 @@ void Train(gym::Environment& env,
       arma::mat action = {double(agent.Action().action[0] * 2)};
 
       env.step(action);
-      ContinuousActionEnv::State nextState;
+      ContinuousActionEnv<stateDimension, actionSize>::State nextState;
       nextState.Data() = env.observation;
 
       replayMethod.Store(
@@ -86,17 +90,12 @@ void Train(gym::Environment& env,
 int main()
 {
   // Initializing the agent.
-
-  // Set up the state and action space.
-  ContinuousActionEnv::State::dimension = 3;
-  ContinuousActionEnv::Action::size = 1;
-
   // Set up the actor and critic networks.
   FFN<EmptyLoss, GaussianInitialization> policyNetwork(
       EmptyLoss(), GaussianInitialization(0, 0.1));
   policyNetwork.Add<Linear>(32);
   policyNetwork.Add<ReLU>();
-  policyNetwork.Add<Linear>(ContinuousActionEnv::Action::size);
+  policyNetwork.Add<Linear>(actionSize);
   policyNetwork.Add<TanH>();
 
   FFN<EmptyLoss, GaussianInitialization> qNetwork(
@@ -106,7 +105,8 @@ int main()
   qNetwork.Add<Linear>(1);
 
   // Set up the policy method.
-  RandomReplay<ContinuousActionEnv> replayMethod(32, 10000);
+  RandomReplay<ContinuousActionEnv<stateDimension, actionSize>> 
+      replayMethod(32, 10000);
 
   // Set up training configurations.
   TrainingConfig config;
@@ -114,7 +114,7 @@ int main()
   config.UpdateInterval() = 1;
 
   // Set up Soft actor-critic agent.
-  SAC<ContinuousActionEnv,
+  SAC<ContinuousActionEnv<stateDimension, actionSize>,
       decltype(qNetwork),
       decltype(policyNetwork),
       AdamUpdate>
@@ -122,7 +122,7 @@ int main()
 
   // Preparation for training the agent.
   // Set up the gym training environment.
-  gym::Environment env("gym.kurg.org", "4040", "Pendulum-v0");
+  gym::Environment env("localhost", "4040", "Pendulum-v1");
 
   // Initializing training variables.
   std::vector<double> returnList;
@@ -146,7 +146,7 @@ int main()
   agent.Deterministic() = true;
 
   // Creating and setting up the gym environment for testing.
-  gym::Environment envTest("gym.kurg.org", "4040", "Pendulum-v0");
+  gym::Environment envTest("localhost", "4040", "Pendulum-v1");
   envTest.monitor.start("./dummy/", true, true);
 
   // Resets the environment.

From 89bbe918cb4ae635cb8c86e8fe349a2dca9174a4 Mon Sep 17 00:00:00 2001
From: Ali-Hossam <ali.hossamx22@gmail.com>
Date: Mon, 8 Apr 2024 13:17:39 +0200
Subject: [PATCH 08/12] Refactor pendulum_sac.cpp to work with the new
 ContinuousActionEnv class

---
 reinforcement_learning_gym/pendulum_sac/pendulum_sac.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/reinforcement_learning_gym/pendulum_sac/pendulum_sac.cpp b/reinforcement_learning_gym/pendulum_sac/pendulum_sac.cpp
index 78ed6753..068e1a03 100644
--- a/reinforcement_learning_gym/pendulum_sac/pendulum_sac.cpp
+++ b/reinforcement_learning_gym/pendulum_sac/pendulum_sac.cpp
@@ -147,7 +147,6 @@ int main()
 
   // Creating and setting up the gym environment for testing.
   gym::Environment envTest("localhost", "4040", "Pendulum-v1");
-  envTest.monitor.start("./dummy/", true, true);
 
   // Resets the environment.
   envTest.reset();
@@ -175,7 +174,7 @@ int main()
     if (envTest.done)
     {
       std::cout << " Total steps: " << totalSteps
-                << "\\t Total reward: " << totalReward << std::endl;
+                << "\t Total reward: " << totalReward << std::endl;
       break;
     }
 
@@ -231,7 +230,7 @@ int main()
     if (envTest.done)
     {
       std::cout << " Total steps: " << totalSteps
-                << "\\t Total reward: " << totalReward << std::endl;
+                << "\t Total reward: " << totalReward << std::endl;
       break;
     }
 

From a62a1648a7af969cfe7970d2800bb11da1624b38 Mon Sep 17 00:00:00 2001
From: Ali-Hossam <ali.hossamx22@gmail.com>
Date: Mon, 8 Apr 2024 13:18:27 +0200
Subject: [PATCH 09/12] Replace gym env V0 with V1

---
 reinforcement_learning_gym/pendulum_td3/pendulum_td3.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/reinforcement_learning_gym/pendulum_td3/pendulum_td3.cpp b/reinforcement_learning_gym/pendulum_td3/pendulum_td3.cpp
index 43d8d6a4..ebb8ff77 100644
--- a/reinforcement_learning_gym/pendulum_td3/pendulum_td3.cpp
+++ b/reinforcement_learning_gym/pendulum_td3/pendulum_td3.cpp
@@ -146,10 +146,11 @@ int main()
   agent.Deterministic() = true;
 
   // Creating and setting up the gym environment for testing.
-  gym::Environment envTest("localhost", "4040", "Pendulum-v1-render");
+  gym::Environment envTest("localhost", "4040", "Pendulum-v1");
 
   // Resets the environment.
   envTest.reset();
+  envTest.render();
 
   double totalReward = 0;
   size_t totalSteps = 0;

From efc730bd184eddd0cc2804b6b42400a356e7ad90 Mon Sep 17 00:00:00 2001
From: Ali-Hossam <ali.hossamx22@gmail.com>
Date: Mon, 8 Apr 2024 13:28:38 +0200
Subject: [PATCH 10/12] Update dqn Network

---
 .../cartpole_dqn/cartpole_dqn.cpp             | 46 ++++++++++++-------
 1 file changed, 29 insertions(+), 17 deletions(-)

diff --git a/reinforcement_learning_gym/cartpole_dqn/cartpole_dqn.cpp b/reinforcement_learning_gym/cartpole_dqn/cartpole_dqn.cpp
index 711c6aa0..5953e18f 100644
--- a/reinforcement_learning_gym/cartpole_dqn/cartpole_dqn.cpp
+++ b/reinforcement_learning_gym/cartpole_dqn/cartpole_dqn.cpp
@@ -17,6 +17,10 @@
 using namespace mlpack;
 using namespace ens;
 
+// Set up the state and action space.
+constexpr size_t stateDimension = 4;
+constexpr size_t actionSize = 2;
+
 template<typename EnvironmentType,
          typename NetworkType,
          typename UpdaterType,
@@ -45,7 +49,7 @@ void Train(
       arma::mat action = {double(agent.Action().action)};
 
       env.step(action);
-      DiscreteActionEnv::State nextState;
+      DiscreteActionEnv<stateDimension, actionSize>::State nextState;
       nextState.Data() = env.observation;
 
       replayMethod.Store(
@@ -75,8 +79,8 @@ void Train(
     {
       std::cout << "Avg return in last " << consecutiveEpisodes
                 << " episodes: " << averageReturn
-                << "\\t Episode return: " << episodeReturn
-                << "\\t Total steps: " << agent.TotalSteps() << std::endl;
+                << "\t Episode return: " << episodeReturn
+                << "\t Total steps: " << agent.TotalSteps() << std::endl;
     }
   }
 }
@@ -84,17 +88,21 @@ void Train(
 int main()
 {
   // Initializing the agent.
-  // Set up the state and action space.
-  DiscreteActionEnv::State::dimension = 4;
-  DiscreteActionEnv::Action::size = 2;
   // Set up the network.
-  SimpleDQN<> model(DiscreteActionEnv::State::dimension,
-                    128,
-                    32,
-                    DiscreteActionEnv::Action::size);
+    FFN<MeanSquaredError, GaussianInitialization> network(
+      MeanSquaredError(), GaussianInitialization(0, 1));
+  network.Add<Linear>(128);
+  network.Add<ReLU>();
+  network.Add<Linear>(actionSize);
+
+  SimpleDQN<> model(network);
+
   // Set up the policy and replay method.
-  GreedyPolicy<DiscreteActionEnv> policy(1.0, 1000, 0.1, 0.99);
-  RandomReplay<DiscreteActionEnv> replayMethod(32, 2000);
+  GreedyPolicy<DiscreteActionEnv<stateDimension, actionSize>> 
+      policy(1.0, 1000, 0.1, 0.99);
+  RandomReplay<DiscreteActionEnv<stateDimension, actionSize>> 
+      replayMethod(32, 2000);
+
   // Set up training configurations.
   TrainingConfig config;
   config.StepSize() = 0.001;
@@ -103,12 +111,16 @@ int main()
   config.ExplorationSteps() = 100;
   config.DoubleQLearning() = false;
   config.StepLimit() = 200;
+
   // Set up DQN agent.
-  QLearning<DiscreteActionEnv, decltype(model), AdamUpdate, decltype(policy)>
+  QLearning<DiscreteActionEnv<stateDimension, actionSize>,
+            decltype(model), 
+            AdamUpdate, decltype(policy)>
       agent(config, model, policy, replayMethod);
+
   // Preparation for training the agent.
   // Set up the gym training environment.
-  gym::Environment env("gym.kurg.org", "4040", "CartPole-v0");
+  gym::Environment env("localhost", "4040", "CartPole-v0");
 
   // Initializing training variables.
   std::vector<double> returnList;
@@ -133,7 +145,7 @@ int main()
   agent.Deterministic() = true;
 
   // Creating and setting up the gym environment for testing.
-  gym::Environment envTest("gym.kurg.org", "4040", "CartPole-v0");
+  gym::Environment envTest("localhost", "4040", "CartPole-v0");
   envTest.monitor.start("./dummy/", true, true);
 
   // Resets the environment.
@@ -162,7 +174,7 @@ int main()
     if (envTest.done)
     {
       std::cout << " Total steps: " << totalSteps
-                << "\\t Total reward: " << totalReward << std::endl;
+                << "\t Total reward: " << totalReward << std::endl;
       break;
     }
 
@@ -215,7 +227,7 @@ int main()
     if (envTest.done)
     {
       std::cout << " Total steps: " << totalSteps
-                << "\\t Total reward: " << totalReward << std::endl;
+                << "\t Total reward: " << totalReward << std::endl;
       break;
     }
 

From be140e3c65c14879b09054d2e72bce58713dca8a Mon Sep 17 00:00:00 2001
From: Ali-Hossam <ali.hossamx22@gmail.com>
Date: Mon, 8 Apr 2024 13:47:43 +0200
Subject: [PATCH 11/12] replaced {envname}_render with env.render

---
 .../mountain_car_ddpg/mountain_car_ddpg.cpp                  | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/reinforcement_learning_gym/mountain_car_ddpg/mountain_car_ddpg.cpp b/reinforcement_learning_gym/mountain_car_ddpg/mountain_car_ddpg.cpp
index 348506aa..65fc703c 100644
--- a/reinforcement_learning_gym/mountain_car_ddpg/mountain_car_ddpg.cpp
+++ b/reinforcement_learning_gym/mountain_car_ddpg/mountain_car_ddpg.cpp
@@ -156,16 +156,17 @@ int main()
         returnList,
         episodes,
         consecutiveEpisodes,
-        100000);
+        10000);
 
   // Testing the trained agent.
   agent.Deterministic() = true;
 
   // Creating and setting up the gym environment for testing.
-  gym::Environment envTest("localhost", "4040", "MountainCarContinuous-v0-render");
+  gym::Environment envTest("localhost", "4040", "MountainCarContinuous-v0");
 
   // Resets the environment.
   envTest.reset();
+  envTest.render();
 
   double totalReward = 0;
   size_t totalSteps = 0;

From b56cdd9cf8089168851e757df80397f8ce9a5088 Mon Sep 17 00:00:00 2001
From: Ali-Hossam <ali.hossamx22@gmail.com>
Date: Thu, 11 Apr 2024 14:17:38 +0200
Subject: [PATCH 12/12] removed first envTest.close and second envTest.render
 as they were preventing reusing the test env.

---
 reinforcement_learning_gym/acrobot_dqn/acrobot_dqn.cpp      | 5 +----
 .../bipedal_walker_sac/bipedal_walker_sac.cpp               | 2 +-
 reinforcement_learning_gym/cartpole_dqn/cartpole_dqn.cpp    | 2 --
 .../lunar_lander_dqn/lunar_lander_dqn.cpp                   | 2 --
 .../mountain_car_dqn/mountain_car_dqn.cpp                   | 2 --
 reinforcement_learning_gym/pendulum_dqn/pendulum_dqn.cpp    | 2 --
 reinforcement_learning_gym/pendulum_sac/pendulum_sac.cpp    | 6 ++----
 7 files changed, 4 insertions(+), 17 deletions(-)

diff --git a/reinforcement_learning_gym/acrobot_dqn/acrobot_dqn.cpp b/reinforcement_learning_gym/acrobot_dqn/acrobot_dqn.cpp
index 971098bf..3b3aec6e 100644
--- a/reinforcement_learning_gym/acrobot_dqn/acrobot_dqn.cpp
+++ b/reinforcement_learning_gym/acrobot_dqn/acrobot_dqn.cpp
@@ -186,7 +186,6 @@ int main()
     //   << totalReward << "\t Action taken: " << action;
   }
 
-  envTest.close();
   std::cout << envTest.url() << std::endl;
 
   /** 
@@ -197,12 +196,10 @@ int main()
   agent.Deterministic() = true;
 
   // Creating and setting up the gym environment for testing.
-  // envTest.monitor.start("./dummy/", true, true);
-  envTest.compression(9);
+  envTest.monitor.start("./dummy/", true, true);
   
   // Resets the environment.
   envTest.reset();
-  envTest.render();
 
   totalReward = 0;
   totalSteps = 0;
diff --git a/reinforcement_learning_gym/bipedal_walker_sac/bipedal_walker_sac.cpp b/reinforcement_learning_gym/bipedal_walker_sac/bipedal_walker_sac.cpp
index 56f15899..c67fc302 100644
--- a/reinforcement_learning_gym/bipedal_walker_sac/bipedal_walker_sac.cpp
+++ b/reinforcement_learning_gym/bipedal_walker_sac/bipedal_walker_sac.cpp
@@ -218,7 +218,7 @@ int main()
     if (envTest.done)
     {
       std::cout << " Total steps: " << totalSteps
-                << "\\t Total reward: " << totalReward << std::endl;
+                << "\t Total reward: " << totalReward << std::endl;
       break;
     }
 
diff --git a/reinforcement_learning_gym/cartpole_dqn/cartpole_dqn.cpp b/reinforcement_learning_gym/cartpole_dqn/cartpole_dqn.cpp
index 5953e18f..5b19ee11 100644
--- a/reinforcement_learning_gym/cartpole_dqn/cartpole_dqn.cpp
+++ b/reinforcement_learning_gym/cartpole_dqn/cartpole_dqn.cpp
@@ -183,7 +183,6 @@ int main()
     //   << totalReward << "\\t Action taken: " << action;
   }
 
-  envTest.close();
   std::cout << envTest.url() << std::endl;
 
   // A little more training...
@@ -203,7 +202,6 @@ int main()
 
   // Resets the environment.
   envTest.reset();
-  envTest.render();
 
   totalReward = 0;
   totalSteps = 0;
diff --git a/reinforcement_learning_gym/lunar_lander_dqn/lunar_lander_dqn.cpp b/reinforcement_learning_gym/lunar_lander_dqn/lunar_lander_dqn.cpp
index 4a2babed..4c344309 100644
--- a/reinforcement_learning_gym/lunar_lander_dqn/lunar_lander_dqn.cpp
+++ b/reinforcement_learning_gym/lunar_lander_dqn/lunar_lander_dqn.cpp
@@ -178,7 +178,6 @@ int main()
     //   << totalReward << \"\t Action taken: " << action;
   }
 
-  envTest.close();
   std::cout << envTest.url() << std::endl;
 
   // A little more training...
@@ -204,7 +203,6 @@ int main()
 
   // Resets the environment.
   envTest.reset();
-  envTest.render();
 
   totalReward = 0;
   totalSteps = 0;
diff --git a/reinforcement_learning_gym/mountain_car_dqn/mountain_car_dqn.cpp b/reinforcement_learning_gym/mountain_car_dqn/mountain_car_dqn.cpp
index c7f19819..657625dc 100644
--- a/reinforcement_learning_gym/mountain_car_dqn/mountain_car_dqn.cpp
+++ b/reinforcement_learning_gym/mountain_car_dqn/mountain_car_dqn.cpp
@@ -204,7 +204,6 @@ int main()
     //   << totalReward << "\t Action taken: " << action;
   }
 
-  envTest.close();
   std::cout << envTest.url() << std::endl;
 
   // A little more training...
@@ -226,7 +225,6 @@ int main()
 
   // Resets the environment.
   envTest.reset();
-  envTest.render();
 
   totalReward = 0;
   totalSteps = 0;
diff --git a/reinforcement_learning_gym/pendulum_dqn/pendulum_dqn.cpp b/reinforcement_learning_gym/pendulum_dqn/pendulum_dqn.cpp
index 8f4fa77f..b2e3ac3a 100644
--- a/reinforcement_learning_gym/pendulum_dqn/pendulum_dqn.cpp
+++ b/reinforcement_learning_gym/pendulum_dqn/pendulum_dqn.cpp
@@ -195,7 +195,6 @@ int main()
     //   << totalReward << "\t Action taken: " << action;
   }
 
-  envTest.close();
   std::cout << envTest.url() << std::endl;
 
   //A little more training...
@@ -216,7 +215,6 @@ int main()
 
   // Resets the environment.
   envTest.reset();
-  envTest.render();
 
   totalReward = 0;
   totalSteps = 0;
diff --git a/reinforcement_learning_gym/pendulum_sac/pendulum_sac.cpp b/reinforcement_learning_gym/pendulum_sac/pendulum_sac.cpp
index 068e1a03..9110bc8d 100644
--- a/reinforcement_learning_gym/pendulum_sac/pendulum_sac.cpp
+++ b/reinforcement_learning_gym/pendulum_sac/pendulum_sac.cpp
@@ -81,8 +81,8 @@ void Train(gym::Environment& env,
     {
       std::cout << "Avg return in last " << returnList.size()
                 << " episodes: " << averageReturn
-                << "\\t Episode return: " << episodeReturn
-                << "\\t Total steps: " << agent.TotalSteps() << std::endl;
+                << "\t Episode return: " << episodeReturn
+                << "\t Total steps: " << agent.TotalSteps() << std::endl;
     }
   }
 }
@@ -183,7 +183,6 @@ int main()
     //   << totalReward << "\\t Action taken: " << action;
   }
 
-  envTest.close();
   std::cout << envTest.url() << std::endl;
 
   // A little more training...
@@ -206,7 +205,6 @@ int main()
 
   // Resets the environment.
   envTest.reset();
-  envTest.render();
 
   totalReward = 0;
   totalSteps = 0;