mlpack · Ali-Hossam · Apr 1, 2024 · Apr 6, 2024 · Apr 7, 2024 · Apr 7, 2024
diff --git a/README.md b/README.md
@@ -10,10 +10,12 @@ https://lab.mlpack.org/.
 
 ### 0. Contents
 
-  1. [Overview](#1-overview)
-  2. [Building the examples and usage](#2-Building-the-examples-and-usage)
-  3. [List of examples](#3-List-of-examples)
-  4. [Datasets](#4-datasets)
+- [0. Contents](#0-contents)
+- [1. Overview](#1-overview)
+- [2. Building the examples and usage](#2-building-the-examples-and-usage)
+- [3. List of examples](#3-list-of-examples)
+- [4. Datasets](#4-datasets)
+- [5. Setup](#5-setup)
 
 ###  1. Overview
 
@@ -93,3 +95,9 @@ extract all the necessary dataset in order for examples to work perfectly:
 cd tools/
 ./download_data_set.py
 ```
+
+### 5. Setup
+To setup a jupyter local environment that work with C++ using xeus-cling you shall execute the following command:
+```sh
+./script/jupyter-conda-setup.sh <environment_name>
+```
diff --git a/reinforcement_learning_gym/acrobot_dqn/acrobot_dqn.cpp b/reinforcement_learning_gym/acrobot_dqn/acrobot_dqn.cpp
@@ -17,6 +17,10 @@
 using namespace mlpack;
 using namespace ens;
 
+// Set up the state and action space.
+constexpr size_t stateDimension = 6;
+constexpr size_t actionSize = 3;
+
 // Function to train the agent on the Acrobot-v1 gym environment.
 template<typename EnvironmentType,
          typename NetworkType,
@@ -49,7 +53,7 @@ void Train(gym::Environment& env,
       arma::mat action = {double(agent.Action().action)};
 
       env.step(action);
-      DiscreteActionEnv::State nextState;
+      DiscreteActionEnv<stateDimension, actionSize>::State nextState;
       nextState.Data() = env.observation;
 
       replayMethod.Store(
@@ -85,22 +89,22 @@ void Train(gym::Environment& env,
 int main()
 {
   // Initializing the agent.
-  // Set up the state and action space.
-  DiscreteActionEnv::State::dimension = 6;
-  DiscreteActionEnv::Action::size = 3;
 
   // Set up the network.
   FFN<MeanSquaredError, GaussianInitialization> module(
       MeanSquaredError(), GaussianInitialization(0, 1));
-  module.Add<Linear>(DiscreteActionEnv::State::dimension, 64);
-  module.Add<ReLULayer>();
-  module.Add<Linear>(64, DiscreteActionEnv::Action::size);
+  module.Add<Linear>(64);
+  module.Add<ReLU>();
+  module.Add<Linear>(actionSize);
   SimpleDQN<> model(module);
 
   // Set up the policy method.
-  GreedyPolicy<DiscreteActionEnv> policy(1.0, 1000, 0.1, 0.99);
+  GreedyPolicy<DiscreteActionEnv<stateDimension, actionSize>> 
+      policy(1.0, 1000, 0.1, 0.99);
+
   // To enable 3-step learning, we set the last parameter of the replay method as 3.
-  PrioritizedReplay<DiscreteActionEnv> replayMethod(64, 5000, 0.6, 3);
+  PrioritizedReplay<DiscreteActionEnv<stateDimension, actionSize>>
+      replayMethod(64, 5000, 0.6, 3);
 
   // Set up training configurations.
   TrainingConfig config;
@@ -111,7 +115,7 @@ int main()
   config.DoubleQLearning() = true;
 
   // Set up DQN agent.
-  QLearning<DiscreteActionEnv,
+  QLearning<DiscreteActionEnv<stateDimension, actionSize>,
             decltype(model),
             AdamUpdate,
             decltype(policy),
@@ -120,7 +124,7 @@ int main()
 
   // Preparation for training the agent
   // Set up the gym training environment.
-  gym::Environment env("gym.kurg.org", "4040", "Acrobot-v1");
+  gym::Environment env("localhost", "4040", "Acrobot-v1");
 
   // Initializing training variables.
   std::vector<double> returnList;
@@ -144,7 +148,7 @@ int main()
   agent.Deterministic() = true;
 
   // Creating and setting up the gym environment for testing.
-  gym::Environment envTest("gym.kurg.org", "4040", "Acrobot-v1");
+  gym::Environment envTest("localhost", "4040", "Acrobot-v1");
   envTest.monitor.start("./dummy/", true, true);
 
   // Resets the environment.
@@ -182,7 +186,6 @@ int main()
     //   << totalReward << "\t Action taken: " << action;
   }
 
-  envTest.close();
   std::cout << envTest.url() << std::endl;
 
   /** 
@@ -194,10 +197,9 @@ int main()
 
   // Creating and setting up the gym environment for testing.
   envTest.monitor.start("./dummy/", true, true);
-
+  
   // Resets the environment.
   envTest.reset();
-  envTest.render();
 
   totalReward = 0;
   totalSteps = 0;

diff --git a/reinforcement_learning_gym/bipedal_walker_sac/bipedal_walker_sac.cpp b/reinforcement_learning_gym/bipedal_walker_sac/bipedal_walker_sac.cpp
@@ -19,6 +19,10 @@ using namespace mlpack;
 using namespace ens;
 using namespace gym;
 
+// Set up the state and action space.
+constexpr size_t stateDimension = 24;
+constexpr size_t actionSize = 4;
+
 template<typename EnvironmentType,
          typename NetworkType,
          typename UpdaterType,
@@ -48,7 +52,7 @@ void Train(gym::Environment& env,
       arma::mat action = {agent.Action().action};
 
       env.step(action);
-      ContinuousActionEnv::State nextState;
+      ContinuousActionEnv<stateDimension, actionSize>::State nextState;
       nextState.Data() = env.observation;
 
       replayMethod.Store(
@@ -94,10 +98,6 @@ void Train(gym::Environment& env,
 int main()
 {
   // Initializing the agent.
-  // Set up the state and action space.
-  ContinuousActionEnv::State::dimension = 24;
-  ContinuousActionEnv::Action::size = 4;
-
   bool usePreTrainedModel = true;
 
   // Set up the actor and critic networks.
@@ -107,9 +107,8 @@ int main()
   policyNetwork.Add<ReLU>();
   policyNetwork.Add<Linear>(128);
   policyNetwork.Add<ReLU>();
-  policyNetwork.Add<Linear>(ContinuousActionEnv::Action::size);
+  policyNetwork.Add<Linear>(actionSize);
   policyNetwork.Add<TanH>();
-  policyNetwork.ResetParameters();
 
   FFN<EmptyLoss, GaussianInitialization> qNetwork(
       EmptyLoss(), GaussianInitialization(0, 0.01));
@@ -118,10 +117,11 @@ int main()
   qNetwork.Add<Linear>(128);
   qNetwork.Add<ReLU>();
   qNetwork.Add<Linear>(1);
-  qNetwork.ResetParameters();
+
 
   // Set up the replay method.
-  RandomReplay<ContinuousActionEnv> replayMethod(32, 10000);
+  RandomReplay<ContinuousActionEnv<stateDimension, actionSize>> 
+      replayMethod(32, 10000);
 
   // Set up training configurations.
   TrainingConfig config;
@@ -148,14 +148,14 @@ int main()
    * To default is to use the usePreTrainedModel. Otherwise you can disable this
    * by change the usePreTrainedModel to false and then recompile this example.
    */
-  SAC<ContinuousActionEnv,
+  SAC<ContinuousActionEnv<stateDimension, actionSize>,
       decltype(qNetwork),
       decltype(policyNetwork),
       AdamUpdate>
       agent(config, qNetwork, policyNetwork, replayMethod);
 
   const std::string environment = "BipedalWalker-v3";
-  const std::string host = "gym.kurg.org";
+  const std::string host = "127.0.0.1";
   const std::string port = "4040";
 
   Environment env(host, port, environment);
@@ -187,7 +187,7 @@ int main()
   agent.Deterministic() = true;
 
   // Creating and setting up the gym environment for testing.
-  gym::Environment envTest("gym.kurg.org", "4040", "BipedalWalker-v3");
+  gym::Environment envTest(host, port, environment);
   envTest.monitor.start("./dummy/", true, true);
 
   // Resets the environment.
@@ -218,7 +218,7 @@ int main()
     if (envTest.done)
     {
       std::cout << " Total steps: " << totalSteps
-                << "\\t Total reward: " << totalReward << std::endl;
+                << "\t Total reward: " << totalReward << std::endl;
       break;
     }
 

diff --git a/reinforcement_learning_gym/cartpole_dqn/cartpole_dqn.cpp b/reinforcement_learning_gym/cartpole_dqn/cartpole_dqn.cpp
@@ -17,6 +17,10 @@
 using namespace mlpack;
 using namespace ens;
 
+// Set up the state and action space.
+constexpr size_t stateDimension = 4;
+constexpr size_t actionSize = 2;
+
 template<typename EnvironmentType,
          typename NetworkType,
          typename UpdaterType,
@@ -45,7 +49,7 @@ void Train(
       arma::mat action = {double(agent.Action().action)};
 
       env.step(action);
-      DiscreteActionEnv::State nextState;
+      DiscreteActionEnv<stateDimension, actionSize>::State nextState;
       nextState.Data() = env.observation;
 
       replayMethod.Store(
@@ -75,26 +79,30 @@ void Train(
     {
       std::cout << "Avg return in last " << consecutiveEpisodes
                 << " episodes: " << averageReturn
-                << "\\t Episode return: " << episodeReturn
-                << "\\t Total steps: " << agent.TotalSteps() << std::endl;
+                << "\t Episode return: " << episodeReturn
+                << "\t Total steps: " << agent.TotalSteps() << std::endl;
     }
   }
 }
 
 int main()
 {
   // Initializing the agent.
-  // Set up the state and action space.
-  DiscreteActionEnv::State::dimension = 4;
-  DiscreteActionEnv::Action::size = 2;
   // Set up the network.
-  SimpleDQN<> model(DiscreteActionEnv::State::dimension,
-                    128,
-                    32,
-                    DiscreteActionEnv::Action::size);
+    FFN<MeanSquaredError, GaussianInitialization> network(
+      MeanSquaredError(), GaussianInitialization(0, 1));
+  network.Add<Linear>(128);
+  network.Add<ReLU>();
+  network.Add<Linear>(actionSize);
+
+  SimpleDQN<> model(network);
+
   // Set up the policy and replay method.
-  GreedyPolicy<DiscreteActionEnv> policy(1.0, 1000, 0.1, 0.99);
-  RandomReplay<DiscreteActionEnv> replayMethod(32, 2000);
+  GreedyPolicy<DiscreteActionEnv<stateDimension, actionSize>> 
+      policy(1.0, 1000, 0.1, 0.99);
+  RandomReplay<DiscreteActionEnv<stateDimension, actionSize>> 
+      replayMethod(32, 2000);
+
   // Set up training configurations.
   TrainingConfig config;
   config.StepSize() = 0.001;
@@ -103,12 +111,16 @@ int main()
   config.ExplorationSteps() = 100;
   config.DoubleQLearning() = false;
   config.StepLimit() = 200;
+
   // Set up DQN agent.
-  QLearning<DiscreteActionEnv, decltype(model), AdamUpdate, decltype(policy)>
+  QLearning<DiscreteActionEnv<stateDimension, actionSize>,
+            decltype(model), 
+            AdamUpdate, decltype(policy)>
       agent(config, model, policy, replayMethod);
+
   // Preparation for training the agent.
   // Set up the gym training environment.
-  gym::Environment env("gym.kurg.org", "4040", "CartPole-v0");
+  gym::Environment env("localhost", "4040", "CartPole-v0");
 
   // Initializing training variables.
   std::vector<double> returnList;
@@ -133,7 +145,7 @@ int main()
   agent.Deterministic() = true;
 
   // Creating and setting up the gym environment for testing.
-  gym::Environment envTest("gym.kurg.org", "4040", "CartPole-v0");
+  gym::Environment envTest("localhost", "4040", "CartPole-v0");
   envTest.monitor.start("./dummy/", true, true);
 
   // Resets the environment.
@@ -162,7 +174,7 @@ int main()
     if (envTest.done)
     {
       std::cout << " Total steps: " << totalSteps
-                << "\\t Total reward: " << totalReward << std::endl;
+                << "\t Total reward: " << totalReward << std::endl;
       break;
     }
 
@@ -171,7 +183,6 @@ int main()
     //   << totalReward << "\\t Action taken: " << action;
   }
 
-  envTest.close();
   std::cout << envTest.url() << std::endl;
 
   // A little more training...
@@ -191,7 +202,6 @@ int main()
 
   // Resets the environment.
   envTest.reset();
-  envTest.render();
 
   totalReward = 0;
   totalSteps = 0;
@@ -215,7 +225,7 @@ int main()
     if (envTest.done)
     {
       std::cout << " Total steps: " << totalSteps
-                << "\\t Total reward: " << totalReward << std::endl;
+                << "\t Total reward: " << totalReward << std::endl;
       break;
     }