From 1edb99126b7a0b889650837d66462237302dc35d Mon Sep 17 00:00:00 2001 From: Ali-Hossam Date: Mon, 1 Apr 2024 14:37:01 +0200 Subject: [PATCH 01/12] updated README --- README.md | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 50c59e4c..815e21c6 100644 --- a/README.md +++ b/README.md @@ -10,10 +10,12 @@ https://lab.mlpack.org/. ### 0. Contents - 1. [Overview](#1-overview) - 2. [Building the examples and usage](#2-Building-the-examples-and-usage) - 3. [List of examples](#3-List-of-examples) - 4. [Datasets](#4-datasets) +- [0. Contents](#0-contents) +- [1. Overview](#1-overview) +- [2. Building the examples and usage](#2-building-the-examples-and-usage) +- [3. List of examples](#3-list-of-examples) +- [4. Datasets](#4-datasets) +- [5. Setup](#5-setup) ### 1. Overview @@ -93,3 +95,9 @@ extract all the necessary dataset in order for examples to work perfectly: cd tools/ ./download_data_set.py ``` + +### 5. Setup +To setup a jupyter local environment that work with C++ using xeus-cling you shall execute the following command: +```sh +./script/jupyter-conda-setup.sh +``` \ No newline at end of file From 5c621a9544f0cb52146f4fb48d6d553c40cfd62d Mon Sep 17 00:00:00 2001 From: Ali-Hossam Date: Sat, 6 Apr 2024 14:39:41 +0200 Subject: [PATCH 02/12] Adapted to work with new DiscreteActionEnv class --- .../acrobot_dqn/acrobot_dqn.cpp | 33 +++++++++++-------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/reinforcement_learning_gym/acrobot_dqn/acrobot_dqn.cpp b/reinforcement_learning_gym/acrobot_dqn/acrobot_dqn.cpp index eb434939..971098bf 100644 --- a/reinforcement_learning_gym/acrobot_dqn/acrobot_dqn.cpp +++ b/reinforcement_learning_gym/acrobot_dqn/acrobot_dqn.cpp @@ -17,6 +17,10 @@ using namespace mlpack; using namespace ens; +// Set up the state and action space. +constexpr size_t stateDimension = 6; +constexpr size_t actionSize = 3; + // Function to train the agent on the Acrobot-v1 gym environment. template::State nextState; nextState.Data() = env.observation; replayMethod.Store( @@ -85,22 +89,22 @@ void Train(gym::Environment& env, int main() { // Initializing the agent. - // Set up the state and action space. - DiscreteActionEnv::State::dimension = 6; - DiscreteActionEnv::Action::size = 3; // Set up the network. FFN module( MeanSquaredError(), GaussianInitialization(0, 1)); - module.Add(DiscreteActionEnv::State::dimension, 64); - module.Add(); - module.Add(64, DiscreteActionEnv::Action::size); + module.Add(64); + module.Add(); + module.Add(actionSize); SimpleDQN<> model(module); // Set up the policy method. - GreedyPolicy policy(1.0, 1000, 0.1, 0.99); + GreedyPolicy> + policy(1.0, 1000, 0.1, 0.99); + // To enable 3-step learning, we set the last parameter of the replay method as 3. - PrioritizedReplay replayMethod(64, 5000, 0.6, 3); + PrioritizedReplay> + replayMethod(64, 5000, 0.6, 3); // Set up training configurations. TrainingConfig config; @@ -111,7 +115,7 @@ int main() config.DoubleQLearning() = true; // Set up DQN agent. - QLearning, decltype(model), AdamUpdate, decltype(policy), @@ -120,7 +124,7 @@ int main() // Preparation for training the agent // Set up the gym training environment. - gym::Environment env("gym.kurg.org", "4040", "Acrobot-v1"); + gym::Environment env("localhost", "4040", "Acrobot-v1"); // Initializing training variables. std::vector returnList; @@ -144,7 +148,7 @@ int main() agent.Deterministic() = true; // Creating and setting up the gym environment for testing. - gym::Environment envTest("gym.kurg.org", "4040", "Acrobot-v1"); + gym::Environment envTest("localhost", "4040", "Acrobot-v1"); envTest.monitor.start("./dummy/", true, true); // Resets the environment. @@ -193,8 +197,9 @@ int main() agent.Deterministic() = true; // Creating and setting up the gym environment for testing. - envTest.monitor.start("./dummy/", true, true); - + // envTest.monitor.start("./dummy/", true, true); + envTest.compression(9); + // Resets the environment. envTest.reset(); envTest.render(); From 81448915894c42437bc889f2d5130f7eece2b701 Mon Sep 17 00:00:00 2001 From: Ali-Hossam Date: Sun, 7 Apr 2024 14:05:22 +0200 Subject: [PATCH 03/12] Adapt bidpedal_walker_sac to work with new ContinuousActionEnv class --- .../bipedal_walker_sac/bipedal_walker_sac.cpp | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/reinforcement_learning_gym/bipedal_walker_sac/bipedal_walker_sac.cpp b/reinforcement_learning_gym/bipedal_walker_sac/bipedal_walker_sac.cpp index 3ca5570f..56f15899 100644 --- a/reinforcement_learning_gym/bipedal_walker_sac/bipedal_walker_sac.cpp +++ b/reinforcement_learning_gym/bipedal_walker_sac/bipedal_walker_sac.cpp @@ -19,6 +19,10 @@ using namespace mlpack; using namespace ens; using namespace gym; +// Set up the state and action space. +constexpr size_t stateDimension = 24; +constexpr size_t actionSize = 4; + template::State nextState; nextState.Data() = env.observation; replayMethod.Store( @@ -94,10 +98,6 @@ void Train(gym::Environment& env, int main() { // Initializing the agent. - // Set up the state and action space. - ContinuousActionEnv::State::dimension = 24; - ContinuousActionEnv::Action::size = 4; - bool usePreTrainedModel = true; // Set up the actor and critic networks. @@ -107,9 +107,8 @@ int main() policyNetwork.Add(); policyNetwork.Add(128); policyNetwork.Add(); - policyNetwork.Add(ContinuousActionEnv::Action::size); + policyNetwork.Add(actionSize); policyNetwork.Add(); - policyNetwork.ResetParameters(); FFN qNetwork( EmptyLoss(), GaussianInitialization(0, 0.01)); @@ -118,10 +117,11 @@ int main() qNetwork.Add(128); qNetwork.Add(); qNetwork.Add(1); - qNetwork.ResetParameters(); + // Set up the replay method. - RandomReplay replayMethod(32, 10000); + RandomReplay> + replayMethod(32, 10000); // Set up training configurations. TrainingConfig config; @@ -148,14 +148,14 @@ int main() * To default is to use the usePreTrainedModel. Otherwise you can disable this * by change the usePreTrainedModel to false and then recompile this example. */ - SAC, decltype(qNetwork), decltype(policyNetwork), AdamUpdate> agent(config, qNetwork, policyNetwork, replayMethod); const std::string environment = "BipedalWalker-v3"; - const std::string host = "gym.kurg.org"; + const std::string host = "127.0.0.1"; const std::string port = "4040"; Environment env(host, port, environment); @@ -187,7 +187,7 @@ int main() agent.Deterministic() = true; // Creating and setting up the gym environment for testing. - gym::Environment envTest("gym.kurg.org", "4040", "BipedalWalker-v3"); + gym::Environment envTest(host, port, environment); envTest.monitor.start("./dummy/", true, true); // Resets the environment. From bb9e8d3595cafdecce9baa6f80b8fc9db4f65a4e Mon Sep 17 00:00:00 2001 From: Ali-Hossam Date: Sun, 7 Apr 2024 23:29:01 +0200 Subject: [PATCH 04/12] Adapt lunar_lander_dqn to work with new DiscreteActionEnv class --- .../lunar_lander_dqn/lunar_lander_dqn.cpp | 29 +++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/reinforcement_learning_gym/lunar_lander_dqn/lunar_lander_dqn.cpp b/reinforcement_learning_gym/lunar_lander_dqn/lunar_lander_dqn.cpp index d02e2e9c..4a2babed 100644 --- a/reinforcement_learning_gym/lunar_lander_dqn/lunar_lander_dqn.cpp +++ b/reinforcement_learning_gym/lunar_lander_dqn/lunar_lander_dqn.cpp @@ -18,6 +18,10 @@ using namespace mlpack; using namespace ens; +// Set up the state and action space. +constexpr size_t stateDimension = 8; +constexpr size_t actionSize = 4; + template::State nextState; nextState.Data() = env.observation; replayMethod.Store( @@ -82,22 +86,20 @@ void Train( int main() { // Initializing the agent. - // Set up the state and action space. - DiscreteActionEnv::State::dimension = 8; - DiscreteActionEnv::Action::size = 4; - // Set up the network. FFN network( MeanSquaredError(), GaussianInitialization(0, 1)); network.Add(128); - network.Add(); - network.Add(DiscreteActionEnv::Action::size); + network.Add(); + network.Add(actionSize); SimpleDQN<> model(network); // Set up the policy and replay method. - GreedyPolicy policy(1.0, 2000, 0.1, 0.99); - RandomReplay replayMethod(64, 100000); + GreedyPolicy> + policy(1.0, 2000, 0.1, 0.99); + RandomReplay> + replayMethod(64, 100000); // Set up training configurations. TrainingConfig config; @@ -105,12 +107,15 @@ int main() config.DoubleQLearning() = false; // Set up DQN agent. - QLearning + QLearning, + decltype(model), + AdamUpdate, + decltype(policy)> agent(config, model, policy, replayMethod); // Preparation for training the agent. // Set up the gym training environment. - gym::Environment env("gym.kurg.org", "4040", "LunarLander-v2"); + gym::Environment env("localhost", "4040", "LunarLander-v2"); // Initializing training variables. std::vector returnList; @@ -135,7 +140,7 @@ int main() agent.Deterministic() = true; // Creating and setting up the gym environment for testing. - gym::Environment envTest("gym.kurg.org", "4040", "LunarLander-v2"); + gym::Environment envTest("localhost", "4040", "LunarLander-v2"); envTest.monitor.start("./dummy/", true, true); // Resets the environment. From 3bf7dd7b9cdead0316c7971f3baa8b7d6261e606 Mon Sep 17 00:00:00 2001 From: Ali-Hossam Date: Sun, 7 Apr 2024 23:39:23 +0200 Subject: [PATCH 05/12] Adapt cartpole_dqn.cpp to work with new DicreteActionEnv class --- .../mountain_car_dqn/mountain_car_dqn.cpp | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/reinforcement_learning_gym/mountain_car_dqn/mountain_car_dqn.cpp b/reinforcement_learning_gym/mountain_car_dqn/mountain_car_dqn.cpp index 738923c7..c7f19819 100644 --- a/reinforcement_learning_gym/mountain_car_dqn/mountain_car_dqn.cpp +++ b/reinforcement_learning_gym/mountain_car_dqn/mountain_car_dqn.cpp @@ -16,6 +16,10 @@ using namespace mlpack; using namespace ens; +// Set up the state and action space. +constexpr size_t stateDimension = 2; +constexpr size_t actionSize = 3; + template::State nextState; nextState.Data() = env.observation; // Use an adjusted reward for task completion. @@ -92,22 +96,20 @@ void Train( int main() { // Initializing the agent. - // Set up the state and action space. - DiscreteActionEnv::State::dimension = 2; - DiscreteActionEnv::Action::size = 3; - // Set up the network. FFN network( MeanSquaredError(), GaussianInitialization(0, 1)); network.Add(128); - network.Add(); - network.Add(DiscreteActionEnv::Action::size); + network.Add(); + network.Add(actionSize); // Set up the network. SimpleDQN<> model(network); // Set up the policy method. - GreedyPolicy policy(1.0, 1000, 0.1, 0.99); - RandomReplay replayMethod(32, 10000); + GreedyPolicy> + policy(1.0, 1000, 0.1, 0.99); + RandomReplay> + replayMethod(32, 10000); // Set up training configurations. TrainingConfig config; @@ -115,7 +117,7 @@ int main() config.ExplorationSteps() = 400; // Set up DQN agent. - QLearning, decltype(model), AdamUpdate, decltype(policy), @@ -125,7 +127,7 @@ int main() // Preparation for training the agent. // Set up the gym training environment. - gym::Environment env("gym.kurg.org", "4040", "MountainCar-v0"); + gym::Environment env("localhost", "4040", "MountainCar-v0"); // Initializing training variables. std::vector returnList; @@ -164,7 +166,7 @@ int main() agent.Deterministic() = true; // Creating and setting up the gym environment for testing. - gym::Environment envTest("gym.kurg.org", "4040", "MountainCar-v0"); + gym::Environment envTest("localhost", "4040", "MountainCar-v0"); envTest.monitor.start("./dummy/", true, true); // Resets the environment. From ad4f08c85c9c47511899d214c126d76d863c72ff Mon Sep 17 00:00:00 2001 From: Ali-Hossam Date: Sun, 7 Apr 2024 23:49:56 +0200 Subject: [PATCH 06/12] Adapt pendulum_dqn.cpp to work with new DiscreteActionEnv class --- .../pendulum_dqn/pendulum_dqn.cpp | 28 +++++++++++-------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/reinforcement_learning_gym/pendulum_dqn/pendulum_dqn.cpp b/reinforcement_learning_gym/pendulum_dqn/pendulum_dqn.cpp index 40868a75..8f4fa77f 100644 --- a/reinforcement_learning_gym/pendulum_dqn/pendulum_dqn.cpp +++ b/reinforcement_learning_gym/pendulum_dqn/pendulum_dqn.cpp @@ -17,6 +17,10 @@ using namespace mlpack; using namespace ens; +// Set up the state and action space. +constexpr size_t stateDimension = 3; +constexpr size_t actionSize = 3; + template::State nextState; nextState.Data() = env.observation; replayMethod.Store( @@ -82,35 +86,35 @@ void Train( int main() { // Initializing the agent. - - // Set up the state and action space. - DiscreteActionEnv::State::dimension = 3; - DiscreteActionEnv::Action::size = 3; - // Set up the network. FFN network( MeanSquaredError(), GaussianInitialization(0, 1)); network.Add(128); network.Add(); - network.Add(DiscreteActionEnv::Action::size); + network.Add(actionSize); SimpleDQN<> model(network); // Set up the policy and replay method. - GreedyPolicy policy(1.0, 1000, 0.1, 0.99); - RandomReplay replayMethod(32, 10000); + GreedyPolicy> + policy(1.0, 1000, 0.1, 0.99); + RandomReplay> + replayMethod(32, 10000); // Set up training configurations. TrainingConfig config; config.ExplorationSteps() = 100; // Set up DQN agent. - QLearning + QLearning, + decltype(model), + AdamUpdate, + decltype(policy)> agent(config, model, policy, replayMethod); // Preparation for training the agent. // Set up the gym training environment. - gym::Environment env("gym.kurg.org", "4040", "Pendulum-v0"); + gym::Environment env("localhost", "4040", "Pendulum-v1"); // Initializing training variables. std::vector returnList; @@ -153,7 +157,7 @@ int main() agent.Deterministic() = true; // Creating and setting up the gym environment for testing. - gym::Environment envTest("gym.kurg.org", "4040", "Pendulum-v0"); + gym::Environment envTest("localhost", "4040", "Pendulum-v1"); envTest.monitor.start("./dummy/", true, true); // Resets the environment. From fc108dc6ecb0b00ee2761959d477bae7829475ff Mon Sep 17 00:00:00 2001 From: Ali-Hossam Date: Mon, 8 Apr 2024 12:53:22 +0200 Subject: [PATCH 07/12] Refactor pendulum_sac.cpp to work with the new version of ContinousActionEnv class --- .../pendulum_sac/pendulum_sac.cpp | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/reinforcement_learning_gym/pendulum_sac/pendulum_sac.cpp b/reinforcement_learning_gym/pendulum_sac/pendulum_sac.cpp index ea5c8e50..78ed6753 100644 --- a/reinforcement_learning_gym/pendulum_sac/pendulum_sac.cpp +++ b/reinforcement_learning_gym/pendulum_sac/pendulum_sac.cpp @@ -17,6 +17,10 @@ using namespace mlpack; using namespace ens; +// Set up the state and action space. +constexpr size_t stateDimension = 3; +constexpr size_t actionSize = 1; + template::State nextState; nextState.Data() = env.observation; replayMethod.Store( @@ -86,17 +90,12 @@ void Train(gym::Environment& env, int main() { // Initializing the agent. - - // Set up the state and action space. - ContinuousActionEnv::State::dimension = 3; - ContinuousActionEnv::Action::size = 1; - // Set up the actor and critic networks. FFN policyNetwork( EmptyLoss(), GaussianInitialization(0, 0.1)); policyNetwork.Add(32); policyNetwork.Add(); - policyNetwork.Add(ContinuousActionEnv::Action::size); + policyNetwork.Add(actionSize); policyNetwork.Add(); FFN qNetwork( @@ -106,7 +105,8 @@ int main() qNetwork.Add(1); // Set up the policy method. - RandomReplay replayMethod(32, 10000); + RandomReplay> + replayMethod(32, 10000); // Set up training configurations. TrainingConfig config; @@ -114,7 +114,7 @@ int main() config.UpdateInterval() = 1; // Set up Soft actor-critic agent. - SAC, decltype(qNetwork), decltype(policyNetwork), AdamUpdate> @@ -122,7 +122,7 @@ int main() // Preparation for training the agent. // Set up the gym training environment. - gym::Environment env("gym.kurg.org", "4040", "Pendulum-v0"); + gym::Environment env("localhost", "4040", "Pendulum-v1"); // Initializing training variables. std::vector returnList; @@ -146,7 +146,7 @@ int main() agent.Deterministic() = true; // Creating and setting up the gym environment for testing. - gym::Environment envTest("gym.kurg.org", "4040", "Pendulum-v0"); + gym::Environment envTest("localhost", "4040", "Pendulum-v1"); envTest.monitor.start("./dummy/", true, true); // Resets the environment. From 89bbe918cb4ae635cb8c86e8fe349a2dca9174a4 Mon Sep 17 00:00:00 2001 From: Ali-Hossam Date: Mon, 8 Apr 2024 13:17:39 +0200 Subject: [PATCH 08/12] Refactor pendulum_sac.cpp to work with the new ContinuousActionEnv class --- reinforcement_learning_gym/pendulum_sac/pendulum_sac.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/reinforcement_learning_gym/pendulum_sac/pendulum_sac.cpp b/reinforcement_learning_gym/pendulum_sac/pendulum_sac.cpp index 78ed6753..068e1a03 100644 --- a/reinforcement_learning_gym/pendulum_sac/pendulum_sac.cpp +++ b/reinforcement_learning_gym/pendulum_sac/pendulum_sac.cpp @@ -147,7 +147,6 @@ int main() // Creating and setting up the gym environment for testing. gym::Environment envTest("localhost", "4040", "Pendulum-v1"); - envTest.monitor.start("./dummy/", true, true); // Resets the environment. envTest.reset(); @@ -175,7 +174,7 @@ int main() if (envTest.done) { std::cout << " Total steps: " << totalSteps - << "\\t Total reward: " << totalReward << std::endl; + << "\t Total reward: " << totalReward << std::endl; break; } @@ -231,7 +230,7 @@ int main() if (envTest.done) { std::cout << " Total steps: " << totalSteps - << "\\t Total reward: " << totalReward << std::endl; + << "\t Total reward: " << totalReward << std::endl; break; } From a62a1648a7af969cfe7970d2800bb11da1624b38 Mon Sep 17 00:00:00 2001 From: Ali-Hossam Date: Mon, 8 Apr 2024 13:18:27 +0200 Subject: [PATCH 09/12] Replace gym env V0 with V1 --- reinforcement_learning_gym/pendulum_td3/pendulum_td3.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/reinforcement_learning_gym/pendulum_td3/pendulum_td3.cpp b/reinforcement_learning_gym/pendulum_td3/pendulum_td3.cpp index 43d8d6a4..ebb8ff77 100644 --- a/reinforcement_learning_gym/pendulum_td3/pendulum_td3.cpp +++ b/reinforcement_learning_gym/pendulum_td3/pendulum_td3.cpp @@ -146,10 +146,11 @@ int main() agent.Deterministic() = true; // Creating and setting up the gym environment for testing. - gym::Environment envTest("localhost", "4040", "Pendulum-v1-render"); + gym::Environment envTest("localhost", "4040", "Pendulum-v1"); // Resets the environment. envTest.reset(); + envTest.render(); double totalReward = 0; size_t totalSteps = 0; From efc730bd184eddd0cc2804b6b42400a356e7ad90 Mon Sep 17 00:00:00 2001 From: Ali-Hossam Date: Mon, 8 Apr 2024 13:28:38 +0200 Subject: [PATCH 10/12] Update dqn Network --- .../cartpole_dqn/cartpole_dqn.cpp | 46 ++++++++++++------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/reinforcement_learning_gym/cartpole_dqn/cartpole_dqn.cpp b/reinforcement_learning_gym/cartpole_dqn/cartpole_dqn.cpp index 711c6aa0..5953e18f 100644 --- a/reinforcement_learning_gym/cartpole_dqn/cartpole_dqn.cpp +++ b/reinforcement_learning_gym/cartpole_dqn/cartpole_dqn.cpp @@ -17,6 +17,10 @@ using namespace mlpack; using namespace ens; +// Set up the state and action space. +constexpr size_t stateDimension = 4; +constexpr size_t actionSize = 2; + template::State nextState; nextState.Data() = env.observation; replayMethod.Store( @@ -75,8 +79,8 @@ void Train( { std::cout << "Avg return in last " << consecutiveEpisodes << " episodes: " << averageReturn - << "\\t Episode return: " << episodeReturn - << "\\t Total steps: " << agent.TotalSteps() << std::endl; + << "\t Episode return: " << episodeReturn + << "\t Total steps: " << agent.TotalSteps() << std::endl; } } } @@ -84,17 +88,21 @@ void Train( int main() { // Initializing the agent. - // Set up the state and action space. - DiscreteActionEnv::State::dimension = 4; - DiscreteActionEnv::Action::size = 2; // Set up the network. - SimpleDQN<> model(DiscreteActionEnv::State::dimension, - 128, - 32, - DiscreteActionEnv::Action::size); + FFN network( + MeanSquaredError(), GaussianInitialization(0, 1)); + network.Add(128); + network.Add(); + network.Add(actionSize); + + SimpleDQN<> model(network); + // Set up the policy and replay method. - GreedyPolicy policy(1.0, 1000, 0.1, 0.99); - RandomReplay replayMethod(32, 2000); + GreedyPolicy> + policy(1.0, 1000, 0.1, 0.99); + RandomReplay> + replayMethod(32, 2000); + // Set up training configurations. TrainingConfig config; config.StepSize() = 0.001; @@ -103,12 +111,16 @@ int main() config.ExplorationSteps() = 100; config.DoubleQLearning() = false; config.StepLimit() = 200; + // Set up DQN agent. - QLearning + QLearning, + decltype(model), + AdamUpdate, decltype(policy)> agent(config, model, policy, replayMethod); + // Preparation for training the agent. // Set up the gym training environment. - gym::Environment env("gym.kurg.org", "4040", "CartPole-v0"); + gym::Environment env("localhost", "4040", "CartPole-v0"); // Initializing training variables. std::vector returnList; @@ -133,7 +145,7 @@ int main() agent.Deterministic() = true; // Creating and setting up the gym environment for testing. - gym::Environment envTest("gym.kurg.org", "4040", "CartPole-v0"); + gym::Environment envTest("localhost", "4040", "CartPole-v0"); envTest.monitor.start("./dummy/", true, true); // Resets the environment. @@ -162,7 +174,7 @@ int main() if (envTest.done) { std::cout << " Total steps: " << totalSteps - << "\\t Total reward: " << totalReward << std::endl; + << "\t Total reward: " << totalReward << std::endl; break; } @@ -215,7 +227,7 @@ int main() if (envTest.done) { std::cout << " Total steps: " << totalSteps - << "\\t Total reward: " << totalReward << std::endl; + << "\t Total reward: " << totalReward << std::endl; break; } From be140e3c65c14879b09054d2e72bce58713dca8a Mon Sep 17 00:00:00 2001 From: Ali-Hossam Date: Mon, 8 Apr 2024 13:47:43 +0200 Subject: [PATCH 11/12] replaced {envname}_render with env.render --- .../mountain_car_ddpg/mountain_car_ddpg.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/reinforcement_learning_gym/mountain_car_ddpg/mountain_car_ddpg.cpp b/reinforcement_learning_gym/mountain_car_ddpg/mountain_car_ddpg.cpp index 348506aa..65fc703c 100644 --- a/reinforcement_learning_gym/mountain_car_ddpg/mountain_car_ddpg.cpp +++ b/reinforcement_learning_gym/mountain_car_ddpg/mountain_car_ddpg.cpp @@ -156,16 +156,17 @@ int main() returnList, episodes, consecutiveEpisodes, - 100000); + 10000); // Testing the trained agent. agent.Deterministic() = true; // Creating and setting up the gym environment for testing. - gym::Environment envTest("localhost", "4040", "MountainCarContinuous-v0-render"); + gym::Environment envTest("localhost", "4040", "MountainCarContinuous-v0"); // Resets the environment. envTest.reset(); + envTest.render(); double totalReward = 0; size_t totalSteps = 0; From b56cdd9cf8089168851e757df80397f8ce9a5088 Mon Sep 17 00:00:00 2001 From: Ali-Hossam Date: Thu, 11 Apr 2024 14:17:38 +0200 Subject: [PATCH 12/12] removed first envTest.close and second envTest.render as they were preventing reusing the test env. --- reinforcement_learning_gym/acrobot_dqn/acrobot_dqn.cpp | 5 +---- .../bipedal_walker_sac/bipedal_walker_sac.cpp | 2 +- reinforcement_learning_gym/cartpole_dqn/cartpole_dqn.cpp | 2 -- .../lunar_lander_dqn/lunar_lander_dqn.cpp | 2 -- .../mountain_car_dqn/mountain_car_dqn.cpp | 2 -- reinforcement_learning_gym/pendulum_dqn/pendulum_dqn.cpp | 2 -- reinforcement_learning_gym/pendulum_sac/pendulum_sac.cpp | 6 ++---- 7 files changed, 4 insertions(+), 17 deletions(-) diff --git a/reinforcement_learning_gym/acrobot_dqn/acrobot_dqn.cpp b/reinforcement_learning_gym/acrobot_dqn/acrobot_dqn.cpp index 971098bf..3b3aec6e 100644 --- a/reinforcement_learning_gym/acrobot_dqn/acrobot_dqn.cpp +++ b/reinforcement_learning_gym/acrobot_dqn/acrobot_dqn.cpp @@ -186,7 +186,6 @@ int main() // << totalReward << "\t Action taken: " << action; } - envTest.close(); std::cout << envTest.url() << std::endl; /** @@ -197,12 +196,10 @@ int main() agent.Deterministic() = true; // Creating and setting up the gym environment for testing. - // envTest.monitor.start("./dummy/", true, true); - envTest.compression(9); + envTest.monitor.start("./dummy/", true, true); // Resets the environment. envTest.reset(); - envTest.render(); totalReward = 0; totalSteps = 0; diff --git a/reinforcement_learning_gym/bipedal_walker_sac/bipedal_walker_sac.cpp b/reinforcement_learning_gym/bipedal_walker_sac/bipedal_walker_sac.cpp index 56f15899..c67fc302 100644 --- a/reinforcement_learning_gym/bipedal_walker_sac/bipedal_walker_sac.cpp +++ b/reinforcement_learning_gym/bipedal_walker_sac/bipedal_walker_sac.cpp @@ -218,7 +218,7 @@ int main() if (envTest.done) { std::cout << " Total steps: " << totalSteps - << "\\t Total reward: " << totalReward << std::endl; + << "\t Total reward: " << totalReward << std::endl; break; } diff --git a/reinforcement_learning_gym/cartpole_dqn/cartpole_dqn.cpp b/reinforcement_learning_gym/cartpole_dqn/cartpole_dqn.cpp index 5953e18f..5b19ee11 100644 --- a/reinforcement_learning_gym/cartpole_dqn/cartpole_dqn.cpp +++ b/reinforcement_learning_gym/cartpole_dqn/cartpole_dqn.cpp @@ -183,7 +183,6 @@ int main() // << totalReward << "\\t Action taken: " << action; } - envTest.close(); std::cout << envTest.url() << std::endl; // A little more training... @@ -203,7 +202,6 @@ int main() // Resets the environment. envTest.reset(); - envTest.render(); totalReward = 0; totalSteps = 0; diff --git a/reinforcement_learning_gym/lunar_lander_dqn/lunar_lander_dqn.cpp b/reinforcement_learning_gym/lunar_lander_dqn/lunar_lander_dqn.cpp index 4a2babed..4c344309 100644 --- a/reinforcement_learning_gym/lunar_lander_dqn/lunar_lander_dqn.cpp +++ b/reinforcement_learning_gym/lunar_lander_dqn/lunar_lander_dqn.cpp @@ -178,7 +178,6 @@ int main() // << totalReward << \"\t Action taken: " << action; } - envTest.close(); std::cout << envTest.url() << std::endl; // A little more training... @@ -204,7 +203,6 @@ int main() // Resets the environment. envTest.reset(); - envTest.render(); totalReward = 0; totalSteps = 0; diff --git a/reinforcement_learning_gym/mountain_car_dqn/mountain_car_dqn.cpp b/reinforcement_learning_gym/mountain_car_dqn/mountain_car_dqn.cpp index c7f19819..657625dc 100644 --- a/reinforcement_learning_gym/mountain_car_dqn/mountain_car_dqn.cpp +++ b/reinforcement_learning_gym/mountain_car_dqn/mountain_car_dqn.cpp @@ -204,7 +204,6 @@ int main() // << totalReward << "\t Action taken: " << action; } - envTest.close(); std::cout << envTest.url() << std::endl; // A little more training... @@ -226,7 +225,6 @@ int main() // Resets the environment. envTest.reset(); - envTest.render(); totalReward = 0; totalSteps = 0; diff --git a/reinforcement_learning_gym/pendulum_dqn/pendulum_dqn.cpp b/reinforcement_learning_gym/pendulum_dqn/pendulum_dqn.cpp index 8f4fa77f..b2e3ac3a 100644 --- a/reinforcement_learning_gym/pendulum_dqn/pendulum_dqn.cpp +++ b/reinforcement_learning_gym/pendulum_dqn/pendulum_dqn.cpp @@ -195,7 +195,6 @@ int main() // << totalReward << "\t Action taken: " << action; } - envTest.close(); std::cout << envTest.url() << std::endl; //A little more training... @@ -216,7 +215,6 @@ int main() // Resets the environment. envTest.reset(); - envTest.render(); totalReward = 0; totalSteps = 0; diff --git a/reinforcement_learning_gym/pendulum_sac/pendulum_sac.cpp b/reinforcement_learning_gym/pendulum_sac/pendulum_sac.cpp index 068e1a03..9110bc8d 100644 --- a/reinforcement_learning_gym/pendulum_sac/pendulum_sac.cpp +++ b/reinforcement_learning_gym/pendulum_sac/pendulum_sac.cpp @@ -81,8 +81,8 @@ void Train(gym::Environment& env, { std::cout << "Avg return in last " << returnList.size() << " episodes: " << averageReturn - << "\\t Episode return: " << episodeReturn - << "\\t Total steps: " << agent.TotalSteps() << std::endl; + << "\t Episode return: " << episodeReturn + << "\t Total steps: " << agent.TotalSteps() << std::endl; } } } @@ -183,7 +183,6 @@ int main() // << totalReward << "\\t Action taken: " << action; } - envTest.close(); std::cout << envTest.url() << std::endl; // A little more training... @@ -206,7 +205,6 @@ int main() // Resets the environment. envTest.reset(); - envTest.render(); totalReward = 0; totalSteps = 0;