-
Notifications
You must be signed in to change notification settings - Fork 92
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactor rl examples - updated README #223
Changes from all commits
1edb991
5c621a9
8144891
bb9e8d3
3bf7dd7
ad4f08c
fc108dc
89bbe91
a62a164
efc730b
be140e3
b56cdd9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,10 @@ | |
using namespace mlpack; | ||
using namespace ens; | ||
|
||
// Set up the state and action space. | ||
constexpr size_t stateDimension = 6; | ||
constexpr size_t actionSize = 3; | ||
|
||
// Function to train the agent on the Acrobot-v1 gym environment. | ||
template<typename EnvironmentType, | ||
typename NetworkType, | ||
|
@@ -49,7 +53,7 @@ void Train(gym::Environment& env, | |
arma::mat action = {double(agent.Action().action)}; | ||
|
||
env.step(action); | ||
DiscreteActionEnv::State nextState; | ||
DiscreteActionEnv<stateDimension, actionSize>::State nextState; | ||
nextState.Data() = env.observation; | ||
|
||
replayMethod.Store( | ||
|
@@ -85,22 +89,22 @@ void Train(gym::Environment& env, | |
int main() | ||
{ | ||
// Initializing the agent. | ||
// Set up the state and action space. | ||
DiscreteActionEnv::State::dimension = 6; | ||
DiscreteActionEnv::Action::size = 3; | ||
|
||
// Set up the network. | ||
FFN<MeanSquaredError, GaussianInitialization> module( | ||
MeanSquaredError(), GaussianInitialization(0, 1)); | ||
module.Add<Linear>(DiscreteActionEnv::State::dimension, 64); | ||
module.Add<ReLULayer>(); | ||
module.Add<Linear>(64, DiscreteActionEnv::Action::size); | ||
module.Add<Linear>(64); | ||
module.Add<ReLU>(); | ||
module.Add<Linear>(actionSize); | ||
SimpleDQN<> model(module); | ||
|
||
// Set up the policy method. | ||
GreedyPolicy<DiscreteActionEnv> policy(1.0, 1000, 0.1, 0.99); | ||
GreedyPolicy<DiscreteActionEnv<stateDimension, actionSize>> | ||
policy(1.0, 1000, 0.1, 0.99); | ||
|
||
// To enable 3-step learning, we set the last parameter of the replay method as 3. | ||
PrioritizedReplay<DiscreteActionEnv> replayMethod(64, 5000, 0.6, 3); | ||
PrioritizedReplay<DiscreteActionEnv<stateDimension, actionSize>> | ||
replayMethod(64, 5000, 0.6, 3); | ||
|
||
// Set up training configurations. | ||
TrainingConfig config; | ||
|
@@ -111,7 +115,7 @@ int main() | |
config.DoubleQLearning() = true; | ||
|
||
// Set up DQN agent. | ||
QLearning<DiscreteActionEnv, | ||
QLearning<DiscreteActionEnv<stateDimension, actionSize>, | ||
decltype(model), | ||
AdamUpdate, | ||
decltype(policy), | ||
|
@@ -120,7 +124,7 @@ int main() | |
|
||
// Preparation for training the agent | ||
// Set up the gym training environment. | ||
gym::Environment env("gym.kurg.org", "4040", "Acrobot-v1"); | ||
gym::Environment env("localhost", "4040", "Acrobot-v1"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure of the status of gym.kurg.org, but I don't know if this is the right thing to do here, otherwise we would now need to expect a user to be running the gym locally. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. True this would need the user to be running gym locally. Running gym_tcp_api locally was the only way I could get it to work. I couldn't find any working examples using gym.kurg.org, so I assumed it's not functional anymore. Also, the example in the gym_tcp_api directory used localhost. |
||
|
||
// Initializing training variables. | ||
std::vector<double> returnList; | ||
|
@@ -144,7 +148,7 @@ int main() | |
agent.Deterministic() = true; | ||
|
||
// Creating and setting up the gym environment for testing. | ||
gym::Environment envTest("gym.kurg.org", "4040", "Acrobot-v1"); | ||
gym::Environment envTest("localhost", "4040", "Acrobot-v1"); | ||
envTest.monitor.start("./dummy/", true, true); | ||
|
||
// Resets the environment. | ||
|
@@ -182,7 +186,6 @@ int main() | |
// << totalReward << "\t Action taken: " << action; | ||
} | ||
|
||
envTest.close(); | ||
std::cout << envTest.url() << std::endl; | ||
|
||
/** | ||
|
@@ -194,10 +197,9 @@ int main() | |
|
||
// Creating and setting up the gym environment for testing. | ||
envTest.monitor.start("./dummy/", true, true); | ||
|
||
// Resets the environment. | ||
envTest.reset(); | ||
envTest.render(); | ||
|
||
totalReward = 0; | ||
totalSteps = 0; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But I don't think the intention was for users to run that script directly. It would be better to just use Binderhub or similar.
It's true that you could run this script, but it has a number of assumptions that may not be true for users:
Makefile
-built examples.So I don't think that I would want to include this in the general README; users will then attempt to run the command, and may encounter problems that may not even be relevant if they're not looking to use Jupyterlab.
I think as an alternative it may be more reasonable to comment that script a little bit better. Or, if we restructured the examples in the repository to organize them by language, then perhaps in a directory specific to C++ notebook examples, it makes more sense to have this documentation.