Skip to content

Commit

Permalink
minor chnages to accomodate multiple envs. Also rate of gif updates s…
Browse files Browse the repository at this point in the history
…uited for macbook
  • Loading branch information
AGKhalil committed Jun 24, 2019
1 parent 6986e54 commit baf138e
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 18 deletions.
6 changes: 3 additions & 3 deletions load_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
from stable_baselines.results_plotter import load_results, ts2xy

if __name__ == "__main__":
file_name = str(sys.argv[1])
env_name = str(sys.argv[1])
file_name = str(sys.argv[2])

if file_name[:3] == "mod":
model_name = file_name
Expand All @@ -21,12 +22,11 @@
log_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "tmp")
model_name = os.path.join(dirpath, file_name)

env_name = 'Real-v0'
env = gym.make(env_name)
model = PPO2.load(model_name)

obs = env.reset()
for i in range(1000):
for i in range(10000):
action, _states = model.predict(obs)
obs, rewards, dones, info = env.step(action)
env.render()
36 changes: 21 additions & 15 deletions train_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,14 @@
from stable_baselines.bench import Monitor
from stable_baselines.results_plotter import load_results, ts2xy

best_mean_reward, n_steps, old_steps = -np.inf, 0, 0
step_total = 2000000
best_mean_reward, n_steps, old_steps, total_gif_time = -np.inf, 0, 0, 0
step_total = 1000000

if step_total >= 1000000:
n_gifs = 10
n_gifs = 5
else:
n_gifs = 2
log_incs = np.round((step_total / n_gifs) * 25 / 60000)
print(n_gifs)
n_gifs = 2
log_incs = np.round((step_total / n_gifs) * 60 / 60000)
env_name = 'Real-v0'

##############################################Functions###################
Expand All @@ -36,10 +35,11 @@ def callback(_locals, _globals):
:param _locals: (dict)
:param _globals: (dict)
"""
global n_steps, best_mean_reward, old_steps, gif_dir, env_name, log_incs, models_tmp_dir
global n_steps, best_mean_reward, old_steps, gif_dir, env_name, log_incs, models_tmp_dir, total_gif_time
# Print stats every 1000 calls

if abs(n_steps - old_steps) >= log_incs:
gif_start = time.time()
old_steps = n_steps
# Evaluate policy performance
x, y = ts2xy(load_results(log_dir), 'timesteps')
Expand All @@ -63,17 +63,21 @@ def callback(_locals, _globals):

env_gif = gym.make(env_name)
obs = env_gif.reset()
img = env_gif.sim.render(width=300, height=300, camera_name="isometric_view")
img = env_gif.sim.render(
width=200, height=200, camera_name="isometric_view")
for _ in range(5000):
action, _ = model.predict(obs)
obs, _, _, _ = env_gif.step(action)
img = env_gif.sim.render(width=300, height=300, camera_name="isometric_view")
img = env_gif.sim.render(
width=200, height=200, camera_name="isometric_view")
images.append(np.flipud(img))

print("creating gif...")
imageio.mimsave(save_str, [np.array(img)
for i, img in enumerate(images) if i % 2 == 0], fps=29)
print("gif created...")
gif_end = time.time()
total_gif_time += gif_end - gif_start
n_steps += 1

return True
Expand Down Expand Up @@ -122,8 +126,10 @@ def plot_results(log_folder, model_name, plt_dir, title='Learning Curve'):

print("running...")
# Create log dir
models_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "models/")
models_tmp_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "models_tmp/")
models_dir = os.path.join(os.path.dirname(
os.path.realpath(__file__)), "models/")
models_tmp_dir = os.path.join(os.path.dirname(
os.path.realpath(__file__)), "models_tmp/")
log_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "tmp")
gif_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "tmp_gif/")
plt_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "plot")
Expand All @@ -138,7 +144,7 @@ def plot_results(log_folder, model_name, plt_dir, title='Learning Curve'):
# env = DummyVecEnv([lambda: env])

# multiprocess environment
n_cpu = 20
n_cpu = 8
env = Monitor(env, log_dir, allow_early_resets=True)
env = SubprocVecEnv([lambda: env for i in range(n_cpu)])
# Add some param noise for exploration
Expand All @@ -148,7 +154,7 @@ def plot_results(log_folder, model_name, plt_dir, title='Learning Curve'):
model.learn(total_timesteps=step_total, callback=callback)
end = time.time()

training_time = end - start
training_time = end - start - total_gif_time

stamp = ' {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now())
model_name = "PPO2_" + env_name + "_" + \
Expand All @@ -172,5 +178,5 @@ def plot_results(log_folder, model_name, plt_dir, title='Learning Curve'):
print("********************************************************************")
while watch_agent == "y" or "Y":
subprocess.Popen(
'''export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so:/usr/lib/nvidia-410/libGL.so; python load_agent.py '%s' ''' % model_name, shell=True)
watch_agent = input("Do you want to watch your sick gaits? (Y/n):")
'''export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so:/usr/lib/nvidia-410/libGL.so; python load_agent.py '%s' '%s' ''' % (env_name, model_name), shell=True)
watch_agent = input("Do you want to watch your sick gaits? (Y/n):")

0 comments on commit baf138e

Please sign in to comment.