Skip to content

Commit

Permalink
hand-crafted curriculum. Also plotting is much better
Browse files Browse the repository at this point in the history
  • Loading branch information
AGKhalil committed Jul 18, 2019
1 parent cf853ad commit a07b991
Show file tree
Hide file tree
Showing 2 changed files with 188 additions and 39 deletions.
95 changes: 92 additions & 3 deletions automatic_cl.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,103 @@
from stable_baselines.results_plotter import load_results, ts2xy
import xml.etree.ElementTree as ET

best_mean_reward, n_steps = -np.inf, 0

def callback(_locals, _globals):
"""
Callback called at each step (for DQN an others) or after n steps (see ACER or PPO2)
:param _locals: (dict)
:param _globals: (dict)
"""
global n_steps, best_mean_reward
# Print stats every 1000 calls
if (n_steps + 1) % 1000 == 0:
# Evaluate policy training performance
x, y = ts2xy(load_results(log_dir), 'timesteps')
if len(x) > 0:
mean_reward = np.mean(y[-100:])
print(x[-1], 'timesteps')
print("Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}".format(best_mean_reward, mean_reward))

# New best model, you could save the agent here
if mean_reward > best_mean_reward:
best_mean_reward = mean_reward
# Example for saving best model
print("Saving new best model")
_locals['self'].save(log_dir + 'best_model_prof.pkl')
n_steps += 1
# Returning False will stop training early
return True


def moving_average(values, window):
"""
Smooth values by doing a moving average
:param values: (numpy array)
:param window: (int)
:return: (numpy array)
"""
weights = np.repeat(1.0, window) / window
return np.convolve(values, weights, 'valid')


def plot_results(log_folder, model_name, plt_dir, title='Learning Curve'):
"""
plot the results
:param log_folder: (str) the save location of the results to plot
:param title: (str) the title of the task to plot
"""
m_name_csv = model_name + ".csv"
old_file_name = os.path.join(log_folder, "monitor.csv")
new_file_name = os.path.join(log_folder, m_name_csv)
save_name = os.path.join(plt_dir, model_name)

x, y = ts2xy(load_results(log_folder), 'timesteps')
shutil.copy(old_file_name, new_file_name)
y = moving_average(y, window=1)
# Truncate x
x = x[lfen(x) - len(y):]

fig = plt.figure(title)
plt.plot(x, y)
plt.xlabel('Number of Timesteps')
plt.ylabel('Rewards')
plt.title(title + " Smoothed")
print('Saving plot at:', save_name)
plt.savefig(save_name + ".png")
plt.savefig(save_name + ".eps")
print("plots saved...")

models_dir = os.path.join(os.path.dirname(
os.path.realpath(__file__)), "prof/models/")
models_tmp_dir = os.path.join(os.path.dirname(
os.path.realpath(__file__)), "prof/models_tmp/")
log_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "prof/tmp")
gif_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "prof/tmp_gif/")
plt_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "prof/plot")
os.makedirs(log_dir, exist_ok=True)
os.makedirs(gif_dir, exist_ok=True)
os.makedirs(models_dir, exist_ok=True)
os.makedirs(models_tmp_dir, exist_ok=True)
os.makedirs(plt_dir, exist_ok=True)

step_total = 1000
env_name = 'CurriculumLearning-v0'
save_path = os.path.dirname(
os.path.realpath(__file__))
env = gym.make(env_name)
env.save_path = save_path
n_cpu = 8
# env = Monitor(env, log_dir, allow_early_resets=True)
# env = SubprocVecEnv([lambda: env for i in range(n_cpu)])
env = Monitor(env, log_dir, allow_early_resets=True)
env = DummyVecEnv([lambda: env])
model = PPO2(MlpPolicy, env, verbose=1)
model.learn(total_timesteps=100000)
model.learn(total_timesteps=step_total)

stamp = ' {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now())
model_name = "Prof" + "_" + \
str(step_total) + "_" + stamp
model_loc = os.path.join(models_dir, model_name)
model.save(model_loc)

plot_results(log_dir, model_name, plt_dir)
132 changes: 96 additions & 36 deletions train_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@
import xml.etree.ElementTree as ET

best_mean_reward, n_steps, old_steps, total_gif_time = -np.inf, 0, 0, 0
step_total = 50000
step_total = 250000

if step_total >= 1000000:
n_gifs = 5
n_gifs = 2
else:
n_gifs = 2
log_incs = np.round((step_total / n_gifs) * 60 / 60000)
log_incs = np.round((step_total / n_gifs) / 2560)
env_name = 'Real-v0'

##############################################Functions###################
Expand Down Expand Up @@ -126,6 +126,7 @@ def plot_results(log_folder, model_name, plt_dir, title='Learning Curve'):

def alter_leg(leg_length):
xml_path = os.path.join(gym_real.__path__[0], "envs/assets/real.xml")
print(xml_path)

tree = ET.parse(xml_path)
root = tree.getroot()
Expand All @@ -134,7 +135,7 @@ def alter_leg(leg_length):
print(geom.get("fromto"))

for pos in root.findall("worldbody/body/[@name='torso']"):
pos.set("pos", "0 0 " + str(abs(leg_length) + 0.7))
pos.set("pos", "-10.0 0 " + str(abs(leg_length) + 0.7))
print(pos.get('pos'))

tree.write(xml_path)
Expand All @@ -150,53 +151,112 @@ def alter_leg(leg_length):
log_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "tmp")
gif_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "tmp_gif/")
plt_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "plot")
# tensor_dir = os.path.join(os.path.dirname(
# os.path.realpath(__file__)), "tensorboard/")
os.makedirs(log_dir, exist_ok=True)
os.makedirs(gif_dir, exist_ok=True)
os.makedirs(models_dir, exist_ok=True)
os.makedirs(models_tmp_dir, exist_ok=True)
os.makedirs(plt_dir, exist_ok=True)
# os.makedirs(tensor_dir, exist_ok=True)

# print(tensor_dir)

# alter_leg(-0.1)

# Create and wrap the environment
env = gym.make(env_name)
# env = Monitor(env, log_dir, allow_early_resets=True)
# env = DummyVecEnv([lambda: env])

# multiprocess environment
n_cpu = 8
n_cpu = 20
env = Monitor(env, log_dir, allow_early_resets=True)
env = SubprocVecEnv([lambda: env for i in range(n_cpu)])
# Add some param noise for exploration

alter_leg(-5.0)

model = PPO2(MlpPolicy, env, verbose=1)
start = time.time()
model.learn(total_timesteps=step_total, callback=callback)
end = time.time()

# del model

alter_leg(-0.3)

model = PPO2(MlpPolicy, env, verbose=1)
start = time.time()
model.learn(total_timesteps=step_total, callback=callback)
end = time.time()
# alter_leg(-5.0)
lengths = [i * -0.1 for i in range(1, 10)]
model_created = False

print(lengths)
counter = 0
all_x = []
all_y = []
vert_x = []

for i in lengths:
counter += 1
alter_leg(i)
env = gym.make(env_name)
n_cpu = 20
env = Monitor(env, log_dir, allow_early_resets=True)
env = SubprocVecEnv([lambda: env for i in range(n_cpu)])
if not model_created:
# , tensorboard_log="./a2c_cartpole_tensorboard/
model = PPO2(MlpPolicy, env, verbose=1)
else:
model = PPO2.load(model_loc, env=env)
start = time.time()
model.learn(total_timesteps=step_total)
model_loc = os.path.join(models_dir, 'hand')

x, y = ts2xy(load_results(log_dir), 'timesteps')
y = moving_average(y, window=50)
x = x[len(x) - len(y):]
for i in x:
if model_created:
all_x.append(i + vert_x[-1])
appended_val = x[-1] + vert_x[-1]
else:
all_x.append(i)
appended_val = x[-1]

vert_x.append(appended_val)
for i in y:
all_y.append(i)
os.remove(os.path.join(log_dir, "monitor.csv"))

model.save(model_loc)
env.close()
model_created = True
del env
del model
end = time.time()

training_time = end - start - total_gif_time

stamp = ' {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now())
model_name = "PPO2_" + env_name + "_" + \
str(step_total) + "_" + stamp + "_" + str(training_time)
model_loc = os.path.join(models_dir, model_name)
print(model_loc)
model.save(model_loc)

print("Training time:", training_time)
print("model saved as: " + model_name)

plot_results(log_dir, model_name, plt_dir)

del model # remove to demonstrate saving and loading
print(counter)
print(lengths)
print(all_x)
print(all_y)
print(vert_x)

save_name = os.path.join(plt_dir, 'hand' + str(step_total))

fig = plt.figure('hand' + str(step_total))
plt.plot(all_x, all_y)
for i in vert_x:
plt.axvline(x=i, linestyle='--', color='#ccc5c6', label='leg increment')
plt.xlabel('Number of Timesteps')
plt.ylabel('Rewards')
plt.title('hand' + " Smoothed")
plt.savefig(save_name + ".png")
plt.savefig(save_name + ".eps")
print("plots saved...")
plt.show()

# stamp = ' {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now())
# model_name = "PPO2_" + env_name + "_" + \
# str(step_total) + "_" + stamp + "_" + str(training_time)
# model_loc = os.path.join(models_dir, model_name)
# print(model_loc)
# model.save(model_loc)

# print("Training time:", training_time)
# print("model saved as: " + model_name)

# plot_results(log_dir, 'hand', plt_dir)

# del model # remove to demonstrate saving and loading
env = gym.make(env_name)

# Enjoy trained agent
Expand All @@ -206,5 +266,5 @@ def alter_leg(leg_length):
print("********************************************************************")
while watch_agent == "y" or "Y":
subprocess.Popen(
'''export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so:/usr/lib/nvidia-410/libGL.so; python load_agent.py '%s' '%s' ''' % (env_name, model_name), shell=True)
'''export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so:/usr/lib/nvidia-410/libGL.so; python load_agent.py '%s' '%s' ''' % (env_name, 'hand'), shell=True)
watch_agent = input("Do you want to watch your sick gaits? (Y/n):")

0 comments on commit a07b991

Please sign in to comment.