-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathmain_add_agents.py
89 lines (78 loc) · 2.45 KB
/
main_add_agents.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import sys
sys.path.insert(1,'env/')
from env import envs
from maddpg import MaDDPG
state_dim = 5
action_dim = 1
max_edge= 1
num_agents = 3
maddpg = MaDDPG(num_agents,state_dim, action_dim)
Env = envs.Environ(num_agents,max_edge)
obs = Env.reset()
current_state = obs
max_episode = 1000000
done_epoch = 0
#print(current_state)
max_epoch = 1000
catch_time = []
add_time = 500000
for episode in range(add_time):
print('episode',episode)
#while (True):
#Env.re_create_env(num_agents)
current_state = Env.reset()
#action = maddpg.noise_action(current_state)
#next_state, reward, done = Env.step(action)
#print(reward)
# if not done:
# current_state = next_state
# break
for epoch in range(max_epoch):
#print('epoch',epoch)
#Env.render()
action = maddpg.noise_action(current_state)
#print(action)
next_state, reward, done = Env.step(action)
maddpg.perceive(current_state,action,reward,next_state,done)
current_state = next_state
if done:
print('Done!!!!!!!!!!!! at epoch{} , reward:{}'.format(epoch,reward))
# add summary for each episode
maddpg.summary(episode)
break
if epoch ==max_epoch-1:
print('Time up >>>>>>>>>>>>>>')
# add one more agent into the system
maddpg.add_agents(3)
num_agents +=3
# reset the environment
current_state = Env.re_create_env(num_agents)
for episode in range(add_time, max_episode):
print('episode',episode)
#while (True):
#Env.re_create_env(num_agents)
current_state = Env.reset()
#action = maddpg.noise_action(current_state)
#next_state, reward, done = Env.step(action)
#print(reward)
# if not done:
# current_state = next_state
# break
for epoch in range(max_epoch):
#print('epoch',epoch)
#Env.render()
action = maddpg.noise_action(current_state)
#print(action)
next_state, reward, done = Env.step(action)
maddpg.perceive(current_state,action,reward,next_state,done)
current_state = next_state
if done:
print('Done!!!!!!!!!!!! at epoch{} , reward:{}'.format(epoch,reward))
# add summary for each episode
maddpg.summary(episode)
break
if epoch ==max_epoch-1:
print('Time up >>>>>>>>>>>>>>')