-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathStastTest.py
62 lines (59 loc) · 3.28 KB
/
StastTest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/python3
# coding: utf-8
'''
@Time : 2021/6/10 13:19
@Author : Shulu Chen
@FileName: StastTest.py
@Software: PyCharm
'''
import argparse
import numpy as np
import tensorflow as tf
import time
import pickle
import matplotlib.pyplot as plt
import maddpg.common.tf_util as U
from maddpg.trainer.maddpg import MADDPGAgentTrainer
import tensorflow.contrib.layers as layers
from PaxBehavior import generate_pax,get_pax
def parse_args():
parser = argparse.ArgumentParser("Reinforcement Learning experiments for multiagent environments")
# Environment
parser.add_argument("--scenario", type=str, default="simple_adversary", help="name of the scenario script")
parser.add_argument("--max-episode-len", type=int, default=25, help="maximum episode length")
parser.add_argument("--num-episodes", type=int, default=60000, help="number of episodes")
parser.add_argument("--num-adversaries", type=int, default=0, help="number of adversaries")
parser.add_argument("--good-policy", type=str, default="maddpg", help="policy for good agents")
parser.add_argument("--adv-policy", type=str, default="maddpg", help="policy of adversaries")
# Core training parameters
parser.add_argument("--lr", type=float, default=1e-2, help="learning rate for Adam optimizer")
parser.add_argument("--gamma", type=float, default=0.95, help="discount factor")
parser.add_argument("--batch-size", type=int, default=1024, help="number of episodes to optimize at the same time")
parser.add_argument("--num-units", type=int, default=64, help="number of units in the mlp")
# Checkpointing
parser.add_argument("--exp-name", type=str, default=None, help="name of the experiment")
parser.add_argument("--save-dir", type=str, default="/tmp/policy/", help="directory in which training state and model should be saved")
parser.add_argument("--save-rate", type=int, default=1000, help="save model once every time this many episodes are completed")
parser.add_argument("--load-dir", type=str, default="", help="directory in which training state and model are loaded")
# Evaluation
parser.add_argument("--restore", action="store_true", default=False)
parser.add_argument("--display", action="store_true", default=False)
parser.add_argument("--benchmark", action="store_true", default=False)
parser.add_argument("--benchmark-iters", type=int, default=100000, help="number of iterations run for benchmarking")
parser.add_argument("--benchmark-dir", type=str, default="./benchmark_files/", help="directory where benchmark data is saved")
parser.add_argument("--plots-dir", type=str, default="./learning_curves/", help="directory where plot data is saved")
return parser.parse_args()
def make_env(scenario_name):
from environment import MultiAgentEnv
# import multiagent.scenarios as scenarios
import scenarios
# load scenario from script
scenario = scenarios.load(scenario_name + ".py").Scenario()
# create world
world = scenario.make_world()
# create multiagent environment
env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation,done_callback=scenario.fulled)
return env
env = make_env("test_scenarios")
U.load_state("D:\maddpg_model4\policy")
while True: