Skip to content

Commit

Permalink
base update
Browse files Browse the repository at this point in the history
  • Loading branch information
mahi97 committed Oct 26, 2021
1 parent e9abdab commit db2e346
Show file tree
Hide file tree
Showing 5 changed files with 329 additions and 45 deletions.
53 changes: 47 additions & 6 deletions pz_risk/core/board.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,39 @@ def __init__(self, graph: nx.Graph, info, pos=None):
self.last_attack = (None, None)
self.state = GameState.StartTurn
self.info = info
self.n_grps = info['num_of_groups']
self.n_cards = self.g.number_of_nodes() + self.info['num_of_wild']

def valid_actions(self, player):
"""
:player
return:
bool: isDeterministic?
list: action list
"""
acts = []
if self.state == GameState.Reinforce:
acts = self.player_nodes(player)
elif self.state == GameState.Card:
acts = [0, 1] if len(self.players[player].cards) < 5 else [1]
elif self.state == GameState.Attack:
edges = self.player_attack_edges(player)
acts = [(1, (None, None))]
# assert action[0] <= 1, 'Attack Finished should be 0 or 1: {}'.format(action[0])
acts += [(0, e) for e in edges]
elif self.state == GameState.Move:
u = max(0, self.g.nodes[self.last_attack[1]]['units'] - 3)
acts = [i for i in range(u+1)]
elif self.state == GameState.Fortify:
cc = self.player_connected_components(player)
acts = [(1, None, None, None)]
for c in cc:
for a in c:
for b in c:
if a != b and self.g.nodes[a]['units'] > 1:
acts.append((0, a, b, self.g.nodes[a]['units'] - 1))

return self.state != GameState.Attack, acts

def can_fortify(self, player):
cc = self.player_connected_components(player)
Expand Down Expand Up @@ -94,7 +127,7 @@ def player_units(self, player):
return sum([n[1]['units'] for n in self.g.nodes(data=True) if n[1]['player'] == player])

def player_group_reward(self, player):
group = {gid + 1: True for gid in range(self.info['num_of_group'])}
group = {gid + 1: True for gid in range(self.n_grps)}
for n in self.g.nodes(data=True):
if n[1]['player'] != player:
group[n[1]['gid']] = False
Expand Down Expand Up @@ -128,8 +161,10 @@ def player_attack_edges(self, player):
ee.append((e[1], e[0]))
return ee

def reset(self, n_agent, n_unit_per_agent, n_cell_per_agent):
def reset(self, n_agent):
n_cells = self.g.number_of_nodes()
n_cell_per_agent = n_cells // n_agent
n_unit_per_agent = self.info['num_of_unit']
assert n_cell_per_agent * n_agent == n_cells

remaining_cells = [i for i in self.g.nodes()]
Expand Down Expand Up @@ -178,10 +213,11 @@ def apply_best_match(self, player):
if cnt == 3:
break
else:
match_type = CardType.Artillery if ct[CardType.Artillery] >= 3 - cnt\
else CardType.Cavalry if ct[CardType.Cavalry] >= 3 - cnt\
match_type = CardType.Artillery if ct[CardType.Artillery] >= 3 - cnt \
else CardType.Cavalry if ct[CardType.Cavalry] >= 3 - cnt \
else CardType.Infantry
used += [self.players[player].cards[match_type].pop(-1) for _ in range(3 - cnt) if len(self.players[player].cards[match_type])]
used += [self.players[player].cards[match_type].pop(-1) for _ in range(3 - cnt) if
len(self.players[player].cards[match_type])]
self.players[player].placement += CARD_FIX_SCORE[match_type]
for c in used:
c.owner = -1
Expand Down Expand Up @@ -245,7 +281,8 @@ def step(self, agent, actions, left=None):
self.g.nodes[actions[1]]['units'] -= int(actions[3])
self.g.nodes[actions[2]]['units'] += int(actions[3])

self.next_state(agent, self.state, attack_succeed, attack_finished, len(self.player_nodes(agent)) == len(self.g.nodes()))
self.next_state(agent, self.state, attack_succeed, attack_finished,
len(self.player_nodes(agent)) == len(self.g.nodes()))
if self.state == GameState.StartTurn and self.players[agent].deserve_card:
self.give_card(agent)

Expand All @@ -261,5 +298,9 @@ def register_map(name, filepath):

BOARDS[name] = Board(g, m['info'])


print(os.getcwd())
register_map('world', './maps/world.json')
register_map('4node', './maps/4node.json')
register_map('6node', './maps/6node.json')
register_map('8node', './maps/8node.json')
87 changes: 48 additions & 39 deletions pz_risk/risk_env.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import math
import random

from gym.spaces import Discrete, MultiDiscrete, Dict
from gym.spaces import Discrete, MultiDiscrete, Dict, MultiBinary, Box, Tuple
from pettingzoo import AECEnv
from pettingzoo.utils import agent_selector
from pettingzoo.utils import wrappers
Expand All @@ -16,6 +16,7 @@
from core.gamestate import GameState

from loguru import logger
from copy import deepcopy

from utils import *
from agents.sampling import SAMPLING
Expand All @@ -33,15 +34,15 @@
]


def env():
def env(n_agent=6, board_name='world'):
"""
The env function wraps the environment in 3 wrappers by default. These
wrappers contain logic that is common to many pettingzoo environments.
We recommend you use at least the OrderEnforcingWrapper on your own environment
to provide sane error messages. You can find full documentation for these methods
elsewhere in the developer documentation.
"""
env = RiskEnv()
env = RiskEnv(n_agent, board_name)
env = wrappers.CaptureStdoutWrapper(env)
env = risk_wrappers.AssertInvalidActionsWrapper(env)
env = wrappers.OrderEnforcingWrapper(env)
Expand All @@ -58,7 +59,7 @@ class RiskEnv(AECEnv):
metadata = {'render.modes': ['human'], "name": "rps_v2"}

def __init__(self, n_agent=6, board_name='world'):
'''
"""
- n_agent: Number of Agent
- board: ['test', 'world', 'world2']
The init method takes in environment arguments and
Expand All @@ -68,27 +69,29 @@ def __init__(self, n_agent=6, board_name='world'):
- observation_spaces
These attributes should not be changed after initialization.
'''
"""
super().__init__()
self.board = BOARDS[board_name]
n_nodes = self.board.g.number_of_nodes()
n_edges = self.board.g.number_of_edges()
self.n_nodes = self.board.g.number_of_nodes()
self.n_edges = self.board.g.number_of_edges()
self.n_grps = self.board.n_grps
self.n_cards = self.board.n_cards
self.n_agents = n_agent
self.possible_agents = [r for r in range(n_agent)]
self.agent_name_mapping = dict(zip(self.possible_agents, list(range(len(self.possible_agents)))))

# Gym spaces are defined and documented here: https://gym.openai.com/docs/#spaces
self.action_spaces = {agent: {GameState.Reinforce: Discrete(n_nodes),
GameState.Attack: MultiDiscrete([2, n_edges]), # +1 for Skip
GameState.Fortify: MultiDiscrete([2, n_nodes, n_nodes, 100]), # Last dim for Skip
GameState.StartTurn: Discrete(1),
GameState.EndTurn: Discrete(1),
GameState.Card: Discrete(2),
GameState.Move: Discrete(100)
} for agent in self.possible_agents}
self.observation_spaces = {agent: Discrete(MAX_UNIT) for agent in self.possible_agents} # placement
self.observation_spaces['board'] = Dict({})
self.observation_spaces['cards'] = MultiDiscrete([MAX_CARD for _ in range(n_agent)])
self.observation_spaces['my_cards'] = Discrete(2)
self.action_spaces = {GameState.Reinforce: Discrete(self.n_nodes),
GameState.Attack: MultiDiscrete([2, self.n_edges]), # +1 for Skip
GameState.Fortify: MultiDiscrete([2, self.n_nodes, self.n_nodes, 100]),
# Last dim for Skip
# GameState.StartTurn: Discrete(1),
# GameState.EndTurn: Discrete(1),
GameState.Card: Discrete(2),
GameState.Move: Discrete(100)
}
# self.action_spaces = Box(0, 1000, shape=[self.n_nodes + self.n_edges + self.n_nodes+self.n_nodes + 100 + 1+1+1])
self.observation_spaces = None # Core.Board()

self.agents = []
self.rewards = {}
Expand Down Expand Up @@ -136,10 +139,10 @@ def render_info(self, mode="human"):
plt.pause(0.001)

def render(self, mode="human"):
'''
"""
Renders the environment. In human mode, it can print to terminal, open
up a graphical window, or open up some other display that a human can see and understand.
'''
"""
plt.figure(0)
plt.clf()

Expand Down Expand Up @@ -169,18 +172,13 @@ def render(self, mode="human"):
print('Wait for it')

def observe(self, agent):
'''
"""
Observe should return the observation of the specified agent. This function
should return a sane observation (though not necessarily the most up to date possible)
at any time after reset() is called.
'''
# observation of one agent is the previous state of the other
"""

return {'board': self.board,
'my_card': self.board.players[agent].cards,
'placement': self.board.players[agent].placement,
'game_state': self.board.state,
'cards': [len(p.cards) for p in self.board.players]}
return self.board

def close(self):
"""
Expand Down Expand Up @@ -208,8 +206,8 @@ def reset(self):
self.rewards = {agent: 0 for agent in self.agents}
self._cumulative_rewards = {agent: 0 for agent in self.agents}
self.dones = {agent: False for agent in self.agents}
self.infos = {agent: {} for agent in self.agents}
self.board.reset(len(self.agents), 20, 7)
self.infos = {agent: {'nodes': self.n_nodes, 'agents': self.n_agents} for agent in self.agents}
self.board.reset(len(self.agents))
self.num_turns = 0
self.num_moves = 1
'''
Expand All @@ -218,12 +216,20 @@ def reset(self):
self._agent_selector = agent_selector(self.agents)
self.agent_selection = self._agent_selector.next()

self.land_hist = {a: [] for a in self.possible_agents}
self.unit_hist = {a: [] for a in self.possible_agents}
self.place_hist = {a: [] for a in self.possible_agents}

def reward(self, agent):
return 0.0

def done(self, agent):
return False
return len(self.board.player_nodes(agent)) == 0

# def get_action(self, action):
# # [self.n_nodes + self.n_edges + self.n_nodes + self.n_nodes + 100 + 1 + 1 + 1]
# if self.board.state == GameState.Reinforce:
# action
def step(self, action):
"""
step(action) takes in an action for the current agent (specified by
Expand All @@ -243,9 +249,11 @@ def step(self, action):

agent = self.agent_selection
state = self.board.state
logger.info('Player: {}, State: {}, Actions: {}'.format(agent, state, action))
# logger.info('Player: {}, State: {}, Actions: {}'.format(agent, state, action))

self._cumulative_rewards[agent] = 0
# if len(action) == self.action_spaces.shape[0]:
# action = self.get_action(action)

self.board.step(agent, action)

Expand Down Expand Up @@ -274,15 +282,16 @@ def step(self, action):
if self.board.state == GameState.EndTurn:
self.dones = {agent: True for agent in self.agents}
# Adds .rewards to ._cumulative_rewards
self._accumulate_rewards()
# self._accumulate_rewards()


if __name__ == '__main__':
e = env()
e = env(2, 'world')
e.reset()
# e.render()
winner = -1
for agent in e.agent_iter():
for i, agent in enumerate(e.agent_iter()):
print(i)
obs, rew, done, info = e.last()
if done:
continue
Expand All @@ -294,8 +303,8 @@ def step(self, action):
if all(e.dones.values()):
winner = agent
break
# e.render()
# e.render()
# plt.show()
e.render()
e.render()
plt.show()
logger.info('Done in {} Turns and {} Moves. Winner is Player {}'
.format(e.unwrapped.num_turns, e.unwrapped.num_moves, winner))
Loading

0 comments on commit db2e346

Please sign in to comment.