base update

mahi97 · Oct 26, 2021 · db2e346 · db2e346
1 parent e9abdab
commit db2e346
Show file tree

Hide file tree

Showing 5 changed files with 329 additions and 45 deletions.
diff --git a/pz_risk/core/board.py b/pz_risk/core/board.py
@@ -22,6 +22,39 @@ def __init__(self, graph: nx.Graph, info, pos=None):
         self.last_attack = (None, None)
         self.state = GameState.StartTurn
         self.info = info
+        self.n_grps = info['num_of_groups']
+        self.n_cards = self.g.number_of_nodes() + self.info['num_of_wild']
+
+    def valid_actions(self, player):
+        """
+            :player
+            return:
+                bool: isDeterministic?
+                list: action list
+        """
+        acts = []
+        if self.state == GameState.Reinforce:
+            acts = self.player_nodes(player)
+        elif self.state == GameState.Card:
+            acts = [0, 1] if len(self.players[player].cards) < 5 else [1]
+        elif self.state == GameState.Attack:
+            edges = self.player_attack_edges(player)
+            acts = [(1, (None, None))]
+            # assert action[0] <= 1, 'Attack Finished should be 0 or 1: {}'.format(action[0])
+            acts += [(0, e) for e in edges]
+        elif self.state == GameState.Move:
+            u = max(0, self.g.nodes[self.last_attack[1]]['units'] - 3)
+            acts = [i for i in range(u+1)]
+        elif self.state == GameState.Fortify:
+            cc = self.player_connected_components(player)
+            acts = [(1, None, None, None)]
+            for c in cc:
+                for a in c:
+                    for b in c:
+                        if a != b and self.g.nodes[a]['units'] > 1:
+                            acts.append((0, a, b, self.g.nodes[a]['units'] - 1))
+
+        return self.state != GameState.Attack, acts
 
     def can_fortify(self, player):
         cc = self.player_connected_components(player)
@@ -94,7 +127,7 @@ def player_units(self, player):
         return sum([n[1]['units'] for n in self.g.nodes(data=True) if n[1]['player'] == player])
 
     def player_group_reward(self, player):
-        group = {gid + 1: True for gid in range(self.info['num_of_group'])}
+        group = {gid + 1: True for gid in range(self.n_grps)}
         for n in self.g.nodes(data=True):
             if n[1]['player'] != player:
                 group[n[1]['gid']] = False
@@ -128,8 +161,10 @@ def player_attack_edges(self, player):
                 ee.append((e[1], e[0]))
         return ee
 
-    def reset(self, n_agent, n_unit_per_agent, n_cell_per_agent):
+    def reset(self, n_agent):
         n_cells = self.g.number_of_nodes()
+        n_cell_per_agent = n_cells // n_agent
+        n_unit_per_agent = self.info['num_of_unit']
         assert n_cell_per_agent * n_agent == n_cells
 
         remaining_cells = [i for i in self.g.nodes()]
@@ -178,10 +213,11 @@ def apply_best_match(self, player):
                 if cnt == 3:
                     break
         else:
-            match_type = CardType.Artillery if ct[CardType.Artillery] >= 3 - cnt\
-                else CardType.Cavalry if ct[CardType.Cavalry] >= 3 - cnt\
+            match_type = CardType.Artillery if ct[CardType.Artillery] >= 3 - cnt \
+                else CardType.Cavalry if ct[CardType.Cavalry] >= 3 - cnt \
                 else CardType.Infantry
-            used += [self.players[player].cards[match_type].pop(-1) for _ in range(3 - cnt) if len(self.players[player].cards[match_type])]
+            used += [self.players[player].cards[match_type].pop(-1) for _ in range(3 - cnt) if
+                     len(self.players[player].cards[match_type])]
         self.players[player].placement += CARD_FIX_SCORE[match_type]
         for c in used:
             c.owner = -1
@@ -245,7 +281,8 @@ def step(self, agent, actions, left=None):
                 self.g.nodes[actions[1]]['units'] -= int(actions[3])
                 self.g.nodes[actions[2]]['units'] += int(actions[3])
 
-        self.next_state(agent, self.state, attack_succeed, attack_finished, len(self.player_nodes(agent)) == len(self.g.nodes()))
+        self.next_state(agent, self.state, attack_succeed, attack_finished,
+                        len(self.player_nodes(agent)) == len(self.g.nodes()))
         if self.state == GameState.StartTurn and self.players[agent].deserve_card:
             self.give_card(agent)
 
@@ -261,5 +298,9 @@ def register_map(name, filepath):
 
     BOARDS[name] = Board(g, m['info'])
 
+
 print(os.getcwd())
 register_map('world', './maps/world.json')
+register_map('4node', './maps/4node.json')
+register_map('6node', './maps/6node.json')
+register_map('8node', './maps/8node.json')
diff --git a/pz_risk/risk_env.py b/pz_risk/risk_env.py
@@ -1,7 +1,7 @@
 import math
 import random
 
-from gym.spaces import Discrete, MultiDiscrete, Dict
+from gym.spaces import Discrete, MultiDiscrete, Dict, MultiBinary, Box, Tuple
 from pettingzoo import AECEnv
 from pettingzoo.utils import agent_selector
 from pettingzoo.utils import wrappers
@@ -16,6 +16,7 @@
 from core.gamestate import GameState
 
 from loguru import logger
+from copy import deepcopy
 
 from utils import *
 from agents.sampling import SAMPLING
@@ -33,15 +34,15 @@
 ]
 
 
-def env():
+def env(n_agent=6, board_name='world'):
     """
     The env function wraps the environment in 3 wrappers by default. These
     wrappers contain logic that is common to many pettingzoo environments.
     We recommend you use at least the OrderEnforcingWrapper on your own environment
     to provide sane error messages. You can find full documentation for these methods
     elsewhere in the developer documentation.
     """
-    env = RiskEnv()
+    env = RiskEnv(n_agent, board_name)
     env = wrappers.CaptureStdoutWrapper(env)
     env = risk_wrappers.AssertInvalidActionsWrapper(env)
     env = wrappers.OrderEnforcingWrapper(env)
@@ -58,7 +59,7 @@ class RiskEnv(AECEnv):
     metadata = {'render.modes': ['human'], "name": "rps_v2"}
 
     def __init__(self, n_agent=6, board_name='world'):
-        '''
+        """
         - n_agent: Number of Agent
         - board: ['test', 'world', 'world2']
         The init method takes in environment arguments and
@@ -68,27 +69,29 @@ def __init__(self, n_agent=6, board_name='world'):
         - observation_spaces
 
         These attributes should not be changed after initialization.
-        '''
+        """
         super().__init__()
         self.board = BOARDS[board_name]
-        n_nodes = self.board.g.number_of_nodes()
-        n_edges = self.board.g.number_of_edges()
+        self.n_nodes = self.board.g.number_of_nodes()
+        self.n_edges = self.board.g.number_of_edges()
+        self.n_grps = self.board.n_grps
+        self.n_cards = self.board.n_cards
+        self.n_agents = n_agent
         self.possible_agents = [r for r in range(n_agent)]
         self.agent_name_mapping = dict(zip(self.possible_agents, list(range(len(self.possible_agents)))))
 
         # Gym spaces are defined and documented here: https://gym.openai.com/docs/#spaces
-        self.action_spaces = {agent: {GameState.Reinforce: Discrete(n_nodes),
-                                      GameState.Attack: MultiDiscrete([2, n_edges]),  # +1 for Skip
-                                      GameState.Fortify: MultiDiscrete([2, n_nodes, n_nodes, 100]),  # Last dim for Skip
-                                      GameState.StartTurn: Discrete(1),
-                                      GameState.EndTurn: Discrete(1),
-                                      GameState.Card: Discrete(2),
-                                      GameState.Move: Discrete(100)
-                                      } for agent in self.possible_agents}
-        self.observation_spaces = {agent: Discrete(MAX_UNIT) for agent in self.possible_agents}  # placement
-        self.observation_spaces['board'] = Dict({})
-        self.observation_spaces['cards'] = MultiDiscrete([MAX_CARD for _ in range(n_agent)])
-        self.observation_spaces['my_cards'] = Discrete(2)
+        self.action_spaces = {GameState.Reinforce: Discrete(self.n_nodes),
+                              GameState.Attack: MultiDiscrete([2, self.n_edges]),  # +1 for Skip
+                              GameState.Fortify: MultiDiscrete([2, self.n_nodes, self.n_nodes, 100]),
+                              # Last dim for Skip
+                              # GameState.StartTurn: Discrete(1),
+                              # GameState.EndTurn: Discrete(1),
+                              GameState.Card: Discrete(2),
+                              GameState.Move: Discrete(100)
+                              }
+        # self.action_spaces = Box(0, 1000, shape=[self.n_nodes + self.n_edges + self.n_nodes+self.n_nodes + 100 + 1+1+1])
+        self.observation_spaces = None  # Core.Board()
 
         self.agents = []
         self.rewards = {}
@@ -136,10 +139,10 @@ def render_info(self, mode="human"):
         plt.pause(0.001)
 
     def render(self, mode="human"):
-        '''
+        """
         Renders the environment. In human mode, it can print to terminal, open
         up a graphical window, or open up some other display that a human can see and understand.
-        '''
+        """
         plt.figure(0)
         plt.clf()
 
@@ -169,18 +172,13 @@ def render(self, mode="human"):
             print('Wait for it')
 
     def observe(self, agent):
-        '''
+        """
         Observe should return the observation of the specified agent. This function
         should return a sane observation (though not necessarily the most up to date possible)
         at any time after reset() is called.
-        '''
-        # observation of one agent is the previous state of the other
+        """
 
-        return {'board': self.board,
-                'my_card': self.board.players[agent].cards,
-                'placement': self.board.players[agent].placement,
-                'game_state': self.board.state,
-                'cards': [len(p.cards) for p in self.board.players]}
+        return self.board
 
     def close(self):
         """
@@ -208,8 +206,8 @@ def reset(self):
         self.rewards = {agent: 0 for agent in self.agents}
         self._cumulative_rewards = {agent: 0 for agent in self.agents}
         self.dones = {agent: False for agent in self.agents}
-        self.infos = {agent: {} for agent in self.agents}
-        self.board.reset(len(self.agents), 20, 7)
+        self.infos = {agent: {'nodes': self.n_nodes, 'agents': self.n_agents} for agent in self.agents}
+        self.board.reset(len(self.agents))
         self.num_turns = 0
         self.num_moves = 1
         '''
@@ -218,12 +216,20 @@ def reset(self):
         self._agent_selector = agent_selector(self.agents)
         self.agent_selection = self._agent_selector.next()
 
+        self.land_hist = {a: [] for a in self.possible_agents}
+        self.unit_hist = {a: [] for a in self.possible_agents}
+        self.place_hist = {a: [] for a in self.possible_agents}
+
     def reward(self, agent):
         return 0.0
 
     def done(self, agent):
-        return False
+        return len(self.board.player_nodes(agent)) == 0
 
+    # def get_action(self, action):
+    #     # [self.n_nodes + self.n_edges + self.n_nodes + self.n_nodes + 100 + 1 + 1 + 1]
+    #     if self.board.state == GameState.Reinforce:
+    #         action
     def step(self, action):
         """
         step(action) takes in an action for the current agent (specified by
@@ -243,9 +249,11 @@ def step(self, action):
 
         agent = self.agent_selection
         state = self.board.state
-        logger.info('Player: {}, State: {}, Actions: {}'.format(agent, state, action))
+        # logger.info('Player: {}, State: {}, Actions: {}'.format(agent, state, action))
 
         self._cumulative_rewards[agent] = 0
+        # if len(action) == self.action_spaces.shape[0]:
+        #     action = self.get_action(action)
 
         self.board.step(agent, action)
 
@@ -274,15 +282,16 @@ def step(self, action):
         if self.board.state == GameState.EndTurn:
             self.dones = {agent: True for agent in self.agents}
         # Adds .rewards to ._cumulative_rewards
-        self._accumulate_rewards()
+        # self._accumulate_rewards()
 
 
 if __name__ == '__main__':
-    e = env()
+    e = env(2, 'world')
     e.reset()
     # e.render()
     winner = -1
-    for agent in e.agent_iter():
+    for i, agent in enumerate(e.agent_iter()):
+        print(i)
         obs, rew, done, info = e.last()
         if done:
             continue
@@ -294,8 +303,8 @@ def step(self, action):
         if all(e.dones.values()):
             winner = agent
             break
-        # e.render()
-    # e.render()
-    # plt.show()
+        e.render()
+    e.render()
+    plt.show()
     logger.info('Done in {} Turns and {} Moves. Winner is Player {}'
                 .format(e.unwrapped.num_turns, e.unwrapped.num_moves, winner))