diff --git a/ChangeLog.md b/ChangeLog.md index 5d13cf4c..ef8a8785 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -1,6 +1,11 @@ # ChangeLog -### v2.0.7 +### v2.1.1 + +Remove max episode timesteps from gym registration of the Lux AI env. Expect user to specify themselves + + +### v2.1.0 Added [advanced_specs](https://github.com/Lux-AI-Challenge/Lux-Design-S2/blob/main/docs/advanced_specs.md) document that goes over CPU engine code in depth diff --git a/README.md b/README.md index 771673e6..88efd876 100644 --- a/README.md +++ b/README.md @@ -42,12 +42,12 @@ Each supported programming language/solution type has its own starter kit, you c The kits folder in this repository holds all of the available starter kits you can use to start competing and building an AI agent. The readme shows you how to get started with your language of choice and run a match. We strongly recommend reading through the documentation for your language of choice in the links below - [Python](https://github.com/Lux-AI-Challenge/Lux-Design-S2/tree/main/kits/python/) +- [Reinforcement Learning (Python)](https://github.com/Lux-AI-Challenge/Lux-Design-S2/tree/main/kits/rl/) - [C++](https://github.com/Lux-AI-Challenge/Lux-Design-S2/tree/main/kits/cpp/) - [Javascript](https://github.com/Lux-AI-Challenge/Lux-Design-S2/tree/main/kits/js/) - [Java](https://github.com/Lux-AI-Challenge/Lux-Design-S2/tree/main/kits/java/) - [Go](https://github.com/rooklift/golux2/) - (A working bare-bones Go kit) - Typescript - TBA - Want to use another language but it's not supported? Feel free to suggest that language to our issues or even better, create a starter kit for the community to use and make a PR to this repository. See our [CONTRIBUTING.md](https://github.com/Lux-AI-Challenge/Lux-Design-S2/tree/main/CONTRIBUTING.md) document for more information on this. @@ -72,7 +72,7 @@ We are proud to announce our sponsors [QuantCo](https://quantco.com/), [Regressi ## Core Contributors -We like to extend thanks to some of our early core contributors: [@duanwilliam](https://github.com/duanwilliam) (Frontend), [@programjames](https://github.com/programjames) (Map generation, Engine optimization), and [@themmj](https://github.com/themmj) (C++ kit, Engine optimization). +We like to extend thanks to some of our early core contributors: [@duanwilliam](https://github.com/duanwilliam) (Frontend), [@programjames](https://github.com/programjames) (Map generation, Engine optimization), and [@themmj](https://github.com/themmj) (C++ kit, Go kit, Engine optimization). We further like to extend thanks to some of our core contributors during the beta period: [@LeFiz](https://github.com/LeFiz) (Game Design/Architecture), [@jmerle](https://github.com/jmerle) (Visualizer) diff --git a/environment.yml b/environment.yml index 226c59fb..ee6564ea 100644 --- a/environment.yml +++ b/environment.yml @@ -11,6 +11,7 @@ dependencies: - tqdm - yaml - gym=0.19 + - cv2 - pip: - pettingzoo diff --git a/examples/jax_env.py b/examples/jax_env.py deleted file mode 100644 index e69de29b..00000000 diff --git a/kits/README.md b/kits/README.md index 03d3f862..db1df193 100644 --- a/kits/README.md +++ b/kits/README.md @@ -1,6 +1,6 @@ # Lux AI Season 2 Kits -This folder contains all kits for the Lux AI Challenge Season 2. It covers the [Kit Structure](#kit-structure), [Forward Simulation](#forward-simulation), Envionment [Actions](#environment-actions) and [Observations](#environment-observations), as well as the general [Kit API](#kit-api). For those interested in the RL starter kits/baselines, we highly recommend reading those respective docs as they don't use the standard Kit API. +This folder contains all kits for the Lux AI Challenge Season 2. It covers the [Kit Structure](#kit-structure), [Forward Simulation](#forward-simulation), Envionment [Actions](#environment-actions) and [Observations](#environment-observations), as well as the general [Kit API](#kit-api). For those interested in the [RL starter kits/baselines](https://github.com/Lux-AI-Challenge/Lux-Design-S2/tree/main/kits/rl), we highly recommend reading those respective docs as they don't use the standard Kit API. In each starter kit folder we give you all the tools necessary to compete. Make sure to read the README document carefully. For debugging, you may log to standard error e.g. `console.error("hello")` or `print("hello", file=sys.stderr)`, and will be recorded by the competition servers. diff --git a/kits/rl-sb3-jax-env/README.md b/kits/rl-sb3-jax-env/README.md deleted file mode 100644 index 6d3f6659..00000000 --- a/kits/rl-sb3-jax-env/README.md +++ /dev/null @@ -1 +0,0 @@ -WIP \ No newline at end of file diff --git a/kits/rl-sb3-jax-env/agent.py b/kits/rl-sb3-jax-env/agent.py deleted file mode 100644 index 6cfdac10..00000000 --- a/kits/rl-sb3-jax-env/agent.py +++ /dev/null @@ -1,59 +0,0 @@ -from lux.kit import obs_to_game_state, GameState -from lux.config import EnvConfig -from lux.utils import direction_to, my_turn_to_place_factory -import numpy as np -import sys -from wrappers import SimpleSingleUnitDiscreteController -from wrappers import SingleUnitObservationWrapper -import torch as th -class Agent(): - def __init__(self, player: str, env_cfg: EnvConfig) -> None: - self.player = player - self.opp_player = "player_1" if self.player == "player_0" else "player_0" - np.random.seed(0) - self.env_cfg: EnvConfig = env_cfg - - # load our RL policy - th.load("") - - def bid_policy(self, step: int, obs, remainingOverageTime: int = 60): - return dict(faction="AlphaStrike", bid=0) - def factory_placement_policy(self, step: int, obs, remainingOverageTime: int = 60): - if obs["teams"][self.player]["metal"] == 0: - return dict() - potential_spawns = list(zip(*np.where(obs["board"]["valid_spawns_mask"] == 1))) - potential_spawns_set = set(potential_spawns) - done_search = False - # if player == "player_1": - ice_diff = np.diff(obs["board"]["ice"]) - pot_ice_spots = np.argwhere(ice_diff == 1) - if len(pot_ice_spots) == 0: - pot_ice_spots = potential_spawns - trials = 5 - while trials > 0: - pos_idx = np.random.randint(0, len(pot_ice_spots)) - pos = pot_ice_spots[pos_idx] - - area = 3 - for x in range(area): - for y in range(area): - check_pos = [pos[0] + x - area // 2, pos[1] + y - area // 2] - if tuple(check_pos) in potential_spawns_set: - done_search = True - pos = check_pos - break - if done_search: - break - if done_search: - break - trials -= 1 - spawn_loc = potential_spawns[np.random.randint(0, len(potential_spawns))] - if not done_search: - pos = spawn_loc - - metal = obs["teams"][self.player]["metal"] - return dict(spawn=pos, metal=metal, water=metal) - - def act(self, step: int, obs, remainingOverageTime: int = 60): - - return actions diff --git a/kits/rl-sb3-jax-env/lux/unit.py b/kits/rl-sb3-jax-env/lux/unit.py deleted file mode 100644 index 8bba5ee1..00000000 --- a/kits/rl-sb3-jax-env/lux/unit.py +++ /dev/null @@ -1,77 +0,0 @@ -import math -import sys -from typing import List -import numpy as np -from dataclasses import dataclass -from lux.cargo import UnitCargo -from lux.config import EnvConfig - -# a[1] = direction (0 = center, 1 = up, 2 = right, 3 = down, 4 = left) -move_deltas = np.array([[0, 0], [0, -1], [1, 0], [0, 1], [-1, 0]]) - -@dataclass -class Unit: - team_id: int - unit_id: str - unit_type: str # "LIGHT" or "HEAVY" - pos: np.ndarray - power: int - cargo: UnitCargo - env_cfg: EnvConfig - unit_cfg: dict - action_queue: List - - @property - def agent_id(self): - if self.team_id == 0: return "player_0" - return "player_1" - - def action_queue_cost(self, game_state): - cost = self.env_cfg.ROBOTS[self.unit_type].ACTION_QUEUE_POWER_COST - return cost - - def move_cost(self, game_state, direction): - board = game_state.board - target_pos = self.pos + move_deltas[direction] - if target_pos[0] < 0 or target_pos[1] < 0 or target_pos[1] >= len(board.rubble) or target_pos[0] >= len(board.rubble[0]): - # print("Warning, tried to get move cost for going off the map", file=sys.stderr) - return None - factory_there = board.factory_occupancy_map[target_pos[0], target_pos[1]] - if factory_there not in game_state.teams[self.agent_id].factory_strains and factory_there != -1: - # print("Warning, tried to get move cost for going onto a opposition factory", file=sys.stderr) - return None - rubble_at_target = board.rubble[target_pos[0]][target_pos[1]] - - return math.floor(self.unit_cfg.MOVE_COST + self.unit_cfg.RUBBLE_MOVEMENT_COST * rubble_at_target) - def move(self, direction, repeat=0, n=1): - if isinstance(direction, int): - direction = direction - else: - pass - return np.array([0, direction, 0, 0, repeat, n]) - - def transfer(self, transfer_direction, transfer_resource, transfer_amount, repeat=0, n=1): - assert transfer_resource < 5 and transfer_resource >= 0 - assert transfer_direction < 5 and transfer_direction >= 0 - return np.array([1, transfer_direction, transfer_resource, transfer_amount, repeat, n]) - - def pickup(self, pickup_resource, pickup_amount, repeat=0, n=1): - assert pickup_resource < 5 and pickup_resource >= 0 - return np.array([2, 0, pickup_resource, pickup_amount, repeat, n]) - - def dig_cost(self, game_state): - return self.unit_cfg.DIG_COST - def dig(self, repeat=0, n=1): - return np.array([3, 0, 0, 0, repeat, n]) - - def self_destruct_cost(self, game_state): - return self.unit_cfg.SELF_DESTRUCT_COST - def self_destruct(self, repeat=0, n=1): - return np.array([4, 0, 0, 0, repeat, n]) - - def recharge(self, x, repeat=0, n=1): - return np.array([5, 0, 0, x, repeat, n]) - - def __str__(self) -> str: - out = f"[{self.team_id}] {self.unit_id} {self.unit_type} at {self.pos}" - return out \ No newline at end of file diff --git a/kits/rl-sb3-jax-env/wrappers/__init__.py b/kits/rl-sb3-jax-env/wrappers/__init__.py deleted file mode 100644 index 9b072569..00000000 --- a/kits/rl-sb3-jax-env/wrappers/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .controllers import SimpleDiscreteController, SimpleSingleUnitDiscreteController -from .obs_wrappers import ImageObservationWrapper, SingleUnitObservationWrapper -from .sb3 import SB3Wrapper diff --git a/kits/rl-sb3-jax-env/wrappers/controllers.py b/kits/rl-sb3-jax-env/wrappers/controllers.py deleted file mode 100644 index 87932b64..00000000 --- a/kits/rl-sb3-jax-env/wrappers/controllers.py +++ /dev/null @@ -1,244 +0,0 @@ -from typing import Dict - -import numpy as np -import numpy.typing as npt -from gym import spaces - -from luxai_s2.actions import format_action_vec -from luxai_s2.config import EnvConfig -from luxai_s2.state import ObservationStateDict - - -class Controller: - def __init__(self, action_space: spaces.Space) -> None: - self.action_space = action_space - - def action_to_lux_action( - self, agent: str, obs: Dict[str, ObservationStateDict], action: npt.NDArray - ): - """ - Takes as input the current "raw observation" and the parameterized action and returns - an action formatted for the Lux env - """ - raise NotImplementedError() - - -class SimpleSingleUnitDiscreteController(Controller): - def __init__(self, env_cfg: EnvConfig) -> None: - """ - A simple controller that controls only the heavy unit that will get spawned. This assumes for whichever environment wrapper you use - you have defined a policy to generate the first factory action - - For the heavy unit - - 4 cardinal direction movement (4 dims) - - a move center no-op action (1 dim) - - transfer action just for transferring ice in 4 cardinal directions or center (5) - - pickup action for each resource (5 dims) - - dig action (1 dim) - - It does not include - - self destruct action - - recharge action - - planning (via actions executing multiple times or repeating actions) - - factory actions - - transferring power or resources other than ice - """ - self.env_cfg = env_cfg - self.move_act_dims = 5 - self.transfer_act_dims = 1 # 5 * 5 - self.pickup_act_dims = 5 - self.dig_act_dims = 1 - - self.move_dim_high = self.move_act_dims - self.transfer_dim_high = self.move_dim_high + self.transfer_act_dims - self.pickup_dim_high = self.transfer_dim_high + self.pickup_act_dims - self.dig_dim_high = self.pickup_dim_high + self.dig_act_dims - - total_act_dims = self.dig_dim_high - # action_space = spaces.Box(0, 1, shape=(total_act_dims,)) - action_space = spaces.Discrete(total_act_dims) - super().__init__(action_space) - - def _is_move_action(self, id): - return id < self.move_dim_high - - def _get_move_action(self, id): - return np.array([0, id, 0, 0, 0, 1]) - - def _is_transfer_action(self, id): - return id < self.transfer_dim_high - - def _get_transfer_action(self, id): - id = id - self.move_dim_high - transfer_dir = id % 5 - # resource_type = id // 5 - return np.array( - [1, 0, 0, self.env_cfg.max_transfer_amount, 0, 1] - ) - - def _is_pickup_action(self, id): - return id < self.pickup_dim_high - - def _get_pickup_action(self, id): - id = id - self.transfer_dim_high - return np.array([2, 0, id % 5, self.env_cfg.max_transfer_amount, 0, 1]) - - def _is_dig_action(self, id): - return id < self.dig_dim_high - - def _get_dig_action(self, id): - return np.array([3, 0, 0, 0, 0, 1]) - - def action_to_lux_action( - self, agent: str, obs: Dict[str, ObservationStateDict], action: npt.NDArray - ): - shared_obs = obs["player_0"] - lux_action = dict() - factories = shared_obs["factories"][agent] - units = shared_obs["units"][agent] - for unit_id in units.keys(): - unit = units[unit_id] - pos = unit["pos"] - unit_related_action = action - choice = action #unit_related_action.argmax() - action_queue = [] - if self._is_move_action(choice): - action_queue = [self._get_move_action(choice)] - elif self._is_transfer_action(choice): - action_queue = [self._get_transfer_action(choice)] - elif self._is_pickup_action(choice): - action_queue = [self._get_pickup_action(choice)] - - elif self._is_dig_action(choice): - action_queue = [self._get_dig_action(choice)] - lux_action[unit_id] = action_queue - # only control the first unit! - break - return lux_action - - -class SimpleDiscreteController(Controller): - def __init__(self, env_cfg: EnvConfig) -> None: - """ - A simple controller that uses a discrete action parameterization for Lux AI S2. It includes - - For units - - 4 cardinal direction movement (4 dims) - - a move center no-op action (1 dim) - - transfer action for each combination of the (4 cardinal directions plus center) x (resource type or power) (5*5 = 25 dims) - - pickup action for each resource (5 dims) - - dig action (1 dim) - - For factories - - all actions (build light, heavy, or water) (3 dims) - - - It does not include - - self destruct action - - recharge action - - planning (via actions executing multiple times or repeating actions) - - Sampling from this controller will always result in a valid action, albeit sometimes disastrous - """ - self.env_cfg = env_cfg - self.move_act_dims = 5 - self.transfer_act_dims = 5 * 5 - self.pickup_act_dims = 5 - self.dig_act_dims = 1 - # self.self_destruct_act_dims = 1 - # self.recharge_act_dims = 1 - self.factory_act_dims = 3 # 0 = light, 1 = heavy, 2 = water - - self.move_dim_high = self.move_act_dims - self.transfer_dim_high = self.move_dim_high + self.transfer_act_dims - self.pickup_dim_high = self.transfer_dim_high + self.pickup_act_dims - self.dig_dim_high = self.pickup_dim_high + self.dig_act_dims - - self.factory_dim_high = 3 # self.dig_dim_high + self.factory_act_dims - - total_act_dims = self.factory_dim_high - # action_space = spaces.Discrete(total_act_dims) - action_space = spaces.Box( - 0, 1, shape=(env_cfg.map_size, env_cfg.map_size, total_act_dims) - ) - - super().__init__(action_space) - - # note that all the _is_x_action are meant to be called in a if, elseif... cascade/waterfall - # to understand how _get_x_action works to map the parameterization back to the original action space see luxai_s2/actions.py - def _is_move_action(self, id): - return id < self.move_dim_high - - def _get_move_action(self, id): - return np.array([0, id, 0, 0, 0, 1]) - - def _is_transfer_action(self, id): - return id < self.transfer_dim_high - - def _get_transfer_action(self, id): - id = id - self.move_dim_high - transfer_dir = id % 5 - resource_type = id // 5 - return np.array( - [1, transfer_dir, resource_type, self.env_cfg.max_transfer_amount, 0, 1] - ) - - def _is_pickup_action(self, id): - return id < self.pickup_dim_high - - def _get_pickup_action(self, id): - id = id - self.transfer_dim_high - return np.array([2, 0, id % 5, self.env_cfg.max_transfer_amount, 0, 1]) - - def _is_dig_action(self, id): - return id < self.dig_dim_high - - def _get_dig_action(self, id): - return np.array([3, 0, 0, 0, 0, 1]) - - # def _is_self_destruct_action(self, id): - # return id < self.move_act_dims + self.transfer_act_dims + self.self_destruct_dims - # def _get_self_destruct_action(self, id): - # return [2, 0, 0, 0, 0, 1] - - def action_to_lux_action( - self, agent: str, obs: Dict[str, ObservationStateDict], action: npt.NDArray - ): - """ - Generate an action compatible with LuxAI_S2 engine for a single player - """ - shared_obs = obs["player_0"] - lux_action = dict() - factories = shared_obs["factories"][agent] - units = shared_obs["units"][agent] - for unit_id in units.keys(): - unit = units[unit_id] - pos = unit["pos"] - action_here = action[pos[0], pos[1]] - unit_related_action = action_here[ - : -self.factory_act_dims - ] # assuming factory action is always the final few dimensions - choice = unit_related_action.argmax() - action_queue = [] - # if self._is_move_action(choice): - # action_queue = [self._get_move_action(choice)] - # elif self._is_transfer_action(choice): - # action_queue = [self._get_transfer_action(choice)] - # elif self._is_pickup_action(choice): - # action_queue = [self._get_pickup_action(choice)] - # elif self._is_dig_action(choice): - # action_queue = [self._get_dig_action(choice)] - - lux_action[unit_id] = action_queue - - for unit_id in factories.keys(): - factory = factories[unit_id] - pos = factory["pos"] - - action_here = action[pos[0], pos[1]] - factory_related_action = action_here[ - -self.factory_act_dims : - ] # assuming factory action is always the final few dimensions - choice = factory_related_action.argmax() - lux_action[unit_id] = choice - return lux_action diff --git a/kits/rl-sb3-jax-env/wrappers/lux.py b/kits/rl-sb3-jax-env/wrappers/lux.py deleted file mode 100644 index 7822d033..00000000 --- a/kits/rl-sb3-jax-env/wrappers/lux.py +++ /dev/null @@ -1,27 +0,0 @@ -""" -Wrappers that allow users to insert heuristics into the environment reset and step functions -""" -from typing import Dict - -import gym -import numpy as np -import numpy.typing as npt -from gym import spaces - -import luxai_s2.env -from luxai_s2.env import LuxAI_S2 -from luxai_s2.state import ObservationStateDict -from luxai_s2.utils import my_turn_to_place_factory -from luxai_s2.wrappers.controllers import ( - Controller, - SimpleDiscreteController, - SimpleSingleUnitDiscreteController, -) - - -class FactoryControlWrapper(gym.Wrapper): - def __init__(self, env: gym.Env) -> None: - super().__init__(env) - - def step(self, action): - return super().step(action) diff --git a/kits/rl-sb3-jax-env/wrappers/obs_wrappers.py b/kits/rl-sb3-jax-env/wrappers/obs_wrappers.py deleted file mode 100644 index b1260058..00000000 --- a/kits/rl-sb3-jax-env/wrappers/obs_wrappers.py +++ /dev/null @@ -1,193 +0,0 @@ -from typing import Callable, Dict - -import gym -import numpy as np -import numpy.typing as npt -from gym import spaces - -import luxai_s2.env -from luxai_s2.env import LuxAI_S2 -from luxai_s2.state import ObservationStateDict -from luxai_s2.unit import ActionType, BidActionType, FactoryPlacementActionType -from luxai_s2.utils import my_turn_to_place_factory -from luxai_s2.wrappers.controllers import ( - Controller, - SimpleDiscreteController, - SimpleSingleUnitDiscreteController, -) - -class SingleUnitObservationWrapper(gym.ObservationWrapper): - """ - A state based observation to work with in pair with the SimpleSingleUnitDiscreteController - - It contains info only on the first heavy unit, the first factory you own, and some useful features. If there are no owned heavy units the observation is just zero. - No information about the opponent is included - - - Included features: - - First heavy unit's stats - - Position of closest ice tile - - First factory - - """ - - def __init__(self, env: gym.Env) -> None: - super().__init__(env) - self.observation_space = spaces.Box(-999, 999, shape=(13,)) - - def observation( - self, obs: Dict[str, ObservationStateDict] - ) -> Dict[str, npt.NDArray]: - observation = dict() - shared_obs = obs["player_0"] - ice_map = shared_obs["board"]["ice"] - ice_tile_locations = np.argwhere(ice_map == 1) - for agent in obs.keys(): - factories = shared_obs["factories"][agent] - factory_vec = np.zeros(2) - for k in factories.keys(): - factory = factories[k] - factory_vec = np.array(factory["pos"]) / self.env.state.env_cfg.map_size - break - units = shared_obs["units"][agent] - for k in units.keys(): - unit = units[k] - cargo_space = self.env.state.env_cfg.ROBOTS[ - unit["unit_type"] - ].CARGO_SPACE - battery_cap = self.env.state.env_cfg.ROBOTS[ - unit["unit_type"] - ].BATTERY_CAPACITY - cargo_vec = np.array( - [ - unit["power"] / battery_cap, - unit["cargo"]["ice"] / cargo_space, - unit["cargo"]["ore"] / cargo_space, - unit["cargo"]["water"] / cargo_space, - unit["cargo"]["metal"] / cargo_space, - ] - ) - unit_type = ( - 0 if unit["unit_type"] == "LIGHT" else 1 - ) # note that build actions use 0 to encode Light - unit_vec = np.concatenate( - [unit["pos"], [unit_type], cargo_vec, [unit["team_id"]]], axis=-1 - ) - unit_vec[:2] /= self.env.state.env_cfg.map_size - - pos = np.array(unit["pos"]) - # engineered features - - # compute closest ice tile - ice_tile_distances = np.mean((ice_tile_locations - pos) ** 2, 1) - closest_ice_tile = ( - ice_tile_locations[np.argmin(ice_tile_distances)] - / self.env.state.env_cfg.map_size - ) - obs_vec = np.concatenate( - [unit_vec, factory_vec, closest_ice_tile], axis=-1 - ) - - observation[agent] = obs_vec - break - if agent not in observation: - observation[agent] = np.zeros(13) - return observation - - -class ImageObservationWrapper(gym.ObservationWrapper): - def __init__(self, env: gym.Env) -> None: - super().__init__(env) - obs_dims = 23 # see _convert_obs function for how this is computed - self.map_size = self.env.env_cfg.map_size - self.observation_space = spaces.Box( - -999, 999, shape=(self.map_size, self.map_size, obs_dims) - ) - - def observation( - self, obs: Dict[str, ObservationStateDict] - ) -> Dict[str, npt.NDArray]: - shared_obs = obs["player_0"] - unit_mask = np.zeros((self.map_size, self.map_size, 1)) - unit_data = np.zeros( - (self.map_size, self.map_size, 9) - ) # power(1) + cargo(4) + unit_type(1) + unit_pos(2) + team(1) - factory_mask = np.zeros_like(unit_mask) - factory_data = np.zeros( - (self.map_size, self.map_size, 8) - ) # power(1) + cargo(4) + factory_pos(2) + team(1) - for agent in obs.keys(): - factories = shared_obs["factories"][agent] - units = shared_obs["units"][agent] - - for unit_id in units.keys(): - unit = units[unit_id] - # we encode everything but unit_id or action queue - cargo_space = self.env.state.env_cfg.ROBOTS[ - unit["unit_type"] - ].CARGO_SPACE - battery_cap = self.env.state.env_cfg.ROBOTS[ - unit["unit_type"] - ].BATTERY_CAPACITY - cargo_vec = np.array( - [ - unit["power"] / battery_cap, - unit["cargo"]["ice"] / cargo_space, - unit["cargo"]["ore"] / cargo_space, - unit["cargo"]["water"] / cargo_space, - unit["cargo"]["metal"] / cargo_space, - ] - ) - unit_type = ( - 0 if unit["unit_type"] == "LIGHT" else 1 - ) # note that build actions use 0 to encode Light - unit_vec = np.concatenate( - [unit["pos"], [unit_type], cargo_vec, [unit["team_id"]]], axis=-1 - ) - unit_vec[:2] /= self.env.state.env_cfg.map_size - - # note that all data is stored as map[x, y] format - unit_data[unit["pos"][0], unit["pos"][1]] = unit_vec - unit_mask[unit["pos"][0], unit["pos"][1]] = 1 - - for unit_id in factories.keys(): - factory = factories[unit_id] - # we encode everything but strain_id or unit_id - cargo_vec = np.array( - [ - factory["power"], - factory["cargo"]["ice"], - factory["cargo"]["ore"], - factory["cargo"]["water"], - factory["cargo"]["metal"], - ] - ) - cargo_vec = cargo_vec * 1 / 1000 - - factory_vec = np.concatenate( - [factory["pos"], cargo_vec, [factory["team_id"]]], axis=-1 - ) - factory_vec[:2] /= self.env.state.env_cfg.map_size - factory_data[factory["pos"][0], factory["pos"][1]] = factory_vec - factory_mask[factory["pos"][0], factory["pos"][1]] = 1 - - image_features = np.concatenate( - [ - np.expand_dims(shared_obs["board"]["lichen"], -1) - / self.env.state.env_cfg.MAX_LICHEN_PER_TILE, - np.expand_dims(shared_obs["board"]["rubble"], -1) - / self.env.state.env_cfg.MAX_RUBBLE, - np.expand_dims(shared_obs["board"]["ice"], -1), - np.expand_dims(shared_obs["board"]["ore"], -1), - unit_mask, - unit_data, - factory_mask, - factory_data, - ], - axis=-1, - ) - - new_obs = dict() - for agent in self.all_agents: - new_obs[agent] = image_features - return new_obs diff --git a/kits/rl-sb3-jax-env/wrappers/sb3.py b/kits/rl-sb3-jax-env/wrappers/sb3.py deleted file mode 100644 index 7e3126f5..00000000 --- a/kits/rl-sb3-jax-env/wrappers/sb3.py +++ /dev/null @@ -1,142 +0,0 @@ -from typing import Callable, Dict - -import gym -import numpy as np -import numpy.typing as npt -from gym import spaces - -import luxai_s2.env -from luxai_s2.env import LuxAI_S2 -from luxai_s2.state import ObservationStateDict -from luxai_s2.unit import ActionType, BidActionType, FactoryPlacementActionType -from luxai_s2.utils import my_turn_to_place_factory -from luxai_s2.wrappers.controllers import ( - Controller, - SimpleDiscreteController, - SimpleSingleUnitDiscreteController, -) - - -class SB3Wrapper(gym.Wrapper): - def __init__( - self, - env: LuxAI_S2, - bid_policy: Callable[ - [str, ObservationStateDict], Dict[str, BidActionType] - ] = None, - factory_placement_policy: Callable[ - [str, ObservationStateDict], Dict[str, FactoryPlacementActionType] - ] = None, - heuristic_policy: Callable[ - [str, ObservationStateDict], Dict[str, ActionType] - ] = None, - controller: Controller = None, - ) -> None: - """ - A environment wrapper for Stable Baselines 3. It reduces the LuxAI_S2 env - into a single phase game and places the first two phases (bidding and factory placement) into the env.reset function so that - interacting agents directly start generating actions to play the third phase of the game. - - It's highly recommended to use one of the observation wrappers as well - - Parameters - ---------- - bid_policy: Function - A function accepting player: str and obs: ObservationStateDict as input that returns a bid action - such as dict(bid=10, faction="AlphaStrike"). By default will bid 0 - factory_placement_policy: Function - A function accepting player: str and obs: ObservationStateDict as input that returns a factory placement action - such as dict(spawn=np.array([2, 4]), metal=150, water=150). By default will spawn in a random valid location with metal=150, water=150 - controller : Controller - A controller that parameterizes the action space into something more usable and converts parameterized actions to lux actions. - See luxai_s2/wrappers/controllers.py for available controllers and how to make your own - - heuristic_policy: Function - A function accepting player: str and obs: ObservationStateDict as input and returns a lux action. This can be provided by the user - to define custom logic or a model to generate actions for any of the units or factories. For any action generate for a unit or factory, it will - override the original action for that unit or factory when the step function is called. By defalt this is None and not used - """ - gym.Wrapper.__init__(self, env) - self.env = env - if controller is None: - controller = SimpleDiscreteController(self.env.state.env_cfg) - self.controller = controller - - self.action_space = controller.action_space - - obs_dims = 23 # see _convert_obs function for how this is computed - self.map_size = self.env.env_cfg.map_size - self.observation_space = spaces.Box( - -999, 999, shape=(self.map_size, self.map_size, obs_dims) - ) - - # The simplified wrapper removes the first two phases of the game by using predefined policies (trained or heuristic) - # to handle those two phases during each reset - if factory_placement_policy is None: - - def factory_placement_policy(player, obs: ObservationStateDict): - potential_spawns = np.array( - list(zip(*np.where(obs["board"]["valid_spawns_mask"] == 1))) - ) - spawn_loc = potential_spawns[ - np.random.randint(0, len(potential_spawns)) - ] - return dict(spawn=spawn_loc, metal=150, water=150) - - self.factory_placement_policy = factory_placement_policy - if bid_policy is None: - - def bid_policy(player, obs: ObservationStateDict): - faction = "AlphaStrike" - if player == "player_1": - faction = "MotherMars" - return dict(bid=0, faction=faction) - - self.bid_policy = bid_policy - - self.heuristic_policy = heuristic_policy - - self.prev_obs = None - # list of all agents regardless of status - self.all_agents = [] - - def step(self, action: Dict[str, npt.NDArray]): - lux_action = dict() - for agent in self.all_agents: - if agent in action: - lux_action[agent] = self.controller.action_to_lux_action( - agent=agent, obs=self.prev_obs, action=action[agent] - ) - else: - lux_action[agent] = dict() - if self.heuristic_policy is not None: - heuristic_lux_action = self.heuristic_policy( - agent, self.prev_obs[agent] - ) - # override keys - for k in heuristic_lux_action: - lux_action[agent][k] = heuristic_lux_action[k] - obs, reward, done, info = self.env.step(lux_action) - self.prev_obs = obs - return obs, reward, done, info - - def reset(self, **kwargs): - obs = self.env.reset(**kwargs) - self.all_agents = self.env.agents - action = dict() - for agent in self.all_agents: - action[agent] = self.bid_policy(agent, obs[agent]) - obs, _, _, _ = self.env.step(action) - while self.env.state.real_env_steps < 0: - action = dict() - for agent in self.all_agents: - if my_turn_to_place_factory( - obs["player_0"]["teams"][agent]["place_first"], - self.env.state.env_steps, - ): - action[agent] = self.factory_placement_policy(agent, obs[agent]) - else: - action[agent] = dict() - obs, _, _, _ = self.env.step(action) - self.prev_obs = obs - return obs diff --git a/kits/rl-sb3/README.md b/kits/rl-sb3/README.md deleted file mode 100644 index 6d3f6659..00000000 --- a/kits/rl-sb3/README.md +++ /dev/null @@ -1 +0,0 @@ -WIP \ No newline at end of file diff --git a/kits/rl-sb3/agent.py b/kits/rl-sb3/agent.py deleted file mode 100644 index 6cfdac10..00000000 --- a/kits/rl-sb3/agent.py +++ /dev/null @@ -1,59 +0,0 @@ -from lux.kit import obs_to_game_state, GameState -from lux.config import EnvConfig -from lux.utils import direction_to, my_turn_to_place_factory -import numpy as np -import sys -from wrappers import SimpleSingleUnitDiscreteController -from wrappers import SingleUnitObservationWrapper -import torch as th -class Agent(): - def __init__(self, player: str, env_cfg: EnvConfig) -> None: - self.player = player - self.opp_player = "player_1" if self.player == "player_0" else "player_0" - np.random.seed(0) - self.env_cfg: EnvConfig = env_cfg - - # load our RL policy - th.load("") - - def bid_policy(self, step: int, obs, remainingOverageTime: int = 60): - return dict(faction="AlphaStrike", bid=0) - def factory_placement_policy(self, step: int, obs, remainingOverageTime: int = 60): - if obs["teams"][self.player]["metal"] == 0: - return dict() - potential_spawns = list(zip(*np.where(obs["board"]["valid_spawns_mask"] == 1))) - potential_spawns_set = set(potential_spawns) - done_search = False - # if player == "player_1": - ice_diff = np.diff(obs["board"]["ice"]) - pot_ice_spots = np.argwhere(ice_diff == 1) - if len(pot_ice_spots) == 0: - pot_ice_spots = potential_spawns - trials = 5 - while trials > 0: - pos_idx = np.random.randint(0, len(pot_ice_spots)) - pos = pot_ice_spots[pos_idx] - - area = 3 - for x in range(area): - for y in range(area): - check_pos = [pos[0] + x - area // 2, pos[1] + y - area // 2] - if tuple(check_pos) in potential_spawns_set: - done_search = True - pos = check_pos - break - if done_search: - break - if done_search: - break - trials -= 1 - spawn_loc = potential_spawns[np.random.randint(0, len(potential_spawns))] - if not done_search: - pos = spawn_loc - - metal = obs["teams"][self.player]["metal"] - return dict(spawn=pos, metal=metal, water=metal) - - def act(self, step: int, obs, remainingOverageTime: int = 60): - - return actions diff --git a/kits/rl-sb3/lux/cargo.py b/kits/rl-sb3/lux/cargo.py deleted file mode 100644 index ad43536a..00000000 --- a/kits/rl-sb3/lux/cargo.py +++ /dev/null @@ -1,8 +0,0 @@ -from dataclasses import dataclass - -@dataclass -class UnitCargo: - ice: int = 0 - ore: int = 0 - water: int = 0 - metal: int = 0 diff --git a/kits/rl-sb3/lux/config.py b/kits/rl-sb3/lux/config.py deleted file mode 100644 index 1ea9ef1b..00000000 --- a/kits/rl-sb3/lux/config.py +++ /dev/null @@ -1,136 +0,0 @@ -import dataclasses -from argparse import Namespace -from dataclasses import dataclass -from typing import Dict, List - - -def convert_dict_to_ns(x): - if isinstance(x, dict): - for k in x: - x[k] = convert_dict_to_ns(x) - return Namespace(x) - - -@dataclass -class UnitConfig: - METAL_COST: int = 100 - POWER_COST: int = 500 - CARGO_SPACE: int = 1000 - BATTERY_CAPACITY: int = 1500 - CHARGE: int = 1 - INIT_POWER: int = 50 - MOVE_COST: int = 1 - RUBBLE_MOVEMENT_COST: float = 1 - DIG_COST: int = 5 - DIG_RUBBLE_REMOVED: int = 1 - DIG_RESOURCE_GAIN: int = 2 - DIG_LICHEN_REMOVED: int = 10 - SELF_DESTRUCT_COST: int = 10 - RUBBLE_AFTER_DESTRUCTION: int = 1 - ACTION_QUEUE_POWER_COST: int = 1 - - -@dataclass -class EnvConfig: - ## various options that can be configured if needed - - ### Variable parameters that don't affect game logic much ### - max_episode_length: int = 1000 - map_size: int = 48 - verbose: int = 1 - - # this can be disabled to improve env FPS but assume your actions are well formatted - # During online competition this is set to True - validate_action_space: bool = True - - ### Constants ### - # you can only ever transfer in/out 1000 as this is the max cargo space. - max_transfer_amount: int = 10000 - MIN_FACTORIES: int = 2 - MAX_FACTORIES: int = 5 - CYCLE_LENGTH: int = 50 - DAY_LENGTH: int = 30 - UNIT_ACTION_QUEUE_SIZE: int = 20 # when set to 1, then no action queue is used - - MAX_RUBBLE: int = 100 - FACTORY_RUBBLE_AFTER_DESTRUCTION: int = 50 - INIT_WATER_METAL_PER_FACTORY: int = ( - 150 # amount of water and metal units given to each factory - ) - INIT_POWER_PER_FACTORY: int = 1000 - - #### LICHEN #### - MIN_LICHEN_TO_SPREAD: int = 20 - LICHEN_LOST_WITHOUT_WATER: int = 1 - LICHEN_GAINED_WITH_WATER: int = 1 - MAX_LICHEN_PER_TILE: int = 100 - POWER_PER_CONNECTED_LICHEN_TILE: int = 1 - - # cost of watering with a factory is `ceil(# of connected lichen tiles) / (this factor) + 1` - LICHEN_WATERING_COST_FACTOR: int = 10 - - #### Bidding System #### - BIDDING_SYSTEM: bool = True - - #### Factories #### - FACTORY_PROCESSING_RATE_WATER: int = 100 - ICE_WATER_RATIO: int = 4 - FACTORY_PROCESSING_RATE_METAL: int = 50 - ORE_METAL_RATIO: int = 5 - # game design note: Factories close to resource cluster = more resources are refined per turn - # Then the high ice:water and ore:metal ratios encourages transfer of refined resources between - # factories dedicated to mining particular clusters which is more possible as it is more compact - - FACTORY_CHARGE: int = 50 - FACTORY_WATER_CONSUMPTION: int = 1 - # game design note: with a positve water consumption, game becomes quite hard for new competitors. - # so we set it to 0 - - #### Collision Mechanics #### - POWER_LOSS_FACTOR: float = 0.5 - - #### Units #### - ROBOTS: Dict[str, UnitConfig] = dataclasses.field( - default_factory=lambda: dict( - LIGHT=UnitConfig( - METAL_COST=10, - POWER_COST=50, - INIT_POWER=50, - CARGO_SPACE=100, - BATTERY_CAPACITY=150, - CHARGE=1, - MOVE_COST=1, - RUBBLE_MOVEMENT_COST=0.05, - DIG_COST=5, - SELF_DESTRUCT_COST=5, - DIG_RUBBLE_REMOVED=2, - DIG_RESOURCE_GAIN=2, - DIG_LICHEN_REMOVED=10, - RUBBLE_AFTER_DESTRUCTION=1, - ACTION_QUEUE_POWER_COST=1, - ), - HEAVY=UnitConfig( - METAL_COST=100, - POWER_COST=500, - INIT_POWER=500, - CARGO_SPACE=1000, - BATTERY_CAPACITY=3000, - CHARGE=10, - MOVE_COST=20, - RUBBLE_MOVEMENT_COST=1, - DIG_COST=60, - SELF_DESTRUCT_COST=100, - DIG_RUBBLE_REMOVED=20, - DIG_RESOURCE_GAIN=20, - DIG_LICHEN_REMOVED=100, - RUBBLE_AFTER_DESTRUCTION=10, - ACTION_QUEUE_POWER_COST=10, - ), - ) - ) - - @classmethod - def from_dict(cls, data): - data["ROBOTS"]["LIGHT"] = UnitConfig(**data["ROBOTS"]["LIGHT"]) - data["ROBOTS"]["HEAVY"] = UnitConfig(**data["ROBOTS"]["HEAVY"]) - return cls(**data) diff --git a/kits/rl-sb3/lux/factory.py b/kits/rl-sb3/lux/factory.py deleted file mode 100644 index 13e87e82..00000000 --- a/kits/rl-sb3/lux/factory.py +++ /dev/null @@ -1,55 +0,0 @@ -import math -from sys import stderr -import numpy as np -from dataclasses import dataclass -from lux.cargo import UnitCargo -from lux.config import EnvConfig -@dataclass -class Factory: - team_id: int - unit_id: str - strain_id: int - power: int - cargo: UnitCargo - pos: np.ndarray - # lichen tiles connected to this factory - # lichen_tiles: np.ndarray - env_cfg: EnvConfig - - def build_heavy_metal_cost(self, game_state): - unit_cfg = self.env_cfg.ROBOTS["HEAVY"] - return unit_cfg.METAL_COST - def build_heavy_power_cost(self, game_state): - unit_cfg = self.env_cfg.ROBOTS["HEAVY"] - return unit_cfg.POWER_COST - def can_build_heavy(self, game_state): - return self.power >= self.build_heavy_power_cost(game_state) and self.cargo.metal >= self.build_heavy_metal_cost(game_state) - def build_heavy(self): - return 1 - - def build_light_metal_cost(self, game_state): - unit_cfg = self.env_cfg.ROBOTS["LIGHT"] - return unit_cfg.METAL_COST - def build_light_power_cost(self, game_state): - unit_cfg = self.env_cfg.ROBOTS["LIGHT"] - return unit_cfg.POWER_COST - def can_build_light(self, game_state): - return self.power >= self.build_light_power_cost(game_state) and self.cargo.metal >= self.build_light_metal_cost(game_state) - - def build_light(self): - return 0 - - def water_cost(self, game_state): - """ - Water required to perform water action - """ - owned_lichen_tiles = (game_state.board.lichen_strains == self.strain_id).sum() - return np.ceil(owned_lichen_tiles / self.env_cfg.LICHEN_WATERING_COST_FACTOR) - def can_water(self, game_state): - return self.cargo.water >= self.water_cost(game_state) - def water(self): - return 2 - - @property - def pos_slice(self): - return slice(self.pos[0] - 1, self.pos[0] + 2), slice(self.pos[1] - 1, self.pos[1] + 2) diff --git a/kits/rl-sb3/lux/kit.py b/kits/rl-sb3/lux/kit.py deleted file mode 100644 index 8de70cd7..00000000 --- a/kits/rl-sb3/lux/kit.py +++ /dev/null @@ -1,150 +0,0 @@ -from dataclasses import dataclass, field -from typing import Dict -import numpy as np -from lux.cargo import UnitCargo -from lux.config import EnvConfig -from lux.team import Team, FactionTypes -from lux.unit import Unit -from lux.factory import Factory -def process_action(action): - return to_json(action) -def to_json(obj): - if isinstance(obj, np.ndarray): - return obj.tolist() - elif isinstance(obj, np.integer): - return int(obj) - elif isinstance(obj, np.floating): - return float(obj) - elif isinstance(obj, list) or isinstance(obj, tuple): - return [to_json(s) for s in obj] - elif isinstance(obj, dict): - out = {} - for k in obj: - out[k] = to_json(obj[k]) - return out - else: - return obj -def from_json(state): - if isinstance(state, list): - return np.array(state) - elif isinstance(state, dict): - out = {} - for k in state: - out[k] = from_json(state[k]) - return out - else: - return state - -def process_obs(player, game_state, step, obs): - if step == 0: - # at step 0 we get the entire map information - game_state = from_json(obs) - else: - # use delta changes to board to update game state - obs = from_json(obs) - for k in obs: - if k != 'board': - game_state[k] = obs[k] - else: - if "valid_spawns_mask" in obs[k]: - game_state["board"]["valid_spawns_mask"] = obs[k]["valid_spawns_mask"] - for item in ["rubble", "lichen", "lichen_strains"]: - for k, v in obs["board"][item].items(): - k = k.split(",") - x, y = int(k[0]), int(k[1]) - game_state["board"][item][x, y] = v - return game_state - -def obs_to_game_state(step, env_cfg: EnvConfig, obs): - - units = dict() - for agent in obs["units"]: - units[agent] = dict() - for unit_id in obs["units"][agent]: - unit_data = obs["units"][agent][unit_id] - cargo = UnitCargo(**unit_data["cargo"]) - unit = Unit( - **unit_data, - unit_cfg=env_cfg.ROBOTS[unit_data["unit_type"]], - env_cfg=env_cfg - ) - unit.cargo = cargo - units[agent][unit_id] = unit - - - factory_occupancy_map = np.ones_like(obs["board"]["rubble"], dtype=int) * -1 - factories = dict() - for agent in obs["factories"]: - factories[agent] = dict() - for unit_id in obs["factories"][agent]: - f_data = obs["factories"][agent][unit_id] - cargo = UnitCargo(**f_data["cargo"]) - factory = Factory( - **f_data, - env_cfg=env_cfg - ) - factory.cargo = cargo - factories[agent][unit_id] = factory - factory_occupancy_map[factory.pos_slice] = factory.strain_id - teams = dict() - for agent in obs["teams"]: - team_data = obs["teams"][agent] - faction = FactionTypes[team_data["faction"]] - teams[agent] = Team(**team_data, agent=agent) - - return GameState( - env_cfg=env_cfg, - env_steps=step, - board=Board( - rubble=obs["board"]["rubble"], - ice=obs["board"]["ice"], - ore=obs["board"]["ore"], - lichen=obs["board"]["lichen"], - lichen_strains=obs["board"]["lichen_strains"], - factory_occupancy_map=factory_occupancy_map, - factories_per_team=obs["board"]["factories_per_team"], - valid_spawns_mask=obs["board"]["valid_spawns_mask"] - ), - units=units, - factories=factories, - teams=teams - - ) - -@dataclass -class Board: - rubble: np.ndarray - ice: np.ndarray - ore: np.ndarray - lichen: np.ndarray - lichen_strains: np.ndarray - factory_occupancy_map: np.ndarray - factories_per_team: int - valid_spawns_mask: np.ndarray -@dataclass -class GameState: - """ - A GameState object at step env_steps. Copied from luxai_s2/state/state.py - """ - env_steps: int - env_cfg: dict - board: Board - units: Dict[str, Dict[str, Unit]] = field(default_factory=dict) - factories: Dict[str, Dict[str, Factory]] = field(default_factory=dict) - teams: Dict[str, Team] = field(default_factory=dict) - @property - def real_env_steps(self): - """ - the actual env step in the environment, which subtracts the time spent bidding and placing factories - """ - if self.env_cfg.BIDDING_SYSTEM: - # + 1 for extra factory placement and + 1 for bidding step - return self.env_steps - (self.board.factories_per_team * 2 + 1) - else: - return self.env_steps - - - # various utility functions - def is_day(self): - return self.real_env_steps % self.env_cfg.CYCLE_LENGTH < self.env_cfg.DAY_LENGTH - diff --git a/kits/rl-sb3/lux/team.py b/kits/rl-sb3/lux/team.py deleted file mode 100644 index b33fa6af..00000000 --- a/kits/rl-sb3/lux/team.py +++ /dev/null @@ -1,52 +0,0 @@ -from dataclasses import dataclass -from enum import Enum -from lux.config import EnvConfig -TERM_COLORS = False -try: - from termcolor import colored - TERM_COLORS=True -except: - pass -@dataclass -class FactionInfo: - color: str = "none" - alt_color: str = "red" - faction_id: int = -1 - -class FactionTypes(Enum): - Null = FactionInfo(color="gray", faction_id=0) - AlphaStrike = FactionInfo(color="yellow", faction_id=1) - MotherMars = FactionInfo(color="green", faction_id=2) - TheBuilders = FactionInfo(color="blue", faction_id=3) - FirstMars = FactionInfo(color="red", faction_id=4) - -class Team: - def __init__(self, team_id: int, agent: str, faction: FactionTypes = None, water=0, metal=0, factories_to_place=0, factory_strains=[], place_first=False, bid=0) -> None: - self.faction = faction - self.team_id = team_id - # the key used to differentiate ownership of things in state - self.agent = agent - - self.water = water - self.metal = metal - self.factories_to_place = factories_to_place - self.factory_strains = factory_strains - # whether this team gets to place factories down first or not. The bid winner has this set to True. - # If tied, player_0's team has this True - self.place_first = place_first - def state_dict(self): - return dict( - team_id=self.team_id, - faction=self.faction.name, - # note for optimization, water,metal, factories_to_place doesn't change after the early game. - water=self.init_water, - metal=self.init_metal, - factories_to_place=self.factories_to_place, - factory_strains=self.factory_strains, - place_first=self.place_first, - ) - def __str__(self) -> str: - out = f"[Player {self.team_id}]" - if TERM_COLORS: - return colored(out, self.faction.value.color) - return out \ No newline at end of file diff --git a/kits/rl-sb3/lux/utils.py b/kits/rl-sb3/lux/utils.py deleted file mode 100644 index 043fc2f8..00000000 --- a/kits/rl-sb3/lux/utils.py +++ /dev/null @@ -1,26 +0,0 @@ -def my_turn_to_place_factory(place_first: bool, step: int): - if place_first: - if step % 2 == 1: - return True - else: - if step % 2 == 0: - return True - return False - -# direction (0 = center, 1 = up, 2 = right, 3 = down, 4 = left) -def direction_to(src, target): - ds = target - src - dx = ds[0] - dy = ds[1] - if dx == 0 and dy == 0: - return 0 - if abs(dx) > abs(dy): - if dx > 0: - return 2 - else: - return 4 - else: - if dy > 0: - return 3 - else: - return 1 \ No newline at end of file diff --git a/kits/rl-sb3/main.py b/kits/rl-sb3/main.py deleted file mode 100644 index 67b0944e..00000000 --- a/kits/rl-sb3/main.py +++ /dev/null @@ -1,62 +0,0 @@ -import json -from typing import Dict -import sys -from argparse import Namespace - -from agent import Agent -from lux.config import EnvConfig -from lux.kit import GameState, process_obs, to_json, from_json, process_action, obs_to_game_state -### DO NOT REMOVE THE FOLLOWING CODE ### -agent_dict = dict() # store potentially multiple dictionaries as kaggle imports code directly -agent_prev_obs = dict() -def agent_fn(observation, configurations): - """ - agent definition for kaggle submission. - """ - global agent_dict - step = observation.step - - - player = observation.player - remainingOverageTime = observation.remainingOverageTime - if step == 0: - env_cfg = EnvConfig.from_dict(configurations["env_cfg"]) - agent_dict[player] = Agent(player, env_cfg) - agent_prev_obs[player] = dict() - agent = agent_dict[player] - agent = agent_dict[player] - obs = process_obs(player, agent_prev_obs[player], step, json.loads(observation.obs)) - agent_prev_obs[player] = obs - agent.step = step - if obs["real_env_steps"] < 0: - actions = agent.early_setup(step, obs, remainingOverageTime) - else: - actions = agent.act(step, obs, remainingOverageTime) - - return process_action(actions) - -if __name__ == "__main__": - - def read_input(): - """ - Reads input from stdin - """ - try: - return input() - except EOFError as eof: - raise SystemExit(eof) - step = 0 - player_id = 0 - configurations = None - i = 0 - while True: - inputs = read_input() - obs = json.loads(inputs) - - observation = Namespace(**dict(step=obs["step"], obs=json.dumps(obs["obs"]), remainingOverageTime=obs["remainingOverageTime"], player=obs["player"], info=obs["info"])) - if i == 0: - configurations = obs["info"]["env_cfg"] - i += 1 - actions = agent_fn(observation, dict(env_cfg=configurations)) - # send actions to engine - print(json.dumps(actions)) \ No newline at end of file diff --git a/kits/rl-sb3/nn.py b/kits/rl-sb3/nn.py deleted file mode 100644 index e69de29b..00000000 diff --git a/kits/rl-sb3/wrappers/__init__.py b/kits/rl-sb3/wrappers/__init__.py deleted file mode 100644 index 9b072569..00000000 --- a/kits/rl-sb3/wrappers/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .controllers import SimpleDiscreteController, SimpleSingleUnitDiscreteController -from .obs_wrappers import ImageObservationWrapper, SingleUnitObservationWrapper -from .sb3 import SB3Wrapper diff --git a/kits/rl-sb3/wrappers/controllers.py b/kits/rl-sb3/wrappers/controllers.py deleted file mode 100644 index 87932b64..00000000 --- a/kits/rl-sb3/wrappers/controllers.py +++ /dev/null @@ -1,244 +0,0 @@ -from typing import Dict - -import numpy as np -import numpy.typing as npt -from gym import spaces - -from luxai_s2.actions import format_action_vec -from luxai_s2.config import EnvConfig -from luxai_s2.state import ObservationStateDict - - -class Controller: - def __init__(self, action_space: spaces.Space) -> None: - self.action_space = action_space - - def action_to_lux_action( - self, agent: str, obs: Dict[str, ObservationStateDict], action: npt.NDArray - ): - """ - Takes as input the current "raw observation" and the parameterized action and returns - an action formatted for the Lux env - """ - raise NotImplementedError() - - -class SimpleSingleUnitDiscreteController(Controller): - def __init__(self, env_cfg: EnvConfig) -> None: - """ - A simple controller that controls only the heavy unit that will get spawned. This assumes for whichever environment wrapper you use - you have defined a policy to generate the first factory action - - For the heavy unit - - 4 cardinal direction movement (4 dims) - - a move center no-op action (1 dim) - - transfer action just for transferring ice in 4 cardinal directions or center (5) - - pickup action for each resource (5 dims) - - dig action (1 dim) - - It does not include - - self destruct action - - recharge action - - planning (via actions executing multiple times or repeating actions) - - factory actions - - transferring power or resources other than ice - """ - self.env_cfg = env_cfg - self.move_act_dims = 5 - self.transfer_act_dims = 1 # 5 * 5 - self.pickup_act_dims = 5 - self.dig_act_dims = 1 - - self.move_dim_high = self.move_act_dims - self.transfer_dim_high = self.move_dim_high + self.transfer_act_dims - self.pickup_dim_high = self.transfer_dim_high + self.pickup_act_dims - self.dig_dim_high = self.pickup_dim_high + self.dig_act_dims - - total_act_dims = self.dig_dim_high - # action_space = spaces.Box(0, 1, shape=(total_act_dims,)) - action_space = spaces.Discrete(total_act_dims) - super().__init__(action_space) - - def _is_move_action(self, id): - return id < self.move_dim_high - - def _get_move_action(self, id): - return np.array([0, id, 0, 0, 0, 1]) - - def _is_transfer_action(self, id): - return id < self.transfer_dim_high - - def _get_transfer_action(self, id): - id = id - self.move_dim_high - transfer_dir = id % 5 - # resource_type = id // 5 - return np.array( - [1, 0, 0, self.env_cfg.max_transfer_amount, 0, 1] - ) - - def _is_pickup_action(self, id): - return id < self.pickup_dim_high - - def _get_pickup_action(self, id): - id = id - self.transfer_dim_high - return np.array([2, 0, id % 5, self.env_cfg.max_transfer_amount, 0, 1]) - - def _is_dig_action(self, id): - return id < self.dig_dim_high - - def _get_dig_action(self, id): - return np.array([3, 0, 0, 0, 0, 1]) - - def action_to_lux_action( - self, agent: str, obs: Dict[str, ObservationStateDict], action: npt.NDArray - ): - shared_obs = obs["player_0"] - lux_action = dict() - factories = shared_obs["factories"][agent] - units = shared_obs["units"][agent] - for unit_id in units.keys(): - unit = units[unit_id] - pos = unit["pos"] - unit_related_action = action - choice = action #unit_related_action.argmax() - action_queue = [] - if self._is_move_action(choice): - action_queue = [self._get_move_action(choice)] - elif self._is_transfer_action(choice): - action_queue = [self._get_transfer_action(choice)] - elif self._is_pickup_action(choice): - action_queue = [self._get_pickup_action(choice)] - - elif self._is_dig_action(choice): - action_queue = [self._get_dig_action(choice)] - lux_action[unit_id] = action_queue - # only control the first unit! - break - return lux_action - - -class SimpleDiscreteController(Controller): - def __init__(self, env_cfg: EnvConfig) -> None: - """ - A simple controller that uses a discrete action parameterization for Lux AI S2. It includes - - For units - - 4 cardinal direction movement (4 dims) - - a move center no-op action (1 dim) - - transfer action for each combination of the (4 cardinal directions plus center) x (resource type or power) (5*5 = 25 dims) - - pickup action for each resource (5 dims) - - dig action (1 dim) - - For factories - - all actions (build light, heavy, or water) (3 dims) - - - It does not include - - self destruct action - - recharge action - - planning (via actions executing multiple times or repeating actions) - - Sampling from this controller will always result in a valid action, albeit sometimes disastrous - """ - self.env_cfg = env_cfg - self.move_act_dims = 5 - self.transfer_act_dims = 5 * 5 - self.pickup_act_dims = 5 - self.dig_act_dims = 1 - # self.self_destruct_act_dims = 1 - # self.recharge_act_dims = 1 - self.factory_act_dims = 3 # 0 = light, 1 = heavy, 2 = water - - self.move_dim_high = self.move_act_dims - self.transfer_dim_high = self.move_dim_high + self.transfer_act_dims - self.pickup_dim_high = self.transfer_dim_high + self.pickup_act_dims - self.dig_dim_high = self.pickup_dim_high + self.dig_act_dims - - self.factory_dim_high = 3 # self.dig_dim_high + self.factory_act_dims - - total_act_dims = self.factory_dim_high - # action_space = spaces.Discrete(total_act_dims) - action_space = spaces.Box( - 0, 1, shape=(env_cfg.map_size, env_cfg.map_size, total_act_dims) - ) - - super().__init__(action_space) - - # note that all the _is_x_action are meant to be called in a if, elseif... cascade/waterfall - # to understand how _get_x_action works to map the parameterization back to the original action space see luxai_s2/actions.py - def _is_move_action(self, id): - return id < self.move_dim_high - - def _get_move_action(self, id): - return np.array([0, id, 0, 0, 0, 1]) - - def _is_transfer_action(self, id): - return id < self.transfer_dim_high - - def _get_transfer_action(self, id): - id = id - self.move_dim_high - transfer_dir = id % 5 - resource_type = id // 5 - return np.array( - [1, transfer_dir, resource_type, self.env_cfg.max_transfer_amount, 0, 1] - ) - - def _is_pickup_action(self, id): - return id < self.pickup_dim_high - - def _get_pickup_action(self, id): - id = id - self.transfer_dim_high - return np.array([2, 0, id % 5, self.env_cfg.max_transfer_amount, 0, 1]) - - def _is_dig_action(self, id): - return id < self.dig_dim_high - - def _get_dig_action(self, id): - return np.array([3, 0, 0, 0, 0, 1]) - - # def _is_self_destruct_action(self, id): - # return id < self.move_act_dims + self.transfer_act_dims + self.self_destruct_dims - # def _get_self_destruct_action(self, id): - # return [2, 0, 0, 0, 0, 1] - - def action_to_lux_action( - self, agent: str, obs: Dict[str, ObservationStateDict], action: npt.NDArray - ): - """ - Generate an action compatible with LuxAI_S2 engine for a single player - """ - shared_obs = obs["player_0"] - lux_action = dict() - factories = shared_obs["factories"][agent] - units = shared_obs["units"][agent] - for unit_id in units.keys(): - unit = units[unit_id] - pos = unit["pos"] - action_here = action[pos[0], pos[1]] - unit_related_action = action_here[ - : -self.factory_act_dims - ] # assuming factory action is always the final few dimensions - choice = unit_related_action.argmax() - action_queue = [] - # if self._is_move_action(choice): - # action_queue = [self._get_move_action(choice)] - # elif self._is_transfer_action(choice): - # action_queue = [self._get_transfer_action(choice)] - # elif self._is_pickup_action(choice): - # action_queue = [self._get_pickup_action(choice)] - # elif self._is_dig_action(choice): - # action_queue = [self._get_dig_action(choice)] - - lux_action[unit_id] = action_queue - - for unit_id in factories.keys(): - factory = factories[unit_id] - pos = factory["pos"] - - action_here = action[pos[0], pos[1]] - factory_related_action = action_here[ - -self.factory_act_dims : - ] # assuming factory action is always the final few dimensions - choice = factory_related_action.argmax() - lux_action[unit_id] = choice - return lux_action diff --git a/kits/rl-sb3/wrappers/lux.py b/kits/rl-sb3/wrappers/lux.py deleted file mode 100644 index 7822d033..00000000 --- a/kits/rl-sb3/wrappers/lux.py +++ /dev/null @@ -1,27 +0,0 @@ -""" -Wrappers that allow users to insert heuristics into the environment reset and step functions -""" -from typing import Dict - -import gym -import numpy as np -import numpy.typing as npt -from gym import spaces - -import luxai_s2.env -from luxai_s2.env import LuxAI_S2 -from luxai_s2.state import ObservationStateDict -from luxai_s2.utils import my_turn_to_place_factory -from luxai_s2.wrappers.controllers import ( - Controller, - SimpleDiscreteController, - SimpleSingleUnitDiscreteController, -) - - -class FactoryControlWrapper(gym.Wrapper): - def __init__(self, env: gym.Env) -> None: - super().__init__(env) - - def step(self, action): - return super().step(action) diff --git a/kits/rl-sb3/wrappers/obs_wrappers.py b/kits/rl-sb3/wrappers/obs_wrappers.py deleted file mode 100644 index b1260058..00000000 --- a/kits/rl-sb3/wrappers/obs_wrappers.py +++ /dev/null @@ -1,193 +0,0 @@ -from typing import Callable, Dict - -import gym -import numpy as np -import numpy.typing as npt -from gym import spaces - -import luxai_s2.env -from luxai_s2.env import LuxAI_S2 -from luxai_s2.state import ObservationStateDict -from luxai_s2.unit import ActionType, BidActionType, FactoryPlacementActionType -from luxai_s2.utils import my_turn_to_place_factory -from luxai_s2.wrappers.controllers import ( - Controller, - SimpleDiscreteController, - SimpleSingleUnitDiscreteController, -) - -class SingleUnitObservationWrapper(gym.ObservationWrapper): - """ - A state based observation to work with in pair with the SimpleSingleUnitDiscreteController - - It contains info only on the first heavy unit, the first factory you own, and some useful features. If there are no owned heavy units the observation is just zero. - No information about the opponent is included - - - Included features: - - First heavy unit's stats - - Position of closest ice tile - - First factory - - """ - - def __init__(self, env: gym.Env) -> None: - super().__init__(env) - self.observation_space = spaces.Box(-999, 999, shape=(13,)) - - def observation( - self, obs: Dict[str, ObservationStateDict] - ) -> Dict[str, npt.NDArray]: - observation = dict() - shared_obs = obs["player_0"] - ice_map = shared_obs["board"]["ice"] - ice_tile_locations = np.argwhere(ice_map == 1) - for agent in obs.keys(): - factories = shared_obs["factories"][agent] - factory_vec = np.zeros(2) - for k in factories.keys(): - factory = factories[k] - factory_vec = np.array(factory["pos"]) / self.env.state.env_cfg.map_size - break - units = shared_obs["units"][agent] - for k in units.keys(): - unit = units[k] - cargo_space = self.env.state.env_cfg.ROBOTS[ - unit["unit_type"] - ].CARGO_SPACE - battery_cap = self.env.state.env_cfg.ROBOTS[ - unit["unit_type"] - ].BATTERY_CAPACITY - cargo_vec = np.array( - [ - unit["power"] / battery_cap, - unit["cargo"]["ice"] / cargo_space, - unit["cargo"]["ore"] / cargo_space, - unit["cargo"]["water"] / cargo_space, - unit["cargo"]["metal"] / cargo_space, - ] - ) - unit_type = ( - 0 if unit["unit_type"] == "LIGHT" else 1 - ) # note that build actions use 0 to encode Light - unit_vec = np.concatenate( - [unit["pos"], [unit_type], cargo_vec, [unit["team_id"]]], axis=-1 - ) - unit_vec[:2] /= self.env.state.env_cfg.map_size - - pos = np.array(unit["pos"]) - # engineered features - - # compute closest ice tile - ice_tile_distances = np.mean((ice_tile_locations - pos) ** 2, 1) - closest_ice_tile = ( - ice_tile_locations[np.argmin(ice_tile_distances)] - / self.env.state.env_cfg.map_size - ) - obs_vec = np.concatenate( - [unit_vec, factory_vec, closest_ice_tile], axis=-1 - ) - - observation[agent] = obs_vec - break - if agent not in observation: - observation[agent] = np.zeros(13) - return observation - - -class ImageObservationWrapper(gym.ObservationWrapper): - def __init__(self, env: gym.Env) -> None: - super().__init__(env) - obs_dims = 23 # see _convert_obs function for how this is computed - self.map_size = self.env.env_cfg.map_size - self.observation_space = spaces.Box( - -999, 999, shape=(self.map_size, self.map_size, obs_dims) - ) - - def observation( - self, obs: Dict[str, ObservationStateDict] - ) -> Dict[str, npt.NDArray]: - shared_obs = obs["player_0"] - unit_mask = np.zeros((self.map_size, self.map_size, 1)) - unit_data = np.zeros( - (self.map_size, self.map_size, 9) - ) # power(1) + cargo(4) + unit_type(1) + unit_pos(2) + team(1) - factory_mask = np.zeros_like(unit_mask) - factory_data = np.zeros( - (self.map_size, self.map_size, 8) - ) # power(1) + cargo(4) + factory_pos(2) + team(1) - for agent in obs.keys(): - factories = shared_obs["factories"][agent] - units = shared_obs["units"][agent] - - for unit_id in units.keys(): - unit = units[unit_id] - # we encode everything but unit_id or action queue - cargo_space = self.env.state.env_cfg.ROBOTS[ - unit["unit_type"] - ].CARGO_SPACE - battery_cap = self.env.state.env_cfg.ROBOTS[ - unit["unit_type"] - ].BATTERY_CAPACITY - cargo_vec = np.array( - [ - unit["power"] / battery_cap, - unit["cargo"]["ice"] / cargo_space, - unit["cargo"]["ore"] / cargo_space, - unit["cargo"]["water"] / cargo_space, - unit["cargo"]["metal"] / cargo_space, - ] - ) - unit_type = ( - 0 if unit["unit_type"] == "LIGHT" else 1 - ) # note that build actions use 0 to encode Light - unit_vec = np.concatenate( - [unit["pos"], [unit_type], cargo_vec, [unit["team_id"]]], axis=-1 - ) - unit_vec[:2] /= self.env.state.env_cfg.map_size - - # note that all data is stored as map[x, y] format - unit_data[unit["pos"][0], unit["pos"][1]] = unit_vec - unit_mask[unit["pos"][0], unit["pos"][1]] = 1 - - for unit_id in factories.keys(): - factory = factories[unit_id] - # we encode everything but strain_id or unit_id - cargo_vec = np.array( - [ - factory["power"], - factory["cargo"]["ice"], - factory["cargo"]["ore"], - factory["cargo"]["water"], - factory["cargo"]["metal"], - ] - ) - cargo_vec = cargo_vec * 1 / 1000 - - factory_vec = np.concatenate( - [factory["pos"], cargo_vec, [factory["team_id"]]], axis=-1 - ) - factory_vec[:2] /= self.env.state.env_cfg.map_size - factory_data[factory["pos"][0], factory["pos"][1]] = factory_vec - factory_mask[factory["pos"][0], factory["pos"][1]] = 1 - - image_features = np.concatenate( - [ - np.expand_dims(shared_obs["board"]["lichen"], -1) - / self.env.state.env_cfg.MAX_LICHEN_PER_TILE, - np.expand_dims(shared_obs["board"]["rubble"], -1) - / self.env.state.env_cfg.MAX_RUBBLE, - np.expand_dims(shared_obs["board"]["ice"], -1), - np.expand_dims(shared_obs["board"]["ore"], -1), - unit_mask, - unit_data, - factory_mask, - factory_data, - ], - axis=-1, - ) - - new_obs = dict() - for agent in self.all_agents: - new_obs[agent] = image_features - return new_obs diff --git a/kits/rl-sb3/wrappers/sb3.py b/kits/rl-sb3/wrappers/sb3.py deleted file mode 100644 index 7e3126f5..00000000 --- a/kits/rl-sb3/wrappers/sb3.py +++ /dev/null @@ -1,142 +0,0 @@ -from typing import Callable, Dict - -import gym -import numpy as np -import numpy.typing as npt -from gym import spaces - -import luxai_s2.env -from luxai_s2.env import LuxAI_S2 -from luxai_s2.state import ObservationStateDict -from luxai_s2.unit import ActionType, BidActionType, FactoryPlacementActionType -from luxai_s2.utils import my_turn_to_place_factory -from luxai_s2.wrappers.controllers import ( - Controller, - SimpleDiscreteController, - SimpleSingleUnitDiscreteController, -) - - -class SB3Wrapper(gym.Wrapper): - def __init__( - self, - env: LuxAI_S2, - bid_policy: Callable[ - [str, ObservationStateDict], Dict[str, BidActionType] - ] = None, - factory_placement_policy: Callable[ - [str, ObservationStateDict], Dict[str, FactoryPlacementActionType] - ] = None, - heuristic_policy: Callable[ - [str, ObservationStateDict], Dict[str, ActionType] - ] = None, - controller: Controller = None, - ) -> None: - """ - A environment wrapper for Stable Baselines 3. It reduces the LuxAI_S2 env - into a single phase game and places the first two phases (bidding and factory placement) into the env.reset function so that - interacting agents directly start generating actions to play the third phase of the game. - - It's highly recommended to use one of the observation wrappers as well - - Parameters - ---------- - bid_policy: Function - A function accepting player: str and obs: ObservationStateDict as input that returns a bid action - such as dict(bid=10, faction="AlphaStrike"). By default will bid 0 - factory_placement_policy: Function - A function accepting player: str and obs: ObservationStateDict as input that returns a factory placement action - such as dict(spawn=np.array([2, 4]), metal=150, water=150). By default will spawn in a random valid location with metal=150, water=150 - controller : Controller - A controller that parameterizes the action space into something more usable and converts parameterized actions to lux actions. - See luxai_s2/wrappers/controllers.py for available controllers and how to make your own - - heuristic_policy: Function - A function accepting player: str and obs: ObservationStateDict as input and returns a lux action. This can be provided by the user - to define custom logic or a model to generate actions for any of the units or factories. For any action generate for a unit or factory, it will - override the original action for that unit or factory when the step function is called. By defalt this is None and not used - """ - gym.Wrapper.__init__(self, env) - self.env = env - if controller is None: - controller = SimpleDiscreteController(self.env.state.env_cfg) - self.controller = controller - - self.action_space = controller.action_space - - obs_dims = 23 # see _convert_obs function for how this is computed - self.map_size = self.env.env_cfg.map_size - self.observation_space = spaces.Box( - -999, 999, shape=(self.map_size, self.map_size, obs_dims) - ) - - # The simplified wrapper removes the first two phases of the game by using predefined policies (trained or heuristic) - # to handle those two phases during each reset - if factory_placement_policy is None: - - def factory_placement_policy(player, obs: ObservationStateDict): - potential_spawns = np.array( - list(zip(*np.where(obs["board"]["valid_spawns_mask"] == 1))) - ) - spawn_loc = potential_spawns[ - np.random.randint(0, len(potential_spawns)) - ] - return dict(spawn=spawn_loc, metal=150, water=150) - - self.factory_placement_policy = factory_placement_policy - if bid_policy is None: - - def bid_policy(player, obs: ObservationStateDict): - faction = "AlphaStrike" - if player == "player_1": - faction = "MotherMars" - return dict(bid=0, faction=faction) - - self.bid_policy = bid_policy - - self.heuristic_policy = heuristic_policy - - self.prev_obs = None - # list of all agents regardless of status - self.all_agents = [] - - def step(self, action: Dict[str, npt.NDArray]): - lux_action = dict() - for agent in self.all_agents: - if agent in action: - lux_action[agent] = self.controller.action_to_lux_action( - agent=agent, obs=self.prev_obs, action=action[agent] - ) - else: - lux_action[agent] = dict() - if self.heuristic_policy is not None: - heuristic_lux_action = self.heuristic_policy( - agent, self.prev_obs[agent] - ) - # override keys - for k in heuristic_lux_action: - lux_action[agent][k] = heuristic_lux_action[k] - obs, reward, done, info = self.env.step(lux_action) - self.prev_obs = obs - return obs, reward, done, info - - def reset(self, **kwargs): - obs = self.env.reset(**kwargs) - self.all_agents = self.env.agents - action = dict() - for agent in self.all_agents: - action[agent] = self.bid_policy(agent, obs[agent]) - obs, _, _, _ = self.env.step(action) - while self.env.state.real_env_steps < 0: - action = dict() - for agent in self.all_agents: - if my_turn_to_place_factory( - obs["player_0"]["teams"][agent]["place_first"], - self.env.state.env_steps, - ): - action[agent] = self.factory_placement_policy(agent, obs[agent]) - else: - action[agent] = dict() - obs, _, _, _ = self.env.step(action) - self.prev_obs = obs - return obs diff --git a/kits/rl/README.md b/kits/rl/README.md new file mode 100644 index 00000000..373e7853 --- /dev/null +++ b/kits/rl/README.md @@ -0,0 +1,8 @@ +# Reinforcement Learning Kits + +This folder has all the reinforcement learning starter kits. Check the following README's for how to get started with RL in each of the provided support for RL libraries. At the moment the Jax based environment has not been integrated into the starter kits, that is WIP. + +- [Stable Baselines 3](https://github.com/Lux-AI-Challenge/Lux-Design-S2/tree/main/kits/rl/sb3) +- CleanRL Style (Single File Implementation) - WIP + +For a tutorial series on how to implement a simple RL agent, checkout these Kaggle notebooks! \ No newline at end of file diff --git a/kits/rl/sb3/.gitignore b/kits/rl/sb3/.gitignore new file mode 100644 index 00000000..c8d4db10 --- /dev/null +++ b/kits/rl/sb3/.gitignore @@ -0,0 +1,5 @@ +logs +*.pth +*.tar.gz +luxai_s2 +*.zip \ No newline at end of file diff --git a/kits/rl/sb3/README.md b/kits/rl/sb3/README.md new file mode 100644 index 00000000..6ef87f50 --- /dev/null +++ b/kits/rl/sb3/README.md @@ -0,0 +1,57 @@ +# Stable Baselines 3 Simple RL Kit + +This is a simple stable baselines 3 RL kit based off of part 2 of the [RL tutorial series]() + +`train.py` implements a very simple approach to training an RL agent to dig ice and deliver it back to factories to generate water and survive longer. + +The general structure of this RL agent is that we use a heuristic policy to handle bidding and factory placement, then train the RL agent on solving the normal phase of the game. The goal of this RL agent is to survive as long as possible without worrying about growing lichen yet. + +## Training + +To use the training code, run `train.py --help` for help and to train an agent run + +``` +python train.py --n-envs 10 --log-path logs/exp_1 --seed 42 +``` + +which trains an RL agent using the PPO algorithm with 10 parallel environments for 7,500,000 interactions. To view the training progress and various logged metrics (including Lux AI S2 specific metrics like total ice dug, water produced) you can use tensorboard as so. By the end of training you should see that the evaluation episode length increases over time to reach 1000, meaning the agent has learned to dig ice and produce water well enough to survive. This trained agent should also surpass the default rule-based python agent. + +``` +tensorboard --logdir logs +``` + +You should see your agent generally learn to dig more ice, produce more water, and during evaluation it will survive longer (the eval/mean_ep_length is higher). Note that RL is incredibly unstable to train and sometimes a bad seed may impact training results. For ideas on how to improve your agent stay tuned for a part 3 tutorial showing how to use invalid action masking and more tricks to solve complex multi-agent games like Lux AI S2. + +## Evaluation + +To start evaluating with the CLI tool and eventually submit to the competition, we need to save our best model (stored in /models/best_model.zip) to the root directory. Alternatively you can modify `MODEL_WEIGHTS_RELATIVE_PATH` in agent.py to point to where the model file is. If you ran the training script above it will save the trained agent to `logs/exp_1/models/best_model.zip`. + +Once that is setup, you can test and watch your trained agent on the nice HTML visualizer by running the following + +``` +luxai-s2 main.py main.py --out=replay.html +``` + +Open up `replay.html` and you can look at what your agent is doing. If training was succesful, you should notice it picking up power, digging ice, and transferring it back to the factory. + + +## Submitting to Kaggle + +To submit your trained agent, first create a .tar.gz file + +``` +tar -cvzf submission.tar.gz * +``` + +and submit that to the competition. Make sure that `MODEL_WEIGHTS_RELATIVE_PATH` is pointing to a .zip file in your folder or else the agent won't run. + +## Tips for Improving your Agent + +This tutorial agent will train a policy that can efficiently control a single heavy robot that learns to pickup power, constantly dig ice, and transfer ice back to the factory and survive the full 1000 turns in the game. A simple improvement would be to add lichen planting to the action space / controller or program it directly as a rule in the agent.py file, allowing you to score points by the end of the game as well as generate more power. + +Another easy idea is to modify the `agent.py` code so that you spawn multiple factories and multiple heavy robots, and simply run the trained policy on each heavy robot. + + +If you want to look into more scalable solutions, it's critical to first figure out how to model multiple units at once. This kit shows you how to control a single heavy robot effectively but not multiple. Another thing to consider is what observations and features would be the most useful. Finally, you can always try and develop a more complex action controller in addition to developing better reward functions. + +If you feel you are experienced enough, you can take a look at [last season's winning solution by team Toad Brigade](https://www.kaggle.com/competitions/lux-ai-2021/discussion/294993) or [our paper: Emergent collective intelligence from massive-agent cooperation and competition](https://arxiv.org/abs/2301.01609) which show how to use convolutional neural nets and various other techniques (e.g. invalid action masking) to control a massive number of units at once. \ No newline at end of file diff --git a/kits/rl/sb3/agent.py b/kits/rl/sb3/agent.py new file mode 100644 index 00000000..8e0783a1 --- /dev/null +++ b/kits/rl/sb3/agent.py @@ -0,0 +1,118 @@ +""" +This file is where your agent's logic is kept. Define a bidding policy, factory placement policy, as well as a policy for playing the normal phase of the game + +The tutorial will learn an RL agent to play the normal phase and use heuristics for the other two phases. + +Note that like the other kits, you can only debug print to standard error e.g. print("message", file=sys.stderr) +""" + +import os.path as osp +import sys + +import numpy as np +import torch as th + +from lux.config import EnvConfig +from nn import load_policy +from wrappers import SimpleUnitDiscreteController, SimpleUnitObservationWrapper + +# change this to use weights stored elsewhere +# make sure the model weights are submitted with the other code files +# any files in the logs folder are not necessary +MODEL_WEIGHTS_RELATIVE_PATH = "./best_model.zip" + + +class Agent: + def __init__(self, player: str, env_cfg: EnvConfig) -> None: + self.player = player + self.opp_player = "player_1" if self.player == "player_0" else "player_0" + np.random.seed(0) + self.env_cfg: EnvConfig = env_cfg + + directory = osp.dirname(__file__) + # load our RL policy + self.policy = load_policy(osp.join(directory, MODEL_WEIGHTS_RELATIVE_PATH)) + self.policy.eval() + + self.controller = SimpleUnitDiscreteController(self.env_cfg) + + def bid_policy(self, step: int, obs, remainingOverageTime: int = 60): + return dict(faction="AlphaStrike", bid=0) + + def factory_placement_policy(self, step: int, obs, remainingOverageTime: int = 60): + if obs["teams"][self.player]["metal"] == 0: + return dict() + potential_spawns = list(zip(*np.where(obs["board"]["valid_spawns_mask"] == 1))) + potential_spawns_set = set(potential_spawns) + done_search = False + # if player == "player_1": + ice_diff = np.diff(obs["board"]["ice"]) + pot_ice_spots = np.argwhere(ice_diff == 1) + if len(pot_ice_spots) == 0: + pot_ice_spots = potential_spawns + trials = 5 + while trials > 0: + pos_idx = np.random.randint(0, len(pot_ice_spots)) + pos = pot_ice_spots[pos_idx] + + area = 3 + for x in range(area): + for y in range(area): + check_pos = [pos[0] + x - area // 2, pos[1] + y - area // 2] + if tuple(check_pos) in potential_spawns_set: + done_search = True + pos = check_pos + break + if done_search: + break + if done_search: + break + trials -= 1 + spawn_loc = potential_spawns[np.random.randint(0, len(potential_spawns))] + if not done_search: + pos = spawn_loc + + metal = obs["teams"][self.player]["metal"] + return dict(spawn=pos, metal=metal, water=metal) + + def act(self, step: int, obs, remainingOverageTime: int = 60): + # first convert observations using the same observation wrapper you used for training + # note that SimpleUnitObservationWrapper takes input as the full observation for both players and returns an obs for players + raw_obs = dict(player_0=obs, player_1=obs) + obs = SimpleUnitObservationWrapper.convert_obs(raw_obs, env_cfg=self.env_cfg) + obs = obs[self.player] + + obs = th.from_numpy(obs).float() + with th.no_grad(): + # NOTE: we set deterministic to False here, which is only recommended for RL agents + # that create too many invalid actions (less of an issue if you train with invalid action masking) + + # to improve performance, we have a rule based action mask generator for the controller used + # which will force the agent to generate actions that are valid only. + action_mask = ( + th.from_numpy(self.controller.action_masks(self.player, raw_obs)) + .unsqueeze(0) # we unsqueeze/add an extra batch dimension = + .bool() + ) + actions = ( + self.policy.act( + obs.unsqueeze(0), deterministic=False, action_masks=action_mask + ) + .cpu() + .numpy() + ) + + # use our controller which we trained with in train.py to generate a Lux S2 compatible action + lux_action = self.controller.action_to_lux_action( + self.player, raw_obs, actions[0] + ) + + # commented code below adds watering lichen which can easily improve your agent + # shared_obs = raw_obs[self.player] + # factories = shared_obs["factories"][self.player] + # for unit_id in factories.keys(): + # factory = factories[unit_id] + # if 1000 - step < 50 and factory["cargo"]["water"] > 100: + # lux_action[unit_id] = 2 # water and grow lichen at the very end of the game + + return lux_action diff --git a/kits/rl-sb3-jax-env/lux/cargo.py b/kits/rl/sb3/lux/cargo.py similarity index 99% rename from kits/rl-sb3-jax-env/lux/cargo.py rename to kits/rl/sb3/lux/cargo.py index ad43536a..c166ed67 100644 --- a/kits/rl-sb3-jax-env/lux/cargo.py +++ b/kits/rl/sb3/lux/cargo.py @@ -1,5 +1,6 @@ from dataclasses import dataclass + @dataclass class UnitCargo: ice: int = 0 diff --git a/kits/rl-sb3-jax-env/lux/config.py b/kits/rl/sb3/lux/config.py similarity index 100% rename from kits/rl-sb3-jax-env/lux/config.py rename to kits/rl/sb3/lux/config.py diff --git a/kits/rl-sb3-jax-env/lux/factory.py b/kits/rl/sb3/lux/factory.py similarity index 78% rename from kits/rl-sb3-jax-env/lux/factory.py rename to kits/rl/sb3/lux/factory.py index 13e87e82..32f5b84a 100644 --- a/kits/rl-sb3-jax-env/lux/factory.py +++ b/kits/rl/sb3/lux/factory.py @@ -1,9 +1,13 @@ import math +from dataclasses import dataclass from sys import stderr + import numpy as np -from dataclasses import dataclass + from lux.cargo import UnitCargo from lux.config import EnvConfig + + @dataclass class Factory: team_id: int @@ -19,22 +23,31 @@ class Factory: def build_heavy_metal_cost(self, game_state): unit_cfg = self.env_cfg.ROBOTS["HEAVY"] return unit_cfg.METAL_COST + def build_heavy_power_cost(self, game_state): unit_cfg = self.env_cfg.ROBOTS["HEAVY"] return unit_cfg.POWER_COST + def can_build_heavy(self, game_state): - return self.power >= self.build_heavy_power_cost(game_state) and self.cargo.metal >= self.build_heavy_metal_cost(game_state) + return self.power >= self.build_heavy_power_cost( + game_state + ) and self.cargo.metal >= self.build_heavy_metal_cost(game_state) + def build_heavy(self): return 1 def build_light_metal_cost(self, game_state): unit_cfg = self.env_cfg.ROBOTS["LIGHT"] return unit_cfg.METAL_COST + def build_light_power_cost(self, game_state): unit_cfg = self.env_cfg.ROBOTS["LIGHT"] return unit_cfg.POWER_COST + def can_build_light(self, game_state): - return self.power >= self.build_light_power_cost(game_state) and self.cargo.metal >= self.build_light_metal_cost(game_state) + return self.power >= self.build_light_power_cost( + game_state + ) and self.cargo.metal >= self.build_light_metal_cost(game_state) def build_light(self): return 0 @@ -45,11 +58,15 @@ def water_cost(self, game_state): """ owned_lichen_tiles = (game_state.board.lichen_strains == self.strain_id).sum() return np.ceil(owned_lichen_tiles / self.env_cfg.LICHEN_WATERING_COST_FACTOR) + def can_water(self, game_state): return self.cargo.water >= self.water_cost(game_state) + def water(self): return 2 @property def pos_slice(self): - return slice(self.pos[0] - 1, self.pos[0] + 2), slice(self.pos[1] - 1, self.pos[1] + 2) + return slice(self.pos[0] - 1, self.pos[0] + 2), slice( + self.pos[1] - 1, self.pos[1] + 2 + ) diff --git a/kits/rl-sb3-jax-env/lux/kit.py b/kits/rl/sb3/lux/kit.py similarity index 93% rename from kits/rl-sb3-jax-env/lux/kit.py rename to kits/rl/sb3/lux/kit.py index 8de70cd7..5b2a64ba 100644 --- a/kits/rl-sb3-jax-env/lux/kit.py +++ b/kits/rl/sb3/lux/kit.py @@ -1,13 +1,19 @@ from dataclasses import dataclass, field from typing import Dict + import numpy as np + from lux.cargo import UnitCargo from lux.config import EnvConfig -from lux.team import Team, FactionTypes -from lux.unit import Unit from lux.factory import Factory +from lux.team import FactionTypes, Team +from lux.unit import Unit + + def process_action(action): return to_json(action) + + def to_json(obj): if isinstance(obj, np.ndarray): return obj.tolist() @@ -24,6 +30,8 @@ def to_json(obj): return out else: return obj + + def from_json(state): if isinstance(state, list): return np.array(state) @@ -33,7 +41,8 @@ def from_json(state): out[k] = from_json(state[k]) return out else: - return state + return state + def process_obs(player, game_state, step, obs): if step == 0: @@ -43,11 +52,13 @@ def process_obs(player, game_state, step, obs): # use delta changes to board to update game state obs = from_json(obs) for k in obs: - if k != 'board': + if k != "board": game_state[k] = obs[k] else: if "valid_spawns_mask" in obs[k]: - game_state["board"]["valid_spawns_mask"] = obs[k]["valid_spawns_mask"] + game_state["board"]["valid_spawns_mask"] = obs[k][ + "valid_spawns_mask" + ] for item in ["rubble", "lichen", "lichen_strains"]: for k, v in obs["board"][item].items(): k = k.split(",") @@ -55,8 +66,9 @@ def process_obs(player, game_state, step, obs): game_state["board"][item][x, y] = v return game_state + def obs_to_game_state(step, env_cfg: EnvConfig, obs): - + units = dict() for agent in obs["units"]: units[agent] = dict() @@ -70,7 +82,6 @@ def obs_to_game_state(step, env_cfg: EnvConfig, obs): ) unit.cargo = cargo units[agent][unit_id] = unit - factory_occupancy_map = np.ones_like(obs["board"]["rubble"], dtype=int) * -1 factories = dict() @@ -79,10 +90,7 @@ def obs_to_game_state(step, env_cfg: EnvConfig, obs): for unit_id in obs["factories"][agent]: f_data = obs["factories"][agent][unit_id] cargo = UnitCargo(**f_data["cargo"]) - factory = Factory( - **f_data, - env_cfg=env_cfg - ) + factory = Factory(**f_data, env_cfg=env_cfg) factory.cargo = cargo factories[agent][unit_id] = factory factory_occupancy_map[factory.pos_slice] = factory.strain_id @@ -103,14 +111,14 @@ def obs_to_game_state(step, env_cfg: EnvConfig, obs): lichen_strains=obs["board"]["lichen_strains"], factory_occupancy_map=factory_occupancy_map, factories_per_team=obs["board"]["factories_per_team"], - valid_spawns_mask=obs["board"]["valid_spawns_mask"] + valid_spawns_mask=obs["board"]["valid_spawns_mask"], ), units=units, factories=factories, - teams=teams - + teams=teams, ) + @dataclass class Board: rubble: np.ndarray @@ -121,17 +129,21 @@ class Board: factory_occupancy_map: np.ndarray factories_per_team: int valid_spawns_mask: np.ndarray + + @dataclass class GameState: """ A GameState object at step env_steps. Copied from luxai_s2/state/state.py """ + env_steps: int env_cfg: dict board: Board units: Dict[str, Dict[str, Unit]] = field(default_factory=dict) factories: Dict[str, Dict[str, Factory]] = field(default_factory=dict) teams: Dict[str, Team] = field(default_factory=dict) + @property def real_env_steps(self): """ @@ -143,8 +155,6 @@ def real_env_steps(self): else: return self.env_steps - # various utility functions def is_day(self): return self.real_env_steps % self.env_cfg.CYCLE_LENGTH < self.env_cfg.DAY_LENGTH - diff --git a/kits/rl-sb3-jax-env/lux/team.py b/kits/rl/sb3/lux/team.py similarity index 81% rename from kits/rl-sb3-jax-env/lux/team.py rename to kits/rl/sb3/lux/team.py index b33fa6af..beaa6a42 100644 --- a/kits/rl-sb3-jax-env/lux/team.py +++ b/kits/rl/sb3/lux/team.py @@ -1,18 +1,24 @@ from dataclasses import dataclass from enum import Enum + from lux.config import EnvConfig + TERM_COLORS = False try: from termcolor import colored - TERM_COLORS=True -except: + + TERM_COLORS = True +except: pass + + @dataclass class FactionInfo: color: str = "none" alt_color: str = "red" faction_id: int = -1 + class FactionTypes(Enum): Null = FactionInfo(color="gray", faction_id=0) AlphaStrike = FactionInfo(color="yellow", faction_id=1) @@ -20,8 +26,20 @@ class FactionTypes(Enum): TheBuilders = FactionInfo(color="blue", faction_id=3) FirstMars = FactionInfo(color="red", faction_id=4) + class Team: - def __init__(self, team_id: int, agent: str, faction: FactionTypes = None, water=0, metal=0, factories_to_place=0, factory_strains=[], place_first=False, bid=0) -> None: + def __init__( + self, + team_id: int, + agent: str, + faction: FactionTypes = None, + water=0, + metal=0, + factories_to_place=0, + factory_strains=[], + place_first=False, + bid=0, + ) -> None: self.faction = faction self.team_id = team_id # the key used to differentiate ownership of things in state @@ -31,9 +49,10 @@ def __init__(self, team_id: int, agent: str, faction: FactionTypes = None, water self.metal = metal self.factories_to_place = factories_to_place self.factory_strains = factory_strains - # whether this team gets to place factories down first or not. The bid winner has this set to True. + # whether this team gets to place factories down first or not. The bid winner has this set to True. # If tied, player_0's team has this True self.place_first = place_first + def state_dict(self): return dict( team_id=self.team_id, @@ -45,8 +64,9 @@ def state_dict(self): factory_strains=self.factory_strains, place_first=self.place_first, ) + def __str__(self) -> str: out = f"[Player {self.team_id}]" if TERM_COLORS: return colored(out, self.faction.value.color) - return out \ No newline at end of file + return out diff --git a/kits/rl-sb3/lux/unit.py b/kits/rl/sb3/lux/unit.py similarity index 73% rename from kits/rl-sb3/lux/unit.py rename to kits/rl/sb3/lux/unit.py index 8bba5ee1..0ba65f4f 100644 --- a/kits/rl-sb3/lux/unit.py +++ b/kits/rl/sb3/lux/unit.py @@ -1,19 +1,22 @@ import math import sys +from dataclasses import dataclass from typing import List + import numpy as np -from dataclasses import dataclass + from lux.cargo import UnitCargo from lux.config import EnvConfig # a[1] = direction (0 = center, 1 = up, 2 = right, 3 = down, 4 = left) move_deltas = np.array([[0, 0], [0, -1], [1, 0], [0, 1], [-1, 0]]) + @dataclass class Unit: team_id: int unit_id: str - unit_type: str # "LIGHT" or "HEAVY" + unit_type: str # "LIGHT" or "HEAVY" pos: np.ndarray power: int cargo: UnitCargo @@ -23,7 +26,8 @@ class Unit: @property def agent_id(self): - if self.team_id == 0: return "player_0" + if self.team_id == 0: + return "player_0" return "player_1" def action_queue_cost(self, game_state): @@ -33,16 +37,28 @@ def action_queue_cost(self, game_state): def move_cost(self, game_state, direction): board = game_state.board target_pos = self.pos + move_deltas[direction] - if target_pos[0] < 0 or target_pos[1] < 0 or target_pos[1] >= len(board.rubble) or target_pos[0] >= len(board.rubble[0]): + if ( + target_pos[0] < 0 + or target_pos[1] < 0 + or target_pos[1] >= len(board.rubble) + or target_pos[0] >= len(board.rubble[0]) + ): # print("Warning, tried to get move cost for going off the map", file=sys.stderr) return None factory_there = board.factory_occupancy_map[target_pos[0], target_pos[1]] - if factory_there not in game_state.teams[self.agent_id].factory_strains and factory_there != -1: + if ( + factory_there not in game_state.teams[self.agent_id].factory_strains + and factory_there != -1 + ): # print("Warning, tried to get move cost for going onto a opposition factory", file=sys.stderr) return None rubble_at_target = board.rubble[target_pos[0]][target_pos[1]] - - return math.floor(self.unit_cfg.MOVE_COST + self.unit_cfg.RUBBLE_MOVEMENT_COST * rubble_at_target) + + return math.floor( + self.unit_cfg.MOVE_COST + + self.unit_cfg.RUBBLE_MOVEMENT_COST * rubble_at_target + ) + def move(self, direction, repeat=0, n=1): if isinstance(direction, int): direction = direction @@ -50,22 +66,28 @@ def move(self, direction, repeat=0, n=1): pass return np.array([0, direction, 0, 0, repeat, n]) - def transfer(self, transfer_direction, transfer_resource, transfer_amount, repeat=0, n=1): + def transfer( + self, transfer_direction, transfer_resource, transfer_amount, repeat=0, n=1 + ): assert transfer_resource < 5 and transfer_resource >= 0 assert transfer_direction < 5 and transfer_direction >= 0 - return np.array([1, transfer_direction, transfer_resource, transfer_amount, repeat, n]) - + return np.array( + [1, transfer_direction, transfer_resource, transfer_amount, repeat, n] + ) + def pickup(self, pickup_resource, pickup_amount, repeat=0, n=1): assert pickup_resource < 5 and pickup_resource >= 0 return np.array([2, 0, pickup_resource, pickup_amount, repeat, n]) - + def dig_cost(self, game_state): return self.unit_cfg.DIG_COST + def dig(self, repeat=0, n=1): return np.array([3, 0, 0, 0, repeat, n]) def self_destruct_cost(self, game_state): return self.unit_cfg.SELF_DESTRUCT_COST + def self_destruct(self, repeat=0, n=1): return np.array([4, 0, 0, 0, repeat, n]) @@ -74,4 +96,4 @@ def recharge(self, x, repeat=0, n=1): def __str__(self) -> str: out = f"[{self.team_id}] {self.unit_id} {self.unit_type} at {self.pos}" - return out \ No newline at end of file + return out diff --git a/kits/rl-sb3-jax-env/lux/utils.py b/kits/rl/sb3/lux/utils.py similarity index 92% rename from kits/rl-sb3-jax-env/lux/utils.py rename to kits/rl/sb3/lux/utils.py index 043fc2f8..0f68ece2 100644 --- a/kits/rl-sb3-jax-env/lux/utils.py +++ b/kits/rl/sb3/lux/utils.py @@ -7,6 +7,7 @@ def my_turn_to_place_factory(place_first: bool, step: int): return True return False + # direction (0 = center, 1 = up, 2 = right, 3 = down, 4 = left) def direction_to(src, target): ds = target - src @@ -16,11 +17,11 @@ def direction_to(src, target): return 0 if abs(dx) > abs(dy): if dx > 0: - return 2 + return 2 else: return 4 else: if dy > 0: return 3 else: - return 1 \ No newline at end of file + return 1 diff --git a/kits/rl-sb3-jax-env/main.py b/kits/rl/sb3/main.py similarity index 65% rename from kits/rl-sb3-jax-env/main.py rename to kits/rl/sb3/main.py index 67b0944e..0895bebb 100644 --- a/kits/rl-sb3-jax-env/main.py +++ b/kits/rl/sb3/main.py @@ -1,22 +1,33 @@ import json -from typing import Dict import sys from argparse import Namespace +from typing import Dict from agent import Agent from lux.config import EnvConfig -from lux.kit import GameState, process_obs, to_json, from_json, process_action, obs_to_game_state +from lux.kit import ( + GameState, + from_json, + obs_to_game_state, + process_action, + process_obs, + to_json, +) + ### DO NOT REMOVE THE FOLLOWING CODE ### -agent_dict = dict() # store potentially multiple dictionaries as kaggle imports code directly +agent_dict = ( + dict() +) # store potentially multiple dictionaries as kaggle imports code directly agent_prev_obs = dict() + + def agent_fn(observation, configurations): """ agent definition for kaggle submission. """ global agent_dict step = observation.step - - + player = observation.player remainingOverageTime = observation.remainingOverageTime if step == 0: @@ -28,15 +39,18 @@ def agent_fn(observation, configurations): obs = process_obs(player, agent_prev_obs[player], step, json.loads(observation.obs)) agent_prev_obs[player] = obs agent.step = step - if obs["real_env_steps"] < 0: - actions = agent.early_setup(step, obs, remainingOverageTime) + if step == 0: + actions = agent.bid_policy(step, obs, remainingOverageTime) + elif obs["real_env_steps"] < 0: + actions = agent.factory_placement_policy(step, obs, remainingOverageTime) else: actions = agent.act(step, obs, remainingOverageTime) return process_action(actions) + if __name__ == "__main__": - + def read_input(): """ Reads input from stdin @@ -45,6 +59,7 @@ def read_input(): return input() except EOFError as eof: raise SystemExit(eof) + step = 0 player_id = 0 configurations = None @@ -52,11 +67,19 @@ def read_input(): while True: inputs = read_input() obs = json.loads(inputs) - - observation = Namespace(**dict(step=obs["step"], obs=json.dumps(obs["obs"]), remainingOverageTime=obs["remainingOverageTime"], player=obs["player"], info=obs["info"])) + + observation = Namespace( + **dict( + step=obs["step"], + obs=json.dumps(obs["obs"]), + remainingOverageTime=obs["remainingOverageTime"], + player=obs["player"], + info=obs["info"], + ) + ) if i == 0: configurations = obs["info"]["env_cfg"] i += 1 actions = agent_fn(observation, dict(env_cfg=configurations)) # send actions to engine - print(json.dumps(actions)) \ No newline at end of file + print(json.dumps(actions)) diff --git a/kits/rl/sb3/nn.py b/kits/rl/sb3/nn.py new file mode 100644 index 00000000..121368df --- /dev/null +++ b/kits/rl/sb3/nn.py @@ -0,0 +1,66 @@ +""" +Code for neural network inference and loading SB3 model weights +""" +import sys +import zipfile + +import torch as th +import torch.nn as nn + + +class Net(nn.Module): + def __init__(self, action_dims=12): + super(Net, self).__init__() + self.action_dims = action_dims + self.mlp = nn.Sequential( + nn.Linear(13, 128), + nn.Tanh(), + nn.Linear(128, 128), + nn.Tanh(), + ) + self.action_net = nn.Sequential( + nn.Linear(128, action_dims), + ) + + def act(self, x, action_masks, deterministic=False): + latent_pi = self.forward(x) + action_logits = self.action_net(latent_pi) + action_logits[~action_masks] = -1e8 # mask out invalid actions + dist = th.distributions.Categorical(logits=action_logits) + if not deterministic: + return dist.sample() + else: + return dist.mode + + def forward(self, x): + x = self.mlp(x) + return x + + +import io +import os.path as osp + + +def load_policy(model_path): + # load .pth or .zip + if model_path[-4:] == ".zip": + with zipfile.ZipFile(model_path) as archive: + file_path = "policy.pth" + with archive.open(file_path, mode="r") as param_file: + file_content = io.BytesIO() + file_content.write(param_file.read()) + file_content.seek(0) + sb3_state_dict = th.load(file_content, map_location="cpu") + else: + sb3_state_dict = th.load(model_path, map_location="cpu") + + model = Net() + loaded_state_dict = {} + + # this code here works assuming the first keys in the sb3 state dict are aligned with the ones you define above in Net + for sb3_key, model_key in zip(sb3_state_dict.keys(), model.state_dict().keys()): + loaded_state_dict[model_key] = sb3_state_dict[sb3_key] + print("loaded", sb3_key, "->", model_key, file=sys.stderr) + + model.load_state_dict(loaded_state_dict) + return model diff --git a/examples/sb3.py b/kits/rl/sb3/train.py similarity index 56% rename from examples/sb3.py rename to kits/rl/sb3/train.py index ff9b3547..0f998f60 100644 --- a/examples/sb3.py +++ b/kits/rl/sb3/train.py @@ -1,3 +1,8 @@ +""" +Implementation of RL agent. Note that luxai_s2 and stable_baselines3 are packages not available during the competition running (ATM) +""" + + import copy import os.path as osp @@ -7,26 +12,25 @@ import torch.nn as nn from gym import spaces from gym.wrappers import TimeLimit -from stable_baselines3.common.callbacks import BaseCallback, EvalCallback +from luxai_s2.state import ObservationStateDict, StatsStateDict +from luxai_s2.utils.heuristics.factory_placement import place_near_random_ice +from luxai_s2.wrappers import SB3Wrapper +from stable_baselines3.common.callbacks import ( + BaseCallback, + CheckpointCallback, + EvalCallback, +) from stable_baselines3.common.evaluation import evaluate_policy from stable_baselines3.common.monitor import Monitor from stable_baselines3.common.utils import set_random_seed from stable_baselines3.common.vec_env import ( DummyVecEnv, SubprocVecEnv, - VecCheckNan, VecVideoRecorder, ) from stable_baselines3.ppo import PPO -from luxai_s2.state import ObservationStateDict, StatsStateDict, create_empty_stats -from luxai_s2.utils.heuristics.factory import build_single_heavy -from luxai_s2.utils.heuristics.factory_placement import place_near_random_ice -from luxai_s2.wrappers import ( - SB3Wrapper, - SimpleSingleUnitDiscreteController, - SingleUnitObservationWrapper, -) +from wrappers import SimpleUnitDiscreteController, SimpleUnitObservationWrapper class CustomEnvWrapper(gym.Wrapper): @@ -42,104 +46,48 @@ def step(self, action): opp_agent = "player_1" opp_factories = self.env.state.factories[opp_agent] - for k in opp_factories: + for k in opp_factories.keys(): factory = opp_factories[k] - factory.cargo.water = 1000 # set enemy factories to have 1000 water to keep them alive the whole around and treat the game as single-agent + # set enemy factories to have 1000 water to keep them alive the whole around and treat the game as single-agent + factory.cargo.water = 1000 + # submit actions for just one agent to make it single-agent + # and save single-agent versions of the data below action = {agent: action} - obs, reward, done, info = super().step(action) - - # this is the observation seen by both agents - shared_obs: ObservationStateDict = self.env.prev_obs[agent] + obs, _, done, info = self.env.step(action) + obs = obs[agent] done = done[agent] - # we collect stats on teams here: + # we collect stats on teams here. These are useful stats that can be used to help generate reward functions stats: StatsStateDict = self.env.state.stats[agent] - - # compute reward - # we simply want to encourage the heavy units to move to ice tiles - # and mine them and then bring them back to the factory and dump it - # as well as survive as long as possible - - factories = shared_obs["factories"][agent] - factory_pos = None - for unit_id in factories: - factory = factories[unit_id] - # note that ice converts to water at a 4:1 ratio - factory_pos = np.array(factory["pos"]) - break - units = shared_obs["units"][agent] - unit_deliver_ice_reward = 0 - unit_move_to_ice_reward = 0 - unit_overmining_penalty = 0 - penalize_power_waste = 0 - - ice_map = shared_obs["board"]["ice"] - ice_tile_locations = np.argwhere(ice_map == 1) - - def manhattan_dist(p1, p2): - return abs(p1[0] - p2[0]) + abs(p1[1] - p2[1]) - - unit_power = 0 - for unit_id in units: - unit = units[unit_id] - if unit["unit_type"] == "HEAVY": - pos = np.array(unit["pos"]) - ice_tile_distances = np.mean((ice_tile_locations - pos) ** 2, 1) - closest_ice_tile = ice_tile_locations[np.argmin(ice_tile_distances)] - dist_to_ice = manhattan_dist(closest_ice_tile, pos) - unit_power = unit["power"] - if unit["cargo"]["ice"] < 20: - - dist_penalty = min( - 1.0, dist_to_ice / (10) - ) # go beyond 12 squares manhattan dist and no reward - unit_move_to_ice_reward += ( - 1 - dist_penalty - ) * 0.1 # encourage unit to move to ice - else: - if factory_pos is not None: - dist_to_factory = manhattan_dist(pos, factory_pos) - dist_penalty = min(1.0, dist_to_factory / 10) - unit_deliver_ice_reward = ( - 0.2 + (1 - dist_penalty) * 0.1 - ) # encourage unit to move back to factory - if action[agent] == 15 and unit["power"] < 70: - # penalize the agent for trying to dig with insufficient power, which wastes 10 power for trying to update the action queue - penalize_power_waste -= 0.005 - - # save some stats to the info object so we can record it with our SB3 logger + info = dict() metrics = dict() metrics["ice_dug"] = ( stats["generation"]["ice"]["HEAVY"] + stats["generation"]["ice"]["LIGHT"] ) metrics["water_produced"] = stats["generation"]["water"] + + # we save these two to see often the agent updates robot action queues and how often enough + # power to do so and succeed (less frequent updates = more power is saved) metrics["action_queue_updates_success"] = stats["action_queue_updates_success"] metrics["action_queue_updates_total"] = stats["action_queue_updates_total"] - metrics["unit_deliver_ice_reward"] = unit_deliver_ice_reward - metrics["unit_move_to_ice_reward"] = unit_move_to_ice_reward - + # we can save the metrics to info so we can use tensorboard to log them to get a glimpse into how our agent is behaving info["metrics"] = metrics - reward = ( - 0 - + unit_move_to_ice_reward - + unit_deliver_ice_reward - + unit_overmining_penalty - + metrics["water_produced"] / 10 + penalize_power_waste - ) - reward = reward + reward = 0 if self.prev_step_metrics is not None: + # we check how much ice and water is produced and reward the agent for generating both ice_dug_this_step = metrics["ice_dug"] - self.prev_step_metrics["ice_dug"] water_produced_this_step = ( metrics["water_produced"] - self.prev_step_metrics["water_produced"] ) - # reward += ice_dug_this_step # reward agent for digging ice - # reward += water_produced_this_step * 100 # reward agent even more producing water by delivering ice back to base + # we reward water production more as it is the most important resource for survival + reward = ice_dug_this_step / 100 + water_produced_this_step + self.prev_step_metrics = copy.deepcopy(metrics) - return obs["player_0"], reward, done, info + return obs, reward, done, info def reset(self, **kwargs): obs = self.env.reset(**kwargs)["player_0"] @@ -203,16 +151,16 @@ def _init() -> gym.Env: # Add a SB3 wrapper to make it work with SB3 and simplify the action space with the controller # this will remove the bidding phase and factory placement phase. For factory placement we use # the provided place_near_random_ice function which will randomly select an ice tile and place a factory near it. + env = SB3Wrapper( env, - controller=SimpleSingleUnitDiscreteController(env.state.env_cfg), factory_placement_policy=place_near_random_ice, - heuristic_policy=build_single_heavy, + controller=SimpleUnitDiscreteController(env.env_cfg), ) - env = SingleUnitObservationWrapper( + env = SimpleUnitObservationWrapper( env ) # changes observation to include a few simple features - env = CustomEnvWrapper(env) # convert to single agent and add our reward + env = CustomEnvWrapper(env) # convert to single agent, add our reward env = TimeLimit( env, max_episode_steps=max_episode_steps ) # set horizon to 100 to make training faster. Default is 1000 @@ -224,11 +172,6 @@ def _init() -> gym.Env: return _init -env_id = "LuxAI_S2-v0" - -from collections import defaultdict - - class TensorboardCallback(BaseCallback): def __init__(self, tag: str, verbose=0): super().__init__(verbose) @@ -236,7 +179,7 @@ def __init__(self, tag: str, verbose=0): def _on_step(self) -> bool: c = 0 - + for i, done in enumerate(self.locals["dones"]): if done: info = self.locals["infos"][i] @@ -247,10 +190,18 @@ def _on_step(self) -> bool: return True -def evaluate(args, model): +def save_model_state_dict(save_path, model): + # save the policy state dict for kaggle competition submission + state_dict = model.policy.to("cpu").state_dict() + th.save(state_dict, save_path) + + +def evaluate(args, env_id, model): model = model.load(args.model_path) video_length = 1000 # default horizon - eval_env = SubprocVecEnv([make_env(env_id, i, max_episode_steps=1000) for i in range(args.n_envs)]) + eval_env = SubprocVecEnv( + [make_env(env_id, i, max_episode_steps=1000) for i in range(args.n_envs)] + ) eval_env = VecVideoRecorder( eval_env, osp.join(args.log_path, "eval_videos"), @@ -259,18 +210,13 @@ def evaluate(args, model): name_prefix=f"evaluation_video", ) eval_env.reset() - out =evaluate_policy(model, eval_env, render=False, deterministic=False) + out = evaluate_policy(model, eval_env, render=False, deterministic=False) print(out) -def train(args, model: PPO): - eval_env = SubprocVecEnv([make_env(env_id, i, max_episode_steps=1000) for i in range(4)]) - video_length = 1000 - eval_env = VecVideoRecorder( - eval_env, - osp.join(args.log_path, "eval_videos"), - record_video_trigger=lambda x: x == 0, - video_length=video_length, - name_prefix=f"evaluation-{env_id}", + +def train(args, env_id, model: PPO): + eval_env = SubprocVecEnv( + [make_env(env_id, i, max_episode_steps=1000) for i in range(4)] ) eval_callback = EvalCallback( eval_env, @@ -279,39 +225,47 @@ def train(args, model: PPO): eval_freq=24_000, deterministic=False, render=False, + n_eval_episodes=5, ) + model.learn( args.total_timesteps, callback=[TensorboardCallback(tag="train_metrics"), eval_callback], ) - model.save(args.log_path, "latest_model") + model.save(osp.join(args.log_path, "models/latest_model")) def main(args): print("Training with args", args) - set_random_seed(args.seed) - - env = SubprocVecEnv([make_env(env_id, i, max_episode_steps=args.max_episode_steps) for i in range(args.n_envs)]) + if args.seed is not None: + set_random_seed(args.seed) + env_id = "LuxAI_S2-v0" + env = SubprocVecEnv( + [ + make_env(env_id, i, max_episode_steps=args.max_episode_steps) + for i in range(args.n_envs) + ] + ) env.reset() - rollout_steps = 4_000 + rollout_steps = 4000 policy_kwargs = dict(net_arch=(128, 128)) model = PPO( "MlpPolicy", env, n_steps=rollout_steps // args.n_envs, batch_size=800, - learning_rate=1e-3, + learning_rate=3e-4, policy_kwargs=policy_kwargs, verbose=1, - n_epochs=3, - target_kl=0.07, - gamma=0.97, + n_epochs=2, + target_kl=0.05, + gamma=0.99, tensorboard_log=osp.join(args.log_path), ) if args.eval: - evaluate(args, model) + evaluate(args, env_id, model) else: - train(args, model) + train(args, env_id, model) if __name__ == "__main__": diff --git a/kits/rl/sb3/wrappers/__init__.py b/kits/rl/sb3/wrappers/__init__.py new file mode 100644 index 00000000..19ed27ac --- /dev/null +++ b/kits/rl/sb3/wrappers/__init__.py @@ -0,0 +1,2 @@ +from .controllers import SimpleUnitDiscreteController +from .obs_wrappers import SimpleUnitObservationWrapper diff --git a/kits/rl/sb3/wrappers/controllers.py b/kits/rl/sb3/wrappers/controllers.py new file mode 100644 index 00000000..899bfa1d --- /dev/null +++ b/kits/rl/sb3/wrappers/controllers.py @@ -0,0 +1,223 @@ +import sys +from typing import Any, Dict + +import numpy as np +import numpy.typing as npt +from gym import spaces + + +# Controller class copied here since you won't have access to the luxai_s2 package directly on the competition server +class Controller: + def __init__(self, action_space: spaces.Space) -> None: + self.action_space = action_space + + def action_to_lux_action( + self, agent: str, obs: Dict[str, Any], action: npt.NDArray + ): + """ + Takes as input the current "raw observation" and the parameterized action and returns + an action formatted for the Lux env + """ + raise NotImplementedError() + + def action_masks(self, agent: str, obs: Dict[str, Any]): + """ + Generates a boolean action mask indicating in each discrete dimension whether it would be valid or not + """ + raise NotImplementedError() + + +class SimpleUnitDiscreteController(Controller): + def __init__(self, env_cfg) -> None: + """ + A simple controller that controls only the robot that will get spawned. + Moreover, it will always try to spawn one heavy robot if there are none regardless of action given + + For the robot unit + - 4 cardinal direction movement (4 dims) + - a move center no-op action (1 dim) + - transfer action just for transferring ice in 4 cardinal directions or center (5) + - pickup action for power (1 dims) + - dig action (1 dim) + - no op action (1 dim) - equivalent to not submitting an action queue which costs power + + It does not include + - self destruct action + - recharge action + - planning (via actions executing multiple times or repeating actions) + - factory actions + - transferring power or resources other than ice + + To help understand how to this controller works to map one action space to the original lux action space, + see how the lux action space is defined in luxai_s2/spaces/action.py + + """ + self.env_cfg = env_cfg + self.move_act_dims = 4 + self.transfer_act_dims = 5 + self.pickup_act_dims = 1 + self.dig_act_dims = 1 + self.no_op_dims = 1 + + self.move_dim_high = self.move_act_dims + self.transfer_dim_high = self.move_dim_high + self.transfer_act_dims + self.pickup_dim_high = self.transfer_dim_high + self.pickup_act_dims + self.dig_dim_high = self.pickup_dim_high + self.dig_act_dims + self.no_op_dim_high = self.dig_dim_high + self.no_op_dims + + self.total_act_dims = self.no_op_dim_high + action_space = spaces.Discrete(self.total_act_dims) + super().__init__(action_space) + + def _is_move_action(self, id): + return id < self.move_dim_high + + def _get_move_action(self, id): + # move direction is id + 1 since we don't allow move center here + return np.array([0, id + 1, 0, 0, 0, 1]) + + def _is_transfer_action(self, id): + return id < self.transfer_dim_high + + def _get_transfer_action(self, id): + id = id - self.move_dim_high + transfer_dir = id % 5 + return np.array([1, transfer_dir, 0, self.env_cfg.max_transfer_amount, 0, 1]) + + def _is_pickup_action(self, id): + return id < self.pickup_dim_high + + def _get_pickup_action(self, id): + return np.array([2, 0, 4, self.env_cfg.max_transfer_amount, 0, 1]) + + def _is_dig_action(self, id): + return id < self.dig_dim_high + + def _get_dig_action(self, id): + return np.array([3, 0, 0, 0, 0, 1]) + + def action_to_lux_action( + self, agent: str, obs: Dict[str, Any], action: npt.NDArray + ): + shared_obs = obs["player_0"] + lux_action = dict() + units = shared_obs["units"][agent] + for unit_id in units.keys(): + unit = units[unit_id] + choice = action + action_queue = [] + no_op = False + if self._is_move_action(choice): + action_queue = [self._get_move_action(choice)] + elif self._is_transfer_action(choice): + action_queue = [self._get_transfer_action(choice)] + elif self._is_pickup_action(choice): + action_queue = [self._get_pickup_action(choice)] + elif self._is_dig_action(choice): + action_queue = [self._get_dig_action(choice)] + else: + # action is a no_op, so we don't update the action queue + no_op = True + + # simple trick to help agents conserve power is to avoid updating the action queue + # if the agent was previously trying to do that particular action already + if len(unit["action_queue"]) > 0 and len(action_queue) > 0: + same_actions = (unit["action_queue"][0] == action_queue[0]).all() + if same_actions: + no_op = True + if not no_op: + lux_action[unit_id] = action_queue + + break + + factories = shared_obs["factories"][agent] + if len(units) == 0: + for unit_id in factories.keys(): + lux_action[unit_id] = 1 # build a single heavy + + return lux_action + + def action_masks(self, agent: str, obs: Dict[str, Any]): + """ + Defines a simplified action mask for this controller's action space + + Doesn't account for whether robot has enough power + """ + + # compute a factory occupancy map that will be useful for checking if a board tile + # has a factory and which team's factory it is. + shared_obs = obs[agent] + factory_occupancy_map = ( + np.ones_like(shared_obs["board"]["rubble"], dtype=int) * -1 + ) + factories = dict() + for player in shared_obs["factories"]: + factories[player] = dict() + for unit_id in shared_obs["factories"][player]: + f_data = shared_obs["factories"][player][unit_id] + f_pos = f_data["pos"] + # store in a 3x3 space around the factory position it's strain id. + factory_occupancy_map[ + f_pos[0] - 1 : f_pos[0] + 2, f_pos[1] - 1 : f_pos[1] + 2 + ] = f_data["strain_id"] + + units = shared_obs["units"][agent] + action_mask = np.zeros((self.total_act_dims), dtype=bool) + for unit_id in units.keys(): + action_mask = np.zeros(self.total_act_dims) + # movement is always valid + action_mask[:4] = True + + # transferring is valid only if the target exists + unit = units[unit_id] + pos = np.array(unit["pos"]) + # a[1] = direction (0 = center, 1 = up, 2 = right, 3 = down, 4 = left) + move_deltas = np.array([[0, 0], [0, -1], [1, 0], [0, 1], [-1, 0]]) + for i, move_delta in enumerate(move_deltas): + transfer_pos = np.array( + [pos[0] + move_delta[0], pos[1] + move_delta[1]] + ) + # check if theres a factory tile there + if ( + transfer_pos[0] < 0 + or transfer_pos[1] < 0 + or transfer_pos[0] >= len(factory_occupancy_map) + or transfer_pos[1] >= len(factory_occupancy_map[0]) + ): + continue + factory_there = factory_occupancy_map[transfer_pos[0], transfer_pos[1]] + if factory_there in shared_obs["teams"][agent]["factory_strains"]: + action_mask[ + self.transfer_dim_high - self.transfer_act_dims + i + ] = True + + factory_there = factory_occupancy_map[pos[0], pos[1]] + on_top_of_factory = ( + factory_there in shared_obs["teams"][agent]["factory_strains"] + ) + + # dig is valid only if on top of tile with rubble or resources or lichen + board_sum = ( + shared_obs["board"]["ice"][pos[0], pos[1]] + + shared_obs["board"]["ore"][pos[0], pos[1]] + + shared_obs["board"]["rubble"][pos[0], pos[1]] + + shared_obs["board"]["lichen"][pos[0], pos[1]] + ) + if board_sum > 0 and not on_top_of_factory: + action_mask[ + self.dig_dim_high - self.dig_act_dims : self.dig_dim_high + ] = True + + # pickup is valid only if on top of factory tile + if on_top_of_factory: + action_mask[ + self.pickup_dim_high - self.pickup_act_dims : self.pickup_dim_high + ] = True + action_mask[ + self.dig_dim_high - self.dig_act_dims : self.dig_dim_high + ] = False + + # no-op is always valid + action_mask[-1] = True + break + return action_mask diff --git a/kits/rl/sb3/wrappers/obs_wrappers.py b/kits/rl/sb3/wrappers/obs_wrappers.py new file mode 100644 index 00000000..f6c889da --- /dev/null +++ b/kits/rl/sb3/wrappers/obs_wrappers.py @@ -0,0 +1,90 @@ +from typing import Any, Dict + +import gym +import numpy as np +import numpy.typing as npt +from gym import spaces + + +class SimpleUnitObservationWrapper(gym.ObservationWrapper): + """ + A simple state based observation to work with in pair with the SimpleUnitDiscreteController + + It contains info only on the first robot, the first factory you own, and some useful features. If there are no owned robots the observation is just zero. + No information about the opponent is included. This will generate observations for all teams. + + Included features: + - First robot's stats + - distance vector to closest ice tile + - distance vector to first factory + + """ + + def __init__(self, env: gym.Env) -> None: + super().__init__(env) + self.observation_space = spaces.Box(-999, 999, shape=(13,)) + + def observation(self, obs): + return SimpleUnitObservationWrapper.convert_obs(obs, self.env.state.env_cfg) + + # we make this method static so the submission/evaluation code can use this as well + @staticmethod + def convert_obs(obs: Dict[str, Any], env_cfg: Any) -> Dict[str, npt.NDArray]: + observation = dict() + shared_obs = obs["player_0"] + ice_map = shared_obs["board"]["ice"] + ice_tile_locations = np.argwhere(ice_map == 1) + + for agent in obs.keys(): + obs_vec = np.zeros( + 13, + ) + + factories = shared_obs["factories"][agent] + factory_vec = np.zeros(2) + for k in factories.keys(): + # here we track a normalized position of the first friendly factory + factory = factories[k] + factory_vec = np.array(factory["pos"]) / env_cfg.map_size + break + units = shared_obs["units"][agent] + for k in units.keys(): + unit = units[k] + + # store cargo+power values scaled to [0, 1] + cargo_space = env_cfg.ROBOTS[unit["unit_type"]].CARGO_SPACE + battery_cap = env_cfg.ROBOTS[unit["unit_type"]].BATTERY_CAPACITY + cargo_vec = np.array( + [ + unit["power"] / battery_cap, + unit["cargo"]["ice"] / cargo_space, + unit["cargo"]["ore"] / cargo_space, + unit["cargo"]["water"] / cargo_space, + unit["cargo"]["metal"] / cargo_space, + ] + ) + unit_type = ( + 0 if unit["unit_type"] == "LIGHT" else 1 + ) # note that build actions use 0 to encode Light + # normalize the unit position + pos = np.array(unit["pos"]) / env_cfg.map_size + unit_vec = np.concatenate( + [pos, [unit_type], cargo_vec, [unit["team_id"]]], axis=-1 + ) + + # we add some engineered features down here + # compute closest ice tile + ice_tile_distances = np.mean( + (ice_tile_locations - np.array(unit["pos"])) ** 2, 1 + ) + # normalize the ice tile location + closest_ice_tile = ( + ice_tile_locations[np.argmin(ice_tile_distances)] / env_cfg.map_size + ) + obs_vec = np.concatenate( + [unit_vec, factory_vec - pos, closest_ice_tile - pos], axis=-1 + ) + break + observation[agent] = obs_vec + + return observation diff --git a/kits/rl/tutorials/README.md b/kits/rl/tutorials/README.md new file mode 100644 index 00000000..567867c1 --- /dev/null +++ b/kits/rl/tutorials/README.md @@ -0,0 +1,2 @@ +# Tutorials for Lux AI Season 2 + diff --git a/kits/rl/tutorials/assets/rl_loop.jpg b/kits/rl/tutorials/assets/rl_loop.jpg new file mode 100644 index 00000000..0b516376 Binary files /dev/null and b/kits/rl/tutorials/assets/rl_loop.jpg differ diff --git a/kits/rl/tutorials/ppo_policy.webm b/kits/rl/tutorials/ppo_policy.webm new file mode 100644 index 00000000..618a67ec Binary files /dev/null and b/kits/rl/tutorials/ppo_policy.webm differ diff --git a/kits/rl/tutorials/ppo_policy_vec_env.webm b/kits/rl/tutorials/ppo_policy_vec_env.webm new file mode 100644 index 00000000..e50424ae Binary files /dev/null and b/kits/rl/tutorials/ppo_policy_vec_env.webm differ diff --git a/kits/rl/tutorials/random_interaction.webm b/kits/rl/tutorials/random_interaction.webm new file mode 100644 index 00000000..4ed1083c Binary files /dev/null and b/kits/rl/tutorials/random_interaction.webm differ diff --git a/kits/rl/tutorials/rl-with-lux-1-intro-to-rl.ipynb b/kits/rl/tutorials/rl-with-lux-1-intro-to-rl.ipynb new file mode 100644 index 00000000..7927874b --- /dev/null +++ b/kits/rl/tutorials/rl-with-lux-1-intro-to-rl.ipynb @@ -0,0 +1 @@ +{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.7.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"## Setup Code\n\nBefore we start lets install some dependencies","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19"}},{"cell_type":"code","source":"# verify version\n!python --version\n!pip install --upgrade luxai_s2\n!pip install gym==0.19 pyglet\n!cp -r ../input/lux-ai-season-2/* .","metadata":{"_kg_hide-input":true,"_kg_hide-output":true,"scrolled":true,"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"%%writefile /opt/conda/lib/python3.7/site-packages/luxai_s2/version.py\n__version__ = \"\"","metadata":{"_kg_hide-input":true,"_kg_hide-output":true,"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# Introduction to Reinforcement Learning 🤖\n\nLooking to try out RL for the Lux AI competition on Kaggle? If you haven't tried RL before, this tutorial is a perfect quick start into how RL generally works, and how to program a basic agent. This is part 1 of a 3 part series on training RL agents leading up to a succesful RL agent submittable to the Lux AI competition. Part 1 will cover the basics of RL, how gym environments work, and how to train an agent and scale up training.\n\n\nLet's dig in, welcome to the world of reinforcement learning!\n\nIn AI, RL is a framework of **learning via interaction**, often trying to **maximize the reinforcing reward**. Humans and animals alike sort of naturally follow the paradigm in learning behaviors. We reward a dog for doing a trick via a positive reward signal by giving it a treat. We penalize a dog for peeing on the floor by giving a negative reward signal (saying no!) or giving no reward. For a fun video of reinforcement learning you can watch this chicken below immediately learn to peck a particular colored dot via reinforcing it's actions with food.","metadata":{}},{"cell_type":"code","source":"from IPython.display import IFrame\nIFrame(\"https://www.youtube.com/embed/spfpBrBjntg\", 640, 480,)","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"RL has seen a massive growth in applications in recent years thanks to the rise of deep learning resulting in Deep Reinforcement Learning. Deep learning has enabled models trained via RL to solve far more complex tasks, including [mining diamonds in Minecraft](https://danijar.com/project/dreamerv3/), [managing a nuclear fusion reactor](https://www.deepmind.com/blog/accelerating-fusion-science-through-learned-plasma-control), or [controlling a legged robot](https://manipulation-locomotion.github.io/).\n\n\nThis tutorial will cover the practical basics of RL and how to program a simple deep RL agent. For those more interested in some of the history, math, and more advanced details of RL I highly recommend reading the following seminal book on RL by Richard S. Sutton and Andrew G. Barto: http://www.incompleteideas.net/book/RLbook2020.pdf","metadata":{}},{"cell_type":"markdown","source":"## 1 The Environment Loop\n\nThe core component of RL is the environment loop. It's what enables an agent in an environment to repeatedly interact and improve itself.\n\nWe discretize time in an environment into individual **time steps** labeled as `t` below. At each time step, the agent uses the current environment observation (also known as state) $S_t$ to produce an action $A_t$ and executes that action in the environment. The environment updates in response to the action and gives the agent a new observation $S_{t+1}$ as well as a reward signal $R_{t+1}$ telling the agent how well it is doing. Eventually the environment will tell the agent it is completed and this completes one full episode.\n\n![](https://github.com/Lux-AI-Challenge/Lux-Design-S2/blob/main/kits/rl/tutorials/assets/rl_loop.jpg?raw=1)\n\nThese days, in deep RL the agent is typically a deep neural network that takes an observation as input and produces an action as output.\n\nWhile the agent is continously interacting, it will also periodically update its neural network via an optimization algorithm with a few objectives. Training in RL requires balancing exploration and exploitation. While the overall goal of the agent is to exploit and to **maximize return** (the sum of rewards in an episode), you can easily learn suboptimal behaviors if you don't explore sufficiently in an environment to find better strategies. This explore vs exploitation problem is a foundational problem in RL that is still researched to this day.\n\nFor a deep dive into the math and algorithms the [Spinning Up project](https://spinningup.openai.com/en/latest/) provides a great tutorial on some of the modern deep RL algorithms.\n","metadata":{}},{"cell_type":"markdown","source":"### 1.1 Environment Loop Code\n\nLet's get programming! The [Gym](https://github.com/openai/gym) package has now become the de-facto standard of the environment API and we will write code to show how to create an environment and interact with it here\n\n*Note: Recently the [Farama Foundation](https://farama.org/) which now manages the Gym package (now called [Gymnasium](https://www.gymlibrary.dev/)) has made several changes to the Gym API which is incompatible with most environments and RL libraries at the moment. This tutorial will be using the original API.*\n\nLet's first import a few packages.","metadata":{}},{"cell_type":"code","source":"import gym\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom PIL import Image","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"To create an environment with `gym`, any environemnt that is registered with gym can be created with `gym.make(env_id)`. To start looking at what it looks like you have to first call `env.reset()` to start from a clean state. It's highly recommended to also use `env.seed` to seed environment to ensure reproducible resultss.\n\n`env.render()` will render the environment with a display window (if possible). For users without access to GUI (e.g. on Google Colab, Kaggle notebooks etc.), you can call `env.render(\"rgb_array\")` to get an RGB image and display that image to see what the current state looks like\n\nFor this tutorial we will play around with the CartPole environment where the task is to keep the pole upright by simply moving the black box left and right.","metadata":{}},{"cell_type":"code","source":"env_id = \"CartPole-v1\"\nenv = gym.make(env_id)\nenv.seed(0)\nenv.reset()\nimg = env.render(\"rgb_array\")\nenv.close()","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"Now let's try interacting with the environment. All environments will first define an observation space and action space. These spaces define the shape and structure of the observations returned by the environment and the actions it accepts.\n\nNeatly, the action space `env.action_space` allows you to randomly sample actions to try out. We will write a simple environment loop with random actions below. The loop will repeatedly ask for an action and we will step forward in the environment with that action via `env.step(action)`. \n\nYou might notice that `env.step` returns 4 items, `obs`, `reward`, `done`, and `info`. `obs` is the next observation. `reward` is the scalard reward signal given. `done` represents a somewhat ambiguous meaning. When it's `True` it means the episode is completed and you must call `env.reset()` before stepping through it again. Episode completion can occur for a number of reasons depending on the chosen environment. For CartPole-v0, `done` is `True` whenever a time limit is reached or if the pole falls down too far. Finally `info` usually is not important, but may contain some useful information depending on the environment.\n\nFor users without a GUI, we also provide a simple animation function to record and save videos","metadata":{}},{"cell_type":"code","source":"def animate(imgs, video_name=None, _return=True):\n # using cv2 to generate videos\n import cv2\n import os\n import string\n import random\n video_name = video_name if video_name is not None else ''.join(random.choice(string.ascii_letters) for i in range(18))+'.webm'\n height, width, layers = imgs[0].shape\n video = cv2.VideoWriter(video_name, cv2.VideoWriter_fourcc(*'VP80'), 20, (width,height))\n for img in imgs:\n img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n video.write(img)\n video.release()\n if _return:\n from IPython.display import Video\n return Video(video_name)","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# The environment loop\nobs = env.reset() # always reset before starting a new episode!\nimgs = []\nfor i in range(100):\n action = env.action_space.sample() # sample a random action\n obs, reward, done, info = env.step(action) # get the new observation and reward\n imgs += [env.render(\"rgb_array\")] # save to video\n if done: env.reset()\nenv.close() # close the display window and free up resources\nanimate(imgs, \"random_interaction.webm\") # generate the video replay","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"## 2 Reinforcement Learning\n\nNow that we know how an environment works, we can try to solve it via RL. The optimization code for an RL algorithm is out of the scope of the tutorial so we will rely on a popular RL library called [Stable Baselines 3 (SB3)](https://github.com/DLR-RM/stable-baselines3). Run the command below to install it","metadata":{}},{"cell_type":"code","source":"!pip install stable-baselines3","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"### 2.1 RL Training\nTraining RL algorithms for single-agent environments like CartPole is simple with SB3.\n\nThe algorithm we will use is called PPO. While the specifics of the algorithm are out of the scope, the general way an RL algorithm like PPO works is via a two stage process that constantly repeats.\n\n1. Collect interaction samples with sampled actions (observation, action, reward) from an environment and store into a replay buffer.\n\n2. Sample from the replay buffer and optimize the policy to maximize the return\n\nBelow is some example code which trains a policy by interacting for up to 10,000 timesteps with the environment and then evaluates it. You will notice that compared to using random actions, this policy can keep the pole upright for much longer (success!)","metadata":{}},{"cell_type":"code","source":"from stable_baselines3 import PPO\n\nenv = gym.make(\"CartPole-v1\")\n\n# create a PPO algorithm powered agent\nmodel = PPO(\"MlpPolicy\", env, verbose=1)\n# learn with a budget of 10,000 environment interactions\nmodel.learn(total_timesteps=10_000)\n\n# evaluate and watch the learned policy\nobs = env.reset()\nimgs = []\nfor i in range(1000):\n action, _states = model.predict(obs, deterministic=True)\n obs, reward, done, info = env.step(action)\n imgs += [env.render(\"rgb_array\")] # save to video\n # VecEnv resets automatically\n if done:\n obs = env.reset()\nenv.close()\nanimate(imgs, \"ppo_policy.webm\") # generate the video replay","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"### 2.2 Scaling up Deep RL\n\nRL is often known for being a promising direction, but a fairly inefficient one. The particular algorithm used above is called PPO and has been a staple of the RL research community as it is generally easy to tune and very robust. However it is very **sample inefficient**, meaning it needs many many environment interactions in order to learn something. One way of dealing with this problem is to simply scale up the training by making it run faster. (The other way is to use different algorithms e.g. off-policy ones such as SAC but that's a topic beyond the scope of this tutorial)","metadata":{}},{"cell_type":"markdown","source":"A simple way to increase the speed at which we sample from an environment is to sample from many environments simultaneously. This enables us to leverage the power of parallel computation that is fast in neural networks and even faster when using a GPU/TPU. We can run `n_envs` environments simultaneously to form a single **Vectorized Environment**.\n\nVectorized environments **batch** an environment so that returned observations have an additional batch dimension (e.g. if it was shape `(3,)` it's new shape is `(B, 3)`) and accepted actions must also have this batch dimension, with `B` equal to the number of parallel environments. As vectorized environments accept a batch of actions, with GPUs/TPUs we can easily generate this batch of actions far faster than generating them one at a time, improving the speed at which we sample from an environment.\n\nTry changing the `n_envs` parameter below. You will notice a massive speed up in the training time as `n_envs` is larger. Note that `n_envs` shouldn't be higher than the number of cores your CPU has.","metadata":{}},{"cell_type":"code","source":"from stable_baselines3.common.env_util import make_vec_env\n\nn_envs = 2 # configure how many environments to run in parallel.\n\nenv = make_vec_env(\"CartPole-v1\", n_envs=n_envs)\nmodel = PPO(\"MlpPolicy\", env, verbose=1)\nmodel.learn(total_timesteps=10_000)\n\nobs = env.reset()\nimgs = []\nfor i in range(1000):\n action, _states = model.predict(obs, deterministic=True)\n obs, reward, done, info = env.step(action)\n imgs += [env.render(\"rgb_array\")] # save to video\n # VecEnv resets automatically so no need to do below\n # if done: env.reset()\nenv.close()\nanimate(imgs, \"ppo_policy_vec_env.webm\") # generate the video replay","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"## 3 Final Thoughts\n\nAnd that's the basics of programming a deep RL agent! Pick an environment, vectorize it, and run it through a RL library like SB3.\n\nFor more complicated environments like robotics or multi-agent environments there's a lot more work involved. This may include **reward shaping**, **hyperparameter tuning**, as well as \n\nPart 2 of the tutorial series will these details on how to use RL to tackle parts of the Lux AI Season2 environment.","metadata":{}},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]} \ No newline at end of file diff --git a/kits/rl/tutorials/rl-with-lux-2-rl-problem-solving.ipynb b/kits/rl/tutorials/rl-with-lux-2-rl-problem-solving.ipynb new file mode 100644 index 00000000..1771a028 --- /dev/null +++ b/kits/rl/tutorials/rl-with-lux-2-rl-problem-solving.ipynb @@ -0,0 +1 @@ +{"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3","language":"python"},"language_info":{"name":"python","version":"3.7.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":5,"nbformat":4,"cells":[{"cell_type":"markdown","source":"## Setup Code\n\nBefore we start lets install some dependencies","metadata":{}},{"cell_type":"code","source":"# verify version\n!python --version\n!pip install --upgrade luxai_s2\n!pip install gym==0.21 stable-baselines3\n!cp -r ../input/lux-ai-season-2/* .","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"%%writefile /opt/conda/lib/python3.7/site-packages/luxai_s2/version.py\n__version__ = \"\"","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# Reinforcement Learning for Lux AI Season 2 🤖\n\nPart 2 of the RL series will now dig into building a working RL agent for the Lux AI Challenge, Season 2!\n\nLux AI is designed to be intuitive to understand, but heavily layered in complexity and interactions of game mechanics in an multi-agent cooperative and competitive environment. \n\nLux AI Season 2's rules can be found here: https://www.lux-ai.org/specs-s2. Make sure to read them to learn how to the game works, and the rest of this tutorial will be much easier to understand.\n\nPart 1 of the series covered the single-agent RL setup, but Lux AI Season 2 is multi-agent! Moreover, the environment has different phases and a complex action space which makes it difficult to learn or use of the box. \n\nThis tutorial will cover simple tools and tricks on how to reduce a complex problem into a easier one! We will primarily focus on three things: \n\n1. Simplifying the action space with controllers/action wrappers\n2. Simplifying observations\n3. Transforming the three phase Lux AI game into a single phase game\n\nThis starter kit is also implemented in https://github.com/Lux-AI-Challenge/Lux-Design-S2/tree/main/kits/rl/sb3\n","metadata":{}},{"cell_type":"markdown","source":"## 1. Simplifying the Action Space\n\nThe action space is quite complicated in Lux S2 as each robot can move, dig, transfer/pickup, all in addition to being able to combine any sequence of these primitives into an action queue of up to length 20. For machine learning, such a massive action space leads to the [curse of dimensionality](https://en.wikipedia.org/wiki/Curse_of_dimensionality), making any ML algorithm have a much harder time to learn something useful, especially in RL.\n\nTo handle this, we can program a custom Controller that translates actions from one action space to the original action space and adds a few tricks and heuristics to be integrated with RL training. Since the original lux action space is large, this controller can be a little complicated. For those who want to dive straight into training you can use the controller as is. \n\nFor a high-level overview this controller will\n- Define a massively simplified action space\n- Translate actions from the discrete action space into the Lux S2 action space `action_to_lux_action`\n- Add a heuristic factory action to build one Heavy robot\n- Generate action masks where False = an action is invalid\n\nOverall, the action space of the controller is a discrete action space with just 12 dimensions to control just one heavy robot. It allows for a robot's 4 directional movement, transferring ice in 4 directions in addition to center, picking up power, digging, and a no-op action. This doesn't include factory actions, self destruct, recharging, transferring other types of resources, or longer planned action queues in the action space, which are all open problems for you to potentially tackle!\n\nThe controller also includes a trick to allow agents to reduce power costs incurred by action queue updates. The controller skips updating action queues if the existing action queue is the same as the new one the agent wants to use for the robot.\n\nWhile this simplification doesn't include adding in more complex things like more heavy robots or planting lichen, it will train out a succesful policy that with simple modifications, will beat the majority of bots using the rule-based starter kits.\n\nMore advanced usages can consider how to model the actions of different types of units on a game board (e.g. heavy, light, or factory) by using a MultiDiscrete action space. A more practical and likely winning solution can be to use a image-like controller by generating actions for each tile on the board and only using the actions with friendly units on that tile. See [Season 1's solution by ToadBrigade](https://www.kaggle.com/competitions/lux-ai-2021/discussion/294993) and our previous [research paper: Emergent Collective Intelligence from Massive-Agent Cooperation and Competition](https://arxiv.org/abs/2301.01609) for how a image-like controller can work.\n","metadata":{"tags":[]}},{"cell_type":"code","source":"import sys\nfrom typing import Any, Dict\n\nimport numpy as np\nimport numpy.typing as npt\nfrom gym import spaces\n\n\n# Controller class copied here since you won't have access to the luxai_s2 package directly on the competition server\nclass Controller:\n def __init__(self, action_space: spaces.Space) -> None:\n self.action_space = action_space\n\n def action_to_lux_action(\n self, agent: str, obs: Dict[str, Any], action: npt.NDArray\n ):\n \"\"\"\n Takes as input the current \"raw observation\" and the parameterized action and returns\n an action formatted for the Lux env\n \"\"\"\n raise NotImplementedError()\n\n def action_masks(self, agent: str, obs: Dict[str, Any]):\n \"\"\"\n Generates a boolean action mask indicating in each discrete dimension whether it would be valid or not\n \"\"\"\n raise NotImplementedError()\n\n\nclass SimpleUnitDiscreteController(Controller):\n def __init__(self, env_cfg) -> None:\n \"\"\"\n A simple controller that controls only the robot that will get spawned.\n Moreover, it will always try to spawn one heavy robot if there are none regardless of action given\n\n For the robot unit\n - 4 cardinal direction movement (4 dims)\n - a move center no-op action (1 dim)\n - transfer action just for transferring ice in 4 cardinal directions or center (5)\n - pickup action for power (1 dims)\n - dig action (1 dim)\n - no op action (1 dim) - equivalent to not submitting an action queue which costs power\n\n It does not include\n - self destruct action\n - recharge action\n - planning (via actions executing multiple times or repeating actions)\n - factory actions\n - transferring power or resources other than ice\n\n To help understand how to this controller works to map one action space to the original lux action space,\n see how the lux action space is defined in luxai_s2/spaces/action.py\n\n \"\"\"\n self.env_cfg = env_cfg\n self.move_act_dims = 4\n self.transfer_act_dims = 5\n self.pickup_act_dims = 1\n self.dig_act_dims = 1\n self.no_op_dims = 1\n\n self.move_dim_high = self.move_act_dims\n self.transfer_dim_high = self.move_dim_high + self.transfer_act_dims\n self.pickup_dim_high = self.transfer_dim_high + self.pickup_act_dims\n self.dig_dim_high = self.pickup_dim_high + self.dig_act_dims\n self.no_op_dim_high = self.dig_dim_high + self.no_op_dims\n\n self.total_act_dims = self.no_op_dim_high\n action_space = spaces.Discrete(self.total_act_dims)\n super().__init__(action_space)\n\n def _is_move_action(self, id):\n return id < self.move_dim_high\n\n def _get_move_action(self, id):\n # move direction is id + 1 since we don't allow move center here\n return np.array([0, id + 1, 0, 0, 0, 1])\n\n def _is_transfer_action(self, id):\n return id < self.transfer_dim_high\n\n def _get_transfer_action(self, id):\n id = id - self.move_dim_high\n transfer_dir = id % 5\n return np.array([1, transfer_dir, 0, self.env_cfg.max_transfer_amount, 0, 1])\n\n def _is_pickup_action(self, id):\n return id < self.pickup_dim_high\n\n def _get_pickup_action(self, id):\n return np.array([2, 0, 4, self.env_cfg.max_transfer_amount, 0, 1])\n\n def _is_dig_action(self, id):\n return id < self.dig_dim_high\n\n def _get_dig_action(self, id):\n return np.array([3, 0, 0, 0, 0, 1])\n\n def action_to_lux_action(\n self, agent: str, obs: Dict[str, Any], action: npt.NDArray\n ):\n shared_obs = obs[\"player_0\"]\n lux_action = dict()\n units = shared_obs[\"units\"][agent]\n for unit_id in units.keys():\n unit = units[unit_id]\n choice = action\n action_queue = []\n no_op = False\n if self._is_move_action(choice):\n action_queue = [self._get_move_action(choice)]\n elif self._is_transfer_action(choice):\n action_queue = [self._get_transfer_action(choice)]\n elif self._is_pickup_action(choice):\n action_queue = [self._get_pickup_action(choice)]\n elif self._is_dig_action(choice):\n action_queue = [self._get_dig_action(choice)]\n else:\n # action is a no_op, so we don't update the action queue\n no_op = True\n\n # simple trick to help agents conserve power is to avoid updating the action queue\n # if the agent was previously trying to do that particular action already\n if len(unit[\"action_queue\"]) > 0 and len(action_queue) > 0:\n same_actions = (unit[\"action_queue\"][0] == action_queue[0]).all()\n if same_actions:\n no_op = True\n if not no_op:\n lux_action[unit_id] = action_queue\n\n break\n\n factories = shared_obs[\"factories\"][agent]\n if len(units) == 0:\n for unit_id in factories.keys():\n lux_action[unit_id] = 1 # build a single heavy\n\n return lux_action\n\n def action_masks(self, agent: str, obs: Dict[str, Any]):\n \"\"\"\n Defines a simplified action mask for this controller's action space\n\n Doesn't account for whether robot has enough power\n \"\"\"\n\n # compute a factory occupancy map that will be useful for checking if a board tile\n # has a factory and which team's factory it is.\n shared_obs = obs[agent]\n factory_occupancy_map = (\n np.ones_like(shared_obs[\"board\"][\"rubble\"], dtype=int) * -1\n )\n factories = dict()\n for player in shared_obs[\"factories\"]:\n factories[player] = dict()\n for unit_id in shared_obs[\"factories\"][player]:\n f_data = shared_obs[\"factories\"][player][unit_id]\n f_pos = f_data[\"pos\"]\n # store in a 3x3 space around the factory position it's strain id.\n factory_occupancy_map[\n f_pos[0] - 1 : f_pos[0] + 2, f_pos[1] - 1 : f_pos[1] + 2\n ] = f_data[\"strain_id\"]\n\n units = shared_obs[\"units\"][agent]\n action_mask = np.zeros((self.total_act_dims), dtype=bool)\n for unit_id in units.keys():\n action_mask = np.zeros(self.total_act_dims)\n # movement is always valid\n action_mask[:4] = True\n\n # transferring is valid only if the target exists\n unit = units[unit_id]\n pos = np.array(unit[\"pos\"])\n # a[1] = direction (0 = center, 1 = up, 2 = right, 3 = down, 4 = left)\n move_deltas = np.array([[0, 0], [0, -1], [1, 0], [0, 1], [-1, 0]])\n for i, move_delta in enumerate(move_deltas):\n transfer_pos = np.array(\n [pos[0] + move_delta[0], pos[1] + move_delta[1]]\n )\n # check if theres a factory tile there\n if (\n transfer_pos[0] < 0\n or transfer_pos[1] < 0\n or transfer_pos[0] >= len(factory_occupancy_map)\n or transfer_pos[1] >= len(factory_occupancy_map[0])\n ):\n continue\n factory_there = factory_occupancy_map[transfer_pos[0], transfer_pos[1]]\n if factory_there in shared_obs[\"teams\"][agent][\"factory_strains\"]:\n action_mask[\n self.transfer_dim_high - self.transfer_act_dims + i\n ] = True\n\n factory_there = factory_occupancy_map[pos[0], pos[1]]\n on_top_of_factory = (\n factory_there in shared_obs[\"teams\"][agent][\"factory_strains\"]\n )\n\n # dig is valid only if on top of tile with rubble or resources or lichen\n board_sum = (\n shared_obs[\"board\"][\"ice\"][pos[0], pos[1]]\n + shared_obs[\"board\"][\"ore\"][pos[0], pos[1]]\n + shared_obs[\"board\"][\"rubble\"][pos[0], pos[1]]\n + shared_obs[\"board\"][\"lichen\"][pos[0], pos[1]]\n )\n if board_sum > 0 and not on_top_of_factory:\n action_mask[\n self.dig_dim_high - self.dig_act_dims : self.dig_dim_high\n ] = True\n\n # pickup is valid only if on top of factory tile\n if on_top_of_factory:\n action_mask[\n self.pickup_dim_high - self.pickup_act_dims : self.pickup_dim_high\n ] = True\n action_mask[\n self.dig_dim_high - self.dig_act_dims : self.dig_dim_high\n ] = False\n\n # no-op is always valid\n action_mask[-1] = True\n break\n return action_mask\n","metadata":{"execution":{"iopub.status.busy":"2023-02-01T00:50:34.093747Z","iopub.execute_input":"2023-02-01T00:50:34.094256Z","iopub.status.idle":"2023-02-01T00:50:34.233834Z","shell.execute_reply.started":"2023-02-01T00:50:34.094157Z","shell.execute_reply":"2023-02-01T00:50:34.232507Z"},"trusted":true},"execution_count":1,"outputs":[]},{"cell_type":"markdown","source":"## 2. Simplifying the Observation Space\n\nLux S2 is fully observable which means you can see everything on the map, the opponents units etc. However, this is very high dimensional and not necessarily easy to learn from due to the curse of dimensionality (again!). We want to simplify this observation space in a way that contains sufficient information to learn a good policy but is also easy to learn from.\n\nFor this tutorial, we will create a state-based observation space (no image like features e.g. the rubble, ice, ore maps) with some feature engineering that includes useful information such as the distance to the closest factory and ice tile. The wrapper we provide below will use the `gym.ObservationWrapper` interface. Note that since we are focusing on just controlling one heavy robot, the observation wrapper is written to only support one heavy robot (and returns 0 if there are none).\n\n\nMore advanced solutions can look into using the full set of observations and designing the appropriate neural net architecture to process them. One idea would be to use convolutional neural networks to process board features like images. See [Season 1's solution by ToadBrigade](https://www.kaggle.com/competitions/lux-ai-2021/discussion/294993) and our previous [research paper: Emergent Collective Intelligence from Massive-Agent Cooperation and Competition](https://arxiv.org/abs/2301.01609) for example architectures and feature engineering choices.\n","metadata":{"tags":[]}},{"cell_type":"code","source":"from typing import Any, Dict\n\nimport gym\nimport numpy as np\nimport numpy.typing as npt\nfrom gym import spaces\n\n\nclass SimpleUnitObservationWrapper(gym.ObservationWrapper):\n \"\"\"\n A simple state based observation to work with in pair with the SimpleUnitDiscreteController\n\n It contains info only on the first robot, the first factory you own, and some useful features. If there are no owned robots the observation is just zero.\n No information about the opponent is included. This will generate observations for all teams.\n\n Included features:\n - First robot's stats\n - distance vector to closest ice tile\n - distance vector to first factory\n\n \"\"\"\n\n def __init__(self, env: gym.Env) -> None:\n super().__init__(env)\n self.observation_space = spaces.Box(-999, 999, shape=(13,))\n\n def observation(self, obs):\n return SimpleUnitObservationWrapper.convert_obs(obs, self.env.state.env_cfg)\n\n # we make this method static so the submission/evaluation code can use this as well\n @staticmethod\n def convert_obs(obs: Dict[str, Any], env_cfg: Any) -> Dict[str, npt.NDArray]:\n observation = dict()\n shared_obs = obs[\"player_0\"]\n ice_map = shared_obs[\"board\"][\"ice\"]\n ice_tile_locations = np.argwhere(ice_map == 1)\n\n for agent in obs.keys():\n obs_vec = np.zeros(\n 13,\n )\n\n factories = shared_obs[\"factories\"][agent]\n factory_vec = np.zeros(2)\n for k in factories.keys():\n # here we track a normalized position of the first friendly factory\n factory = factories[k]\n factory_vec = np.array(factory[\"pos\"]) / env_cfg.map_size\n break\n units = shared_obs[\"units\"][agent]\n for k in units.keys():\n unit = units[k]\n\n # store cargo+power values scaled to [0, 1]\n cargo_space = env_cfg.ROBOTS[unit[\"unit_type\"]].CARGO_SPACE\n battery_cap = env_cfg.ROBOTS[unit[\"unit_type\"]].BATTERY_CAPACITY\n cargo_vec = np.array(\n [\n unit[\"power\"] / battery_cap,\n unit[\"cargo\"][\"ice\"] / cargo_space,\n unit[\"cargo\"][\"ore\"] / cargo_space,\n unit[\"cargo\"][\"water\"] / cargo_space,\n unit[\"cargo\"][\"metal\"] / cargo_space,\n ]\n )\n unit_type = (\n 0 if unit[\"unit_type\"] == \"LIGHT\" else 1\n ) # note that build actions use 0 to encode Light\n # normalize the unit position\n pos = np.array(unit[\"pos\"]) / env_cfg.map_size\n unit_vec = np.concatenate(\n [pos, [unit_type], cargo_vec, [unit[\"team_id\"]]], axis=-1\n )\n\n # we add some engineered features down here\n # compute closest ice tile\n ice_tile_distances = np.mean(\n (ice_tile_locations - np.array(unit[\"pos\"])) ** 2, 1\n )\n # normalize the ice tile location\n closest_ice_tile = (\n ice_tile_locations[np.argmin(ice_tile_distances)] / env_cfg.map_size\n )\n obs_vec = np.concatenate(\n [unit_vec, factory_vec - pos, closest_ice_tile - pos], axis=-1\n )\n break\n observation[agent] = obs_vec\n\n return observation","metadata":{"execution":{"iopub.status.busy":"2023-02-01T00:50:34.236567Z","iopub.execute_input":"2023-02-01T00:50:34.236932Z","iopub.status.idle":"2023-02-01T00:50:34.255263Z","shell.execute_reply.started":"2023-02-01T00:50:34.236898Z","shell.execute_reply":"2023-02-01T00:50:34.254085Z"},"trusted":true},"execution_count":2,"outputs":[]},{"cell_type":"markdown","source":"## 3. Transforming Lux S2 into a Single Phase\n\nNormally RL frameworks like Stable Baselines 3, RLlib, Tianshou etc. expect the action space and observation space to be consistent throughout an episode. Lux S2 does not conform to this as we add some additional complexity like bidding and factory placement phases. A simple way to get around this is to **upgrade the reset function.**\n\nPreviously we saw that `env.reset()` resets an environment to a clean slate. We will upgrade this function by building a environment wrapper that not only resets to the clean slate, but also handles the bidding and factory placement phases so effectively agents that are learning start from game states with factories already placed.\n\nBelow will build a wrapper that works with the SB3 package. To do this, we want to provide the wrapper a bidding policy and factory placement policy which will be used by all teams to handle the first two phases in the reset function. The code below does just that by overriding the environment's reset function in the wrapper. \n\nFurthermore, we want to use the Controller we defined earlier, so that is also an argument to the SB3Wrapper and we use it to transform actions inside the `env.step` function","metadata":{"tags":[]}},{"cell_type":"code","source":"from typing import Callable, Dict\n\nimport gym\nimport numpy as np\nimport numpy.typing as npt\nfrom gym import spaces\n\nimport luxai_s2.env\nfrom luxai_s2.env import LuxAI_S2\nfrom luxai_s2.state import ObservationStateDict\nfrom luxai_s2.unit import ActionType, BidActionType, FactoryPlacementActionType\nfrom luxai_s2.utils import my_turn_to_place_factory\nfrom luxai_s2.wrappers.controllers import (\n Controller,\n)\n\n\nclass SB3Wrapper(gym.Wrapper):\n def __init__(\n self,\n env: LuxAI_S2,\n bid_policy: Callable[\n [str, ObservationStateDict], Dict[str, BidActionType]\n ] = None,\n factory_placement_policy: Callable[\n [str, ObservationStateDict], Dict[str, FactoryPlacementActionType]\n ] = None,\n controller: Controller = None,\n ) -> None:\n \"\"\"\n A environment wrapper for Stable Baselines 3. It reduces the LuxAI_S2 env\n into a single phase game and places the first two phases (bidding and factory placement) into the env.reset function so that\n interacting agents directly start generating actions to play the third phase of the game.\n\n It also accepts a Controller that translates action's in one action space to a Lux S2 compatible action\n\n Parameters\n ----------\n bid_policy: Function\n A function accepting player: str and obs: ObservationStateDict as input that returns a bid action\n such as dict(bid=10, faction=\"AlphaStrike\"). By default will bid 0\n factory_placement_policy: Function\n A function accepting player: str and obs: ObservationStateDict as input that returns a factory placement action\n such as dict(spawn=np.array([2, 4]), metal=150, water=150). By default will spawn in a random valid location with metal=150, water=150\n controller : Controller\n A controller that parameterizes the action space into something more usable and converts parameterized actions to lux actions.\n See luxai_s2/wrappers/controllers.py for available controllers and how to make your own\n \"\"\"\n gym.Wrapper.__init__(self, env)\n self.env = env\n \n assert controller is not None\n \n # set our controller and replace the action space\n self.controller = controller\n self.action_space = controller.action_space\n\n # The simplified wrapper removes the first two phases of the game by using predefined policies (trained or heuristic)\n # to handle those two phases during each reset\n if factory_placement_policy is None:\n def factory_placement_policy(player, obs: ObservationStateDict):\n potential_spawns = np.array(\n list(zip(*np.where(obs[\"board\"][\"valid_spawns_mask\"] == 1)))\n )\n spawn_loc = potential_spawns[\n np.random.randint(0, len(potential_spawns))\n ]\n return dict(spawn=spawn_loc, metal=150, water=150)\n\n self.factory_placement_policy = factory_placement_policy\n if bid_policy is None:\n def bid_policy(player, obs: ObservationStateDict):\n faction = \"AlphaStrike\"\n if player == \"player_1\":\n faction = \"MotherMars\"\n return dict(bid=0, faction=faction)\n\n self.bid_policy = bid_policy\n\n self.prev_obs = None\n\n def step(self, action: Dict[str, npt.NDArray]):\n \n # here, for each agent in the game we translate their action into a Lux S2 action\n lux_action = dict()\n for agent in self.env.agents:\n if agent in action:\n lux_action[agent] = self.controller.action_to_lux_action(\n agent=agent, obs=self.prev_obs, action=action[agent]\n )\n else:\n lux_action[agent] = dict()\n \n # lux_action is now a dict mapping agent name to an action\n obs, reward, done, info = self.env.step(lux_action)\n self.prev_obs = obs\n return obs, reward, done, info\n\n def reset(self, **kwargs):\n # we upgrade the reset function here\n \n # we call the original reset function first\n obs = self.env.reset(**kwargs)\n \n # then use the bid policy to go through the bidding phase\n action = dict()\n for agent in self.env.agents:\n action[agent] = self.bid_policy(agent, obs[agent])\n obs, _, _, _ = self.env.step(action)\n \n # while real_env_steps < 0, we are in the factory placement phase\n # so we use the factory placement policy to step through this\n while self.env.state.real_env_steps < 0:\n action = dict()\n for agent in self.env.agents:\n if my_turn_to_place_factory(\n obs[\"player_0\"][\"teams\"][agent][\"place_first\"],\n self.env.state.env_steps,\n ):\n action[agent] = self.factory_placement_policy(agent, obs[agent])\n else:\n action[agent] = dict()\n obs, _, _, _ = self.env.step(action)\n self.prev_obs = obs\n \n return obs\n","metadata":{"execution":{"iopub.status.busy":"2023-02-01T00:50:34.257115Z","iopub.execute_input":"2023-02-01T00:50:34.257901Z","iopub.status.idle":"2023-02-01T00:50:35.633247Z","shell.execute_reply.started":"2023-02-01T00:50:34.257864Z","shell.execute_reply":"2023-02-01T00:50:35.631953Z"},"trusted":true},"execution_count":3,"outputs":[]},{"cell_type":"markdown","source":"### Defining a Bid and Factory Placement policy\n\nTo test the code above, we can program some heuristic bid and factory placement policies","metadata":{}},{"cell_type":"code","source":"def zero_bid(player, obs):\n # a policy that always bids 0\n faction = \"AlphaStrike\"\n if player == \"player_1\":\n faction = \"MotherMars\"\n return dict(bid=0, faction=faction)\n\ndef place_near_random_ice(player, obs):\n \"\"\"\n This policy will place a single factory with all the starting resources\n near a random ice tile\n \"\"\"\n if obs[\"teams\"][player][\"metal\"] == 0:\n return dict()\n potential_spawns = list(zip(*np.where(obs[\"board\"][\"valid_spawns_mask\"] == 1)))\n potential_spawns_set = set(potential_spawns)\n done_search = False\n \n # simple numpy trick to find locations adjacent to ice tiles.\n ice_diff = np.diff(obs[\"board\"][\"ice\"])\n pot_ice_spots = np.argwhere(ice_diff == 1)\n if len(pot_ice_spots) == 0:\n pot_ice_spots = potential_spawns\n \n # pick a random ice spot and search around it for spawnable locations.\n trials = 5\n while trials > 0:\n pos_idx = np.random.randint(0, len(pot_ice_spots))\n pos = pot_ice_spots[pos_idx]\n area = 3\n for x in range(area):\n for y in range(area):\n check_pos = [pos[0] + x - area // 2, pos[1] + y - area // 2]\n if tuple(check_pos) in potential_spawns_set:\n done_search = True\n pos = check_pos\n break\n if done_search:\n break\n if done_search:\n break\n trials -= 1\n \n if not done_search:\n spawn_loc = potential_spawns[np.random.randint(0, len(potential_spawns))]\n pos = spawn_loc\n \n # this will spawn a factory at pos and with all the starting metal and water\n metal = obs[\"teams\"][player][\"metal\"]\n return dict(spawn=pos, metal=metal, water=metal)","metadata":{"execution":{"iopub.status.busy":"2023-02-01T00:50:35.636509Z","iopub.execute_input":"2023-02-01T00:50:35.637301Z","iopub.status.idle":"2023-02-01T00:50:35.659903Z","shell.execute_reply.started":"2023-02-01T00:50:35.637234Z","shell.execute_reply":"2023-02-01T00:50:35.655104Z"},"trusted":true},"execution_count":4,"outputs":[]},{"cell_type":"markdown","source":"So **without the wrapper**, when we reset the environment it looks like this:","metadata":{}},{"cell_type":"code","source":"import matplotlib.pyplot as plt\nenv = gym.make(\"LuxAI_S2-v0\")\nenv.reset(seed=0)\nimg = env.render(\"rgb_array\")\nplt.imshow(img)","metadata":{"execution":{"iopub.status.busy":"2023-02-01T00:50:35.664031Z","iopub.execute_input":"2023-02-01T00:50:35.664436Z","iopub.status.idle":"2023-02-01T00:50:35.905239Z","shell.execute_reply.started":"2023-02-01T00:50:35.664381Z","shell.execute_reply":"2023-02-01T00:50:35.903835Z"},"trusted":true},"execution_count":5,"outputs":[{"execution_count":5,"output_type":"execute_result","data":{"text/plain":""},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"
","image/png":"iVBORw0KGgoAAAANSUhEUgAAAQEAAAD8CAYAAAB3lxGOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAA7oUlEQVR4nO19adAs11ne83bP+q33u6uudCVdSZZky0a+RjIYTIyDAQsTR6ZSUKIqWFW4ApXgAFX8QEBVIOVyBRLAP1LBhR0clMQLLoxjkxJgWWXjsiMsyZZky1qvpGvpLrrrt8/a3W9+dM90n2Wmz/T0zDfLeVTf1fSZs/byznmffhdiZlhYWMwvnL2egIWFxd7CCgELizmHFQIWFnMOKwQsLOYcVghYWMw5rBCwsJhzjEwIENFdRPQcEZ0kovtGNY6FhcVwoFHYCRCRC+B5AD8F4DSARwH8IjM/nftgFhYWQ2FUO4EfAnCSmV9i5haAzwC4e0RjWVhYDIHCiPq9BsCriePTAH64V+W1SpGvXq6MaCrDgxnwgkAsy7F/yrGvvuMQ4DqOMJ4fMJTdoPGEqM+RGRwi0JAngKN/5Gti1O24Tv4EDP/0pZ1LzHxILh+VENCtTbhGRPQrAH4FAI4ulfGp990xoqkMD88PcHGnJpTlqUZRhqeAAOXhIc1pT9YpOA5WFypwEoXbjSaanq/MR+5JN0W5ljofdW1ym2qpANcZbkPKzPCZEUjXhKDeiMp8DE+97tzmgSzXPitO/Pd//L6ufFTqwGkA1yaOjwE4m6zAzB9j5juZ+c61SnFE07CwsEjDqITAowBuJqIbiKgE4B4AXxzRWBYWFkNgJOoAM3tE9EEA/wDABfAJZv7eKMYaF0bpbanrO69tYrLrgBn1Vlv4XuY6dCi6jrJlz6IuBMzwA3GtXhAo23gZruMIKowp9ljdnxqMihMAMz8A4IFR9W8xOPyAUZOEAJAucAqOg0px+FvFCwL4gcg/eH6g4Q1EOETmyruEbHzLaMaaVLf9kQkBi8kEc7bnaZwElsV4Yc2GLSzmHHYnYAhZJ2XmXG0FJgmO/Kovp10AaTbapl2rtgyjemk3f7BCwACuQziwVBXKdhot1NveyMaUb3r5QWSIpB8AOKQhGAd8VBwiLJVLwnhZSDkdXIdQKYm3XMvzBWKQEK4tOX7bD4RVOA6h4DiZeYJRIe2ambTRgUAjZTmtEDAAEaHoukJZXg/GpIEIcMmBI28Hcumb4CbOm+4BYKj3O0tExoTya1MLywlYWMw5rBCwsJhzzIw6wMxoSvqlDuWCO7StOtDRycezLyWi1LFCXVp9386k2tMnN9yyIVD4Tn64+eaB5KyHmU4WPT0v5HV/sM47ygCma50dIQBgp9lC2+9vAbd/sQp3yvY/zJxuiNKjPO02qBQLqBRn03dDxy9YqJiyx8HCwiJvWCFgYTHnmBl1oIOkHqb1i89pHJ2vuu7dfS5jUQ/9kpIfVd4gbEdidco3IMrYoCEJRsXJ6HqdRrXC9PzMlBCQ11x0HKxUy0JZISdCYLFSQrUU69J+EGB9t4FR2BFqn/+OVU2nDlTegHsoxRqboolH8rwGAaOlOc/5kX6zIgbMMFNCQIZDIfs9Cka44DiCMuX54zVjNXEEkmUAIxIAicJpMLzRrUN+Tsdtu0UpRk/TBMsJWFjMOawQsLCYc8yMOkDoGALF27TSmA0CGOjPCeS4awxJv34FqpFRaDwk1vP8AM2EI1ToJzEaFUoH1yGBo2AOIxBBmrfJfl9eKxCtN1EmEsedYdICps4uHwDMkBAAgOVKac/G5u5//erkB1m3BzMczfsKQXeFql/XWm3BG7LgEPYtVMYmBGTHLD9gNL3BvTNljsTEeKojD4yW2q9OamztMWCIyzWUECCiUwC2AfgAPGa+k4j2A/grAMcBnALwC8y8Psw4hnMZ9RAThbys4bSk25igv2b5zSBPi8F+uwHdd+OONjHMbiWP/fI/Z+YTzHxndHwfgIeY+WYAD0XHFhYWE4pRKM13A7g/+nw/gPeNYAwLC4ucMCwnwAC+REQM4M+Z+WMAjjDzOQBg5nNEdHjYSU4LZItB0/fHWbaOIcklGQdJO0KHCZRk3SiU+gqfmDj2A8ZmvSn0Uy0Vc4k2nBXas6M9t9myC6V1vZeapum9MYw6MOyVfTszn40e9AeJ6FnThnIasnlFVt3RTN+VLYNUy0IdJyB7YpYMchNYZMMkGB0NpQ4w89no/xcAfB5hNuLzRHQUAKL/X+jR1qYhs7CYAGQWAkS0SETLnc8AfhrAUwjTjd0bVbsXwBeGnaSFhcXoMIw6cATA56PtTAHAp5j574noUQCfJaIPAHgFwM8PP80pAatbOmG732O3l40ToNR2smFQJ0y3aFSj6YfEAz9gtJKZiyl0zhrVa1mdTi57hyoh4Hv2pTGWSvYN9ZrpIjtncbJIv66qgZe2zohfN2YWAsz8EoA3a8ovA3jXMJOaVsgXXT3WtOkRcTcNpKklBwgOWLqhmdX8gFDLHCHCGKPRbqPliVaFa4vVsdnRyYZABKBUEI2MPD+ALwtgqV0APY+Stg5djol85J+mX5DQOY0hPtJMWQzOAkxlftZbI2saMmFee+w115l+R8CFIckxpYESVKh7ldHCOhBZWMw57E4gIxTdPzpM+zHSqwRpdSSdtVua2DZqIgaR0tbwV8Vkm8F6nXcUPEGvHvOL5ps+1izDCoGM2G22UW/Hab4DDg1t1BszPmZWH9TQwEgsVd7Ka2521dMNcKVqAdStnmxQFBZIAk0nUSTsNFvCHEoFFyXJGSgLHIdQLor96MK4tSRbBmb13Cvh1aiH4JBPpvZ8a6I2pXQzLbBCICOabR+7zVgI6Mgj9U0BlGO5nUk/QBTjULLykS0IiVjKRdDpWyyTiUFdWDLZoKjpeUq+wlyEABGclH78IEDb95VyRVjInoUaPmT0tNvkw3ICFhZzDisELCzmHFYdyAyd/p+iAmi2+to2GtJRbuko78TUkOMs1RFeqSXKJHsihGuDWCfZD0hJ5bYX8Rz6GQKRrg7pVCaNumVgUCRD13cW9OItRnl+rRDICIZGxzfgAAKZvJMILWaGL1UKmJUci64jWs0RASBVl05GG+q8T1cciCS9GSRzADJvwNhXreytd53mXMu8RRpHoO23R3m6QVE+xKAul6ZsHZk3rBDICVleV021bQvtza//ILCknxksJ2BhMeewQsDCYs5h1QEDeEGAjd2GsH1vtNupHMDzh9+M546c6B5X2jX88Mm/w0JzK27DoirBCN+DJ/vygjAseBwdl1AtFQRyziHAddT5CCUU/pMk+VjjRUiKURNp+hkfWp4v6MocWV0JtKiGUKPOFwmoapuGBNRYaMrehzJMyMMsGIfKZYWAAYKAsdUQQ26lCQAAOLPvRjx6/Ce7x8uNddx+6iuodoQA642FAqnM8wM02rFxDEGXXk1k+YHQGFB8OwEwiSQfQzYo6kGqpS12hPB8X/EQpMS/wBCOUbJBEdS19/I+lDGSxzUvxrEPrBAYJYj6XsCpJgYtZgaWE7CwmHPYncAASDMEkssqrR2s7Z7vHi82NuEGqs27AskOiEh8Vxz6DWTfIsrGQYqxUELf7gwjGxiNG0o0pMioKS5TnYXkdsIXiL0s1fNh0E+yy84cJ/yVaS9YIWAIE0MgGSe+/494/ZlHusfEQcwH9IFDjkBOlVwXbkXctIWcQLJNarda6JxskgZE4bLlCMWsV55HhADi+e0YL/UbXucEBanIxKCo17NvkvJsWmCFwAhR9hooeY3usc5DsBdkqz5HVtwyGuuMgWcaAdLPWlYLwbwwzYZJqZwAEX2CiC4Q0VOJsv1E9CARvRD9fy3x3e8Q0Ukieo6I3j2qiVtYWOQDE2LwLwHcJZVp8w0S0W0A7gHwxqjNnxFpDNotLCwmBqnqADN/jYiOS8V3A3hn9Pl+AF8F8NtR+WeYuQngZSI6iTAhycM5zXfPkOYsVC8u4vHr3oFmodotu/7SM7j+0tP9O1ZIQIJDLFUhJSKQSyInMEzOLWVtyr5a8kYc98aXdbq5zqAHallaZCHdecvsMTgadsAPAmGejibk+jDIygn0yjd4DYB/StQ7HZUpmLU0ZPXiIh6+6T3Yqu7vlv34s58ThIByi0TPlpyKSrnA1P0nLiJSOIG8WHv1VlYNjMaJXqP1t/3TeBZqOIJUwyhN3724hlHkMGQgNJRKvh1xnFzJjrztBIzFo01DZmExGcgqBHrlGzwN4NpEvWMAzmafnoWFxaiRVR3o5Bv8Q4j5Br8I4FNE9KcArgZwM4BHtD1MIxJbskZxAS03VmNq5WUsNLeEbU+pXU/tUqIEcs2hkafmLhjUZO5DDY6iqjmqMZRDhEDiSdJSlYU9EzjRjuR2RCCJEyEyYTx0+Z/0ryinAalCgIg+jZAEPEhEpwH8PsKHX8k3yMzfI6LPAngagAfg15jZwERu8iHfYA/f9DN48tjbu8fLjXX87JOfwEJru1tWae307bPXA2+i28u8gUIjkFo4Os7ADC3fx26zJZWKcyq5DhbLJaGsWioKD5QfBKi12kIdvXGQzGXIX3ftIrtFBYdQLJhEOxZDnpsIb51X4yTA5O3AL/b4SptvkJk/DODDw0xqGrBbWsb6wmHhoq7Ur2Clsd49NiXQBNLJ6JcoapciCHR19vIm1IVOk60Re4bXSkw70DDzeRnr6JKdCuOw7HNpPv6kGhRZByILizmHNRseAD1ec4x7GnMBE4cleTMvcyu9ykaFtFeWkworBAzBEJ/36y89K1z0heYWin5z6HfoPXkC+djgDjO9CeVw4qFnnaxG5GMspKYKE0Ol+8xotMXsRiXXhSN4SHF0PSS2UnkKyUBIk7A2k+unjK3vVfEqJIN2Jsj7Z8cKAUPI1+6NZ/4JbzwT20VlffjVHHd6nVP3a6jaFGVwKILsWcdKT/LKgoxr1eViBIucgOcH2A1awqxWq2U4cJV2wvxk7z90zqVIDKqnUU3DlgdMjZAyd54jLCdgYTHnsELAwmLOMffqgImO1tnqC1v+Hs32iiZkAExSajAlhCgUhyHS+CWEMUNYqKOLtpNpnoKzVKdMJAE7gUwGgaxvd7b46jrENpA4CdNh9fdNirNSBthow2NAo+3hta1dBBz0rSfr/JP2TuDs2k34P3f8u+4xcYCf/e7/xE2Xvjd034rDjE63N+kHYrtOv6JzTo9U6QkUHAcr1dhakwHUm220EunKWbI/6FaUeAOSrIxM1iWvA9Do/4YnKO0ZJwDlgitUzFsszL0QCBho+77WSGUvkFXyt90SLi8djfthH61CRak3cQYrmgmpuQ9FEBFcyfNS79Unh0VL9zTMilGeV53HaJ6wnICFxZzDCgELiznHTKsDbd9HvdXuq+c1oxRXojZgphrkpUHksdVbamzi9le/HvfJjJXaZYks6w4otFVsbpTIRmId7lrmDDbvguOgWoxvOQbQ8jxBFaPo3yQH0/I8+AnOxiVC0XWF81ZwRTsC5tBhSSQdNd6H0lhGZJ7GG1LuRwYBcB0n1fFLaWeJweHQaHs4tyl68impuqA5ngx6QEDavXBg5xze+/jHhbKC5MCp1Yk1Rjaj0puLroPVJKHHwOVdX3AqCg16xBF3JY/BUsHFatUV5lQuuCGBFsFnRrMmrZ97rG3AxcnWo2GZnpdItqkUHBSUsNF7j5kWAkCPh3wGQWAUAk8qw+jYqgxQrCP7vWdNCibN24l+/QId1n+yQBgtwZcVkyeWLCwsxoqJ2AkwRxFVI3Siu/STmvooNWqdTv8dOAWIv44BEPjj2yHI43MQzmGawAg5lCCxje9cqjx+6XqpLeL4LJw23f3SSakGqZ5ckGXGOjsBlo77jjtBmAgh0PZ9nNmI03OtVCqC7qhDrdXG5d2aWCgZsfgBCzcqABy93UF1X3xJauuMs98JwOJOWuo33T2IxH+0KJSBa+9wUKzGdTZOMy4+p0oB1bFIM2bGO0u+WVUyTNNx0tOOgYs7u8K7/MVyEUtSRCDj+aR4Fsqza3kBNmqN7jyJwvHLhfh2doiwUimnS3dpqSahvMPgvxrpkihyHQeVYkHo3s2aK27EmAghEDCj1oqfwmoxPSKZFwRCG0BD1mienOoaYfmIZCGWcm1MrcjSLjE5wOJBQnmZuvOrp6cmzBXyPLOSfo22JwiBckpIrp7z0ZzctDkFzGhJ4b0qRfFWpugNwrgg8xZEhILjTCQHICNrGrI/IKIzRPRE9PeexHc2DZmFxRQhaxoyAPgIM5+I/h4AYNOQWVhMIbKmIeuFu5EhDZn83rXtq9FkZTQ9X9nu697xy3Vq6ywMVl8HOJCNhTRzNE5N1bteEBB2LzFatbhOc0vlG3R6usm2UstcSBGCZO+20DhIdE5Rxxa9CON2vfV2c7AUIUgNA65CPM/j3nA70VZfmJE0CXcK1IAOhuEEPkhE7wfwGIDfYuZ1ZExDdrAqZiDarDexWW/2HVwrAFLqAMCrj/lyJdWWIIMHWC/3VyFqToPx0jf81Ac6T6cWXWE/4yATgyJ9vWxiIGDp5UjkRZh+hoYdORuICKWCi1JGDmQSkdVO4KMAbgJwAsA5AH8Sleuunf5eTKQhWy7t3QmdPJMSC4vxIpMQYObzzOwzcwDg4wi3/IBNQ2ZhMXXIpA4Q0dFOVmIAPweg8+ZgiDRkBi905S0p9zqQ66X/3ssORGlNeqenluskDWriV4NCPaWdqn8rffefXhc6AxZheEeMP0RSq7iNahikfb2XhctQIgKxLiaSML4SoYjZ6DrLGaDT6swDsqYheycRnUB4t5wC8KsAsqch477PMNwScPQHCKXF+OJsnw9w4bnBeQE25ADSQ0qrJKBy70hGCLrgF2G+vOQxcNXqEgquI5QNzhIwLu/W0Wh7iRK1n0du+GmcOnhb93i1fgk/8ezfoOLVEm2knjWOOLvNNlperN07RFhbrKLopmj3Gg9O1pn6JSD5F4EY2G60UW/1s/gCFssllIux92HAjO1GS7A8rRYLqJbmK0t21jRkf9Gnfu5pyMgBlq8iwdLPb4s3iikxqBMAapnJzkETvgqDk2xyFB2Gav2WBcycSq4CwJnVG/DM0Tu7x4e3XsU73L8NRXi//iGureX5Qn4+lwj7FtTIRmkI+VX9ue01NgNo+R7afn+BUy4WUE6ELmcOjZ6SQqDozp87zfyt2MLCQoAVAhYWc46J8B0A+tOCHAD1dYaf2KI2d3T6vtpTWp2OoZKklqbMSHVyAYW6qUjEkWBQE5NsCSJOMd4hNFqeJnsvhDYl14VrEKBCiRoEUaHfX7uAa66cFI6dwOurEnXX0cfIiInQlKIGqXNj+DKhF3mQ9lXJtJRBfx4BCP1NkhGJ/YAjTiJu5/kBWl6KLtQdrzccJwyGOg0k48QIgX7wWsCpb0pRYiTHO51nl87uTL4pmTVlOqMf6aITJL2VAR/irUHEcCR934QjOL2+pXGLjY8dIhxbW8bigF57uvF/9Pkv4m0vPBCPgwBFv6Vx0+3fj3zGvCDAxa1dAz5TfeWje5TTQ56rEYrlc7hZb2JL5kmk4XeaLdSabeF7ZQmkuR+ksSrFQqon7KRgKoQAAAQmwjkTzMyFdAQekIWzT28TsPqCLO2h6zVW2viFoA1KYwEN+tEhgOCB3LOnUf1a6shbeTrymwalTiRwlMhGiuwa3TpGDcsJWFjMOawQsLCYc0yMOjBo3ja9DYCq78v1AokFDMAKCRcErPQVknBxmUOhbi4a35GYk152jkEodeWQ3rKn39F9srGQ6gloYkewf6GKlQRv0PJ9XN6tayPlyv2L36vbegL1vWaN4gK+cfN7sVXd3y277srzuPPUQ9JY/fsJx+pvTBbXU60VU+8rktdG2n6UMGXSCXGksZqeh42EtygRYbFcHGugE1NMjBAYFvqLnW4dKAuKDlGostqBoPN1ueiUNFfqjNLrrFTKKBeHuzSdmw6Ird/qrTau7NYzOU0p51FDxCXRdop47qq34MLKtUK5LARMdGktUWgyRxM9Xbkg6hUymqNUx/MD4ceFEFojYvJkgFUHhkGWh8nCYtJghYCFxZxjMtUBQ36gV5DK5OdUn4LIWCRZzw8CJTWW67iKMYxg1ELRP4p7W/856mqEqdHESnm8fGr5fnROJJ1Xnl/qi/p00xyHfezfeU0oW65fVvvWKvz5vGqTDbG2K2uol5a6x27gYa12Aa7g46ZzDEvnFljmDeR1EcELAjie5E+XslQCwXVGa3Q0kULA6B24Qm4BspGd3jswEB94ZnhBooxD3TlpWeYQYdmRDT+caB8V+fgxwI5s+aOZAEEhAuXr++qVTVHf7mmw0h+6sOX+gARsdyzN+e53pcrNHfyLxz+OwImV4ILfQiC3UQjHHCH1/f+O/xSeuO4d3eN9tUu459GPYLmxkRhf1f91c9LZbcjUgmhbwNioNTVu42pBsqjoOlhbrOZ7XiRMpBAYJdLeKTBYSVDK6GF4krjQpo+WLlSXjJBQ6s/Yh2XpNGQuvyAmbKYEAqPa2smU529UN3yzUMVOebV7Mot+E4HBaNkIRhUBBxrLU2UwYay0BDt5wHICFhZzjsnZCUwQ1W76S6T9cUhT+PNE2jnrpbgPOq+M6zBtpjo5jQrRDisakJj148nRoMiRLVCEo846lTToiW+7daR28g99r3gqg0RNGtTmZmKEQHqir3wgkzxEobcXxwUoFgogR4ySI3vshfkSVaualIhjPd5vmxrLyHYK/RVqYtLXSRStVstK9h5l/JHZ9gMbtTrageyuNJp74eZz38Zi/Ur3uNquodiui4QvxNPYLFTx+PGfEAjFY+sn8frz3xbayFvqsJ94LWEdcW0MzQ+JFLbe8wNsN0SnJ/m+cxzCQqkolLY8H02ZhOyBiREC44Rs9OMkHnBmRqngosDSQ69pF/YV1xmVPqvr1+QHXusNySKjuFQpYW2hOuQMs8EPAuw0mkJEolH+GNx4/knceP5JpTzQJTKNUHfL+Ob178L60pFogoy3fv8h3Prat4U2gcy/cPhP8oFV6mghXm0/YGzXW2pYukRBwXFCQ6REWdPzsdNspYwVwiQN2bVE9BUieoaIvkdEvxGV7yeiB4nohej/a4k2NhWZhcWUwIQY9BAmF3kDgLcB+LUo3dh9AB5i5psBPBQd21RkFhZTBpNAo+cQJhgBM28T0TMIswrdDeCdUbX7AXwVwG8jYyqyvULqe1uoqkC/uiaQIxLpiASdE08/XVlrc9P9JjkUC91wEG7Lk3DGFBGHQHAcEtKBM9R08iOdg8IRSUQlM8peDZXWbres6LW0odWVsOxJTkYTMUkeC0DkvqS+HhbtjlTjpYABCpJ8g1kIdmBATiDKSfgWAN8EcKSTe4CZzxHR4aiaUSqyZBqyA5XxUROytxcRwXWQOMsEctWTLD8Uch77UFAMPh/WKPxZHr9el5vkOlLFMxtbOLsR16oUCzh+cBWFMXi7EQHH1laEsu1GC69e2Ry4L5P73SRiVGdeHZRrV/Cvv/YhwTu0yJ5idKXYEnDoaZqcIJF4H4Wh28XxA8XKCJoQ6yLX0PJ8XNjakeajLKsnjJ8+IloC8DkAv8nMW31+KdJenIUFzB8D8DEAuHG1OjLRr7XskhJQENA98eGF0Vv0qGsmDWHTfz6jNIbJgtCzOj79ASs2fSND582MXDZOpF0PYka5XRNJYUOJn+Va9yKBU+nEIS6akbEQERURCoBPMvPfRMXnieho9P1RABeicpuKzMJiimDydoAQJht5hpn/NPHVFwHcG32+F8AXEuX3EFGZiG7AQKnILCwsxg0TdeDtAH4JwHeJ6Imo7HcB/CGAzxLRBwC8AuDnASBzKrIcIJuZ6IkXSTchEpxjOpZe8u5K61SS085V5R+AtDyHxn0PuLkfIyfXE9qw8KmNdP30s/PTlxJFthTJGk6oFsR1CEq0d+lmc/QUX89ZJPvWk47yrNV5Z4XJ24Gvo/ft/q4ebXJPRWYCrfWVhmUXijQePTrhIQ8wSs010FMSY8HeywA1tJuJYMqac1JGSN5JhYFYRlAFRSA7/nRsBiVyMJUF7hEhWSF4c8TMWQxmOVnK6zhWCT+Tdhazikmjc/OF9SK0sJhzzNxOIIl0DcxiUtFv697rK33KuRhuEXASdzwHQLspxo6QnXwAKFGDdIZZvXaF/e6/XnsLhY8K31v3bT3MfT4zQkBn9MGkyeQjGszpL4SmsNdFH9UmUfZsGxcGdUPNf/woR2BqvXQOQLY8PPaGIg7fGN/y9Z0A3/1KHa16XM91JNsFIoSiIXE9dDYkjiNwGV3iuI+AYWi24j3YbB3BLWCIyzYxQkA24Mmt35Qy7QWVjjmy9lIrzp6euNc7J5M3ASb3h04oFKuEhbWESbTLYBLDyxNL1qBRDEo5tLw6bzUEu8o5p3MLaRGsRgHLCVhYzDmsELCwmHNMjDowLujCN6VtLvMmGPv2pdnqjm1shJFsruzW4VL0+0DAQqmIhVKxf8McodvK6+oobaRGgeQduX7BAwpxWbPGqNc9tNpxw5LrgKSnwnXEjT5BDQCqZF8mlUBk2XkNeqO0NOOxsF0aSWCOiRQCOicS3YkRK5g9LA6JBkSsaafonKTSjqPSnbP2m9WLTkbT8/D9y5tC2bG1lfEJAdalgJOrqJaAshBgTT+vPtPE95+Oy/wgwHajJbSrlgpKBKmC6whb5jDluti3o5lyAJGwliMLaRkCg8jGWju2vi36w6oDFhZzDisELCzmHJOpDkBVCVI3sj3s/VN1S2L4cv7wMaLX6y6x2ExJMFMJzPsDwusQMCvRh0yQFqFIt2XvHKe+BpTUOGZ1/Ua+Aho3HxPoXxMm+4V2vy9GCNJwAro6KdPrVcf0teJECoHVhQr2L1QGVnSymBfUWx7Obm6LhiUGJ36UUD3GzBopxjImDVMXyji/uYMrO/W+teSHveA6uOHgvr7hzANmnLq0gVrL65Z5gZjSu9ecdA+8cAY0XA9BvKVcx8FiuSj0VXQdwU5Ay09pZySSgwzNQy6979fyURA7Cm0EVApR8UuSMIgn6kQKgYLjoFoqjiXKTCeykOjtJZOH6lk2mVkWOZJV+KjkZrZ2OjQNY9gnr1fRdYxiBdZabew02waziKETkrII1P66ywQvs5JPwtEk/zS6C3W/+qZtU9oYpUFT2pjvBCwnYGEx57BCwMJizjER6oDrEFYqpe5xWlosU7R9H/VWW9HLkvCCAMuVsrDlr7XaaCXYQhNyxgQM6I2BNBOUi3Tb27Q6RkRpTuTHwj4Xi2tOnJmJHey0mqi3e2/1A2a0/WBgX5He53GwfsJUcv3tTwbZhYvnXwwxTlFncqRrbewhTep6xcgoZWLJsdMwEUKgWHBx7f7V3Puttzy8cnlTYZGTWCoXcd2BfXCj+NDMwOn1TVzZbYgVNSRPHjB5UFV9V/WQ05JVGqu6UeHg9Q5uemu5KwRadcbjD2xgZ308r17S3IiBhGdfAnK0Y4dICBVuqotrHYgSw3UElxLpWuo/UKeoEoq6OmoTYwyThuwPiOgMET0R/b0n0WagNGSdV4Ly37BgGLwiRCekVDRmx0WY4r8Or5z8b1TYw5cSQ4MIIIdADqlCcxJA+vus3988wGQn0ElD9m0iWgbwLSJ6MPruI8z8x8nKUhqyqwF8mYhuGVewUQsLi8GQuhNg5nPM/O3o8zaAThqyXrgbURoyZn4ZQCcNmYWFxQRimDRkbwfwQSJ6P4DHEO4W1mGYhiyJpufj5Usb3eN91TL2LVS62zE/CPDa5o5A1mmtQSR4BlZujbaHly9uJCIFEZarZexfjNN1t/xAcarJBI1Vm6Lxc0iYiWU6QyBVB1ajEWl4A+Ude/p51EJ6537m+RauvBYb/QQ+sLMZGNgKjG/LLRsLlRcd3PqjFRQrcen5F9o497yXaGRmN8CQLP2Y4TrSqdU6xqlzZI1LYrLe12/5l3jx8A90j9dqF/Hupz6JansXWTBMGrKPAvgQwnV+CMCfAPhl9D5Hcn/dXIQHK0XUWjGLLHusMQP1ticYrJhGl1HHFY/9gIWxCcDaYgVLlXK3rNH2IEON/pJNA9aRgForsj7HccteR1GZjoTMqrhL1ij1HUZ9R9T4dEScpqORciwiqOviCwBuAdh3xEVlKd4Qb70WgEjVXIXr3eNhVqNWDx4RyIT0u7h0NU4dfEO3Zm3rVXhOdo4/cxoyZj7PzD4zBwA+jnjLb5SGjJk/xsx3MvOdyyWbudzCYq+QOQ1ZJw9hhJ8D8FT02aYhs7CYIgyThuwXiegEwh3MKQC/CgBZ0pC5RWD5SLwJcoJA2KL7QXqgidyg2Ys5RFgul4QNdtPz0E5zP0yNkMO9DYES6y1VCQurTnduzIytyz7azf52ALp+FRVdM0ejHawmSo5aB335BorOayFhv9/yA+w2WyYz0I+nFPVeTeABm+d91Dbj61jfzteuoXuKSDXgUU5Ptw6JdSSO4ND2GRy/+Ez3eLG5hbP7bkTJb6bM5iltKe11iGkAeNMNC/zZ339d9/jCs4zXvicSVoNGmzGtJ7dyAFyztoK1BDGoc3k9s76Ny7t1oY5uLM0zlxiblYeQwZD5zMM3FnDLj5bhRFqT12I8+VAN6+dEjkT1rDMjFGUYZV/SPFx6a7venbkO4fZjR7CS4F8u7dTw9NmLmr7TLeRMhIAQWIqAQlFqFBAgWew5JI5PRGJEYsS2LvJxshaRug65n07/cufJEs8pwU9wAJeXjuJzd34QtfKy0lcSj//bH/kWM98pl0+ExSAIcEuJZTps5IGW09DpdYgUy7K87Eg0LwvU8R2gUAIcl7pV0sbXEow5CnydhVwWuEQouPFOwHWy9WlCqOkaeW1xHURAximMDYWghWIQ75QLfgutQhnN4kKm/qwDkYXFnGMydgIMBL6wT55acI/Ppm16VQj8uCb7rH/nb+BUo68TfzZ1liKQoKsqumy3n0QdEhO9pkUeEubYh7mguJJuokND2dYP32WuIDDcwIPrDxaXoYOJEAKtXcYr30yQMxvTKQXS/RRUwyBAfejkfjbO+3j6a43uAxQEjN0rovcdQ9X3ZaMj5nRuBay5ybW6dspxp5/EF2vVMo6trcZ1SO8xOqgBUy9VQPbSI81M01SaguPg2P5lFN34NfZ2o4Uru/0jLZkiEykLUSguN9bx7qc+ibZb6tMC+G6P8okQAl4TuPKyeDrSToZy8gzClOvq7SUxKluZQXMMALWtADubCRIQKmeiPvDcoyx9XsqZZM15g5jnUX4QO8fJh6xcLODovqW+Y+tiBXY7TM5RNszR92bEW8iEXhIOEdYWql1h1TmveQmBPFBp7+K2s9nfwltOwMJizmGFgIXFnGMi1AEZLpEQAJLB8PxgmvnCvhBUEgIqS4Rk/MtWE2jsBlo7gL7HUpnnlrC7cFCISlOpXUGhVUsOr6B3aqzktDVRcuR2g+j5/dQ0DXsZGtWo23qZ9EzSDZ3qLFUS1RpG0/OEyTMzygXR1D3VcCzRNh5Kr6rI9wMxJOVLpw5lfzomUgjsW6zg8PJi99gLArxyedMo4u10gaF7Mk7cVcXCSnyVz77QxlNfrXdvaOaQE0i2DJjhs3ijer4oOC7vvx5fe+9/gleMDaF+8Mv/Bdc992VhfDniLoF7GLWIa5HryFFyTKw+GZwecpwZjmxBQyonEXIZQjOhEjPgoH8k37Yf4Llzl4V+Di0v4o1XHxLGevbcpdT7U3Yo0qYhhySI2cwuZBhMpBDoyOLOCdJap0nH2rdDhmThpKEToScuyK1nMDlgJ/ErluPdlZcB0ajQ3QEM3E59/0CKdJleWE7AwmLOYYWAhcWcYyLVARkOEQ4uLfTXFR2Ge6wJqohGR5dOim0OLC2g5MrxC8Q6Jmm41xYqqBTjep4f4Pz2Tv85croDExh48VtNFBK+FDvrfvjOX+LYkqwAQ3RyYhBeuu1nsb7/eLessbAGXzIoeeX1P4X1I7cmSkTlmgAcPfVPOPLKY0I7J2TihHpKCG8StY0rtTq+e/qCvGJho11rt7V5D9N0Z4K6zXckArGTzqtTUqoQjt9eRjFxrq+c9nH51Vi3L7gOrt63jGLCv6GquT8YrBKMzIIvks5YSSFco7ZK/8I6ROvMXjDV9CZDCEg3i3w1iYCVahn9QAWgdNwDrcQna+M0cOlF0fpspVIyesjTsFguYTExpabn4+LOLvwBWFqtTQwzXntBNP8MAkkAsIZQlIxsAiKcve6tOHP8R8R60p1x8dhbcPHYW/rOsrxzCYdOPSqWSteMoD6I8j2402hhV5NyzOReVSwSI9Y8+b28reVOxURJkrMolAhX31JEeTEu85osCAEH4Q+Q/DZAgXwxmcE0GoYkS8SifpgMIQCg361gZl/Omrr6BzKPUNJTE446bZ6p7ojTTXrJ0JkYU79fIG2d2YLlBCws5hwTtBMYEgxw00FQj3+5nDah5MoGI7Mr0ZMgMCq1K1jcjMM7+m4JjcX9AA0m+1vlFeysxNHkiAMs7l4EOJmqLdS/OS4IzYeSG4leRj5pa9EZKyXbUWSspDHEUcoS2mEQMOo7gRDERYnWBNa+/5f3m3lumLI6FSn9GM5pdoRAALSfXxD2NiUfOH5APBNyKuqZBTNue/h/4Gbnf3eLNg69Do/e9XvwE8ZC6SCceuN7cPqWd3ZLis1d/LO//nUUW3GI64AZTlK4MIOdsH23J5bt3kyhsVZMXsdojy+/uycpOggHovxr7DK+86W6UBbG6khwBH6AFy9ckSILqQ9mqoGTIXp5RI4SqUKAiCoAvgagHNX/a2b+fSLaD+CvABxHGGPwF6K8AyCi3wHwAQA+gF9n5n8YyezFmQJtkktQnNdAxswoNbZQSPwc1JcODv6TRQSvvAivHFtw+oWKNiGmbCykWMghnxu8k2NByOun6VtrkZeYEwdAsxZoQoeJc277vjZ0WBK6smmByc9iE8BPMPObAZwAcBcRvQ3AfQAeYuabATwUHctpyO4C8GdENK+PooXFxMMkDRkz8050WIz+GGG6sfuj8vsBvC/6fDdsGjILi6mBEScQ/ZJ/C8DrAPw3Zv4mER1h5nMAwMzniOhwVH3gNGSzgILj4MjKoqAbbjea2G5kDJ89JMLtqeh3sbB7Cbc89mkEbnzZz930Y9g6eNNAffuFEl644x64Xry2A2e/i0OnnxCMhRwN7Zfma8esesS9+oZ3o7ZyJF7H1nlcd/KrcBPBNiFxPR2jm36ZmHoZ5ujn1dvT0BTc/UfoKhNUr87sqoiREIjyBpwgon0APk9Eb+pTXTcb5UoIaciqwxvv7DUKroMjK2LUnLMbGKsQkBlrWb+t1i7jlm99Smizu3r1wEIgKJRw8o57hLKbH/00DrwqCgGdhRwkfVuxeWKG7DT+8ut/Gpeuub17fOj0kzj60teBhBAiBCDB/VzHUYi2JKbsiI5bmCUMRJUz8waAryLU9c93shBF/+/Yg9o0ZDMP0vyNc0yLPGGShuxQtAMAEVUB/CSAZxGmG7s3qnYvgC9En20aMguLKYKJOnAUwP0RL+AA+Cwz/18iehjAZ4noAwBeAfDzADKlIZtXmBjKDNRZ8pAIRKKxTqbfUGY4zV24iehDIEJ76YDw0t0rLaCReAVJHKDa3IYTeMlmELP7qKHT/UIJjfKSMFvfEdVF3y2ivnAAXjfZBqParsEJkupB5xuDJSY+j3ufIb+x1Wkd2lgZRrXMkCoEmPk7ABQPE2a+DOBdPdp8GMCHM89qhiFbvxkJgsg4pUtoEUUhp5L9kHIDOZJXHxMhCBxhxPSbnnHo8S/g0GN/3S3xy0t47v0fRVCJOZDTt74L569/a7e/UmMLdzz4R1jeOB33JL/M1yz88tE34Ykf//eCgKkvHRTqbBx6Hb7x3g+DohPi+G28+dG/xLWnHha6ZohORqwx8pGFUOh0pM5rdMjHPnAYi8XZsRicBkRPvM6k1aSxHBNPIL2kMFncsc6TreioMxFzFOqbqKyf6R57lWUQizx/u7KMdiXOhefVqgic/reXbvVesYLd1asBpzdPFBTKqCXMmB2/ldgVSAMY+EfNMOdnhDmxobWwsOgFuxPICSYpwCYWBqmJU1eS91qVMMEjHGvOYYVATvCZsVlrCNF9aq203HAqfUUO4aobCmJkoQ0fl894CU4ACpkQFolOLpwkEqJaJOW6P3z6cRRacTadVmUZr93wI/CLlW6bnWvehIt3/KtunaBYQVAQIxStXnwBa+ef7Y5XbNVQam73X73qVIjFrddww1N/K4TYfu3Gt4ekY4Tq7mVc9eqjID8kHZ3AF7iHcNZm23zZGSgvzaCjiSjErMEAyo+HZPSVNyZCCMgXbBpVNM8PcHZjW4g/b7oTIGnxN91ZxsJqrKmdeb6FK+e8rrkds/6GcmTnHTnmNlhJsX79s1/Cdc98qXu8uf84Ll3z5lgIEGHz1ndg89Z39F3D4Vcewxse/oRiMag42gjGQuqtvXrpJdz+j/9VKNs5cFwQAkubp3H7N/4cxcQbC5dI8RqUx9PdV4qwyPHmy0L57YVhkuUELCzmHFYIWFjMOSZCHZhGjJL0YwY4GaRijDwYITT0oWBA+65RknXSfCjzWKp6YtpulmGFQEac39rFRi0m1AIOeYGhwcCTfy9Gu2k3WXC/6xoGaUJcx4cEdkggKolIkScFOIJAW90+ix///G8hoKQzDitCT9bmy81tFCSiQht4Q16vXEAEOf7Q2x76Y/iFOLSz6zdR8ZsCB+A6jtAqdJ6S+Yje/EQ/zLodgRUCGeEFvhB7jnmYlJAxmBl1iVQPgpBr7r5LCM0F1XBakrEQAKVMfRBEixrX94S4hB0BoCO5BEEQMWzig9j5QqyWBrnOwu5FtY6jIx11xKAiZTI91HJkoVmC5QQsLOYcVghYWMw5rDqQEQulIvyFeJPsB4yterNv+m3ZWaizW5aN45QudIZBcj0l5Va0XZeNheTIvR3vpC5EjbzTIu2dN2mi7ch9L5aK2L8QRzpmMC5t19AakEshilKMJTp3iBSbAB0HIKsCyvFAM9k7JK+HS4TFclHgQBptLzVVegdWCGTE2kIV+xI3dNPzsdtsIfDFp1Uw1WHxQYl1+2ShOhYhzP3HcUPVWKjrHNQ9VB5C1T4xjMoVSEKIWBUwaW9D9Po4hMUdWKzixHVXdY+9gPHIy2ewvttQ5qj0I41FidodoSAKAerWFTqXZZ7MEZB+LYOiK6LGIFUKroOrVpbgJvIlXtzaRdOr92mVaD+qic065F8+/bXWlaoPU4oMUOoN6HuY6Js1ggmK8JAfDFMrNq0gkL5P5n3gyBNR17VupyEV6HcemTAik8Hce+szTuJtyKBWh5YTsLCYc9idQE7obEnld9PCr3akosc763DvLUeylUkBRqQ6JNBu9bfPochuYBr97dRf+PQ62n7GqODLma1i3iJRppm1TtVR6/RfiDzOoLBCICcUHRfXH9in6M6DhrcCVGtEd38b7rWN7p3ve4znH2lg61IyFyAgyYnIoCh5TOpgJEYoCvV/qRrLlKZpyKv07x0pInJYT33/L7dVHjBHaaVVF2TSMy9nnWv3r6DkJgKhKNxDVJiGDNNxiOBqnKdMMUwasj8A8G8AdCw5fpeZH4ja7EEasr2F4xCWKqX0ihngrjooXO11rQjbLUaxLOvt4i8GR7G80nR7ksPvRIciTyHW6RWwJ/U27KH767kE6ltHR3oaGwv1qZP1UVosl1ApTudvqsmsO2nIdoioCODrRPR30XcfYeY/TlaW0pBdDeDLRHSLDTZqYTGZGCYNWS/cDZuGzMJiajBMGrKfAfBBIno/gMcA/FaUlXgu05CNEq16gJ1zflfp9z0h+U4EUW+nKNIwS5yA+r5f1ff796xvYbKNbnk+Lu/EgUB8ZniBaiik4wSUsUzUa/Wt4sDbfYeApXIJjtNfPRmWnNtLDJOG7KMAPoTwfvgQgD8B8MswfDk+a2nIRonLp32cfLwuPNC+H0gPi/jCv/P+X+YEnBRtPrQOlEjADA9P3HPc8spuA4+8fFb6HnBJ3ZCqlnzSQ6eLIqQbX+ENVKsAkuonuy66Lm46vB9lSd+f3kdeReY0ZMx8npl9Di0+Po54yz9wGrIVm4YsHcnwAtP43m/KQdLfLCFzGrJOHsIIPwfgqeizTUNmYTFFGCYN2f8iohMIf6NOAfhVABOfhmwSw4DPcsbbLMjvdGR6Uz93GCYN2S/1aTOxacgubtcU7ypFLGgNfrhfFQDQehCmGQKtVss4tLzYc769IBsCdawR5e91XotJOJDSolHHjzAuY87p0dFwC8pbe10dE78FyMKjvz0AlPqzhZVq2dhuYWKsG0YZVz2JWqst5APQ7QsUqz9tYhG1jcxz66wH5TLByswQnUdU9j5UHYEkox+N5Z3e2SRhDtTLMigj9M5COiMf+VjzhkCqn2Z01KlI0vGsgYhQKRaMhYB1ILKwmHNYIWBhMeeYGHVgWDAzWp7fN7IPoNfb9xJt38dOQ7T8WSgVVeMUDBpHIESqkQ/p+Y1kI5IdinJE2rv9XnVMesrk36DhKGYdMyQEgPPbu6i3PekL8dDXWKjlBSMrKQnrtQY2as24DwJuu/oQKpq03t0Hm0JdVyT0NE+z4jUoB/OOeiW1DqdJjwwwUb+7dVI4gGSJ7t19tyzxRddQiqQ6Ul/JsedABsyQEED4K598MMb6o69htbn7T2+EbrtJS6ABhpTDiQuWfxzVEcdSO9HMm3Ueiubz6oeBBMGAdbQEo0kdyYRwHh78JCwnYGEx57BCwMJizjG16kCt1cZGLY5Sy2C0DUMsJ1FZAQ6/3oETva5nBi6dDLBzUdS3J9HSUIbsCBPIxkI62gDpGkgqeWg6v4z7bJ1NQBpKVcKtbysLjV97wcf62YkxXp0YTK0QaPs+thrN9IopKFYJ+68nuKXwbgl8xvZ5wu6luE6eAkDU44E8eXdF/5f0W1nXD8tEspDRo05unEA2rn9Qa79CiXD05qKQr3D7YgMbiiubhVUHLCzmHFYIWFjMOSZSHfADRtPz+275ckkDPqFotD3BqKnlZ9NjNT5G6fq/QZ1RIwsHoLzmDBg764HQtt2AArnvousI/hxF151F9wIBEykEthpN7DSV+FkCJs3yL0+8eOGKcOP5ASsPs+7OZMXCR3Yg6mEslKwDVkKXj/opUHrXZRdSDPk0xkSJouYu4/EHxDRcfhsK5HYHlxZw9eqyUKHgzPaGeSKFAHMYf65vnT3/vRoNmBmetPZZFnh5QjJ8RLMWaAhFXT6CGK7joFiYr0hXsy3iLCwsUjHTQmCQXWw3PZgmgEZWBxaLPYaR+bG9bhOpDmQBAVitVlBwJbmm2UknVYkyEfzvE9gNbwZmYNkLUFhOjySULPKDAJd3aiPZuk8CWTeN0PEGyZKC4+Cq1SUhj+DyiLJITTJmRwhQGKqrWho8fHlwGkJUoCUAS8u9auvR9Dxs1OoI/FE8rnoDnnFB6xiVU1+jRr9fetd1cGR1EeXCzDwGmTDT6oCFhUU6rBCwsJhzWCFgYTHnoEnwjiOiiwB2AVxKqzuFOAi7rmnDrK7temY+JBdOhBAAACJ6jJnv3Ot55A27runDLK9NB6sOWFjMOawQsLCYc0ySEPjYXk9gRLDrmj7M8toUTAwnYGFhsTeYpJ2AhYXFHmDPhQAR3UVEzxHRSSK6b6/nMyiI6BNEdIGInkqU7SeiB4nohej/a4nvfida63NE9O69mXU6iOhaIvoKET1DRN8jot+Iyqd6bURUIaJHiOjJaF3/MSqf6nUNBY4SduzFHwAXwIsAbgRQAvAkgNv2ck4Z1vAOAD8I4KlE2X8GcF/0+T4AfxR9vi1aYxnADdHa3b1eQ491HQXwg9HnZQDPR/Of6rUhdF9Yij4XAXwTwNumfV3D/O31TuCHAJxk5peYuQXgMwDu3uM5DQRm/hqAK1Lx3QDujz7fD+B9ifLPMHOTmV8GcBLhOZg4MPM5Zv529HkbwDMArsGUr41D7ESHxeiPMeXrGgZ7LQSuAfBq4vh0VDbtOMLM54DwYQJwOCqfyvUS0XEAb0H4qzn1ayMil4ieAHABwIPMPBPryoq9FgI6P89Zfl0xdesloiUAnwPwm8y81a+qpmwi18bMPjOfAHAMwA8R0Zv6VJ+adWXFXguB0wCuTRwfAzAL6SHOE9FRAIj+fyEqn6r1ElERoQD4JDP/TVQ8E2sDAGbeAPBVAHdhhtY1KPZaCDwK4GYiuoGISgDuAfDFPZ5THvgigHujz/cC+EKi/B4iKhPRDQBuBvDIHswvFRRG4/gLAM8w858mvprqtRHRISLaF32uAvhJAM9iytc1FPaamQTwHoTM84sAfm+v55Nh/p8GcA5AG+GvxgcAHADwEIAXov/vT9T/vWitzwH4mb2ef591/RjCbe93ADwR/b1n2tcG4HYAj0fregrAf4jKp3pdw/xZi0ELiznHXqsDFhYWewwrBCws5hxWCFhYzDmsELCwmHNYIWBhMeewQsDCYs5hhYCFxZzDCgELiznH/wfjRQWttVMM/wAAAABJRU5ErkJggg==\n"},"metadata":{"needs_background":"light"}}]},{"cell_type":"markdown","source":"**With the wrapper**, when we reset the environment it looks like this:","metadata":{}},{"cell_type":"code","source":"import matplotlib.pyplot as plt\nenv = gym.make(\"LuxAI_S2-v0\")\nenv = SB3Wrapper(env, zero_bid, place_near_random_ice, controller=SimpleUnitDiscreteController(env.env_cfg))\nenv.reset(seed=0)\nimg = env.render(\"rgb_array\")\nplt.imshow(img)","metadata":{"execution":{"iopub.status.busy":"2023-02-01T00:50:35.906793Z","iopub.execute_input":"2023-02-01T00:50:35.907717Z","iopub.status.idle":"2023-02-01T00:50:36.223125Z","shell.execute_reply.started":"2023-02-01T00:50:35.907681Z","shell.execute_reply":"2023-02-01T00:50:36.221831Z"},"trusted":true},"execution_count":6,"outputs":[{"execution_count":6,"output_type":"execute_result","data":{"text/plain":""},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"
","image/png":"iVBORw0KGgoAAAANSUhEUgAAAQEAAAD8CAYAAAB3lxGOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAA840lEQVR4nO19adAs11ne856e9Vvvd1dd6Uq6kizJlowsIxkMTozBgIWJkakUlEiCVYUrUAkOUMUPBFQFUi5XIAX4Rwpc2MFBSbzgAhyblAALlYWwkS3JlmRL1na1WLqL7vrts3b3mx/dM9NnmekzPcs3y3lU39X0mbP19s55n/MuxMxwcHCYX4i9noCDg8PewgkBB4c5hxMCDg5zDicEHBzmHE4IODjMOZwQcHCYc4xMCBDRHUT0HBGdIKJ7RjWOg4PDYKBR2AkQkQfgeQA/BuAkgEcB/Bwzf2fogzk4OAyEUa0Evg/ACWZ+iZkbAD4L4M4RjeXg4DAAciPq9woAryWOTwL4/m6V10p5vny5NKKpDA5mwA9DuWyI/dMQ++o5DgGeENJ4QcjQVoPWE6IeR3YQRKABLwDH/6j3xKrbcV38CRj+Oxd2LjDzIbV8VELAdG7SPSKiXwTwiwBwdKmIT7//thFNZXD4QYjzOxWpbJhqFGV4CwjQXh4yXPZknZwQWF0oQSQKt2t11P1Am4/ak2mKai19Pvq5qW3KhRw8MdiClJkRMCNU7glBfxC1+VheetO1HQay3PusuPV//ON3TeWjUgdOArgycXwMwOlkBWb+ODPfzsy3r5XyI5qGg4NDGkYlBB4FcD0RXUNEBQB3AfjiiMZycHAYACNRB5jZJ6IPAfh7AB6ATzLz06MYa1wYpbelqe9hLROTXYfMqDaa0vcq12FC3hPakj2LuhAyIwjlc/XDUFvGq/CEkFQYW+yxuj81GBUnAGa+D8B9o+rfoX8EIaOiCAEgXeDkhEApP/ij4ochglDmH/wgNPAGMgSRvfKuIBvfMpqxJtVtf2RCwGEywZztfRongeUwXjizYQeHOYdbCVhC1UmZeai2ApMEoW71DWkVQIaFtm3Xui3DqDbt5g9OCFjAE4QDS2WpbKfWQLXpj2xM9aFXX0SGTPoBgCADwdjnqyKIsFQsSONlIeVM8AShVJAfuYYfSMQgITq35PjNIJTOQghCTojMPMGokHbPbNqYQKCRspxOCFiAiJD3PKlsWC/GpIEI8EhAqMuBofRN8BLXzfQCMPTnnRUiY0L5tamF4wQcHOYcTgg4OMw5ZkYdYGbUFf3ShGLOG9hWHWjp5ONZlxJR6liRLq3vtzPp9vTJBbdqCBTtyQ8232EgOetBppNFTx8WhvV8sMk7ygK25zo7QgDATr2BZtDbAm7/YhnelK1/mDndEKVLedpjUMrnUMrPpu+GiV9w0DFlr4ODg8Ow4YSAg8OcY2bUgRaSepjRL35I45h81U1790MZi7rol5T8qPMGUTuSq9NwA6KMDQaSYFScjKnXaVQrbK/PTAkB9ZzzQmClXJTKckMiBBZLBZQLHV06CEOs79YwCjtC4/vfsqpp1YHOG3AXpdhgUzTxSF7XMGQ0DNd5eKTfrIgBO8yUEFAhKGK/R8EI54SQlCk/GK8Zq40jkCoDGLEASBROg+GN6TzU93TctluUYvQ0TXCcgIPDnMMJAQeHOcfMqAOEliFQZ5lWGLNBAAO9OYEhrhoj0q9XgW5kFBkPyfX8IEQ94QgV+UmMRoUywRMkcRTMUQQiKPO2We+r5wrE55sok4nj1jBpAVNnlw8AZkgIAMByqbBnY3P7v151hgdVtwczhGG/QtJdoevXlUZT8obMCcK+hdLYhIDqmBWEjLrfv3emypHYGE+15IHVqfaqkxpbewwY4HYNJASI6BUA2wACAD4z305E+wH8BYDjAF4B8LPMvD7IOJZzGfUQE4VhWcMZSbcxwXzPhjeDYVoM9loNmL4bd7SJQVYrw1gv/zAz38rMt8fH9wB4gJmvB/BAfOzg4DChGIXSfCeAe+PP9wJ4/wjGcHBwGBIG5QQYwJeIiAH8KTN/HMARZj4DAMx8hogODzrJaYFqMWi7f5xl6RiRXIpxkLIiFEygJOtGkdTX+MTEcRAyNqt1qZ9yIT+UaMNZYbw6xmubLbtQWtd7qWnaPhuDqAOD3tl3MPPp+EW/n4ietW2opiGbV2TVHe30XdUySLcsNHECqidmwSI3gUM2TILR0UDqADOfjv9/DsDnEWUjPktERwEg/v+5Lm1dGjIHhwlAZiFARItEtNz6DODHATyFKN3Y3XG1uwF8YdBJOjg4jA6DqANHAHw+Xs7kAHyamf+OiB4F8Dki+iCAVwH8zODTnBKwvqSTlvtdVnvZOAFKbacaBrXCdMtGNYZ+SD4IQkYjmbmYIuesUW3LmnRy1TtUCwHftS+DsVSyb+j3zBTZOYuTRfp91Q28jHVGvN2YWQgw80sA3mIovwjg3YNMalqh3nT92NCmS8TdNJChlhogOGTlgWbW8wNCLxNShDFGrdlEw5etCtcWy2Ozo1MNgQhAIScbGflBiEAVwEq7EGYeJe08TDkmhiP/DP2CpM5pDPGRZspicBZgK/OzPhpZ05BJ89pjr7nW9FsCLgpJjikNlKBDX6uMFs6ByMFhzuFWAhmh6f7xYdqPkVklSKuj6Kzt0sSy0RAxiLS2lr8qNssMNuu8o+AJuvU4vGi+6WPNMpwQyIjdehPVZifNd8iRoY3+YHaOmfUXNTIwkku1XXnDw657ugGeUi2EvtRTDYqiAkWgmSSKgp16Q5pDIeehoDgDZYEQhGJe7scUxq2h2DIw69deC69GXQSHejGN19sQtSmlm2mBEwIZUW8G2K13hICJPNJ3CqAdq+1s+gHiGIeKlY9qQUjESi6CVt9ymUoMmsKSqQZFdd/X8hUORQgQQaT0E4QhmkGglWvCQvUsNPAho6fdJh+OE3BwmHM4IeDgMOdw6oAlAgDniVCLF4+bOQ87gXz5OCbLBIBDzMiHAYJQ8iiyshUwkY5qS6Htiekhx1mpI22pJcoUeyIArJTJBkUE0lK57UU8h16GQGSqQyaVyaBuWRgUqTD1nQXdeItRXl8nBCwQALjfy+Oj+TLOt25Gablr/TyA9/sN/JuNSwh2q+1yRkQgJhEqhBYzy4IjrqPmWPSEbDVHBIB0XToZbai1n645ECl6M0jlAFTegLGvXNpb7zoD36LyFmkcgbHfLuXpBkXDIQZNuTRV68hhwwkBC5wnwkfzZZwlsrrTdQCfzxVwyCvgXah2rTfVti20N7/+/cCRfnZwnIAFaqBoBdDHQ98gwrkhZD92cBg13FPq4DDncOpARuRrIZa2A7AAtldyCPL6KuG5w2/B31x5dfu41Kzg+0/8LRbqW+0yVjwPGdE+eFJV8MMoLHgnOi6hXMhJ5JwgwBO6niyVUPRPkuRjgxchaUZNZOhnfGj4gaQrc2x1JdGiBkKNWl8koJNuBhLQYKGpeh+qsCEPs2AcKpcTAhmxuB1gaStAZUl0fSlO77sWzSM/3D5erq3jlle+jHJLCLDZWChUyvwgRK3ZMY4hmNKrySw/EBkDyo4/AJNM8jFUg6IupFrXg9HDDwLNQ5AS/wIDOEapBkXQz72b96GKkbyuw2Ice8AJgQGQ9xk5nxF2U6pSeISpJgYdZgZOCGQFARcP5bC5L+coaIephhMCGbG5P750aiSPBEqNHaztnm0fL9Y24YW6zbsGxQ6ISN4rjvwGskse1ThIMxZK6NutYVQDo3FDi4YUGzV1ynRnIbWd9AU6Xpb69bDoJ9lla44TvmXaDU4IZAT3ePlbuPXVh/BTT/99+5g47PABPSBISORUwfPglWSdI+IEkm0sJm2AyckmaUAUPftqhGI2K88jQghZdWoZL/Ua3uQEBaXIxqCo27tvk/JsWuCEwAhR9KtYqa23j00egt2gWvVpJgcZjXXGwDONAOlXLauF4LAwzYZJqXYCRPRJIjpHRE8lyvYT0f1E9EL8/7XEd79JRCeI6Dkies+oJj5OCESmwP2AmFGY9p8Ih7mAjbHQnwO4Qykz5hskopsA3AXg5rjNnxAZDNqnDIc4xPuDBvKW+8DEjDdygB8OGiOemYPD4EhVB5j5ISI6rhTfCeBd8ed7ATwI4Dfi8s8ycx3Ay0R0AlFCkoeHNN89QSEM8W/X13HIy7dNgVWiqJpfxBNX/RDq+TIKzLg9aKLaeBzL29/q3blGAhIEsVKFtIhAHsmcwCA5t7QgJtq6WvFGHPfCl026ucmgB3pZWmQh03XL7DE4mqVfEIbSPIUh5PogyMoJdMs3eAWAryXqnYzLNExTGrIgZDR2K22pB+gP06WFPC4ceDu2yvsBAN8AsLS9g8OnO0JAe0Tid0tNRaXdYGr/0yki0jiBYbH2+qOsGxiNE91G6237Z/AsNHAEqYZRhr67cQ2jyGHIQGQoldwdEWKoZMewfQesxaNLQ+bgMBnIKgS65Rs8CeDKRL1jAE5nn56Dg8OokVUdaOUb/D3I+Qa/CODTRPRHAC4HcD2ARwad5MQgsSSr5RfQ8DpqTKW4jIX6lrTsKTS7xxJoQaEEhppDY5iau2RQk7kPPTiKruboxlCCCKHCk6SlKot6JnCiHantiEAKJ0Jkw3iY8j+ZtyinAalCgIg+g4gEPEhEJwH8DqKXX8s3yMxPE9HnAHwHgA/gl5nZwkRu8qE+YA9f9xN48tg72sfLtXX85JOfxEJju11Wauz07LPbC2+j26u8gUYjkF44Os7ADo0gwG5d3TGR51TwBBaLBamsXMhLL1QQhqg0mlIds3GQymWoX7ftIttFOUHI52yiHcshz22Ed0TtTJ41gc3uwM91+cqYb5CZPwLgI4NMahqwW1jG+sJh6aauVC/JxkGWr4tEOln9EsXtUgSBqc5ePoSm0GmqNWLX8FqJaYcGZn5YxjqmZKfSOKz6XNqPP6kGRS6oiIPDnMOZDfeBLtsc457GXMDGYUldzKvcSreyUSFty3JS4YSAJRjy+371hWelm75Q30I+qA+8h96VJ1CPLZ4w24dQDSceedapasRwjIX0VGFyqPSAGbWmnN2o4HkQkocUx/dDYSu1t5AshDRJ52Zz/7Sxzb1qXoVk0c4Gw/7ZcULAEuq9u/nU13DzqY5dVNaXX89xZ9Y5Tb+Guk1RBociqJ51rPWknlmY8VxNuRjBMifgByF2w4Y0q9VyEQKe1k6an+r9h9a1lIlB/TLqadiGAVsjpMydDxGOE3BwmHM4IeDgMOeYe3XARkdrLfWlJX+XZntFEzIAJiU1mBZCFJrDEBn8EqKYISzVMUXbyTRPyVmqVSaTgK1AJv1A1bdbS3z9POQ2UDgJ22HNz02Ks1IGuGjDY0Ct6eP1rV2EHPasp+r8k7YncHrtOvzf2/5j+5g4xE9++3/hugtPD9y35jBj0u1t+oHcrtWv7JzTJVV6AjkhsFLuWGsygGq9iUYiXTkr9gftigpvQIqVkc15qecBGPR/ywuU9o4TgGLOkyoOWyzMvRAIGWgGgdFIZS+QVfI3vQIuLh3t9MMBGrmSVm/iDFYME9JzH8ogIniK56XZq08Ni5buaZgVo7yuJo/RYcJxAg4Ocw4nBBwc5hwzrQ40gwDVRrOnnlePU1zJ2oCdajAsDWIYS72l2iZuee0rnT6ZsVK5qJBl7QGltprNjRbZSK7Dbcuc/uadEwLlfOeRYwAN35dUMYr/TXIwDd9HkOBsPCLkPU+6bjlPtiNgjhyWZNLR4H2ojGVF5hm8IdV+VBAAT4hUxy+tnSMGB0Ot6ePMpuzJp6XqguF4MugBCWnPwoGdM3jf45+QynKKA6dRJzYY2YxKb857AqtJQo+Bi7uB5FQUGfTII+4qHoOFnIfVsifNqZjzIgItRsCMekU5f+5ybn2enGo9GpWZeYlkm1JOIDeBmapnWggAXV7yGQSBkQt9pQyjY6syQLOO7LXPmhRMht2JXv0CLdZ/skAYLcGXFZMnlhwcHMaKiVgJMMcRVWO0orv0kprmKDV6nVb/LYgc5F/HEAiD8a0Q1PE5jOYwTWBEHEqYWMa3btUwfum6qS3y+CxdNtPz0kqpBqWeWpBlxiY7AVaOe447QZgIIdAMApza6KTnWimVJN3RhEqjiYu7FblQMWIJQpYeVAA4eotAeV/nllTWGae/FYLllbTSb7p7EMn/GJErAlfeJpAvd+psnGScf06XArpjkWHMjE+W+rDqZJih46SnHQPnd3alvfzFYh5LSkQg6/mkeBaqs2v4ITYqtfY8iaLxi7nO4yyIsFIqpkt35VRtQnlHwX8N0iVR5AmBUj4nde9lzRU3YkyEEAiZUWl03sJyPj0imR+GUhvAQNYY3pzyGmH5iGIhlnJvbK3I0m4xCWDxIKG4TO35VdNTEw4V6jyzkn61pi8JgWJKSK6u8zFc3LQ5hcxoKOG9Snn5UaZ4B2FcUHkLIkJOiInkAFRkTUP2u0R0ioieiP/em/hu5tKQOTjMMrKmIQOAjzLzrfHffQBmNg2Zg8MsI2sasm64ExnSkKn7rs1Ajyarou4H2nLftMev1qmsszRYdR3gUDUWMszROjVV93phSNi9wGhUOnXqWzrfYNLTbZaVRuZCiRCkerdFxkGyc4o+tuxF2GnXXW+3BysRgvQw4Drk6zzuBbeIl/rSjJRJeFOgBrQwCCfwISL6AIDHAPw6M68jYxqyg2U5A9FmtY7Nar3n4EYBkFIHAF57LFAr6bYEGTzAurm/SlFzaoyXvhqkvtDDdGoxFfYyDrIxKDLXyyYGQlY2R2IvwvQrNOjI2UBEKOQ8FDJyIJOIrHYCHwNwHYBbAZwB8IdxuenemZ/FRBqy5cLeXdDJMylxcBgvMgkBZj7LzAEzhwA+gWjJD7g0ZA4OU4dM6gARHW1lJQbw0wBaOwcDpCGz2NBVl6Tc7UCtl/57rzoQpTXpnp5arZM0qOlsDUr1tHa6/q313Xt6bZgMWKThhRx/iJRWnTa6YZBxey8Ll6FFBGJTTCRpfC1CEbPVfVYzQKfVmQdkTUP2LiK6FdHT8gqAXwKQPQ0Z93yH4RWAo99DKCx2bs722RDnnuufF2BLDiA9pLROAmrPjmKEYAp+EeXLSx4Dl60uIecJqax/loBxcbeKWtNPlOj9PHLNj+OVgze1j1erF/Ajz/41Sn4l0Ubp2eCIs1tvouF3tHtBhLXFMvJeinZv8OBkk6lfAop/EYiB7VoT1UYviy9gsVhAMd/xPgyZsV1rSJan5XwO5cJ8ZcnOmobsz3rUH3oaMhLA8mUkWfoFTflBsSUGTQJAL7NZORjCV6F/kk2NosPQrd+ygJlTyVUAOLV6DZ45env7+PDWa3in9zeRCO/VP+Rza/iBlJ/PI8K+BT2yURoiftV8bbuNzQAagY9m0FvgFPM5FBOhy5kjo6ekEMh78+dOM39n7ODgIMEJAQeHOcdE+A4AvWlBDoHqOiNILFHrOyZ9X+8prU7LUElRS1NmpDu5gCLdVCbiSDKo6ZBsCSJOM94h1Bq+IXsvpDYFz4NnEaBCixoEWaHfXzmHKy6dkI5F6PdUidrn0cPIiIlQV6IG6XNjBCqhF3uQ9lTJjJRBbx4BiPxNkhGJg5BjTqLTzg9CNPwUXag9XncIEQVDnQaScWKEQC/4DeCVrytRYhTHO5Nnl8nuTH0omQ1lJqMf5aYTFL2VgQDyo0HEEIq+b8MRnFzfMrjFdo4FEY6tLWOxT6890/g/+PwX8fYX7uuMgxD5oGFw0+3dj3rF/DDE+a1dCz5T3/IxvcrpIc/1CMXqNdys1rGl8iTK8Dv1Bir1pvS9dgpkeB6UsUr5XKon7KRgKoQAAIQ2wjkT7MyFTAQekIWzT28Tsr5BlvbSdRsrbfxc2ASlsYAW/ZgQQvJA7trTqH4tTeStOh11p0GrEwscLbKRJrtGdx6jhuMEHBzmHE4IODjMOSZGHeg3b5vZBkDX99V6ocIChmCNhAtD1vqKSLhOmaBIN5eN70jOSa86xyCSumpIb9XT7+g+1VhI9wS0sSPYv1DGSoI3aAQBLu5WjZFy1f7l7/VlPYF63rNafgFfvf592Crvb5dddel53P7KA8pYvfuJxuptTNapp1srpj5XpJ4bGfvRwpQpF0QoY9V9HxsJb1EiwmIxP9ZAJ7aYGCEwKMw3O906UBUULaJQZ7VDSedrc9Epaa70GaXXWSkVUcwPdmtaDx3QsX6rNpq4tFvN5DSlXUcDEZdEU+Tx3GVvxbmVK6VyVQjY6NJGotBmjjZ6unZD9DtkNUeljh+E0o8LIbJGxOTJAKcODIIsL5ODw6TBCQEHhznHZKoDlvxAtyCVyc+pPgWxsUiyXhCGWmosT3iaMYxk1ELxP5p7W+85mmpEqdHkSsPYfGoEQXxNFJ1XnV/qRn26aY7gAPt3XpfKlqsX9b6NCv9wttpUQ6zt0hqqhaX2sRf6WKucgyf5uJkcw9K5BVZ5A/W8iOCHIYSv+NOlnCqB4InRGh1NpBCw2gPXyC1ANbIzeweG8gvPDD9MlHGkOyctywQRloVq+CHidVTs48cAC9XyxzABgkYEqvf3tUubsr7d1WClN0xhy4M+Cdj2WIbr3etOFes7+FePfwKh6CjBuaCBUG2jEY5DhNL3Px//MTxx1Tvbx/sqF3DXox/Fcm0jMb6u/5vmZLLbUKkF2baAsVGpG9zG9YJkUd4TWFssD/e6KJhIITBKpO0pMFhLUMroYniSuNG2r5YpVJeKiFDqzdhHZek05FB+QWzYTAUERrmxkynP36ge+HqujJ3iavti5oM6QovRshGMOkIODZan2mDSWGkJdoYBxwk4OMw5JmclMEFUu+0vkfHHIU3hHybSrlk3xb3feWU8D9tmupPTqBCvsOIBidk8nhoNioRqgSIdtc5TS4Oe+LZdR2mn/tB3i6fST9Skfm1uJkYIpCf6Gg5Ukoco8vbiTgHyuRxIyFFyVI+9KF+iblWTEnGsy/62rbGMaqfQW6EmJnOdRNFquahl79HGH5ltP7BRqaIZqu5Ko3kWrj/zTSxWL7WPy80K8s2qTPhCvoz1XBmPH/8RiVA8tn4Cbzz7TamNuqSO+umcS1RHPjeG4YdECVvvByG2a7LTk/rcCUFYKOSl0oYfoK6SkF0wMUJgnFCNfkTiBWdmFHIecqy89IZ2UV+dOqPSZ0392vzAG70hWWYUl0oFrC2UB5xhNgRhiJ1aXYpINMofg2vPPolrzz6plYemRKYxql4RX7/63VhfOhJPkPG27z6AG1//ptQmVPkXjv5JvrBaHSPkux2EjO1qQw9LlyjICREZIiXK6n6AnXojZawINmnIriSiLxPRM0T0NBH9aly+n4juJ6IX4v+vJdq4VGQODlMCG2LQR5Rc5E0A3g7gl+N0Y/cAeICZrwfwQHzsUpE5OEwZbAKNnkGUYATMvE1EzyDKKnQngHfF1e4F8CCA30DGVGR7hdR9W+iqQK+6NlAjEpmIBJMTTy9d2Whz0/4mORRL3XAYLcuTEGOKiEMgCEFSOnCGnk5+pHPQOCKFqGRG0a+g1Nhtl+X9hjG0uhaWPcnJGCImqWMBiN2X9O1h2e5IN14KGaAwyTfYhWAH+uQE4pyEbwXwdQBHWrkHmPkMER2Oq1mlIkumITtQGh81oXp7ERE8gcRVJpCnX2T1pVDz2EeCov/5sEHhz/L6dbvdpNZRKp7a2MLpjU6tUj6H4wdXkRuDtxsRcGxtRSrbrjXw2qXNvvuyed5tIka15tVCsXIJ/+6hD0veoXn2NaMrzZaAI0/T5ASJ5OcoCt0ujx9qVkYwhFiXuYaGH+Dc1o4yH+20usL67SOiJQB/BeDXmHmrxy9F2sZZVMD8cQAfB4BrV8sjE/1Gyy4lAQUB7Qsf3RizRY9+zmQgbHrPZ5TGMFkQeVZ3Ln/Imk3fyNDamVHLxom0+0HMKDYrMilsKfGz3OtuJHAqnTjATbMyFiKiPCIB8Clm/uu4+CwRHY2/PwrgXFzuUpE5OEwRbHYHCFGykWeY+Y8SX30RwN3x57sBfCFRfhcRFYnoGvSViszBwWHcsFEH3gHg5wF8m4ieiMt+C8DvAfgcEX0QwKsAfgYAMqciGwJUMxMz8aLoJkSSc0zL0ktdXRmdSoa0ctX5ByAtz6F1330u7sfIyXWFMSx8aiNTP73s/MylRLEtRbKGiNSCTh2CFu1dediEmeLrOotk32bSUZ21Pu+ssNkd+Aq6P+7v7tJm6KnIbGC0vjKw7FKRwaPHJDzUAUapuYZmSmIs2HsZoId2sxFMWXNOqojIO6UwlMsIuqAIVcefls2gQg6mssBdIiRrBO8QMXMWg1kulrYdxzrhZ9POYVYxaXTucOG8CB0c5hwztxJIIl0Dc5hU9Fq6d/vKnHKuAy8PiMQTzyHQrMuxI1QnHwBa1CCTYVa3VWGv56/b2kLjo6J9656tB3nOZ0YImIw+mAyZfGSDOfONMBR2u+mjWiSqnm3jQr9uqMMfP84RmFovnQNQLQ+PvSmPw9d2HvnqTohvf7mKRrVTzxOK7QIRItGQuB8mGxIhJC6jTRz3EDAMw1K8C5ttIrglDHDbJkYIqAY8Q+s3pcx4Q5Vjjq299Iqzpyfu9crJZifA5vkwCYV8mbCwljCJ9hhMcnh5YsUaNI5BqYaW1+eth2DXOed0biEtgtUo4DgBB4c5hxMCDg5zjolRB8YFU/imtMXlsAnGnn0ZlrpjGxtRJJtLu1V4FP8+ELBQyGOhkO/dcIgwLeVNdbQ2SqNQ8Y5cP+cDuU5ZvcKoVn00mp2GBU+AlLfCE/JCn6AHANWyL5NOILLqvAazUVqa8VjULo0ksMdECgGTE4npwsgV7F4WQbIBERvaaTon6bTjqHTnrP1m9aJTUfd9fPfiplR2bG1lfEKATSng1Cq6JaAqBNjQz2vP1PHd73TKgjDEdq0htSsXcloEqZwnpCVzlHJd7lsYphxCJqzVyEJGhsAisrHRjq1ni95w6oCDw5zDCQEHhznHZKoD0FWC1IVsF3v/VN2SGIGaP3yM6LbdJRfbKQl2KoF9f0B0H0JmLfqQDdIiFJmW7K3j1G1ARY1j1s/fylfA4OZjA/M2YbJfGNf7coQgAydgqpMyvW51bLcVJ1IIrC6UsH+h1Leik8W8oNrwcXpzWzYssbjwo4TuMWbXSDOWsWmYeqKMs5s7uLRT7VlLfdlznsA1B/f1DGceMuOVCxuoNPx2mR/KKb27zcn0wktXwMD1EORHyhMCi8W81FfeE5KdgJGfMs5IJgcZhpdc2e838lGQO4psBHQKUfNLUtCPJ+pECoGcECgX8mOJMtOKLCR7e6nkoX6VbWaWRY5kFT46uZmtnQl1yxj2yfuV94RVrMBKo4mdetNiFh2YhKQqAo2/7irBy6zlkxCG5J9WT6HpV9+2bUobqzRoWhv7lYDjBBwc5hxOCDg4zDkmQh3wBGGlVGgfp6XFskUzCFBtNDW9LAk/DLFcKkpL/kqjiUaCLbQhZ2zAgNkYyDBBtci0vE2rY0WUDon8WNjnYXFNdDIzscBOo45qs/tSP2RGMwj79hXpfh376ydKJdfb/qSfVbh8/eUQ4xR3pka6NsYeMqSu14yMUiaWHDsNEyEE8jkPV+5fHXq/1YaPVy9uaixyEkvFPK46sA9eHB+aGTi5volLuzW5ooHkGQZsXlRd39U95IxklcGqblQ4eLXAdW8rtoVAo8p4/L4N7KyPZ+slzY0YSHj2JaBGOxZEUqhwW13c6ECUGK4luLRI10r/oT5FnVA01dGbWGOQNGS/S0SniOiJ+O+9iTZ9pSFrbQmqf4OCYbFFiFZIqXjMloswdf5avHLyv1FhDzclBgYRQIJAgnShOQkg83PW628eYLMSaKUh+yYRLQP4BhHdH3/3UWb+g2RlJQ3Z5QD+gYhuGFewUQcHh/6QuhJg5jPM/M348zaAVhqybrgTcRoyZn4ZQCsNmYODwwRikDRk7wDwISL6AIDHEK0W1mGZhiyJuh/g5Qsb7eN95SL2LZTay7EgDPH65o5E1hmtQRT4FlZutaaPl89vJCIFEZbLRexf7KTrbgSh5lSTCQarNk3j54gwk8tMhkC6DqxHIzLwBtoee/p1NELZcz/1fAOXXu8Y/YQBsLMZWtgKjG/JrRoLFRcFbvzBEvKlTunZF5o487yfaGRnN8BQLP2Y4Qnl0hod4/Q5ssElMVnvKzf8FF48/D3t47XKebznqU+h3NxFFgyShuxjAD6M6Dw/DOAPAfwCul8jtb92LsKDpTwqjQ6LrHqsMQPVpi8ZrNhGl9HHlY+DkKWxCcDaYglLpWK7rNb0oUKP/pJNAzaRgEYrsh7HnZbdjuIyEwmZVXFXrFGqO4zqjqzxmYg4Q0cj5VhkUNvFFwC8HLDviIfSUmdBvPV6CCJdc5Xud5eXWY9a3X9EIBvS7/zS5Xjl4JvaNStbr8EX2Tn+zGnImPksMwfMHAL4BDpLfqs0ZMz8cWa+nZlvXy64zOUODnuFzGnIWnkIY/w0gKfizy4NmYPDFGGQNGQ/R0S3IlrBvALglwAgSxoyLw8sH+ksgkQYSkv0IEwPNDE0GNZiggjLxYK0wK77Pppp7oepEXK4uyFQ4nwLZcLCqmjPjZmxdTFAs97bDsDUr6qiLxbyKAqCV9dVHhPCnIcgJ7BZbcgBOk2Vdb8X+ev4uuYS9vuNIMRuvWE1F+N4WlH3xXXoA5tnA1Q2O/exuj1cu4b2JSLdgEe7PO06JNdROIJD26dw/Pwz7ePF+hZO77sWhaCeMpunjKW01yGmAeDN1yzw537nDe3jc88yXn9aJqz6jTZjW09tJQBcsbaCtQQxaHJ5PbW+jYu7VamOaSxNn08UcMwUqrq8ymcevjaHG36wCBFrTX6D8eQDFayfkTkS3bMunVC8+dB+XFMP4NUaVgQh5wR21pbwT69f0IhXs7Vd95fQE4Rbjh3BSoJ/ubBTwXdOn9fqpu3ZdxtKFQJSYCkCcnmlUUiAYrEnSB6fiOSIxOjYuqjHyVpE+nmo/bT6VztPlviigCDBAVxcOoq/uv1DqBSXtb6SePw//MA3mPl2tXwiLAZBgFdInKZgKw+0IQ2dXodIsywblh2JYbNAH18AuQIgPGpXSRvfSDAaBFVpq4oixwZQNucUMBbXd0AZ4guY4BEh53VWAp7IdmFtCDVTI78pE5NEQMYpjA25sIF82Fkp54IGGrki6vmFTP05B6I5h/CDvl8e8vcwCovD0DEZKwEGwkBaJ08tuMtn2zbdKoRBpyYHbN7zt3CqGab6p2bckXRZaqk+iTokJ3pNizzUbazOiMon06kN4VddW9YP3uVQQWB4oQ8v6C8uQwsTIQQau4xXv54gZzamUwqk+ynohkGAvlev9rNxNsB3Hqq1X6AwZOxekr3vGLq+rxodMZvrJNFk4GtBGTsQOEY+bhZ14/I44BBJtletQqzr6WvlIo6trXbqkNljtF8Dpm6qgOqlR4aZptko5ITAsf3LyHudbeztWgOXdntHWrJFd9GW1q7Tcrm2jvc89Sk0vUKPFsC3u5RPhBDw68Cll+XL0a+XlE2YclO9vSRGVSszGI4BoLIVYmczQQJC50z0F567lOlzSKIOwsPhAt4s6sj1eBO1viHfs9Zx8iUr5nM4um+pa5/RHLsYMKmWdaphjrk3K0MkldBLQhBhbaHcFlat6zosITAMlJq7uOl09l14xwk4aPAZ2GQBbwpIMofBMRErAYfJgQBwXDRxV25rFvOtOhgwkULAI5ICQDIYfhBOM1/YE5JKQkBpiZCMf9moA7Xd0GgH0PNYKfO9AnYXDkpRaZpBFUhsN5XA+Kncdt/zNoXu1lJq9aPn91LTDKGeIqMafVmvhu9O0g2t6qxUktUaRt33pckzM4o52dQ91XAs0bYzlFnKqs8DcaTYJKGrQ9nfjokUAvsWSzi8vNg+9sMQr17ctIp4O11gmN6MW+8oY2Glc5dPv9DEUw9W2w80c8QJJFuGzAhYflD9QBYcF/dfjYfe91/h5zuGUDf+8x/jxpf/qX0sCFiyeKCCQCYGAdYMX9QoOTZWnwxODznODKHaNZDOSYDll4UV4oIZEOgdybcZhHjuzEWpn0PLi7j58kPSWM+euZD6fKoORcY05FB4CbazCxkEEykEWrK4dYFM5I5aYtwdsiQLJw2tCD2dgqH1DCYBFolfsYxPl5m7G6dHYP9orwD6bqfvP5AmXaYXjhicc7AQfS0kGQCLiPd3mA04ITDnaC6XEeZ7bQYqIEJj36LbNpghTKQ6oEIQ4eDSQm9dUTC8Y3VQSTY6unBCbnNgaQEFT41fINexScO9tlBCKd+p5wchzm7v9J4jpzswgYEXv1FHLuFLsbMeRPvyCseWZAUYspMTg/DSTT+J9f3H22W1hTUEikHJg8fegRdWrkGu2sUDjRlHX/kaDn/3MQBAWMihuR2gEYSaF6EWwptkbeNSpYpvnzynDZEUJ5Vm05j3ME13Nq1NhEIgttJ5tUoKJcLxW4rIJ671pZMBLr7W0e1znsDl+5aRT/g3lA3PB4N1gpFZ8kUyGSupzwPFbbX+pfOQrTO7wVbTmwwhoDws6t0kAlbKRfQC5YDCcR+00rlYGyeBCy/K1mcrpYLVS56GxWIBi4kp1f0A53d2EfSxuDbq1cx4/QXZ/DMMFQHABkJRMbIJiXD6qrfh1PEfkOspT8ZTqzfgqdUbes7ypvXTuLHx1eiwUQd2dI84gv4iqs/gTq2BXUPKMSsnLvU4Zs2T36vLWm5VTJQkOYtcgXD5DXkUFztlfp0lISAQ/QCpuwEa1JvJDKbRMCRZIhb1wmQIAQC9HgU7+3I21DW/kMMIJT014ajT5pnqjjjdpJcKk4kx9foFMtaZLThOwMFhzjFBK4EBwQDXBcJq55dLNAkFTzUYmV2JngSBUapcwuJmJ7xj4BVQW9wfBSjoA43iCnZWOtHkiEMs7p4HOJmqLdK/uVMQmQ8lFxLdjHzSzkU1OlLbUWysZDDE0coS2mEYMqo7oRTERYvWBDbu/6vrzWEumLI6FWn9WM5pdoRACDSfX5DWNoUAOH5AvhJqKuqZBTNuevh/4nrxf9pFG4fegEfv+G0ECWOhdBBeufm9OHnDu9ol+fou/uVf/gryjU6I65AZIilcmMEiat/uiVW7N1uolojR1qb0tcJRgFm2tUAks5JTrO0yvvWlqlQWGU8mOIIgxIvnLimRhfQXM9XAyRImdWXUSBUCRFQC8BCAYlz/L5n5d4hoP4C/AHAcUYzBn43zDoCIfhPABwEEAH6Fmf9+JLOXZwo0SS1Bfl4DGTOjUNtCLvFzUF062P9PFhH84iL8YseCM8iVjAkxVWMhzUIOw3nAWzkWpLx+hr6NFnmJOXEI1CuhIXSYPOdmEBhDhyVhKpsW2Pws1gH8CDO/BcCtAO4gorcDuAfAA8x8PYAH4mM1DdkdAP6EiOb1VXRwmHjYpCFjZt6JD/PxHyNKN3ZvXH4vgPfHn++ES0Pm4DA1sOIE4l/ybwB4A4A/ZuavE9ERZj4DAMx8hogOx9X7TkM2C8gJgSMri5JuuF2rY7uWMXz2gIiWp7LfxcLuBdzw2GcQep3bfua6f4Gtg9f11XeQK+CF2+6C53fO7cDpb+PQySckYyFhoP3SfO2YdY+41970HlRWjnTOY+ssrjrxILyE9yMUrqdldNMrE1M3wxzzvLp7GtqC2/9IXWWCRpQOoIpYCYE4b8CtRLQPwOeJ6M09qptmo90JKQ1ZeXDjnb1GzhM4siJHzTm9gbEKAZWxVvXbcuUibvjGp6U2u6uX9y0EwlwBJ267Syq7/tHP4MBrshAwWchB0bc1mydmqE7jL7/xx3Hhilvax4dOPomjL30FSAghQgiS3M9NHIVsS2LLjpi4hVlCX1Q5M28AeBCRrn+2lYUo/n/LHtSlIZt5kOFvnGM6DBM2acgOxSsAEFEZwI8CeBZRurG742p3A/hC/NmlIXNwmCLYqANHAdwb8wICwOeY+f8R0cMAPkdEHwTwKoCfAZApDdm8wsZQpq/OkodEIJKNdTL9hjJD1HfhNSrJztFcOiBtuvuFBdQSW5DEIcr1bYjQTzaDnN1HD50e5AqoFZek2QZCVhcDL4/qwgH47WQbjHKzAhEm1YPWNxanmPg87nWGumNr0jqMsTKsatkhVQgw87cAvNVQfhHAu7u0+QiAj2Se1QxDtX6zEgSxcUqb0CKKQ04l+yHtARKKVx8TIQyFNGL6Q8849PgXcOixv2yXBMUlPPeBjyEsdTiQkze+G2evflu7v0JtC7fd//tY3jjZ6UndzDec+MWjb8YTP/SfJAFTXToo1dk49AZ89X0fAcUXRARNvOXRP8eVrzwsdc2QnYzYYOSjCqHI6Uif1+gwHPvAQSwWZ8dicBoQv/Emk1abxmpMPIn0UsJkccs6T7Wio9ZE7JGrbqK0fqp97JeWQSzz/M3SMpqlTi48v1JGKHo/Xqaz9/Ml7K5eDojuPFGYK6KSMGMWQSOxKlAGsPCPmmHOzwpzYkPr4ODQDW4lMCTYpACbWJjmaQji0Xcfg0ALEzzCseYcTggMCQEzNis1KdpOpZGWG06nr0gQLrsmJ0cW2ghw8ZSf4ASgkQlRkezkwkkiIa5FSq77wycfR67RyabTKC3j9Wt+AEG+1G6zc8Wbcf62f92uE+ZLCHNyhKLV8y9g7eyz7fHyjQoK9d6hyw1OhVjceh3XPPU3Uojt1699R0Q6xijvXsRlrz0KCiLSUYSBxD1Es7Zb5qvOQMPSDFqaiEbMWgyg/XgoRl/DxkQIAfWGTaOK5gchTm9sS/HnbVcCpJz8dbcXsbDa0dROPd/ApTN+29yO2fxACdV5R425DdZSrF/97Jdw1TNfah9v7j+OC1e8pSMEiLB54zuxeeM7e57D4Vcfw5se/qRmMag52kjGQvqjvXrhJdzyj/9dKts5cFwSAkubJ3HLV/8U+cSOhUekeQ2q45meK01YDPHhy0L57YVhkuMEHBzmHE4IODjMOSZCHZhGjJL0YwY4GaRijDwYITL0obBP+65RknXKfCjzWLp6YttuluGEQEac3drFRqVDqIUc8QIDg4En/06OdtOss+R+1zYMMoS47hwSWJAcFpxIkyc5CEmgrW6fxg99/tcRUtIZhzWhp2rzxfo2cgpRYQy8oZ6vWkAENf7Q2x/4AwS5TmhnL6ijFNQlDsATQmoVOU+pfER3fqIXZt2OwAmBjPDDQIo9xzxISsgOmBlVhVQPw4hrbu8lROaCejgtxVgIgFamvwiyRY0X+FJcwpYAMJFckiCIGTb5RWx9IVdLg1pnYfe8XkeYSEcTMahJmUwvtRpZaJbgOAEHhzmHEwIODnMOpw5kxEIhj2Chs0gOQsZWtd4z/bbqLNRaLavGcVoXJsMgtZ6WciterqvGQmrk3pZ3UhuyRt5qkbbnTYZoO2rfi4U89i90Ih0zGBe2K2j0yaUQxSnGEp0LIs0mwMQBqKqAdtzXTPYOyfvhEWGxmJc4kFrTT02V3oITAhmxtlDGvsQDXfcD7NYbCAP5bZVMdVh+UTq6fbJQH4sQ5f/kTkPdWKjtHNQ+1F5C3T4xisoVKkKIWBcwabshZn0c0skdWCzj1qsuax/7IeORl09hfbemzVHrRxmLErVbQkEWAtSuK3WuyjyVIyDzufSLtogag1TJeQKXrSzBS+RLPL+1i7pf7dEq0X5UE5t1qL985nttKtVfphQZoNXr0/cw0TcbBBM04aG+GLZWbEZBoHyfzPvAsSeiqWvTSkMpMK88MmFEJoND763HOIndkH6tDh0n4OAw53ArgSGhtSRV96alX+1YRe+srKO1txrJViUFGLHqkECz0ds+h2K7gWn0t9N/4dPrGPsZo4KvZrbq8BaJMsOsTaqOXqf3iajj9AsnBIaEvPBw9YF9mu7cb3grQLdG9PY34V1Zaz/5gc94/pEati4kcwECipyIDYqSx6QPRnKEokj/V6qxSmnahrxK/14oEZGjevr+v9pWe8GE1sqoLqik57Ccda7cv4KClwiEonEPcWEaMkxHEMEzOE/ZYpA0ZL8L4N8DaFly/BYz3xe32YM0ZHsLIQhLpUJ6xQzwVgVyl/ttK8Jmg5Evqnq7/IvBcSyvNN2e1PA78aHMU8h1ugXsSX0Mu+j+Zi6BetYxkZ7WxkI96mR9lRaLBZTy0/mbajPrVhqyHSLKA/gKEf1t/N1HmfkPkpWVNGSXA/gHIrrBBRt1cJhMDJKGrBvuhEtD5uAwNRgkDdlPAPgQEX0AwGMAfj3OSjyXachGiUY1xM6ZoK30B76UfCeGrLdTHGmYFU5A3+/X9f3ePZtb2CyjG36AizudQCABM/xQNxQycQLaWDbqtb6r2PdyXxCwVCxAiN7qyaDk3F5ikDRkHwPwYUTPw4cB/CGAX4Dl5vispSEbJS6eDHDi8ar0QgdBqLws8oZ/a/9f5QREijYfWQcqJGCGl6fTc6flpd0aHnn5tPI94JG+INUt+ZSXzhRFyDS+xhvoVgGk1E92nfc8XHd4P4qKvj+9r7yOzGnImPksMwccWXx8Ap0lf99pyFZcGrJ0JMMLTOO+35SDlL9ZQuY0ZK08hDF+GsBT8WeXhszBYYowSBqy/01EtyL6jXoFwC8BmPg0ZJMYBnyWM95mwfAuR6ad+rnDIGnIfr5Hm4lNQ3Z+u6J5V2liwWjww72qAIDRgzDNEGi1XMSh5cWu8+0G1RCoZY2ofm/yWkxCQEmLRi0/wk4Z85BeHQO3oO3am+rY+C1AFR697QGg1Z8trJSL1nYLE2PdMMq46klUGk0pH4BpXaBZ/RkTi+htVJ7bZD2olklWZpZovaKq96HuCKQY/Rgs78zOJglzoG6WQRlhdhYyGfmox4YdAqV+mtFRqyIpx7MGIkIpn7MWAs6ByMFhzuGEgIPDnGNi1IFBwcxo+EHPyD6AWW/fSzSDADs12fJnoZDXjVPQbxyBCKlGPmTmN5KNSHUoGiLS9va71bHpKZN/g4GjmHXMkBAAzm7votr0lS/kw8BgoTYsWFlJKViv1LBRqXf6IOCmyw+hZEjr3X6xKdJ1ZULP8DZrXoNqMO+4V9LrcJr0yAAb9btdJ4UDSJaY9u7bZYkv2oZSpNRR+kqOPQcyYIaEAKJf+eSLMdYffQOrze1/uiNy201aAvUxpBpOXLL847iOPJbeiWHebPJQtJ9XL/QlCPqsYyQYbeooJoTz8OIn4TgBB4c5hxMCDg5zjqlVByqNJjYqnSi1DEbTMsRyEqUV4PAbBUS8Xc8MXDgRYue8rG9PoqWhCtURJlSNhUy0AdI1kFTy0HZ+GdfZJpuANBTKhBvfXpQav/5CgPXTE2O8OjGYWiHQDAJs1erpFVOQLxP2X03wCtHTEgaM7bOE3QudOsMUALIeDwyTd9f0f0W/VXX9qEwmCxld6gyNE8jG9fdr7ZcrEI5en5fyFW6fr2FDc2VzcOqAg8OcwwkBB4c5x0SqA0HIqPtBzyXfUNKATyhqTV8yamoE2fRYg49Ruv5vUWfUyMIBaNucIWNnPZTaNmvQoPad94Tkz5H3vFl0L5AwkUJgq1bHTl2LnyVh0iz/hokXz12SHrwgZO1lNj2ZrFn4qA5EXYyFknXAWujyUb8FWu+m7EKaIZ/BmChRVN9lPH6fnIYraEKD2u7g0gIuX12WKuTEbC+YJ1IIMEfx53rW2fPfq9GAmeEr5z7LAm+YUAwfUa+EBkLRlI+gA08I5HPzFelqtkWcg4NDKmZaCPSzim2nBzME0MjqwOKwx7AyP3b3bSLVgSwgAKvlEnKeItcMK+mkKlEkQvBdAnvRw8AMLPshcsvpkYSSRUEY4uJOZSRL90kg66YRJt4gWZITApetLkl5BJdHlEVqkjE7QoCiUF3lQv/hy8OTkKICLQFYWu5W24y672OjUkUYjOJ1NRvwjAtGx6gh9TVq9Pql9zyBI6uLKOZm5jXIhJlWBxwcHNLhhICDw5zDCQEHhzkHTYJ3HBGdB7AL4EJa3SnEQbjzmjbM6rldzcyH1MKJEAIAQESPMfPtez2PYcOd1/Rhls/NBKcOODjMOZwQcHCYc0ySEPj4Xk9gRHDnNX2Y5XPTMDGcgIODw95gklYCDg4Oe4A9FwJEdAcRPUdEJ4jonr2eT78gok8S0TkieipRtp+I7ieiF+L/ryW++834XJ8jovfszazTQURXEtGXiegZInqaiH41Lp/qcyOiEhE9QkRPxuf1X+LyqT6vgcBxwo69+APgAXgRwLUACgCeBHDTXs4pwzm8E8D3AngqUfbfANwTf74HwO/Hn2+Kz7EI4Jr43L29Pocu53UUwPfGn5cBPB/Pf6rPDZH7wlL8OQ/g6wDePu3nNcjfXq8Evg/ACWZ+iZkbAD4L4M49nlNfYOaHAFxSiu8EcG/8+V4A70+Uf5aZ68z8MoATiK7BxIGZzzDzN+PP2wCeAXAFpvzcOMJOfJiP/xhTfl6DYK+FwBUAXkscn4zLph1HmPkMEL1MAA7H5VN5vkR0HMBbEf1qTv25EZFHRE8AOAfgfmaeifPKir0WAiY/z1nerpi68yWiJQB/BeDXmHmrV1VD2USeGzMHzHwrgGMAvo+I3tyj+tScV1bstRA4CeDKxPExALOQHuIsER0FgPj/5+LyqTpfIsojEgCfYua/jotn4twAgJk3ADwI4A7M0Hn1i70WAo8CuJ6IriGiAoC7AHxxj+c0DHwRwN3x57sBfCFRfhcRFYnoGgDXA3hkD+aXCoqicfwZgGeY+Y8SX031uRHRISLaF38uA/hRAM9iys9rIOw1MwngvYiY5xcB/PZezyfD/D8D4AyAJqJfjQ8COADgAQAvxP/fn6j/2/G5PgfgJ/Z6/j3O618gWvZ+C8AT8d97p/3cANwC4PH4vJ4C8J/j8qk+r0H+nMWgg8OcY6/VAQcHhz2GEwIODnMOJwQcHOYcTgg4OMw5nBBwcJhzOCHg4DDncELAwWHO4YSAg8Oc4/8DjkZ7KWvpRd8AAAAASUVORK5CYII=\n"},"metadata":{"needs_background":"light"}}]},{"cell_type":"markdown","source":"Success! Our upgraded reset function makes the environment now start from the start of the normal game phase, meaning the action space can be consistently the same throughout the game.","metadata":{}},{"cell_type":"markdown","source":"## 4. Training with RL\n\nIn the previous tutorial, we saw how to train an agent with SB3 in single-agent environments. Handling true multi-agent via training separate or shared policies to control all agents requires a few extra things so instead, for the purpose of a tutorial we will treat Lux S2 like a single agent environment by training a policy for one team and letting the other team simply do nothing.\n\nMoreover, we want to define our own reward function to encourage our robots to seek ice, dig it, and return to a factory so it can generate water and survive longer. To do this all, we will just create a custom environment wrapper.\n\n\n","metadata":{}},{"cell_type":"code","source":"import copy\nclass CustomEnvWrapper(gym.Wrapper):\n def __init__(self, env: gym.Env) -> None:\n \"\"\"\n Adds a custom reward and turns the LuxAI_S2 environment into a single-agent environment for easy training\n \"\"\"\n super().__init__(env)\n self.prev_step_metrics = None\n\n def step(self, action):\n agent = \"player_0\"\n opp_agent = \"player_1\"\n\n opp_factories = self.env.state.factories[opp_agent]\n for k in opp_factories.keys():\n factory = opp_factories[k]\n # set enemy factories to have 1000 water to keep them alive the whole around and treat the game as single-agent\n factory.cargo.water = 1000\n\n # submit actions for just one agent to make it single-agent\n # and save single-agent versions of the data below\n action = {agent: action}\n obs, _, done, info = self.env.step(action)\n obs = obs[agent]\n done = done[agent]\n \n # we collect stats on teams here. These are useful stats that can be used to help generate reward functions\n stats: StatsStateDict = self.env.state.stats[agent]\n\n info = dict()\n metrics = dict()\n metrics[\"ice_dug\"] = (\n stats[\"generation\"][\"ice\"][\"HEAVY\"] + stats[\"generation\"][\"ice\"][\"LIGHT\"]\n )\n metrics[\"water_produced\"] = stats[\"generation\"][\"water\"]\n\n # we save these two to see often the agent updates robot action queues and how often enough\n # power to do so and succeed (less frequent updates = more power is saved)\n metrics[\"action_queue_updates_success\"] = stats[\"action_queue_updates_success\"]\n metrics[\"action_queue_updates_total\"] = stats[\"action_queue_updates_total\"]\n\n # we can save the metrics to info so we can use tensorboard to log them to get a glimpse into how our agent is behaving\n info[\"metrics\"] = metrics\n\n reward = 0\n if self.prev_step_metrics is not None:\n # we check how much ice and water is produced and reward the agent for generating both\n ice_dug_this_step = metrics[\"ice_dug\"] - self.prev_step_metrics[\"ice_dug\"]\n water_produced_this_step = (\n metrics[\"water_produced\"] - self.prev_step_metrics[\"water_produced\"]\n )\n # we reward water production more as it is the most important resource for survival\n reward = ice_dug_this_step / 100 + water_produced_this_step\n\n self.prev_step_metrics = copy.deepcopy(metrics)\n return obs, reward, done, info\n\n def reset(self, **kwargs):\n obs = self.env.reset(**kwargs)[\"player_0\"]\n self.prev_step_metrics = None\n return obs","metadata":{"execution":{"iopub.status.busy":"2023-02-01T00:52:27.477157Z","iopub.execute_input":"2023-02-01T00:52:27.477630Z","iopub.status.idle":"2023-02-01T00:52:27.491772Z","shell.execute_reply.started":"2023-02-01T00:52:27.477591Z","shell.execute_reply":"2023-02-01T00:52:27.490832Z"},"trusted":true},"execution_count":13,"outputs":[]},{"cell_type":"markdown","source":"### 3.1 Defining the Environment and using Wrappers","metadata":{}},{"cell_type":"markdown","source":"Next, we will define a `make_env` function and use it with SB3 to create multiple environments in parallel that scale with the number of CPU cores you have. A future tutorial will show a variant that creates a single jax-powered environment to achieve the same functionality but scaling with GPU.\n\nWe will use the SB3Wrapper, the controller and observation wrapper we defined, and the custom env wrapper as well. These put together will give us an environment that resets to the start of the normal game phase, has a consistent and simplified observation and action space, and contains our reward function.","metadata":{}},{"cell_type":"code","source":"from stable_baselines3.common.vec_env import SubprocVecEnv\nfrom stable_baselines3.common.monitor import Monitor\nfrom gym.wrappers import TimeLimit\ndef make_env(env_id: str, rank: int, seed: int = 0, max_episode_steps=200):\n def _init() -> gym.Env:\n # verbose = 0\n # collect_stats=True lets us track stats like total ice dug during an episode to help create reward functions\n # max factories set to 2 for simplification and keeping returns consistent as we survive longer \n # if there are more initial resources\n env = gym.make(env_id, verbose=0, collect_stats=True, MAX_FACTORIES=2)\n\n # Add a SB3 wrapper to make it work with SB3 and simplify the action space with the controller\n # this will remove the bidding phase and factory placement phase. For factory placement we use\n # the provided place_near_random_ice function which will randomly select an ice tile and place a factory near it.\n env = SB3Wrapper(\n env,\n factory_placement_policy=place_near_random_ice,\n controller=SimpleUnitDiscreteController(env.env_cfg),\n )\n \n # changes observation to include a few simple features\n env = SimpleUnitObservationWrapper(\n env\n )\n \n # convert to single agent, adds our reward\n env = CustomEnvWrapper(env) \n \n # Add a timelimit to the environment, which can truncate episodes, speed up training\n env = TimeLimit(\n env, max_episode_steps=max_episode_steps\n )\n env = Monitor(env) # for SB3 to allow it to record metrics\n env.reset(seed=seed + rank)\n set_random_seed(seed)\n return env\n\n return _init","metadata":{"execution":{"iopub.status.busy":"2023-02-01T00:52:30.252699Z","iopub.execute_input":"2023-02-01T00:52:30.253701Z","iopub.status.idle":"2023-02-01T00:52:30.262878Z","shell.execute_reply.started":"2023-02-01T00:52:30.253661Z","shell.execute_reply":"2023-02-01T00:52:30.261747Z"},"trusted":true},"execution_count":14,"outputs":[]},{"cell_type":"markdown","source":"Next we will define a useful callback function to log some of the custom metrics we defined earlier in the CustomEnvWrapper","metadata":{}},{"cell_type":"code","source":"from stable_baselines3.common.callbacks import BaseCallback, EvalCallback\nclass TensorboardCallback(BaseCallback):\n def __init__(self, tag: str, verbose=0):\n super().__init__(verbose)\n self.tag = tag\n\n def _on_step(self) -> bool:\n c = 0\n\n for i, done in enumerate(self.locals[\"dones\"]):\n if done:\n info = self.locals[\"infos\"][i]\n c += 1\n for k in info[\"metrics\"]:\n stat = info[\"metrics\"][k]\n self.logger.record_mean(f\"{self.tag}/{k}\", stat)\n return True","metadata":{"execution":{"iopub.status.busy":"2023-02-01T00:52:31.076189Z","iopub.execute_input":"2023-02-01T00:52:31.076633Z","iopub.status.idle":"2023-02-01T00:52:31.084739Z","shell.execute_reply.started":"2023-02-01T00:52:31.076596Z","shell.execute_reply":"2023-02-01T00:52:31.083505Z"},"trusted":true},"execution_count":15,"outputs":[]},{"cell_type":"markdown","source":"### 3.2 Training Setup\n\nNow we can prepare for training by creating training and evaluation environments, as well as defining our algorithm and model.","metadata":{}},{"cell_type":"code","source":"import os.path as osp\nfrom stable_baselines3.common.utils import set_random_seed\nfrom stable_baselines3.ppo import PPO\n\nset_random_seed(42)\nlog_path = \"logs/exp_1\"\nnum_envs = 4\n\n# set max episode steps to 200 for training environments to train faster\nenv = SubprocVecEnv([make_env(\"LuxAI_S2-v0\", i, max_episode_steps=200) for i in range(num_envs)])\nenv.reset()\n# set max episode steps to 1000 to match original environment\neval_env = SubprocVecEnv([make_env(\"LuxAI_S2-v0\", i, max_episode_steps=1000) for i in range(4)])\neval_env.reset()\nrollout_steps = 4000\npolicy_kwargs = dict(net_arch=(128, 128))\nmodel = PPO(\n \"MlpPolicy\",\n env,\n n_steps=rollout_steps // num_envs,\n batch_size=800,\n learning_rate=3e-4,\n policy_kwargs=policy_kwargs,\n verbose=1,\n n_epochs=2,\n target_kl=0.05,\n gamma=0.99,\n tensorboard_log=osp.join(log_path),\n)\n\neval_callback = EvalCallback(\n eval_env,\n best_model_save_path=osp.join(log_path, \"models\"),\n log_path=osp.join(log_path, \"eval_logs\"),\n eval_freq=24_000,\n deterministic=False,\n render=False,\n n_eval_episodes=5,\n)","metadata":{"execution":{"iopub.status.busy":"2023-02-01T00:52:31.757402Z","iopub.execute_input":"2023-02-01T00:52:31.758648Z","iopub.status.idle":"2023-02-01T00:52:41.699282Z","shell.execute_reply.started":"2023-02-01T00:52:31.758590Z","shell.execute_reply":"2023-02-01T00:52:41.698356Z"},"trusted":true},"execution_count":16,"outputs":[{"name":"stdout","text":"Using cpu device\n","output_type":"stream"}]},{"cell_type":"markdown","source":"With our callback functions and model defined, we can now begin training","metadata":{}},{"cell_type":"code","source":"# %load_ext tensorboard\n%tensorboard --logdir logs","metadata":{"execution":{"iopub.status.busy":"2023-02-01T00:50:48.544721Z","iopub.execute_input":"2023-02-01T00:50:48.545111Z","iopub.status.idle":"2023-02-01T00:50:48.553940Z","shell.execute_reply.started":"2023-02-01T00:50:48.545072Z","shell.execute_reply":"2023-02-01T00:50:48.551693Z"},"trusted":true},"execution_count":11,"outputs":[{"name":"stderr","text":"UsageError: Line magic function `%tensorboard` not found.\n","output_type":"stream"}]},{"cell_type":"code","source":"total_timesteps = 10_000_000\nmodel.learn(\n total_timesteps,\n callback=[TensorboardCallback(tag=\"train_metrics\"), eval_callback],\n)\nmodel.save(osp.join(log_path, \"models/latest_model\"))","metadata":{"execution":{"iopub.status.busy":"2023-02-01T00:52:41.700767Z","iopub.execute_input":"2023-02-01T00:52:41.701709Z","iopub.status.idle":"2023-02-01T01:26:07.717095Z","shell.execute_reply.started":"2023-02-01T00:52:41.701659Z","shell.execute_reply":"2023-02-01T01:26:07.715570Z"},"scrolled":true,"trusted":true},"execution_count":17,"outputs":[{"name":"stdout","text":"Logging to logs/exp_1/PPO_2\n----------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0 |\n| time/ | |\n| fps | 957 |\n| iterations | 1 |\n| time_elapsed | 4 |\n| total_timesteps | 4000 |\n| train_metrics/ | |\n| action_queue_updates_success | 145 |\n| action_queue_updates_total | 177 |\n| ice_dug | 0 |\n| water_produced | 0 |\n----------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.02 |\n| time/ | |\n| fps | 958 |\n| iterations | 2 |\n| time_elapsed | 8 |\n| total_timesteps | 8000 |\n| train/ | |\n| approx_kl | 0.00025716395 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.48 |\n| explained_variance | 0.843 |\n| learning_rate | 0.0003 |\n| loss | -0.000655 |\n| n_updates | 2 |\n| policy_gradient_loss | -0.000534 |\n| value_loss | 0.000729 |\n| train_metrics/ | |\n| action_queue_updates_success | 146 |\n| action_queue_updates_total | 176 |\n| ice_dug | 4 |\n| water_produced | 0 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.0133 |\n| time/ | |\n| fps | 974 |\n| iterations | 3 |\n| time_elapsed | 12 |\n| total_timesteps | 12000 |\n| train/ | |\n| approx_kl | 9.9918936e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.48 |\n| explained_variance | 0.363 |\n| learning_rate | 0.0003 |\n| loss | -0.00112 |\n| n_updates | 4 |\n| policy_gradient_loss | -0.000431 |\n| value_loss | 0.000874 |\n| train_metrics/ | |\n| action_queue_updates_success | 138 |\n| action_queue_updates_total | 177 |\n| ice_dug | 0 |\n| water_produced | 0 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.075 |\n| time/ | |\n| fps | 971 |\n| iterations | 4 |\n| time_elapsed | 16 |\n| total_timesteps | 16000 |\n| train/ | |\n| approx_kl | 0.0001520467 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.48 |\n| explained_variance | 0.945 |\n| learning_rate | 0.0003 |\n| loss | -0.00105 |\n| n_updates | 6 |\n| policy_gradient_loss | -0.000384 |\n| value_loss | 2.93e-05 |\n| train_metrics/ | |\n| action_queue_updates_success | 139 |\n| action_queue_updates_total | 177 |\n| ice_dug | 1 |\n| water_produced | 0.25 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.06 |\n| time/ | |\n| fps | 962 |\n| iterations | 5 |\n| time_elapsed | 20 |\n| total_timesteps | 20000 |\n| train/ | |\n| approx_kl | 0.00014432374 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.48 |\n| explained_variance | 0.000437 |\n| learning_rate | 0.0003 |\n| loss | 0.0395 |\n| n_updates | 8 |\n| policy_gradient_loss | -0.00028 |\n| value_loss | 0.0566 |\n| train_metrics/ | |\n| action_queue_updates_success | 140 |\n| action_queue_updates_total | 177 |\n| ice_dug | 0 |\n| water_produced | 0 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.06 |\n| time/ | |\n| fps | 967 |\n| iterations | 6 |\n| time_elapsed | 24 |\n| total_timesteps | 24000 |\n| train/ | |\n| approx_kl | 7.736915e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.48 |\n| explained_variance | 0.926 |\n| learning_rate | 0.0003 |\n| loss | -0.00122 |\n| n_updates | 10 |\n| policy_gradient_loss | -0.000353 |\n| value_loss | 0.000268 |\n| train_metrics/ | |\n| action_queue_updates_success | 149 |\n| action_queue_updates_total | 177 |\n| ice_dug | 0 |\n| water_produced | 0 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.052 |\n| time/ | |\n| fps | 966 |\n| iterations | 7 |\n| time_elapsed | 28 |\n| total_timesteps | 28000 |\n| train/ | |\n| approx_kl | 0.00023715764 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.48 |\n| explained_variance | 0.824 |\n| learning_rate | 0.0003 |\n| loss | -0.00399 |\n| n_updates | 12 |\n| policy_gradient_loss | -0.00151 |\n| value_loss | 5.99e-05 |\n| train_metrics/ | |\n| action_queue_updates_success | 145 |\n| action_queue_updates_total | 177 |\n| ice_dug | 0 |\n| water_produced | 0 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.056 |\n| time/ | |\n| fps | 969 |\n| iterations | 8 |\n| time_elapsed | 33 |\n| total_timesteps | 32000 |\n| train/ | |\n| approx_kl | 0.0005054677 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.48 |\n| explained_variance | 0.924 |\n| learning_rate | 0.0003 |\n| loss | -0.00543 |\n| n_updates | 14 |\n| policy_gradient_loss | -0.00253 |\n| value_loss | 1.45e-05 |\n| train_metrics/ | |\n| action_queue_updates_success | 138 |\n| action_queue_updates_total | 175 |\n| ice_dug | 2 |\n| water_produced | 0 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.008 |\n| time/ | |\n| fps | 976 |\n| iterations | 9 |\n| time_elapsed | 36 |\n| total_timesteps | 36000 |\n| train/ | |\n| approx_kl | 0.00033363188 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.48 |\n| explained_variance | 0.33 |\n| learning_rate | 0.0003 |\n| loss | -0.00186 |\n| n_updates | 16 |\n| policy_gradient_loss | -0.000427 |\n| value_loss | 0.000187 |\n| train_metrics/ | |\n| action_queue_updates_success | 138 |\n| action_queue_updates_total | 175 |\n| ice_dug | 2 |\n| water_produced | 0 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.008 |\n| time/ | |\n| fps | 974 |\n| iterations | 10 |\n| time_elapsed | 41 |\n| total_timesteps | 40000 |\n| train/ | |\n| approx_kl | 0.00030649855 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.48 |\n| explained_variance | 0.2 |\n| learning_rate | 0.0003 |\n| loss | -0.0022 |\n| n_updates | 18 |\n| policy_gradient_loss | -0.000971 |\n| value_loss | 0.000312 |\n| train_metrics/ | |\n| action_queue_updates_success | 142 |\n| action_queue_updates_total | 175 |\n| ice_dug | 0 |\n| water_produced | 0 |\n---------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.01 |\n| time/ | |\n| fps | 975 |\n| iterations | 11 |\n| time_elapsed | 45 |\n| total_timesteps | 44000 |\n| train/ | |\n| approx_kl | 0.000877353 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.47 |\n| explained_variance | 0.931 |\n| learning_rate | 0.0003 |\n| loss | -0.00712 |\n| n_updates | 20 |\n| policy_gradient_loss | -0.00295 |\n| value_loss | 6.81e-06 |\n| train_metrics/ | |\n| action_queue_updates_success | 132 |\n| action_queue_updates_total | 174 |\n| ice_dug | 1 |\n| water_produced | 0 |\n-------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.01 |\n| time/ | |\n| fps | 977 |\n| iterations | 12 |\n| time_elapsed | 49 |\n| total_timesteps | 48000 |\n| train/ | |\n| approx_kl | 0.0011578674 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.46 |\n| explained_variance | 0.371 |\n| learning_rate | 0.0003 |\n| loss | -0.00181 |\n| n_updates | 22 |\n| policy_gradient_loss | -0.000925 |\n| value_loss | 7.56e-05 |\n| train_metrics/ | |\n| action_queue_updates_success | 140 |\n| action_queue_updates_total | 174 |\n| ice_dug | 0 |\n| water_produced | 0 |\n--------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.006 |\n| time/ | |\n| fps | 971 |\n| iterations | 13 |\n| time_elapsed | 53 |\n| total_timesteps | 52000 |\n| train/ | |\n| approx_kl | 0.002232566 |\n| clip_fraction | 0.003 |\n| clip_range | 0.2 |\n| entropy_loss | -2.44 |\n| explained_variance | 0.898 |\n| learning_rate | 0.0003 |\n| loss | -0.0102 |\n| n_updates | 24 |\n| policy_gradient_loss | -0.00382 |\n| value_loss | 5.88e-06 |\n| train_metrics/ | |\n| action_queue_updates_success | 132 |\n| action_queue_updates_total | 175 |\n| ice_dug | 0 |\n| water_produced | 0 |\n-------------------------------------------------\n------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.002 |\n| time/ | |\n| fps | 970 |\n| iterations | 14 |\n| time_elapsed | 57 |\n| total_timesteps | 56000 |\n| train/ | |\n| approx_kl | 0.00616081 |\n| clip_fraction | 0.0591 |\n| clip_range | 0.2 |\n| entropy_loss | -2.4 |\n| explained_variance | 0.908 |\n| learning_rate | 0.0003 |\n| loss | -0.00325 |\n| n_updates | 26 |\n| policy_gradient_loss | -0.00513 |\n| value_loss | 3.88e-06 |\n| train_metrics/ | |\n| action_queue_updates_success | 121 |\n| action_queue_updates_total | 171 |\n| ice_dug | 0 |\n| water_produced | 0 |\n------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.004 |\n| time/ | |\n| fps | 973 |\n| iterations | 15 |\n| time_elapsed | 61 |\n| total_timesteps | 60000 |\n| train/ | |\n| approx_kl | 0.0025576144 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.36 |\n| explained_variance | 0.844 |\n| learning_rate | 0.0003 |\n| loss | -0.00746 |\n| n_updates | 28 |\n| policy_gradient_loss | -0.00235 |\n| value_loss | 4e-06 |\n| train_metrics/ | |\n| action_queue_updates_success | 118 |\n| action_queue_updates_total | 168 |\n| ice_dug | 1 |\n| water_produced | 0 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.056 |\n| time/ | |\n| fps | 974 |\n| iterations | 16 |\n| time_elapsed | 65 |\n| total_timesteps | 64000 |\n| train/ | |\n| approx_kl | 0.0026448176 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.29 |\n| explained_variance | 0.163 |\n| learning_rate | 0.0003 |\n| loss | 0.00305 |\n| n_updates | 30 |\n| policy_gradient_loss | 0.000345 |\n| value_loss | 8.15e-05 |\n| train_metrics/ | |\n| action_queue_updates_success | 120 |\n| action_queue_updates_total | 166 |\n| ice_dug | 2 |\n| water_produced | 0.25 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.056 |\n| time/ | |\n| fps | 973 |\n| iterations | 17 |\n| time_elapsed | 69 |\n| total_timesteps | 68000 |\n| train/ | |\n| approx_kl | 0.00060837384 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.29 |\n| explained_variance | 0.000745 |\n| learning_rate | 0.0003 |\n| loss | 0.0319 |\n| n_updates | 32 |\n| policy_gradient_loss | -0.00047 |\n| value_loss | 0.0473 |\n| train_metrics/ | |\n| action_queue_updates_success | 118 |\n| action_queue_updates_total | 164 |\n| ice_dug | 0 |\n| water_produced | 0 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.056 |\n| time/ | |\n| fps | 974 |\n| iterations | 18 |\n| time_elapsed | 73 |\n| total_timesteps | 72000 |\n| train/ | |\n| approx_kl | 0.0003276844 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.24 |\n| explained_variance | 0.891 |\n| learning_rate | 0.0003 |\n| loss | -0.000166 |\n| n_updates | 34 |\n| policy_gradient_loss | -3.8e-05 |\n| value_loss | 0.000133 |\n| train_metrics/ | |\n| action_queue_updates_success | 117 |\n| action_queue_updates_total | 165 |\n| ice_dug | 0 |\n| water_produced | 0 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.056 |\n| time/ | |\n| fps | 974 |\n| iterations | 19 |\n| time_elapsed | 77 |\n| total_timesteps | 76000 |\n| train/ | |\n| approx_kl | 0.0017416099 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.31 |\n| explained_variance | 0.722 |\n| learning_rate | 0.0003 |\n| loss | -0.00485 |\n| n_updates | 36 |\n| policy_gradient_loss | -0.00195 |\n| value_loss | 6.42e-05 |\n| train_metrics/ | |\n| action_queue_updates_success | 127 |\n| action_queue_updates_total | 167 |\n| ice_dug | 0 |\n| water_produced | 0 |\n--------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.054 |\n| time/ | |\n| fps | 974 |\n| iterations | 20 |\n| time_elapsed | 82 |\n| total_timesteps | 80000 |\n| train/ | |\n| approx_kl | 0.003034997 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.32 |\n| explained_variance | 0.86 |\n| learning_rate | 0.0003 |\n| loss | -0.00174 |\n| n_updates | 38 |\n| policy_gradient_loss | -0.00257 |\n| value_loss | 2.84e-05 |\n| train_metrics/ | |\n| action_queue_updates_success | 125 |\n| action_queue_updates_total | 168 |\n| ice_dug | 0 |\n| water_produced | 0 |\n-------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.002 |\n| time/ | |\n| fps | 969 |\n| iterations | 21 |\n| time_elapsed | 86 |\n| total_timesteps | 84000 |\n| train/ | |\n| approx_kl | 0.0010488753 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.37 |\n| explained_variance | 0.903 |\n| learning_rate | 0.0003 |\n| loss | 0.0017 |\n| n_updates | 40 |\n| policy_gradient_loss | 0.00017 |\n| value_loss | 1.63e-05 |\n| train_metrics/ | |\n| action_queue_updates_success | 130 |\n| action_queue_updates_total | 169 |\n| ice_dug | 1 |\n| water_produced | 0 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.002 |\n| time/ | |\n| fps | 971 |\n| iterations | 22 |\n| time_elapsed | 90 |\n| total_timesteps | 88000 |\n| train/ | |\n| approx_kl | 0.0002110318 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.37 |\n| explained_variance | 0.491 |\n| learning_rate | 0.0003 |\n| loss | -0.000476 |\n| n_updates | 42 |\n| policy_gradient_loss | -0.000178 |\n| value_loss | 8.75e-05 |\n| train_metrics/ | |\n| action_queue_updates_success | 135 |\n| action_queue_updates_total | 169 |\n| ice_dug | 0 |\n| water_produced | 0 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.006 |\n| time/ | |\n| fps | 973 |\n| iterations | 23 |\n| time_elapsed | 94 |\n| total_timesteps | 92000 |\n| train/ | |\n| approx_kl | 0.00034651073 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.37 |\n| explained_variance | 0.894 |\n| learning_rate | 0.0003 |\n| loss | -0.00256 |\n| n_updates | 44 |\n| policy_gradient_loss | -0.00077 |\n| value_loss | 8.5e-06 |\n| train_metrics/ | |\n| action_queue_updates_success | 130 |\n| action_queue_updates_total | 168 |\n| ice_dug | 2 |\n| water_produced | 0 |\n---------------------------------------------------\nEval num_timesteps=96000, episode_reward=0.04 +/- 0.08\nEpisode length: 301.00 +/- 0.00\n---------------------------------------------------\n| eval/ | |\n| mean_ep_length | 301 |\n| mean_reward | 0.04 |\n| time/ | |\n| total_timesteps | 96000 |\n| train/ | |\n| approx_kl | 0.00035094516 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.35 |\n| explained_variance | 0.468 |\n| learning_rate | 0.0003 |\n| loss | -0.00106 |\n| n_updates | 46 |\n| policy_gradient_loss | -0.000159 |\n| value_loss | 0.000198 |\n| train_metrics/ | |\n| action_queue_updates_success | 127 |\n| action_queue_updates_total | 168 |\n| ice_dug | 1 |\n| water_produced | 0 |\n---------------------------------------------------\nNew best mean reward!\n---------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.008 |\n| time/ | |\n| fps | 953 |\n| iterations | 24 |\n| time_elapsed | 100 |\n| total_timesteps | 96000 |\n---------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.028 |\n| time/ | |\n| fps | 955 |\n| iterations | 25 |\n| time_elapsed | 104 |\n| total_timesteps | 100000 |\n| train/ | |\n| approx_kl | 0.0008710647 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.37 |\n| explained_variance | 0.245 |\n| learning_rate | 0.0003 |\n| loss | -0.00485 |\n| n_updates | 48 |\n| policy_gradient_loss | -0.00137 |\n| value_loss | 8.9e-05 |\n| train_metrics/ | |\n| action_queue_updates_success | 129 |\n| action_queue_updates_total | 166 |\n| ice_dug | 10 |\n| water_produced | 0 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.034 |\n| time/ | |\n| fps | 958 |\n| iterations | 26 |\n| time_elapsed | 108 |\n| total_timesteps | 104000 |\n| train/ | |\n| approx_kl | 0.0008234091 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.38 |\n| explained_variance | 0.131 |\n| learning_rate | 0.0003 |\n| loss | 0.00152 |\n| n_updates | 50 |\n| policy_gradient_loss | -0.000816 |\n| value_loss | 0.00182 |\n| train_metrics/ | |\n| action_queue_updates_success | 129 |\n| action_queue_updates_total | 167 |\n| ice_dug | 4 |\n| water_produced | 0 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.046 |\n| time/ | |\n| fps | 957 |\n| iterations | 27 |\n| time_elapsed | 112 |\n| total_timesteps | 108000 |\n| train/ | |\n| approx_kl | 0.0008310125 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.37 |\n| explained_variance | 0.237 |\n| learning_rate | 0.0003 |\n| loss | -0.00501 |\n| n_updates | 52 |\n| policy_gradient_loss | -0.00122 |\n| value_loss | 0.000898 |\n| train_metrics/ | |\n| action_queue_updates_success | 130 |\n| action_queue_updates_total | 167 |\n| ice_dug | 6 |\n| water_produced | 0 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.05 |\n| time/ | |\n| fps | 955 |\n| iterations | 28 |\n| time_elapsed | 117 |\n| total_timesteps | 112000 |\n| train/ | |\n| approx_kl | 0.0013095809 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.35 |\n| explained_variance | 0.289 |\n| learning_rate | 0.0003 |\n| loss | 0.000849 |\n| n_updates | 54 |\n| policy_gradient_loss | -0.00138 |\n| value_loss | 0.000843 |\n| train_metrics/ | |\n| action_queue_updates_success | 122 |\n| action_queue_updates_total | 166 |\n| ice_dug | 4 |\n| water_produced | 0 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.07 |\n| time/ | |\n| fps | 955 |\n| iterations | 29 |\n| time_elapsed | 121 |\n| total_timesteps | 116000 |\n| train/ | |\n| approx_kl | 0.0017493216 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.32 |\n| explained_variance | 0.195 |\n| learning_rate | 0.0003 |\n| loss | 0.000718 |\n| n_updates | 56 |\n| policy_gradient_loss | -0.000558 |\n| value_loss | 0.000536 |\n| train_metrics/ | |\n| action_queue_updates_success | 129 |\n| action_queue_updates_total | 167 |\n| ice_dug | 11 |\n| water_produced | 0 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.052 |\n| time/ | |\n| fps | 956 |\n| iterations | 30 |\n| time_elapsed | 125 |\n| total_timesteps | 120000 |\n| train/ | |\n| approx_kl | 0.0013841867 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.28 |\n| explained_variance | 0.134 |\n| learning_rate | 0.0003 |\n| loss | 0.00214 |\n| n_updates | 58 |\n| policy_gradient_loss | -0.000754 |\n| value_loss | 0.00445 |\n| train_metrics/ | |\n| action_queue_updates_success | 139 |\n| action_queue_updates_total | 169 |\n| ice_dug | 1 |\n| water_produced | 0 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.104 |\n| time/ | |\n| fps | 957 |\n| iterations | 31 |\n| time_elapsed | 129 |\n| total_timesteps | 124000 |\n| train/ | |\n| approx_kl | 0.0016302329 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.27 |\n| explained_variance | 0.772 |\n| learning_rate | 0.0003 |\n| loss | -0.00248 |\n| n_updates | 60 |\n| policy_gradient_loss | -0.00223 |\n| value_loss | 0.000326 |\n| train_metrics/ | |\n| action_queue_updates_success | 131 |\n| action_queue_updates_total | 163 |\n| ice_dug | 5 |\n| water_produced | 0.25 |\n--------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.18 |\n| time/ | |\n| fps | 957 |\n| iterations | 32 |\n| time_elapsed | 133 |\n| total_timesteps | 128000 |\n| train/ | |\n| approx_kl | 0.000806186 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.23 |\n| explained_variance | 0.0181 |\n| learning_rate | 0.0003 |\n| loss | 0.0375 |\n| n_updates | 62 |\n| policy_gradient_loss | 0.000137 |\n| value_loss | 0.0598 |\n| train_metrics/ | |\n| action_queue_updates_success | 140 |\n| action_queue_updates_total | 164 |\n| ice_dug | 19 |\n| water_produced | 0.25 |\n-------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.318 |\n| time/ | |\n| fps | 958 |\n| iterations | 33 |\n| time_elapsed | 137 |\n| total_timesteps | 132000 |\n| train/ | |\n| approx_kl | 0.001065569 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.22 |\n| explained_variance | 0.0271 |\n| learning_rate | 0.0003 |\n| loss | 0.0328 |\n| n_updates | 64 |\n| policy_gradient_loss | -0.00134 |\n| value_loss | 0.0623 |\n| train_metrics/ | |\n| action_queue_updates_success | 135 |\n| action_queue_updates_total | 163 |\n| ice_dug | 48 |\n| water_produced | 0.25 |\n-------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.36 |\n| time/ | |\n| fps | 958 |\n| iterations | 34 |\n| time_elapsed | 141 |\n| total_timesteps | 136000 |\n| train/ | |\n| approx_kl | 0.0014423163 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.21 |\n| explained_variance | -0.0109 |\n| learning_rate | 0.0003 |\n| loss | 0.0462 |\n| n_updates | 66 |\n| policy_gradient_loss | -7.75e-05 |\n| value_loss | 0.0869 |\n| train_metrics/ | |\n| action_queue_updates_success | 132 |\n| action_queue_updates_total | 162 |\n| ice_dug | 32 |\n| water_produced | 0 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.602 |\n| time/ | |\n| fps | 958 |\n| iterations | 35 |\n| time_elapsed | 146 |\n| total_timesteps | 140000 |\n| train/ | |\n| approx_kl | 0.0017468471 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.16 |\n| explained_variance | 0.202 |\n| learning_rate | 0.0003 |\n| loss | 0.00301 |\n| n_updates | 68 |\n| policy_gradient_loss | -0.00102 |\n| value_loss | 0.00888 |\n| train_metrics/ | |\n| action_queue_updates_success | 139 |\n| action_queue_updates_total | 161 |\n| ice_dug | 22 |\n| water_produced | 1 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.682 |\n| time/ | |\n| fps | 959 |\n| iterations | 36 |\n| time_elapsed | 150 |\n| total_timesteps | 144000 |\n| train/ | |\n| approx_kl | 0.0016584389 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.09 |\n| explained_variance | 0.0387 |\n| learning_rate | 0.0003 |\n| loss | 0.208 |\n| n_updates | 70 |\n| policy_gradient_loss | -0.000939 |\n| value_loss | 0.343 |\n| train_metrics/ | |\n| action_queue_updates_success | 140 |\n| action_queue_updates_total | 158 |\n| ice_dug | 20 |\n| water_produced | 0.5 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.742 |\n| time/ | |\n| fps | 960 |\n| iterations | 37 |\n| time_elapsed | 154 |\n| total_timesteps | 148000 |\n| train/ | |\n| approx_kl | 0.0010056595 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.05 |\n| explained_variance | 0.201 |\n| learning_rate | 0.0003 |\n| loss | 0.0601 |\n| n_updates | 72 |\n| policy_gradient_loss | -0.000795 |\n| value_loss | 0.128 |\n| train_metrics/ | |\n| action_queue_updates_success | 140 |\n| action_queue_updates_total | 160 |\n| ice_dug | 24 |\n| water_produced | 0.5 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.848 |\n| time/ | |\n| fps | 961 |\n| iterations | 38 |\n| time_elapsed | 158 |\n| total_timesteps | 152000 |\n| train/ | |\n| approx_kl | 0.0016303925 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.06 |\n| explained_variance | 0.0882 |\n| learning_rate | 0.0003 |\n| loss | 0.15 |\n| n_updates | 74 |\n| policy_gradient_loss | -0.000931 |\n| value_loss | 0.203 |\n| train_metrics/ | |\n| action_queue_updates_success | 138 |\n| action_queue_updates_total | 158 |\n| ice_dug | 26 |\n| water_produced | 1 |\n--------------------------------------------------\n------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 0.898 |\n| time/ | |\n| fps | 961 |\n| iterations | 39 |\n| time_elapsed | 162 |\n| total_timesteps | 156000 |\n| train/ | |\n| approx_kl | 0.00164708 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.03 |\n| explained_variance | 0.02 |\n| learning_rate | 0.0003 |\n| loss | 0.107 |\n| n_updates | 76 |\n| policy_gradient_loss | -0.00143 |\n| value_loss | 0.235 |\n| train_metrics/ | |\n| action_queue_updates_success | 138 |\n| action_queue_updates_total | 157 |\n| ice_dug | 7 |\n| water_produced | 0.5 |\n------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 1.8 |\n| time/ | |\n| fps | 962 |\n| iterations | 40 |\n| time_elapsed | 166 |\n| total_timesteps | 160000 |\n| train/ | |\n| approx_kl | 0.003974599 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.92 |\n| explained_variance | 0.0994 |\n| learning_rate | 0.0003 |\n| loss | 0.103 |\n| n_updates | 78 |\n| policy_gradient_loss | -0.00125 |\n| value_loss | 0.23 |\n| train_metrics/ | |\n| action_queue_updates_success | 134 |\n| action_queue_updates_total | 152 |\n| ice_dug | 75 |\n| water_produced | 5 |\n-------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 3.27 |\n| time/ | |\n| fps | 962 |\n| iterations | 41 |\n| time_elapsed | 170 |\n| total_timesteps | 164000 |\n| train/ | |\n| approx_kl | 0.0026435042 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.88 |\n| explained_variance | 0.0301 |\n| learning_rate | 0.0003 |\n| loss | 2.19 |\n| n_updates | 80 |\n| policy_gradient_loss | -0.00119 |\n| value_loss | 5.14 |\n| train_metrics/ | |\n| action_queue_updates_success | 130 |\n| action_queue_updates_total | 146 |\n| ice_dug | 151 |\n| water_produced | 6.5 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 4.82 |\n| time/ | |\n| fps | 961 |\n| iterations | 42 |\n| time_elapsed | 174 |\n| total_timesteps | 168000 |\n| train/ | |\n| approx_kl | 0.0008459856 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.76 |\n| explained_variance | 0.0403 |\n| learning_rate | 0.0003 |\n| loss | 3.78 |\n| n_updates | 82 |\n| policy_gradient_loss | -0.000208 |\n| value_loss | 8.27 |\n| train_metrics/ | |\n| action_queue_updates_success | 123 |\n| action_queue_updates_total | 138 |\n| ice_dug | 101 |\n| water_produced | 7.5 |\n--------------------------------------------------\n------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 5.5 |\n| time/ | |\n| fps | 960 |\n| iterations | 43 |\n| time_elapsed | 179 |\n| total_timesteps | 172000 |\n| train/ | |\n| approx_kl | 0.00048582 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.73 |\n| explained_variance | 0.0321 |\n| learning_rate | 0.0003 |\n| loss | 5.85 |\n| n_updates | 84 |\n| policy_gradient_loss | -0.000748 |\n| value_loss | 12.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 120 |\n| action_queue_updates_total | 144 |\n| ice_dug | 64 |\n| water_produced | 4 |\n------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 5.76 |\n| time/ | |\n| fps | 960 |\n| iterations | 44 |\n| time_elapsed | 183 |\n| total_timesteps | 176000 |\n| train/ | |\n| approx_kl | 0.00069236057 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.8 |\n| explained_variance | 0.106 |\n| learning_rate | 0.0003 |\n| loss | 2.9 |\n| n_updates | 86 |\n| policy_gradient_loss | -0.000522 |\n| value_loss | 4.81 |\n| train_metrics/ | |\n| action_queue_updates_success | 131 |\n| action_queue_updates_total | 148 |\n| ice_dug | 40 |\n| water_produced | 1.5 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 8.15 |\n| time/ | |\n| fps | 961 |\n| iterations | 45 |\n| time_elapsed | 187 |\n| total_timesteps | 180000 |\n| train/ | |\n| approx_kl | 0.0015396948 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.82 |\n| explained_variance | 0.17 |\n| learning_rate | 0.0003 |\n| loss | 1.29 |\n| n_updates | 88 |\n| policy_gradient_loss | -0.000787 |\n| value_loss | 2.14 |\n| train_metrics/ | |\n| action_queue_updates_success | 130 |\n| action_queue_updates_total | 147 |\n| ice_dug | 145 |\n| water_produced | 16.2 |\n--------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 7.65 |\n| time/ | |\n| fps | 961 |\n| iterations | 46 |\n| time_elapsed | 191 |\n| total_timesteps | 184000 |\n| train/ | |\n| approx_kl | 0.000665737 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.86 |\n| explained_variance | 0.0594 |\n| learning_rate | 0.0003 |\n| loss | 23.4 |\n| n_updates | 90 |\n| policy_gradient_loss | -0.000483 |\n| value_loss | 38.3 |\n| train_metrics/ | |\n| action_queue_updates_success | 140 |\n| action_queue_updates_total | 154 |\n| ice_dug | 75 |\n| water_produced | 4.75 |\n-------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 6.28 |\n| time/ | |\n| fps | 961 |\n| iterations | 47 |\n| time_elapsed | 195 |\n| total_timesteps | 188000 |\n| train/ | |\n| approx_kl | 0.00020301346 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.86 |\n| explained_variance | 0.0657 |\n| learning_rate | 0.0003 |\n| loss | 3.67 |\n| n_updates | 92 |\n| policy_gradient_loss | -0.00022 |\n| value_loss | 7.82 |\n| train_metrics/ | |\n| action_queue_updates_success | 141 |\n| action_queue_updates_total | 155 |\n| ice_dug | 64 |\n| water_produced | 1 |\n---------------------------------------------------\nEval num_timesteps=192000, episode_reward=13.16 +/- 20.69\nEpisode length: 313.00 +/- 19.39\n---------------------------------------------------\n| eval/ | |\n| mean_ep_length | 313 |\n| mean_reward | 13.2 |\n| time/ | |\n| total_timesteps | 192000 |\n| train/ | |\n| approx_kl | 0.00025811998 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.88 |\n| explained_variance | 0.466 |\n| learning_rate | 0.0003 |\n| loss | 0.14 |\n| n_updates | 94 |\n| policy_gradient_loss | -0.00049 |\n| value_loss | 0.755 |\n| train_metrics/ | |\n| action_queue_updates_success | 139 |\n| action_queue_updates_total | 154 |\n| ice_dug | 64 |\n| water_produced | 8 |\n---------------------------------------------------\nNew best mean reward!\n---------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 7.08 |\n| time/ | |\n| fps | 950 |\n| iterations | 48 |\n| time_elapsed | 202 |\n| total_timesteps | 192000 |\n---------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 7.95 |\n| time/ | |\n| fps | 950 |\n| iterations | 49 |\n| time_elapsed | 206 |\n| total_timesteps | 196000 |\n| train/ | |\n| approx_kl | 8.083284e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.88 |\n| explained_variance | 0.0969 |\n| learning_rate | 0.0003 |\n| loss | 5.68 |\n| n_updates | 96 |\n| policy_gradient_loss | 8.58e-05 |\n| value_loss | 12.5 |\n| train_metrics/ | |\n| action_queue_updates_success | 146 |\n| action_queue_updates_total | 156 |\n| ice_dug | 75 |\n| water_produced | 5.5 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 4.44 |\n| time/ | |\n| fps | 949 |\n| iterations | 50 |\n| time_elapsed | 210 |\n| total_timesteps | 200000 |\n| train/ | |\n| approx_kl | 0.00021829375 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.86 |\n| explained_variance | 0.121 |\n| learning_rate | 0.0003 |\n| loss | 3.2 |\n| n_updates | 98 |\n| policy_gradient_loss | -0.000146 |\n| value_loss | 8.82 |\n| train_metrics/ | |\n| action_queue_updates_success | 137 |\n| action_queue_updates_total | 150 |\n| ice_dug | 17 |\n| water_produced | 0 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 6.32 |\n| time/ | |\n| fps | 950 |\n| iterations | 51 |\n| time_elapsed | 214 |\n| total_timesteps | 204000 |\n| train/ | |\n| approx_kl | 0.00073647796 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.8 |\n| explained_variance | 0.835 |\n| learning_rate | 0.0003 |\n| loss | 0.109 |\n| n_updates | 100 |\n| policy_gradient_loss | 0.000133 |\n| value_loss | 0.237 |\n| train_metrics/ | |\n| action_queue_updates_success | 136 |\n| action_queue_updates_total | 152 |\n| ice_dug | 139 |\n| water_produced | 13.5 |\n---------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 7.09 |\n| time/ | |\n| fps | 951 |\n| iterations | 52 |\n| time_elapsed | 218 |\n| total_timesteps | 208000 |\n| train/ | |\n| approx_kl | 0.000326183 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.84 |\n| explained_variance | 0.0994 |\n| learning_rate | 0.0003 |\n| loss | 9.65 |\n| n_updates | 102 |\n| policy_gradient_loss | -0.000251 |\n| value_loss | 26.3 |\n| train_metrics/ | |\n| action_queue_updates_success | 136 |\n| action_queue_updates_total | 150 |\n| ice_dug | 75 |\n| water_produced | 4.75 |\n-------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 6.27 |\n| time/ | |\n| fps | 951 |\n| iterations | 53 |\n| time_elapsed | 222 |\n| total_timesteps | 212000 |\n| train/ | |\n| approx_kl | 0.0004068938 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.78 |\n| explained_variance | 0.176 |\n| learning_rate | 0.0003 |\n| loss | 2.15 |\n| n_updates | 104 |\n| policy_gradient_loss | -0.000438 |\n| value_loss | 4.45 |\n| train_metrics/ | |\n| action_queue_updates_success | 141 |\n| action_queue_updates_total | 156 |\n| ice_dug | 77 |\n| water_produced | 3.75 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 5.03 |\n| time/ | |\n| fps | 951 |\n| iterations | 54 |\n| time_elapsed | 226 |\n| total_timesteps | 216000 |\n| train/ | |\n| approx_kl | 0.0001394609 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.81 |\n| explained_variance | 0.213 |\n| learning_rate | 0.0003 |\n| loss | 2.19 |\n| n_updates | 106 |\n| policy_gradient_loss | -0.000283 |\n| value_loss | 4.37 |\n| train_metrics/ | |\n| action_queue_updates_success | 134 |\n| action_queue_updates_total | 149 |\n| ice_dug | 6 |\n| water_produced | 0 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 5.75 |\n| time/ | |\n| fps | 951 |\n| iterations | 55 |\n| time_elapsed | 231 |\n| total_timesteps | 220000 |\n| train/ | |\n| approx_kl | 0.00019217537 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.78 |\n| explained_variance | 0.826 |\n| learning_rate | 0.0003 |\n| loss | 0.0725 |\n| n_updates | 108 |\n| policy_gradient_loss | -0.000117 |\n| value_loss | 0.261 |\n| train_metrics/ | |\n| action_queue_updates_success | 134 |\n| action_queue_updates_total | 146 |\n| ice_dug | 54 |\n| water_produced | 3.25 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 3.51 |\n| time/ | |\n| fps | 951 |\n| iterations | 56 |\n| time_elapsed | 235 |\n| total_timesteps | 224000 |\n| train/ | |\n| approx_kl | 0.00031904533 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.77 |\n| explained_variance | 0.264 |\n| learning_rate | 0.0003 |\n| loss | 1.16 |\n| n_updates | 110 |\n| policy_gradient_loss | -0.0005 |\n| value_loss | 2.38 |\n| train_metrics/ | |\n| action_queue_updates_success | 139 |\n| action_queue_updates_total | 149 |\n| ice_dug | 41 |\n| water_produced | 3.25 |\n---------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 3.84 |\n| time/ | |\n| fps | 949 |\n| iterations | 57 |\n| time_elapsed | 240 |\n| total_timesteps | 228000 |\n| train/ | |\n| approx_kl | 0.001044607 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.74 |\n| explained_variance | 0.113 |\n| learning_rate | 0.0003 |\n| loss | 2.15 |\n| n_updates | 112 |\n| policy_gradient_loss | -0.000722 |\n| value_loss | 3.46 |\n| train_metrics/ | |\n| action_queue_updates_success | 137 |\n| action_queue_updates_total | 148 |\n| ice_dug | 69 |\n| water_produced | 6.5 |\n-------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 8.89 |\n| time/ | |\n| fps | 949 |\n| iterations | 58 |\n| time_elapsed | 244 |\n| total_timesteps | 232000 |\n| train/ | |\n| approx_kl | 0.00084565266 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.78 |\n| explained_variance | 0.104 |\n| learning_rate | 0.0003 |\n| loss | 3.46 |\n| n_updates | 114 |\n| policy_gradient_loss | -0.00025 |\n| value_loss | 10.1 |\n| train_metrics/ | |\n| action_queue_updates_success | 143 |\n| action_queue_updates_total | 150 |\n| ice_dug | 173 |\n| water_produced | 28 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 8.9 |\n| time/ | |\n| fps | 949 |\n| iterations | 59 |\n| time_elapsed | 248 |\n| total_timesteps | 236000 |\n| train/ | |\n| approx_kl | 0.00020317236 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.73 |\n| explained_variance | 0.0496 |\n| learning_rate | 0.0003 |\n| loss | 32.1 |\n| n_updates | 116 |\n| policy_gradient_loss | -0.000433 |\n| value_loss | 50.3 |\n| train_metrics/ | |\n| action_queue_updates_success | 146 |\n| action_queue_updates_total | 150 |\n| ice_dug | 11 |\n| water_produced | 0 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 11.6 |\n| time/ | |\n| fps | 949 |\n| iterations | 60 |\n| time_elapsed | 252 |\n| total_timesteps | 240000 |\n| train/ | |\n| approx_kl | 3.3405795e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.71 |\n| explained_variance | 0.72 |\n| learning_rate | 0.0003 |\n| loss | 0.211 |\n| n_updates | 118 |\n| policy_gradient_loss | 4.94e-05 |\n| value_loss | 0.487 |\n| train_metrics/ | |\n| action_queue_updates_success | 149 |\n| action_queue_updates_total | 155 |\n| ice_dug | 177 |\n| water_produced | 15.8 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 11.3 |\n| time/ | |\n| fps | 950 |\n| iterations | 61 |\n| time_elapsed | 256 |\n| total_timesteps | 244000 |\n| train/ | |\n| approx_kl | 1.6553016e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.75 |\n| explained_variance | 0.00712 |\n| learning_rate | 0.0003 |\n| loss | 8.46 |\n| n_updates | 120 |\n| policy_gradient_loss | 2.28e-05 |\n| value_loss | 22.7 |\n| train_metrics/ | |\n| action_queue_updates_success | 139 |\n| action_queue_updates_total | 152 |\n| ice_dug | 68 |\n| water_produced | 1.5 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 11.4 |\n| time/ | |\n| fps | 950 |\n| iterations | 62 |\n| time_elapsed | 260 |\n| total_timesteps | 248000 |\n| train/ | |\n| approx_kl | 1.1464402e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.79 |\n| explained_variance | 0.361 |\n| learning_rate | 0.0003 |\n| loss | 0.467 |\n| n_updates | 122 |\n| policy_gradient_loss | -5.63e-05 |\n| value_loss | 1.36 |\n| train_metrics/ | |\n| action_queue_updates_success | 143 |\n| action_queue_updates_total | 150 |\n| ice_dug | 58 |\n| water_produced | 7 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 6.77 |\n| time/ | |\n| fps | 950 |\n| iterations | 63 |\n| time_elapsed | 265 |\n| total_timesteps | 252000 |\n| train/ | |\n| approx_kl | 7.817574e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.75 |\n| explained_variance | 0.12 |\n| learning_rate | 0.0003 |\n| loss | 3.75 |\n| n_updates | 124 |\n| policy_gradient_loss | 3.66e-05 |\n| value_loss | 7.74 |\n| train_metrics/ | |\n| action_queue_updates_success | 146 |\n| action_queue_updates_total | 153 |\n| ice_dug | 48 |\n| water_produced | 6 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 9.99 |\n| time/ | |\n| fps | 950 |\n| iterations | 64 |\n| time_elapsed | 269 |\n| total_timesteps | 256000 |\n| train/ | |\n| approx_kl | 0.00025784745 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.77 |\n| explained_variance | 0.0496 |\n| learning_rate | 0.0003 |\n| loss | 1.8 |\n| n_updates | 126 |\n| policy_gradient_loss | 1.64e-05 |\n| value_loss | 7.35 |\n| train_metrics/ | |\n| action_queue_updates_success | 142 |\n| action_queue_updates_total | 151 |\n| ice_dug | 96 |\n| water_produced | 15.2 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 9.29 |\n| time/ | |\n| fps | 949 |\n| iterations | 65 |\n| time_elapsed | 273 |\n| total_timesteps | 260000 |\n| train/ | |\n| approx_kl | 0.00026554256 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.72 |\n| explained_variance | 0.0365 |\n| learning_rate | 0.0003 |\n| loss | 11.8 |\n| n_updates | 128 |\n| policy_gradient_loss | -0.000323 |\n| value_loss | 29.8 |\n| train_metrics/ | |\n| action_queue_updates_success | 139 |\n| action_queue_updates_total | 154 |\n| ice_dug | 77 |\n| water_produced | 13.3 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 9.34 |\n| time/ | |\n| fps | 950 |\n| iterations | 66 |\n| time_elapsed | 277 |\n| total_timesteps | 264000 |\n| train/ | |\n| approx_kl | 0.0001891165 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.79 |\n| explained_variance | 0.0612 |\n| learning_rate | 0.0003 |\n| loss | 13.8 |\n| n_updates | 130 |\n| policy_gradient_loss | -0.000197 |\n| value_loss | 22.1 |\n| train_metrics/ | |\n| action_queue_updates_success | 145 |\n| action_queue_updates_total | 154 |\n| ice_dug | 16 |\n| water_produced | 2.25 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 9.57 |\n| time/ | |\n| fps | 950 |\n| iterations | 67 |\n| time_elapsed | 281 |\n| total_timesteps | 268000 |\n| train/ | |\n| approx_kl | 0.0006065965 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.79 |\n| explained_variance | 0.0635 |\n| learning_rate | 0.0003 |\n| loss | 0.844 |\n| n_updates | 132 |\n| policy_gradient_loss | -0.00108 |\n| value_loss | 2.49 |\n| train_metrics/ | |\n| action_queue_updates_success | 146 |\n| action_queue_updates_total | 156 |\n| ice_dug | 75 |\n| water_produced | 8 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 11.1 |\n| time/ | |\n| fps | 950 |\n| iterations | 68 |\n| time_elapsed | 286 |\n| total_timesteps | 272000 |\n| train/ | |\n| approx_kl | 0.00037927998 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.79 |\n| explained_variance | 0.0888 |\n| learning_rate | 0.0003 |\n| loss | 6.53 |\n| n_updates | 134 |\n| policy_gradient_loss | -0.00023 |\n| value_loss | 11.8 |\n| train_metrics/ | |\n| action_queue_updates_success | 143 |\n| action_queue_updates_total | 154 |\n| ice_dug | 69 |\n| water_produced | 13.2 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 10.1 |\n| time/ | |\n| fps | 950 |\n| iterations | 69 |\n| time_elapsed | 290 |\n| total_timesteps | 276000 |\n| train/ | |\n| approx_kl | 0.00044146608 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.76 |\n| explained_variance | 0.0432 |\n| learning_rate | 0.0003 |\n| loss | 7.2 |\n| n_updates | 136 |\n| policy_gradient_loss | -0.000636 |\n| value_loss | 23.8 |\n| train_metrics/ | |\n| action_queue_updates_success | 146 |\n| action_queue_updates_total | 160 |\n| ice_dug | 76 |\n| water_produced | 10.5 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 9.51 |\n| time/ | |\n| fps | 950 |\n| iterations | 70 |\n| time_elapsed | 294 |\n| total_timesteps | 280000 |\n| train/ | |\n| approx_kl | 0.00086314604 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.79 |\n| explained_variance | 0.0969 |\n| learning_rate | 0.0003 |\n| loss | 5.88 |\n| n_updates | 138 |\n| policy_gradient_loss | -0.00128 |\n| value_loss | 15.7 |\n| train_metrics/ | |\n| action_queue_updates_success | 151 |\n| action_queue_updates_total | 160 |\n| ice_dug | 69 |\n| water_produced | 10.5 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 13.6 |\n| time/ | |\n| fps | 950 |\n| iterations | 71 |\n| time_elapsed | 298 |\n| total_timesteps | 284000 |\n| train/ | |\n| approx_kl | 0.0010566121 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.73 |\n| explained_variance | 0.0822 |\n| learning_rate | 0.0003 |\n| loss | 10.9 |\n| n_updates | 140 |\n| policy_gradient_loss | -0.000962 |\n| value_loss | 19.3 |\n| train_metrics/ | |\n| action_queue_updates_success | 141 |\n| action_queue_updates_total | 152 |\n| ice_dug | 154 |\n| water_produced | 21.2 |\n--------------------------------------------------\nEval num_timesteps=288000, episode_reward=0.36 +/- 0.72\nEpisode length: 301.00 +/- 0.00\n--------------------------------------------------\n| eval/ | |\n| mean_ep_length | 301 |\n| mean_reward | 0.36 |\n| time/ | |\n| total_timesteps | 288000 |\n| train/ | |\n| approx_kl | 0.0009082459 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.69 |\n| explained_variance | 0.0683 |\n| learning_rate | 0.0003 |\n| loss | 21.2 |\n| n_updates | 142 |\n| policy_gradient_loss | 0.000231 |\n| value_loss | 42 |\n| train_metrics/ | |\n| action_queue_updates_success | 145 |\n| action_queue_updates_total | 157 |\n| ice_dug | 48 |\n| water_produced | 4 |\n--------------------------------------------------\n---------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 12.7 |\n| time/ | |\n| fps | 943 |\n| iterations | 72 |\n| time_elapsed | 305 |\n| total_timesteps | 288000 |\n---------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 13.3 |\n| time/ | |\n| fps | 943 |\n| iterations | 73 |\n| time_elapsed | 309 |\n| total_timesteps | 292000 |\n| train/ | |\n| approx_kl | 0.00011291199 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.71 |\n| explained_variance | 0.149 |\n| learning_rate | 0.0003 |\n| loss | 3.1 |\n| n_updates | 144 |\n| policy_gradient_loss | -9.41e-06 |\n| value_loss | 7.26 |\n| train_metrics/ | |\n| action_queue_updates_success | 140 |\n| action_queue_updates_total | 155 |\n| ice_dug | 118 |\n| water_produced | 15.8 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 13.2 |\n| time/ | |\n| fps | 944 |\n| iterations | 74 |\n| time_elapsed | 313 |\n| total_timesteps | 296000 |\n| train/ | |\n| approx_kl | 1.2346834e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.67 |\n| explained_variance | 0.138 |\n| learning_rate | 0.0003 |\n| loss | 15.3 |\n| n_updates | 146 |\n| policy_gradient_loss | 5.54e-05 |\n| value_loss | 23.5 |\n| train_metrics/ | |\n| action_queue_updates_success | 140 |\n| action_queue_updates_total | 152 |\n| ice_dug | 75 |\n| water_produced | 10 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 11.9 |\n| time/ | |\n| fps | 945 |\n| iterations | 75 |\n| time_elapsed | 317 |\n| total_timesteps | 300000 |\n| train/ | |\n| approx_kl | 6.1173334e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.72 |\n| explained_variance | 0.127 |\n| learning_rate | 0.0003 |\n| loss | 11.3 |\n| n_updates | 148 |\n| policy_gradient_loss | -0.000117 |\n| value_loss | 18.3 |\n| train_metrics/ | |\n| action_queue_updates_success | 140 |\n| action_queue_updates_total | 148 |\n| ice_dug | 68 |\n| water_produced | 4 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 8.81 |\n| time/ | |\n| fps | 945 |\n| iterations | 76 |\n| time_elapsed | 321 |\n| total_timesteps | 304000 |\n| train/ | |\n| approx_kl | 0.00012447716 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.67 |\n| explained_variance | 0.194 |\n| learning_rate | 0.0003 |\n| loss | 4.16 |\n| n_updates | 150 |\n| policy_gradient_loss | 1.04e-05 |\n| value_loss | 6.78 |\n| train_metrics/ | |\n| action_queue_updates_success | 139 |\n| action_queue_updates_total | 152 |\n| ice_dug | 48 |\n| water_produced | 6.75 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 9.66 |\n| time/ | |\n| fps | 946 |\n| iterations | 77 |\n| time_elapsed | 325 |\n| total_timesteps | 308000 |\n| train/ | |\n| approx_kl | 0.0003696676 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.72 |\n| explained_variance | 0.197 |\n| learning_rate | 0.0003 |\n| loss | 7.28 |\n| n_updates | 152 |\n| policy_gradient_loss | -0.000495 |\n| value_loss | 11.2 |\n| train_metrics/ | |\n| action_queue_updates_success | 147 |\n| action_queue_updates_total | 159 |\n| ice_dug | 96 |\n| water_produced | 7.75 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 10.2 |\n| time/ | |\n| fps | 947 |\n| iterations | 78 |\n| time_elapsed | 329 |\n| total_timesteps | 312000 |\n| train/ | |\n| approx_kl | 0.0006211189 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.71 |\n| explained_variance | 0.158 |\n| learning_rate | 0.0003 |\n| loss | 7.65 |\n| n_updates | 154 |\n| policy_gradient_loss | 2.98e-05 |\n| value_loss | 12.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 146 |\n| action_queue_updates_total | 161 |\n| ice_dug | 114 |\n| water_produced | 18.5 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 13.2 |\n| time/ | |\n| fps | 947 |\n| iterations | 79 |\n| time_elapsed | 333 |\n| total_timesteps | 316000 |\n| train/ | |\n| approx_kl | 0.00042644804 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.72 |\n| explained_variance | 0.0973 |\n| learning_rate | 0.0003 |\n| loss | 18.7 |\n| n_updates | 156 |\n| policy_gradient_loss | -0.000432 |\n| value_loss | 31.2 |\n| train_metrics/ | |\n| action_queue_updates_success | 145 |\n| action_queue_updates_total | 165 |\n| ice_dug | 148 |\n| water_produced | 24.5 |\n---------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 16.9 |\n| time/ | |\n| fps | 947 |\n| iterations | 80 |\n| time_elapsed | 337 |\n| total_timesteps | 320000 |\n| train/ | |\n| approx_kl | 0.000703286 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.75 |\n| explained_variance | 0.0567 |\n| learning_rate | 0.0003 |\n| loss | 18.5 |\n| n_updates | 158 |\n| policy_gradient_loss | 0.000147 |\n| value_loss | 50.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 148 |\n| action_queue_updates_total | 167 |\n| ice_dug | 224 |\n| water_produced | 20.5 |\n-------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 19.7 |\n| time/ | |\n| fps | 947 |\n| iterations | 81 |\n| time_elapsed | 341 |\n| total_timesteps | 324000 |\n| train/ | |\n| approx_kl | 0.0004339232 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.74 |\n| explained_variance | 0.159 |\n| learning_rate | 0.0003 |\n| loss | 14.8 |\n| n_updates | 160 |\n| policy_gradient_loss | 4.66e-05 |\n| value_loss | 28.3 |\n| train_metrics/ | |\n| action_queue_updates_success | 153 |\n| action_queue_updates_total | 167 |\n| ice_dug | 137 |\n| water_produced | 20.2 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 23.4 |\n| time/ | |\n| fps | 948 |\n| iterations | 82 |\n| time_elapsed | 345 |\n| total_timesteps | 328000 |\n| train/ | |\n| approx_kl | 0.00022797105 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.72 |\n| explained_variance | 0.0665 |\n| learning_rate | 0.0003 |\n| loss | 19.8 |\n| n_updates | 162 |\n| policy_gradient_loss | 8.45e-05 |\n| value_loss | 40.8 |\n| train_metrics/ | |\n| action_queue_updates_success | 147 |\n| action_queue_updates_total | 163 |\n| ice_dug | 181 |\n| water_produced | 25.2 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 21.2 |\n| time/ | |\n| fps | 949 |\n| iterations | 83 |\n| time_elapsed | 349 |\n| total_timesteps | 332000 |\n| train/ | |\n| approx_kl | 0.00024342074 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.71 |\n| explained_variance | 0.0773 |\n| learning_rate | 0.0003 |\n| loss | 26.9 |\n| n_updates | 164 |\n| policy_gradient_loss | 0.000389 |\n| value_loss | 53.6 |\n| train_metrics/ | |\n| action_queue_updates_success | 143 |\n| action_queue_updates_total | 166 |\n| ice_dug | 44 |\n| water_produced | 8.25 |\n---------------------------------------------------\n----------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 16.9 |\n| time/ | |\n| fps | 949 |\n| iterations | 84 |\n| time_elapsed | 353 |\n| total_timesteps | 336000 |\n| train/ | |\n| approx_kl | 0.000119701355 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.72 |\n| explained_variance | 0.187 |\n| learning_rate | 0.0003 |\n| loss | 4.48 |\n| n_updates | 166 |\n| policy_gradient_loss | -9.64e-05 |\n| value_loss | 14.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 151 |\n| action_queue_updates_total | 167 |\n| ice_dug | 76 |\n| water_produced | 3.75 |\n----------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 16.1 |\n| time/ | |\n| fps | 949 |\n| iterations | 85 |\n| time_elapsed | 358 |\n| total_timesteps | 340000 |\n| train/ | |\n| approx_kl | 0.00040250184 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.74 |\n| explained_variance | 0.377 |\n| learning_rate | 0.0003 |\n| loss | 1.91 |\n| n_updates | 168 |\n| policy_gradient_loss | -0.000171 |\n| value_loss | 3.99 |\n| train_metrics/ | |\n| action_queue_updates_success | 148 |\n| action_queue_updates_total | 163 |\n| ice_dug | 143 |\n| water_produced | 17.2 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 13.8 |\n| time/ | |\n| fps | 950 |\n| iterations | 86 |\n| time_elapsed | 362 |\n| total_timesteps | 344000 |\n| train/ | |\n| approx_kl | 0.00015786414 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.73 |\n| explained_variance | 0.157 |\n| learning_rate | 0.0003 |\n| loss | 13.6 |\n| n_updates | 170 |\n| policy_gradient_loss | -1.67e-05 |\n| value_loss | 30.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 149 |\n| action_queue_updates_total | 167 |\n| ice_dug | 69 |\n| water_produced | 9.5 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 11.1 |\n| time/ | |\n| fps | 949 |\n| iterations | 87 |\n| time_elapsed | 366 |\n| total_timesteps | 348000 |\n| train/ | |\n| approx_kl | 0.00017424766 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.75 |\n| explained_variance | 0.159 |\n| learning_rate | 0.0003 |\n| loss | 7.59 |\n| n_updates | 172 |\n| policy_gradient_loss | -0.000115 |\n| value_loss | 20.7 |\n| train_metrics/ | |\n| action_queue_updates_success | 149 |\n| action_queue_updates_total | 165 |\n| ice_dug | 117 |\n| water_produced | 12.5 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 12.4 |\n| time/ | |\n| fps | 949 |\n| iterations | 88 |\n| time_elapsed | 370 |\n| total_timesteps | 352000 |\n| train/ | |\n| approx_kl | 0.0005022868 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.75 |\n| explained_variance | 0.233 |\n| learning_rate | 0.0003 |\n| loss | 8.07 |\n| n_updates | 174 |\n| policy_gradient_loss | -0.000248 |\n| value_loss | 15.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 139 |\n| action_queue_updates_total | 162 |\n| ice_dug | 114 |\n| water_produced | 13.8 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 12.3 |\n| time/ | |\n| fps | 950 |\n| iterations | 89 |\n| time_elapsed | 374 |\n| total_timesteps | 356000 |\n| train/ | |\n| approx_kl | 0.00014429663 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.77 |\n| explained_variance | 0.313 |\n| learning_rate | 0.0003 |\n| loss | 7.87 |\n| n_updates | 176 |\n| policy_gradient_loss | -1.52e-06 |\n| value_loss | 15.5 |\n| train_metrics/ | |\n| action_queue_updates_success | 148 |\n| action_queue_updates_total | 163 |\n| ice_dug | 21 |\n| water_produced | 4 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 12.3 |\n| time/ | |\n| fps | 951 |\n| iterations | 90 |\n| time_elapsed | 378 |\n| total_timesteps | 360000 |\n| train/ | |\n| approx_kl | 0.00085658545 |\n| clip_fraction | 0.000125 |\n| clip_range | 0.2 |\n| entropy_loss | -1.76 |\n| explained_variance | 0.47 |\n| learning_rate | 0.0003 |\n| loss | 1.69 |\n| n_updates | 178 |\n| policy_gradient_loss | -0.000119 |\n| value_loss | 5.06 |\n| train_metrics/ | |\n| action_queue_updates_success | 144 |\n| action_queue_updates_total | 159 |\n| ice_dug | 127 |\n| water_produced | 17.5 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 10.8 |\n| time/ | |\n| fps | 951 |\n| iterations | 91 |\n| time_elapsed | 382 |\n| total_timesteps | 364000 |\n| train/ | |\n| approx_kl | 0.00020486079 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.77 |\n| explained_variance | 0.227 |\n| learning_rate | 0.0003 |\n| loss | 15.9 |\n| n_updates | 180 |\n| policy_gradient_loss | 0.000172 |\n| value_loss | 32.1 |\n| train_metrics/ | |\n| action_queue_updates_success | 138 |\n| action_queue_updates_total | 157 |\n| ice_dug | 72 |\n| water_produced | 1.75 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 10.7 |\n| time/ | |\n| fps | 951 |\n| iterations | 92 |\n| time_elapsed | 386 |\n| total_timesteps | 368000 |\n| train/ | |\n| approx_kl | 0.00015487775 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.78 |\n| explained_variance | 0.734 |\n| learning_rate | 0.0003 |\n| loss | 1.16 |\n| n_updates | 182 |\n| policy_gradient_loss | -9.3e-06 |\n| value_loss | 2.33 |\n| train_metrics/ | |\n| action_queue_updates_success | 154 |\n| action_queue_updates_total | 166 |\n| ice_dug | 113 |\n| water_produced | 12.2 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 10.6 |\n| time/ | |\n| fps | 952 |\n| iterations | 93 |\n| time_elapsed | 390 |\n| total_timesteps | 372000 |\n| train/ | |\n| approx_kl | 0.0016817044 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.76 |\n| explained_variance | 0.266 |\n| learning_rate | 0.0003 |\n| loss | 9.1 |\n| n_updates | 184 |\n| policy_gradient_loss | -0.0011 |\n| value_loss | 17 |\n| train_metrics/ | |\n| action_queue_updates_success | 138 |\n| action_queue_updates_total | 148 |\n| ice_dug | 122 |\n| water_produced | 13 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 11.7 |\n| time/ | |\n| fps | 952 |\n| iterations | 94 |\n| time_elapsed | 394 |\n| total_timesteps | 376000 |\n| train/ | |\n| approx_kl | 0.0006763857 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.64 |\n| explained_variance | 0.15 |\n| learning_rate | 0.0003 |\n| loss | 17.4 |\n| n_updates | 186 |\n| policy_gradient_loss | -7.96e-05 |\n| value_loss | 22.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 132 |\n| action_queue_updates_total | 152 |\n| ice_dug | 79 |\n| water_produced | 9 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 11 |\n| time/ | |\n| fps | 952 |\n| iterations | 95 |\n| time_elapsed | 398 |\n| total_timesteps | 380000 |\n| train/ | |\n| approx_kl | 0.00032742237 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.77 |\n| explained_variance | 0.224 |\n| learning_rate | 0.0003 |\n| loss | 10.9 |\n| n_updates | 188 |\n| policy_gradient_loss | -2.02e-05 |\n| value_loss | 20.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 135 |\n| action_queue_updates_total | 151 |\n| ice_dug | 79 |\n| water_produced | 14.2 |\n---------------------------------------------------\nEval num_timesteps=384000, episode_reward=36.56 +/- 73.02\nEpisode length: 336.00 +/- 70.00\n---------------------------------------------------\n| eval/ | |\n| mean_ep_length | 336 |\n| mean_reward | 36.6 |\n| time/ | |\n| total_timesteps | 384000 |\n| train/ | |\n| approx_kl | 0.00034279362 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.75 |\n| explained_variance | 0.197 |\n| learning_rate | 0.0003 |\n| loss | 20.8 |\n| n_updates | 190 |\n| policy_gradient_loss | -0.000371 |\n| value_loss | 32.1 |\n| train_metrics/ | |\n| action_queue_updates_success | 135 |\n| action_queue_updates_total | 149 |\n| ice_dug | 153 |\n| water_produced | 27.8 |\n---------------------------------------------------\nNew best mean reward!\n---------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 16.3 |\n| time/ | |\n| fps | 946 |\n| iterations | 96 |\n| time_elapsed | 405 |\n| total_timesteps | 384000 |\n---------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 16.9 |\n| time/ | |\n| fps | 946 |\n| iterations | 97 |\n| time_elapsed | 409 |\n| total_timesteps | 388000 |\n| train/ | |\n| approx_kl | 0.0005015042 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.63 |\n| explained_variance | 0.144 |\n| learning_rate | 0.0003 |\n| loss | 30.4 |\n| n_updates | 192 |\n| policy_gradient_loss | -0.000358 |\n| value_loss | 57.6 |\n| train_metrics/ | |\n| action_queue_updates_success | 132 |\n| action_queue_updates_total | 144 |\n| ice_dug | 84 |\n| water_produced | 15.5 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 16.7 |\n| time/ | |\n| fps | 946 |\n| iterations | 98 |\n| time_elapsed | 414 |\n| total_timesteps | 392000 |\n| train/ | |\n| approx_kl | 0.00044463776 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.57 |\n| explained_variance | 0.182 |\n| learning_rate | 0.0003 |\n| loss | 14.2 |\n| n_updates | 194 |\n| policy_gradient_loss | 1.12e-05 |\n| value_loss | 28 |\n| train_metrics/ | |\n| action_queue_updates_success | 141 |\n| action_queue_updates_total | 150 |\n| ice_dug | 78 |\n| water_produced | 12.3 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 17.4 |\n| time/ | |\n| fps | 946 |\n| iterations | 99 |\n| time_elapsed | 418 |\n| total_timesteps | 396000 |\n| train/ | |\n| approx_kl | 0.00016963489 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.58 |\n| explained_variance | 0.151 |\n| learning_rate | 0.0003 |\n| loss | 13.3 |\n| n_updates | 196 |\n| policy_gradient_loss | 6.89e-06 |\n| value_loss | 28.2 |\n| train_metrics/ | |\n| action_queue_updates_success | 139 |\n| action_queue_updates_total | 153 |\n| ice_dug | 106 |\n| water_produced | 12.5 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 17.8 |\n| time/ | |\n| fps | 946 |\n| iterations | 100 |\n| time_elapsed | 422 |\n| total_timesteps | 400000 |\n| train/ | |\n| approx_kl | 0.0009872374 |\n| clip_fraction | 0.00188 |\n| clip_range | 0.2 |\n| entropy_loss | -1.69 |\n| explained_variance | 0.244 |\n| learning_rate | 0.0003 |\n| loss | 8 |\n| n_updates | 198 |\n| policy_gradient_loss | -0.00119 |\n| value_loss | 17.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 136 |\n| action_queue_updates_total | 146 |\n| ice_dug | 95 |\n| water_produced | 15.8 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 13.6 |\n| time/ | |\n| fps | 946 |\n| iterations | 101 |\n| time_elapsed | 426 |\n| total_timesteps | 404000 |\n| train/ | |\n| approx_kl | 0.0030415982 |\n| clip_fraction | 0.00388 |\n| clip_range | 0.2 |\n| entropy_loss | -1.65 |\n| explained_variance | 0.167 |\n| learning_rate | 0.0003 |\n| loss | 15.4 |\n| n_updates | 200 |\n| policy_gradient_loss | -0.0019 |\n| value_loss | 28.7 |\n| train_metrics/ | |\n| action_queue_updates_success | 124 |\n| action_queue_updates_total | 135 |\n| ice_dug | 38 |\n| water_produced | 8 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 14.5 |\n| time/ | |\n| fps | 946 |\n| iterations | 102 |\n| time_elapsed | 431 |\n| total_timesteps | 408000 |\n| train/ | |\n| approx_kl | 0.0013733355 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.53 |\n| explained_variance | 0.136 |\n| learning_rate | 0.0003 |\n| loss | 12.2 |\n| n_updates | 202 |\n| policy_gradient_loss | 0.000127 |\n| value_loss | 28.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 119 |\n| action_queue_updates_total | 132 |\n| ice_dug | 148 |\n| water_produced | 19.5 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 12.6 |\n| time/ | |\n| fps | 946 |\n| iterations | 103 |\n| time_elapsed | 435 |\n| total_timesteps | 412000 |\n| train/ | |\n| approx_kl | 0.00025131094 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.56 |\n| explained_variance | 0.153 |\n| learning_rate | 0.0003 |\n| loss | 23.7 |\n| n_updates | 204 |\n| policy_gradient_loss | -0.00014 |\n| value_loss | 42.5 |\n| train_metrics/ | |\n| action_queue_updates_success | 127 |\n| action_queue_updates_total | 141 |\n| ice_dug | 47 |\n| water_produced | 3 |\n---------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 12.7 |\n| time/ | |\n| fps | 946 |\n| iterations | 104 |\n| time_elapsed | 439 |\n| total_timesteps | 416000 |\n| train/ | |\n| approx_kl | 3.66032e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.6 |\n| explained_variance | 0.49 |\n| learning_rate | 0.0003 |\n| loss | 2.54 |\n| n_updates | 206 |\n| policy_gradient_loss | 2.05e-05 |\n| value_loss | 4.84 |\n| train_metrics/ | |\n| action_queue_updates_success | 120 |\n| action_queue_updates_total | 131 |\n| ice_dug | 84 |\n| water_produced | 13.2 |\n-------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 18.7 |\n| time/ | |\n| fps | 945 |\n| iterations | 105 |\n| time_elapsed | 444 |\n| total_timesteps | 420000 |\n| train/ | |\n| approx_kl | 0.0001189569 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.58 |\n| explained_variance | 0.101 |\n| learning_rate | 0.0003 |\n| loss | 14.7 |\n| n_updates | 208 |\n| policy_gradient_loss | -0.000202 |\n| value_loss | 34.6 |\n| train_metrics/ | |\n| action_queue_updates_success | 128 |\n| action_queue_updates_total | 135 |\n| ice_dug | 276 |\n| water_produced | 44 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 21.6 |\n| time/ | |\n| fps | 946 |\n| iterations | 106 |\n| time_elapsed | 448 |\n| total_timesteps | 424000 |\n| train/ | |\n| approx_kl | 0.00049734936 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.5 |\n| explained_variance | 0.0782 |\n| learning_rate | 0.0003 |\n| loss | 57.7 |\n| n_updates | 210 |\n| policy_gradient_loss | -0.000153 |\n| value_loss | 114 |\n| train_metrics/ | |\n| action_queue_updates_success | 118 |\n| action_queue_updates_total | 132 |\n| ice_dug | 145 |\n| water_produced | 21.5 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 19.6 |\n| time/ | |\n| fps | 946 |\n| iterations | 107 |\n| time_elapsed | 452 |\n| total_timesteps | 428000 |\n| train/ | |\n| approx_kl | 0.0004019789 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.51 |\n| explained_variance | 0.0964 |\n| learning_rate | 0.0003 |\n| loss | 36.6 |\n| n_updates | 212 |\n| policy_gradient_loss | -0.000363 |\n| value_loss | 66 |\n| train_metrics/ | |\n| action_queue_updates_success | 124 |\n| action_queue_updates_total | 133 |\n| ice_dug | 78 |\n| water_produced | 9.75 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 26.4 |\n| time/ | |\n| fps | 946 |\n| iterations | 108 |\n| time_elapsed | 456 |\n| total_timesteps | 432000 |\n| train/ | |\n| approx_kl | 3.4632547e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.44 |\n| explained_variance | 0.12 |\n| learning_rate | 0.0003 |\n| loss | 7.6 |\n| n_updates | 214 |\n| policy_gradient_loss | 0.000113 |\n| value_loss | 19.8 |\n| train_metrics/ | |\n| action_queue_updates_success | 117 |\n| action_queue_updates_total | 127 |\n| ice_dug | 179 |\n| water_produced | 35.8 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 26.5 |\n| time/ | |\n| fps | 946 |\n| iterations | 109 |\n| time_elapsed | 460 |\n| total_timesteps | 436000 |\n| train/ | |\n| approx_kl | 4.0246592e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.51 |\n| explained_variance | 0.127 |\n| learning_rate | 0.0003 |\n| loss | 26.6 |\n| n_updates | 216 |\n| policy_gradient_loss | -0.000128 |\n| value_loss | 84.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 116 |\n| action_queue_updates_total | 121 |\n| ice_dug | 97 |\n| water_produced | 13.8 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 20.7 |\n| time/ | |\n| fps | 946 |\n| iterations | 110 |\n| time_elapsed | 464 |\n| total_timesteps | 440000 |\n| train/ | |\n| approx_kl | 0.00016541663 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.43 |\n| explained_variance | 0.0892 |\n| learning_rate | 0.0003 |\n| loss | 13.6 |\n| n_updates | 218 |\n| policy_gradient_loss | -0.000184 |\n| value_loss | 25.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 125 |\n| action_queue_updates_total | 136 |\n| ice_dug | 116 |\n| water_produced | 16.7 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 18.2 |\n| time/ | |\n| fps | 946 |\n| iterations | 111 |\n| time_elapsed | 469 |\n| total_timesteps | 444000 |\n| train/ | |\n| approx_kl | 0.00048110232 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.47 |\n| explained_variance | 0.119 |\n| learning_rate | 0.0003 |\n| loss | 16.8 |\n| n_updates | 220 |\n| policy_gradient_loss | 7.64e-05 |\n| value_loss | 32.5 |\n| train_metrics/ | |\n| action_queue_updates_success | 124 |\n| action_queue_updates_total | 135 |\n| ice_dug | 61 |\n| water_produced | 9.75 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 17.1 |\n| time/ | |\n| fps | 946 |\n| iterations | 112 |\n| time_elapsed | 473 |\n| total_timesteps | 448000 |\n| train/ | |\n| approx_kl | 0.00021812467 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.53 |\n| explained_variance | 0.129 |\n| learning_rate | 0.0003 |\n| loss | 7.3 |\n| n_updates | 222 |\n| policy_gradient_loss | -0.00015 |\n| value_loss | 20.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 127 |\n| action_queue_updates_total | 136 |\n| ice_dug | 26 |\n| water_produced | 4.5 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 13.4 |\n| time/ | |\n| fps | 946 |\n| iterations | 113 |\n| time_elapsed | 477 |\n| total_timesteps | 452000 |\n| train/ | |\n| approx_kl | 0.00019556319 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.49 |\n| explained_variance | 0.199 |\n| learning_rate | 0.0003 |\n| loss | 5.6 |\n| n_updates | 224 |\n| policy_gradient_loss | -8.39e-05 |\n| value_loss | 10.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 120 |\n| action_queue_updates_total | 132 |\n| ice_dug | 107 |\n| water_produced | 18 |\n---------------------------------------------------\n----------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 13.9 |\n| time/ | |\n| fps | 946 |\n| iterations | 114 |\n| time_elapsed | 481 |\n| total_timesteps | 456000 |\n| train/ | |\n| approx_kl | 0.000107978274 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.49 |\n| explained_variance | 0.158 |\n| learning_rate | 0.0003 |\n| loss | 18 |\n| n_updates | 226 |\n| policy_gradient_loss | -5.5e-05 |\n| value_loss | 40.1 |\n| train_metrics/ | |\n| action_queue_updates_success | 121 |\n| action_queue_updates_total | 127 |\n| ice_dug | 89 |\n| water_produced | 16.5 |\n----------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 13.8 |\n| time/ | |\n| fps | 946 |\n| iterations | 115 |\n| time_elapsed | 485 |\n| total_timesteps | 460000 |\n| train/ | |\n| approx_kl | 0.00019832849 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.44 |\n| explained_variance | 0.0649 |\n| learning_rate | 0.0003 |\n| loss | 18 |\n| n_updates | 228 |\n| policy_gradient_loss | -0.000104 |\n| value_loss | 43 |\n| train_metrics/ | |\n| action_queue_updates_success | 120 |\n| action_queue_updates_total | 133 |\n| ice_dug | 107 |\n| water_produced | 16.5 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 21.8 |\n| time/ | |\n| fps | 946 |\n| iterations | 116 |\n| time_elapsed | 490 |\n| total_timesteps | 464000 |\n| train/ | |\n| approx_kl | 7.485457e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.46 |\n| explained_variance | 0.131 |\n| learning_rate | 0.0003 |\n| loss | 19.3 |\n| n_updates | 230 |\n| policy_gradient_loss | -6.18e-05 |\n| value_loss | 32.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 129 |\n| action_queue_updates_total | 139 |\n| ice_dug | 242 |\n| water_produced | 48 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 23 |\n| time/ | |\n| fps | 946 |\n| iterations | 117 |\n| time_elapsed | 494 |\n| total_timesteps | 468000 |\n| train/ | |\n| approx_kl | 0.00026079142 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.47 |\n| explained_variance | 0.0796 |\n| learning_rate | 0.0003 |\n| loss | 52.5 |\n| n_updates | 232 |\n| policy_gradient_loss | -0.000543 |\n| value_loss | 120 |\n| train_metrics/ | |\n| action_queue_updates_success | 131 |\n| action_queue_updates_total | 140 |\n| ice_dug | 53 |\n| water_produced | 10 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 24.8 |\n| time/ | |\n| fps | 946 |\n| iterations | 118 |\n| time_elapsed | 498 |\n| total_timesteps | 472000 |\n| train/ | |\n| approx_kl | 2.621132e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.46 |\n| explained_variance | 0.116 |\n| learning_rate | 0.0003 |\n| loss | 14.3 |\n| n_updates | 234 |\n| policy_gradient_loss | 2.88e-05 |\n| value_loss | 22 |\n| train_metrics/ | |\n| action_queue_updates_success | 131 |\n| action_queue_updates_total | 142 |\n| ice_dug | 165 |\n| water_produced | 26.5 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 24.6 |\n| time/ | |\n| fps | 946 |\n| iterations | 119 |\n| time_elapsed | 502 |\n| total_timesteps | 476000 |\n| train/ | |\n| approx_kl | 4.6345533e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.48 |\n| explained_variance | 0.0935 |\n| learning_rate | 0.0003 |\n| loss | 25.1 |\n| n_updates | 236 |\n| policy_gradient_loss | -0.000165 |\n| value_loss | 53.5 |\n| train_metrics/ | |\n| action_queue_updates_success | 130 |\n| action_queue_updates_total | 144 |\n| ice_dug | 119 |\n| water_produced | 15.2 |\n---------------------------------------------------\nEval num_timesteps=480000, episode_reward=13.28 +/- 19.98\nEpisode length: 312.00 +/- 19.60\n--------------------------------------------------\n| eval/ | |\n| mean_ep_length | 312 |\n| mean_reward | 13.3 |\n| time/ | |\n| total_timesteps | 480000 |\n| train/ | |\n| approx_kl | 0.0002150936 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.56 |\n| explained_variance | 0.152 |\n| learning_rate | 0.0003 |\n| loss | 12.7 |\n| n_updates | 238 |\n| policy_gradient_loss | -0.000182 |\n| value_loss | 26.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 133 |\n| action_queue_updates_total | 147 |\n| ice_dug | 36 |\n| water_produced | 5.25 |\n--------------------------------------------------\n---------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 22.2 |\n| time/ | |\n| fps | 941 |\n| iterations | 120 |\n| time_elapsed | 509 |\n| total_timesteps | 480000 |\n---------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 13.8 |\n| time/ | |\n| fps | 941 |\n| iterations | 121 |\n| time_elapsed | 513 |\n| total_timesteps | 484000 |\n| train/ | |\n| approx_kl | 0.00047319802 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.53 |\n| explained_variance | 0.14 |\n| learning_rate | 0.0003 |\n| loss | 6.24 |\n| n_updates | 240 |\n| policy_gradient_loss | 0.000175 |\n| value_loss | 9.69 |\n| train_metrics/ | |\n| action_queue_updates_success | 125 |\n| action_queue_updates_total | 137 |\n| ice_dug | 43 |\n| water_produced | 7.75 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 16 |\n| time/ | |\n| fps | 942 |\n| iterations | 122 |\n| time_elapsed | 517 |\n| total_timesteps | 488000 |\n| train/ | |\n| approx_kl | 0.00042868807 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.55 |\n| explained_variance | 0.233 |\n| learning_rate | 0.0003 |\n| loss | 7.49 |\n| n_updates | 242 |\n| policy_gradient_loss | -0.000221 |\n| value_loss | 12.6 |\n| train_metrics/ | |\n| action_queue_updates_success | 129 |\n| action_queue_updates_total | 139 |\n| ice_dug | 151 |\n| water_produced | 20 |\n---------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 14.5 |\n| time/ | |\n| fps | 942 |\n| iterations | 123 |\n| time_elapsed | 522 |\n| total_timesteps | 492000 |\n| train/ | |\n| approx_kl | 3.25426e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.52 |\n| explained_variance | 0.155 |\n| learning_rate | 0.0003 |\n| loss | 25.2 |\n| n_updates | 244 |\n| policy_gradient_loss | -3.36e-05 |\n| value_loss | 47.6 |\n| train_metrics/ | |\n| action_queue_updates_success | 127 |\n| action_queue_updates_total | 138 |\n| ice_dug | 92 |\n| water_produced | 19.8 |\n-------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 20.8 |\n| time/ | |\n| fps | 942 |\n| iterations | 124 |\n| time_elapsed | 526 |\n| total_timesteps | 496000 |\n| train/ | |\n| approx_kl | 0.00014352065 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.53 |\n| explained_variance | 0.0731 |\n| learning_rate | 0.0003 |\n| loss | 19.1 |\n| n_updates | 246 |\n| policy_gradient_loss | -0.000165 |\n| value_loss | 46.7 |\n| train_metrics/ | |\n| action_queue_updates_success | 138 |\n| action_queue_updates_total | 146 |\n| ice_dug | 212 |\n| water_produced | 46 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 21.4 |\n| time/ | |\n| fps | 942 |\n| iterations | 125 |\n| time_elapsed | 530 |\n| total_timesteps | 500000 |\n| train/ | |\n| approx_kl | 0.0002800067 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.54 |\n| explained_variance | 0.0908 |\n| learning_rate | 0.0003 |\n| loss | 61.6 |\n| n_updates | 248 |\n| policy_gradient_loss | -0.000172 |\n| value_loss | 114 |\n| train_metrics/ | |\n| action_queue_updates_success | 135 |\n| action_queue_updates_total | 149 |\n| ice_dug | 62 |\n| water_produced | 8 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 22.6 |\n| time/ | |\n| fps | 943 |\n| iterations | 126 |\n| time_elapsed | 534 |\n| total_timesteps | 504000 |\n| train/ | |\n| approx_kl | 7.029156e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.59 |\n| explained_variance | 0.223 |\n| learning_rate | 0.0003 |\n| loss | 9.77 |\n| n_updates | 250 |\n| policy_gradient_loss | -0.000259 |\n| value_loss | 15.3 |\n| train_metrics/ | |\n| action_queue_updates_success | 132 |\n| action_queue_updates_total | 146 |\n| ice_dug | 93 |\n| water_produced | 13 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 18.9 |\n| time/ | |\n| fps | 943 |\n| iterations | 127 |\n| time_elapsed | 538 |\n| total_timesteps | 508000 |\n| train/ | |\n| approx_kl | 0.00052950566 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.58 |\n| explained_variance | 0.168 |\n| learning_rate | 0.0003 |\n| loss | 18.8 |\n| n_updates | 252 |\n| policy_gradient_loss | 4.88e-05 |\n| value_loss | 24.3 |\n| train_metrics/ | |\n| action_queue_updates_success | 133 |\n| action_queue_updates_total | 144 |\n| ice_dug | 40 |\n| water_produced | 2.75 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 20.1 |\n| time/ | |\n| fps | 943 |\n| iterations | 128 |\n| time_elapsed | 542 |\n| total_timesteps | 512000 |\n| train/ | |\n| approx_kl | 0.0003762257 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.57 |\n| explained_variance | 0.44 |\n| learning_rate | 0.0003 |\n| loss | 1.53 |\n| n_updates | 254 |\n| policy_gradient_loss | 0.000105 |\n| value_loss | 3.49 |\n| train_metrics/ | |\n| action_queue_updates_success | 132 |\n| action_queue_updates_total | 141 |\n| ice_dug | 127 |\n| water_produced | 25.2 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 14 |\n| time/ | |\n| fps | 944 |\n| iterations | 129 |\n| time_elapsed | 546 |\n| total_timesteps | 516000 |\n| train/ | |\n| approx_kl | 5.9887097e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.61 |\n| explained_variance | 0.102 |\n| learning_rate | 0.0003 |\n| loss | 23.8 |\n| n_updates | 256 |\n| policy_gradient_loss | 0.000126 |\n| value_loss | 57.8 |\n| train_metrics/ | |\n| action_queue_updates_success | 137 |\n| action_queue_updates_total | 144 |\n| ice_dug | 140 |\n| water_produced | 16.2 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 20.2 |\n| time/ | |\n| fps | 944 |\n| iterations | 130 |\n| time_elapsed | 550 |\n| total_timesteps | 520000 |\n| train/ | |\n| approx_kl | 0.00017327846 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.58 |\n| explained_variance | 0.131 |\n| learning_rate | 0.0003 |\n| loss | 20.6 |\n| n_updates | 258 |\n| policy_gradient_loss | -0.000129 |\n| value_loss | 34.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 137 |\n| action_queue_updates_total | 149 |\n| ice_dug | 219 |\n| water_produced | 37.8 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 20.2 |\n| time/ | |\n| fps | 944 |\n| iterations | 131 |\n| time_elapsed | 554 |\n| total_timesteps | 524000 |\n| train/ | |\n| approx_kl | 0.00021337035 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.62 |\n| explained_variance | 0.118 |\n| learning_rate | 0.0003 |\n| loss | 35.5 |\n| n_updates | 260 |\n| policy_gradient_loss | -7.44e-05 |\n| value_loss | 66.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 130 |\n| action_queue_updates_total | 144 |\n| ice_dug | 85 |\n| water_produced | 13 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 26.6 |\n| time/ | |\n| fps | 944 |\n| iterations | 132 |\n| time_elapsed | 559 |\n| total_timesteps | 528000 |\n| train/ | |\n| approx_kl | 2.789703e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.61 |\n| explained_variance | 0.151 |\n| learning_rate | 0.0003 |\n| loss | 14.3 |\n| n_updates | 262 |\n| policy_gradient_loss | -1.9e-05 |\n| value_loss | 30.7 |\n| train_metrics/ | |\n| action_queue_updates_success | 136 |\n| action_queue_updates_total | 146 |\n| ice_dug | 154 |\n| water_produced | 33.5 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 22.8 |\n| time/ | |\n| fps | 944 |\n| iterations | 133 |\n| time_elapsed | 563 |\n| total_timesteps | 532000 |\n| train/ | |\n| approx_kl | 4.4058357e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.57 |\n| explained_variance | 0.109 |\n| learning_rate | 0.0003 |\n| loss | 38.5 |\n| n_updates | 264 |\n| policy_gradient_loss | -4e-05 |\n| value_loss | 76 |\n| train_metrics/ | |\n| action_queue_updates_success | 135 |\n| action_queue_updates_total | 154 |\n| ice_dug | 71 |\n| water_produced | 6.75 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 28.2 |\n| time/ | |\n| fps | 944 |\n| iterations | 134 |\n| time_elapsed | 567 |\n| total_timesteps | 536000 |\n| train/ | |\n| approx_kl | 0.00013034629 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.68 |\n| explained_variance | 0.242 |\n| learning_rate | 0.0003 |\n| loss | 6.84 |\n| n_updates | 266 |\n| policy_gradient_loss | -2.43e-05 |\n| value_loss | 12.1 |\n| train_metrics/ | |\n| action_queue_updates_success | 134 |\n| action_queue_updates_total | 153 |\n| ice_dug | 223 |\n| water_produced | 42.2 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 21.5 |\n| time/ | |\n| fps | 944 |\n| iterations | 135 |\n| time_elapsed | 571 |\n| total_timesteps | 540000 |\n| train/ | |\n| approx_kl | 0.00014052202 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.72 |\n| explained_variance | 0.17 |\n| learning_rate | 0.0003 |\n| loss | 49.4 |\n| n_updates | 268 |\n| policy_gradient_loss | 0.000323 |\n| value_loss | 97.1 |\n| train_metrics/ | |\n| action_queue_updates_success | 132 |\n| action_queue_updates_total | 151 |\n| ice_dug | 27 |\n| water_produced | 6.5 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 21.5 |\n| time/ | |\n| fps | 945 |\n| iterations | 136 |\n| time_elapsed | 575 |\n| total_timesteps | 544000 |\n| train/ | |\n| approx_kl | 9.267153e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.67 |\n| explained_variance | 0.224 |\n| learning_rate | 0.0003 |\n| loss | 6.89 |\n| n_updates | 270 |\n| policy_gradient_loss | 6.66e-05 |\n| value_loss | 17.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 136 |\n| action_queue_updates_total | 148 |\n| ice_dug | 98 |\n| water_produced | 13 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 15.8 |\n| time/ | |\n| fps | 945 |\n| iterations | 137 |\n| time_elapsed | 579 |\n| total_timesteps | 548000 |\n| train/ | |\n| approx_kl | 0.00077834737 |\n| clip_fraction | 0.000125 |\n| clip_range | 0.2 |\n| entropy_loss | -1.66 |\n| explained_variance | 0.203 |\n| learning_rate | 0.0003 |\n| loss | 14.7 |\n| n_updates | 272 |\n| policy_gradient_loss | -0.000113 |\n| value_loss | 30.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 138 |\n| action_queue_updates_total | 149 |\n| ice_dug | 31 |\n| water_produced | 5.75 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 15.7 |\n| time/ | |\n| fps | 945 |\n| iterations | 138 |\n| time_elapsed | 583 |\n| total_timesteps | 552000 |\n| train/ | |\n| approx_kl | 0.0006762225 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.68 |\n| explained_variance | 0.311 |\n| learning_rate | 0.0003 |\n| loss | 6.78 |\n| n_updates | 274 |\n| policy_gradient_loss | -6.98e-05 |\n| value_loss | 11.1 |\n| train_metrics/ | |\n| action_queue_updates_success | 135 |\n| action_queue_updates_total | 159 |\n| ice_dug | 57 |\n| water_produced | 6.75 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 10.1 |\n| time/ | |\n| fps | 946 |\n| iterations | 139 |\n| time_elapsed | 587 |\n| total_timesteps | 556000 |\n| train/ | |\n| approx_kl | 0.0012264508 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.88 |\n| explained_variance | 0.308 |\n| learning_rate | 0.0003 |\n| loss | 8.9 |\n| n_updates | 276 |\n| policy_gradient_loss | -0.000155 |\n| value_loss | 13.1 |\n| train_metrics/ | |\n| action_queue_updates_success | 131 |\n| action_queue_updates_total | 153 |\n| ice_dug | 91 |\n| water_produced | 15.5 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 14 |\n| time/ | |\n| fps | 946 |\n| iterations | 140 |\n| time_elapsed | 591 |\n| total_timesteps | 560000 |\n| train/ | |\n| approx_kl | 7.645832e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.85 |\n| explained_variance | 0.319 |\n| learning_rate | 0.0003 |\n| loss | 12.5 |\n| n_updates | 278 |\n| policy_gradient_loss | 4.77e-05 |\n| value_loss | 28.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 143 |\n| action_queue_updates_total | 154 |\n| ice_dug | 151 |\n| water_produced | 24.5 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 15.3 |\n| time/ | |\n| fps | 946 |\n| iterations | 141 |\n| time_elapsed | 595 |\n| total_timesteps | 564000 |\n| train/ | |\n| approx_kl | 0.00021975087 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.73 |\n| explained_variance | 0.214 |\n| learning_rate | 0.0003 |\n| loss | 15.2 |\n| n_updates | 280 |\n| policy_gradient_loss | -0.000121 |\n| value_loss | 39.6 |\n| train_metrics/ | |\n| action_queue_updates_success | 138 |\n| action_queue_updates_total | 150 |\n| ice_dug | 104 |\n| water_produced | 19.8 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 17.6 |\n| time/ | |\n| fps | 946 |\n| iterations | 142 |\n| time_elapsed | 599 |\n| total_timesteps | 568000 |\n| train/ | |\n| approx_kl | 0.00017151487 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.72 |\n| explained_variance | 0.261 |\n| learning_rate | 0.0003 |\n| loss | 26.1 |\n| n_updates | 282 |\n| policy_gradient_loss | -0.000131 |\n| value_loss | 42.6 |\n| train_metrics/ | |\n| action_queue_updates_success | 133 |\n| action_queue_updates_total | 151 |\n| ice_dug | 116 |\n| water_produced | 16.2 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 19.1 |\n| time/ | |\n| fps | 947 |\n| iterations | 143 |\n| time_elapsed | 603 |\n| total_timesteps | 572000 |\n| train/ | |\n| approx_kl | 0.00019878987 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.78 |\n| explained_variance | 0.23 |\n| learning_rate | 0.0003 |\n| loss | 19.2 |\n| n_updates | 284 |\n| policy_gradient_loss | -0.000131 |\n| value_loss | 35.5 |\n| train_metrics/ | |\n| action_queue_updates_success | 135 |\n| action_queue_updates_total | 154 |\n| ice_dug | 104 |\n| water_produced | 14 |\n---------------------------------------------------\nEval num_timesteps=576000, episode_reward=0.00 +/- 0.00\nEpisode length: 301.00 +/- 0.00\n---------------------------------------------------\n| eval/ | |\n| mean_ep_length | 301 |\n| mean_reward | 0 |\n| time/ | |\n| total_timesteps | 576000 |\n| train/ | |\n| approx_kl | 0.00042357476 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.77 |\n| explained_variance | 0.296 |\n| learning_rate | 0.0003 |\n| loss | 13.4 |\n| n_updates | 286 |\n| policy_gradient_loss | -0.000252 |\n| value_loss | 31.2 |\n| train_metrics/ | |\n| action_queue_updates_success | 128 |\n| action_queue_updates_total | 152 |\n| ice_dug | 94 |\n| water_produced | 8.5 |\n---------------------------------------------------\n---------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 17.7 |\n| time/ | |\n| fps | 944 |\n| iterations | 144 |\n| time_elapsed | 609 |\n| total_timesteps | 576000 |\n---------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 15.6 |\n| time/ | |\n| fps | 944 |\n| iterations | 145 |\n| time_elapsed | 614 |\n| total_timesteps | 580000 |\n| train/ | |\n| approx_kl | 0.0014275485 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.85 |\n| explained_variance | 0.499 |\n| learning_rate | 0.0003 |\n| loss | 6.3 |\n| n_updates | 288 |\n| policy_gradient_loss | -0.000848 |\n| value_loss | 10.7 |\n| train_metrics/ | |\n| action_queue_updates_success | 133 |\n| action_queue_updates_total | 145 |\n| ice_dug | 91 |\n| water_produced | 14.5 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 16 |\n| time/ | |\n| fps | 944 |\n| iterations | 146 |\n| time_elapsed | 618 |\n| total_timesteps | 584000 |\n| train/ | |\n| approx_kl | 0.0005172007 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.65 |\n| explained_variance | 0.301 |\n| learning_rate | 0.0003 |\n| loss | 18.3 |\n| n_updates | 290 |\n| policy_gradient_loss | -0.000417 |\n| value_loss | 28.7 |\n| train_metrics/ | |\n| action_queue_updates_success | 139 |\n| action_queue_updates_total | 155 |\n| ice_dug | 188 |\n| water_produced | 21 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 18.6 |\n| time/ | |\n| fps | 944 |\n| iterations | 147 |\n| time_elapsed | 622 |\n| total_timesteps | 588000 |\n| train/ | |\n| approx_kl | 0.0008119313 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.71 |\n| explained_variance | 0.279 |\n| learning_rate | 0.0003 |\n| loss | 24.7 |\n| n_updates | 292 |\n| policy_gradient_loss | -0.000112 |\n| value_loss | 44.2 |\n| train_metrics/ | |\n| action_queue_updates_success | 131 |\n| action_queue_updates_total | 152 |\n| ice_dug | 130 |\n| water_produced | 28.8 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 17.2 |\n| time/ | |\n| fps | 945 |\n| iterations | 148 |\n| time_elapsed | 626 |\n| total_timesteps | 592000 |\n| train/ | |\n| approx_kl | 0.0005473661 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.74 |\n| explained_variance | 0.247 |\n| learning_rate | 0.0003 |\n| loss | 31.2 |\n| n_updates | 294 |\n| policy_gradient_loss | -0.000323 |\n| value_loss | 56.6 |\n| train_metrics/ | |\n| action_queue_updates_success | 130 |\n| action_queue_updates_total | 151 |\n| ice_dug | 40 |\n| water_produced | 7.75 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 19.7 |\n| time/ | |\n| fps | 945 |\n| iterations | 149 |\n| time_elapsed | 630 |\n| total_timesteps | 596000 |\n| train/ | |\n| approx_kl | 0.00036622933 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.72 |\n| explained_variance | 0.402 |\n| learning_rate | 0.0003 |\n| loss | 7.83 |\n| n_updates | 296 |\n| policy_gradient_loss | 0.000144 |\n| value_loss | 20 |\n| train_metrics/ | |\n| action_queue_updates_success | 136 |\n| action_queue_updates_total | 156 |\n| ice_dug | 136 |\n| water_produced | 20.5 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 21 |\n| time/ | |\n| fps | 945 |\n| iterations | 150 |\n| time_elapsed | 634 |\n| total_timesteps | 600000 |\n| train/ | |\n| approx_kl | 0.0002547471 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.77 |\n| explained_variance | 0.301 |\n| learning_rate | 0.0003 |\n| loss | 20.6 |\n| n_updates | 298 |\n| policy_gradient_loss | 0.000133 |\n| value_loss | 50.2 |\n| train_metrics/ | |\n| action_queue_updates_success | 132 |\n| action_queue_updates_total | 154 |\n| ice_dug | 116 |\n| water_produced | 21 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 21.7 |\n| time/ | |\n| fps | 946 |\n| iterations | 151 |\n| time_elapsed | 638 |\n| total_timesteps | 604000 |\n| train/ | |\n| approx_kl | 0.00018713754 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.78 |\n| explained_variance | 0.31 |\n| learning_rate | 0.0003 |\n| loss | 15.9 |\n| n_updates | 300 |\n| policy_gradient_loss | -5.62e-05 |\n| value_loss | 49.7 |\n| train_metrics/ | |\n| action_queue_updates_success | 128 |\n| action_queue_updates_total | 149 |\n| ice_dug | 121 |\n| water_produced | 25 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 20.2 |\n| time/ | |\n| fps | 946 |\n| iterations | 152 |\n| time_elapsed | 642 |\n| total_timesteps | 608000 |\n| train/ | |\n| approx_kl | 0.00012084496 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.71 |\n| explained_variance | 0.288 |\n| learning_rate | 0.0003 |\n| loss | 44.4 |\n| n_updates | 302 |\n| policy_gradient_loss | -9.99e-05 |\n| value_loss | 68.1 |\n| train_metrics/ | |\n| action_queue_updates_success | 129 |\n| action_queue_updates_total | 145 |\n| ice_dug | 186 |\n| water_produced | 20.8 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 21.9 |\n| time/ | |\n| fps | 946 |\n| iterations | 153 |\n| time_elapsed | 646 |\n| total_timesteps | 612000 |\n| train/ | |\n| approx_kl | 0.00012577753 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.65 |\n| explained_variance | 0.36 |\n| learning_rate | 0.0003 |\n| loss | 20.3 |\n| n_updates | 304 |\n| policy_gradient_loss | 1.96e-05 |\n| value_loss | 39.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 134 |\n| action_queue_updates_total | 157 |\n| ice_dug | 146 |\n| water_produced | 15.2 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 21.8 |\n| time/ | |\n| fps | 947 |\n| iterations | 154 |\n| time_elapsed | 650 |\n| total_timesteps | 616000 |\n| train/ | |\n| approx_kl | 0.00021348354 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.78 |\n| explained_variance | 0.4 |\n| learning_rate | 0.0003 |\n| loss | 14.2 |\n| n_updates | 306 |\n| policy_gradient_loss | 0.000114 |\n| value_loss | 29.8 |\n| train_metrics/ | |\n| action_queue_updates_success | 130 |\n| action_queue_updates_total | 148 |\n| ice_dug | 140 |\n| water_produced | 19.8 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 20.4 |\n| time/ | |\n| fps | 947 |\n| iterations | 155 |\n| time_elapsed | 654 |\n| total_timesteps | 620000 |\n| train/ | |\n| approx_kl | 0.0011422962 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.71 |\n| explained_variance | 0.439 |\n| learning_rate | 0.0003 |\n| loss | 13.3 |\n| n_updates | 308 |\n| policy_gradient_loss | -0.000333 |\n| value_loss | 30.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 130 |\n| action_queue_updates_total | 156 |\n| ice_dug | 86 |\n| water_produced | 14.5 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 17.4 |\n| time/ | |\n| fps | 946 |\n| iterations | 156 |\n| time_elapsed | 658 |\n| total_timesteps | 624000 |\n| train/ | |\n| approx_kl | 0.0018914065 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.86 |\n| explained_variance | 0.511 |\n| learning_rate | 0.0003 |\n| loss | 14.6 |\n| n_updates | 310 |\n| policy_gradient_loss | 1.37e-05 |\n| value_loss | 25.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 131 |\n| action_queue_updates_total | 157 |\n| ice_dug | 51 |\n| water_produced | 10.8 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 17.9 |\n| time/ | |\n| fps | 947 |\n| iterations | 157 |\n| time_elapsed | 662 |\n| total_timesteps | 628000 |\n| train/ | |\n| approx_kl | 0.00040925355 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.92 |\n| explained_variance | 0.603 |\n| learning_rate | 0.0003 |\n| loss | 6.77 |\n| n_updates | 312 |\n| policy_gradient_loss | -0.000366 |\n| value_loss | 15.7 |\n| train_metrics/ | |\n| action_queue_updates_success | 136 |\n| action_queue_updates_total | 153 |\n| ice_dug | 132 |\n| water_produced | 23.8 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 18.6 |\n| time/ | |\n| fps | 947 |\n| iterations | 158 |\n| time_elapsed | 667 |\n| total_timesteps | 632000 |\n| train/ | |\n| approx_kl | 0.0014935824 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.73 |\n| explained_variance | 0.427 |\n| learning_rate | 0.0003 |\n| loss | 21.4 |\n| n_updates | 314 |\n| policy_gradient_loss | -0.000123 |\n| value_loss | 50 |\n| train_metrics/ | |\n| action_queue_updates_success | 134 |\n| action_queue_updates_total | 151 |\n| ice_dug | 114 |\n| water_produced | 19 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 15.2 |\n| time/ | |\n| fps | 947 |\n| iterations | 159 |\n| time_elapsed | 671 |\n| total_timesteps | 636000 |\n| train/ | |\n| approx_kl | 5.0288927e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.64 |\n| explained_variance | 0.321 |\n| learning_rate | 0.0003 |\n| loss | 20.3 |\n| n_updates | 316 |\n| policy_gradient_loss | 1.61e-05 |\n| value_loss | 37.5 |\n| train_metrics/ | |\n| action_queue_updates_success | 128 |\n| action_queue_updates_total | 140 |\n| ice_dug | 23 |\n| water_produced | 4 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 15.5 |\n| time/ | |\n| fps | 947 |\n| iterations | 160 |\n| time_elapsed | 675 |\n| total_timesteps | 640000 |\n| train/ | |\n| approx_kl | 0.00052962865 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.59 |\n| explained_variance | 0.564 |\n| learning_rate | 0.0003 |\n| loss | 2.88 |\n| n_updates | 318 |\n| policy_gradient_loss | -0.00019 |\n| value_loss | 10.1 |\n| train_metrics/ | |\n| action_queue_updates_success | 131 |\n| action_queue_updates_total | 147 |\n| ice_dug | 88 |\n| water_produced | 15.8 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 15.9 |\n| time/ | |\n| fps | 947 |\n| iterations | 161 |\n| time_elapsed | 679 |\n| total_timesteps | 644000 |\n| train/ | |\n| approx_kl | 0.00015061807 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.69 |\n| explained_variance | 0.367 |\n| learning_rate | 0.0003 |\n| loss | 20.5 |\n| n_updates | 320 |\n| policy_gradient_loss | -0.000371 |\n| value_loss | 36.3 |\n| train_metrics/ | |\n| action_queue_updates_success | 131 |\n| action_queue_updates_total | 143 |\n| ice_dug | 76 |\n| water_produced | 12.5 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 14.2 |\n| time/ | |\n| fps | 947 |\n| iterations | 162 |\n| time_elapsed | 683 |\n| total_timesteps | 648000 |\n| train/ | |\n| approx_kl | 0.00020718158 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.58 |\n| explained_variance | 0.339 |\n| learning_rate | 0.0003 |\n| loss | 12.1 |\n| n_updates | 322 |\n| policy_gradient_loss | -0.000387 |\n| value_loss | 31 |\n| train_metrics/ | |\n| action_queue_updates_success | 123 |\n| action_queue_updates_total | 144 |\n| ice_dug | 75 |\n| water_produced | 15.7 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 11.7 |\n| time/ | |\n| fps | 947 |\n| iterations | 163 |\n| time_elapsed | 687 |\n| total_timesteps | 652000 |\n| train/ | |\n| approx_kl | 0.00020122866 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.69 |\n| explained_variance | 0.43 |\n| learning_rate | 0.0003 |\n| loss | 17.4 |\n| n_updates | 324 |\n| policy_gradient_loss | -0.000158 |\n| value_loss | 31 |\n| train_metrics/ | |\n| action_queue_updates_success | 123 |\n| action_queue_updates_total | 139 |\n| ice_dug | 55 |\n| water_produced | 7.25 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 15.7 |\n| time/ | |\n| fps | 948 |\n| iterations | 164 |\n| time_elapsed | 691 |\n| total_timesteps | 656000 |\n| train/ | |\n| approx_kl | 0.00018896077 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.63 |\n| explained_variance | 0.513 |\n| learning_rate | 0.0003 |\n| loss | 5.26 |\n| n_updates | 326 |\n| policy_gradient_loss | 2.01e-05 |\n| value_loss | 10.5 |\n| train_metrics/ | |\n| action_queue_updates_success | 130 |\n| action_queue_updates_total | 146 |\n| ice_dug | 159 |\n| water_produced | 22.5 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 16.6 |\n| time/ | |\n| fps | 948 |\n| iterations | 165 |\n| time_elapsed | 695 |\n| total_timesteps | 660000 |\n| train/ | |\n| approx_kl | 0.00017486085 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.67 |\n| explained_variance | 0.266 |\n| learning_rate | 0.0003 |\n| loss | 34.5 |\n| n_updates | 328 |\n| policy_gradient_loss | -0.000219 |\n| value_loss | 62.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 125 |\n| action_queue_updates_total | 141 |\n| ice_dug | 85 |\n| water_produced | 20.2 |\n---------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 17 |\n| time/ | |\n| fps | 949 |\n| iterations | 166 |\n| time_elapsed | 699 |\n| total_timesteps | 664000 |\n| train/ | |\n| approx_kl | 0.001538135 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.6 |\n| explained_variance | 0.223 |\n| learning_rate | 0.0003 |\n| loss | 33.3 |\n| n_updates | 330 |\n| policy_gradient_loss | -0.000144 |\n| value_loss | 55.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 130 |\n| action_queue_updates_total | 141 |\n| ice_dug | 81 |\n| water_produced | 14.8 |\n-------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 18.1 |\n| time/ | |\n| fps | 949 |\n| iterations | 167 |\n| time_elapsed | 703 |\n| total_timesteps | 668000 |\n| train/ | |\n| approx_kl | 0.00019238265 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.55 |\n| explained_variance | 0.306 |\n| learning_rate | 0.0003 |\n| loss | 20.8 |\n| n_updates | 332 |\n| policy_gradient_loss | -0.000152 |\n| value_loss | 33.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 127 |\n| action_queue_updates_total | 142 |\n| ice_dug | 118 |\n| water_produced | 21 |\n---------------------------------------------------\nEval num_timesteps=672000, episode_reward=5.24 +/- 8.85\nEpisode length: 305.00 +/- 8.00\n---------------------------------------------------\n| eval/ | |\n| mean_ep_length | 305 |\n| mean_reward | 5.24 |\n| time/ | |\n| total_timesteps | 672000 |\n| train/ | |\n| approx_kl | 0.00022197426 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.64 |\n| explained_variance | 0.314 |\n| learning_rate | 0.0003 |\n| loss | 21.6 |\n| n_updates | 334 |\n| policy_gradient_loss | -0.000167 |\n| value_loss | 38 |\n| train_metrics/ | |\n| action_queue_updates_success | 121 |\n| action_queue_updates_total | 135 |\n| ice_dug | 82 |\n| water_produced | 13.5 |\n---------------------------------------------------\n---------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 19.5 |\n| time/ | |\n| fps | 946 |\n| iterations | 168 |\n| time_elapsed | 710 |\n| total_timesteps | 672000 |\n---------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 19.8 |\n| time/ | |\n| fps | 946 |\n| iterations | 169 |\n| time_elapsed | 714 |\n| total_timesteps | 676000 |\n| train/ | |\n| approx_kl | 0.0006194165 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.58 |\n| explained_variance | 0.306 |\n| learning_rate | 0.0003 |\n| loss | 15.8 |\n| n_updates | 336 |\n| policy_gradient_loss | -2.08e-05 |\n| value_loss | 30.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 135 |\n| action_queue_updates_total | 145 |\n| ice_dug | 122 |\n| water_produced | 24.5 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 19.6 |\n| time/ | |\n| fps | 946 |\n| iterations | 170 |\n| time_elapsed | 718 |\n| total_timesteps | 680000 |\n| train/ | |\n| approx_kl | 0.00012223788 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.6 |\n| explained_variance | 0.286 |\n| learning_rate | 0.0003 |\n| loss | 26.5 |\n| n_updates | 338 |\n| policy_gradient_loss | -0.000216 |\n| value_loss | 45.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 133 |\n| action_queue_updates_total | 141 |\n| ice_dug | 108 |\n| water_produced | 19.3 |\n---------------------------------------------------\n----------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 25.4 |\n| time/ | |\n| fps | 946 |\n| iterations | 171 |\n| time_elapsed | 722 |\n| total_timesteps | 684000 |\n| train/ | |\n| approx_kl | 0.000103289916 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.56 |\n| explained_variance | 0.256 |\n| learning_rate | 0.0003 |\n| loss | 18.5 |\n| n_updates | 340 |\n| policy_gradient_loss | -0.000325 |\n| value_loss | 36.8 |\n| train_metrics/ | |\n| action_queue_updates_success | 136 |\n| action_queue_updates_total | 147 |\n| ice_dug | 207 |\n| water_produced | 42.2 |\n----------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 23.6 |\n| time/ | |\n| fps | 946 |\n| iterations | 172 |\n| time_elapsed | 726 |\n| total_timesteps | 688000 |\n| train/ | |\n| approx_kl | 0.00017056346 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.59 |\n| explained_variance | 0.269 |\n| learning_rate | 0.0003 |\n| loss | 41.8 |\n| n_updates | 342 |\n| policy_gradient_loss | 7.64e-05 |\n| value_loss | 81.6 |\n| train_metrics/ | |\n| action_queue_updates_success | 130 |\n| action_queue_updates_total | 138 |\n| ice_dug | 62 |\n| water_produced | 12.8 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 21.5 |\n| time/ | |\n| fps | 947 |\n| iterations | 173 |\n| time_elapsed | 730 |\n| total_timesteps | 692000 |\n| train/ | |\n| approx_kl | 2.4299025e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.48 |\n| explained_variance | 0.315 |\n| learning_rate | 0.0003 |\n| loss | 15.8 |\n| n_updates | 344 |\n| policy_gradient_loss | 3.53e-05 |\n| value_loss | 27 |\n| train_metrics/ | |\n| action_queue_updates_success | 135 |\n| action_queue_updates_total | 149 |\n| ice_dug | 25 |\n| water_produced | 3.75 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 23.4 |\n| time/ | |\n| fps | 947 |\n| iterations | 174 |\n| time_elapsed | 734 |\n| total_timesteps | 696000 |\n| train/ | |\n| approx_kl | 0.0011129864 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.7 |\n| explained_variance | 0.577 |\n| learning_rate | 0.0003 |\n| loss | 2.59 |\n| n_updates | 346 |\n| policy_gradient_loss | -0.000458 |\n| value_loss | 6.08 |\n| train_metrics/ | |\n| action_queue_updates_success | 136 |\n| action_queue_updates_total | 150 |\n| ice_dug | 192 |\n| water_produced | 33.2 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 22 |\n| time/ | |\n| fps | 947 |\n| iterations | 175 |\n| time_elapsed | 738 |\n| total_timesteps | 700000 |\n| train/ | |\n| approx_kl | 0.0003357281 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.7 |\n| explained_variance | 0.276 |\n| learning_rate | 0.0003 |\n| loss | 22.8 |\n| n_updates | 348 |\n| policy_gradient_loss | -0.000153 |\n| value_loss | 72 |\n| train_metrics/ | |\n| action_queue_updates_success | 132 |\n| action_queue_updates_total | 154 |\n| ice_dug | 109 |\n| water_produced | 12 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 15.2 |\n| time/ | |\n| fps | 947 |\n| iterations | 176 |\n| time_elapsed | 742 |\n| total_timesteps | 704000 |\n| train/ | |\n| approx_kl | 0.00026154923 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.8 |\n| explained_variance | 0.318 |\n| learning_rate | 0.0003 |\n| loss | 11.9 |\n| n_updates | 350 |\n| policy_gradient_loss | -0.000216 |\n| value_loss | 28.6 |\n| train_metrics/ | |\n| action_queue_updates_success | 131 |\n| action_queue_updates_total | 149 |\n| ice_dug | 49 |\n| water_produced | 9.75 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 15 |\n| time/ | |\n| fps | 947 |\n| iterations | 177 |\n| time_elapsed | 746 |\n| total_timesteps | 708000 |\n| train/ | |\n| approx_kl | 0.00026825207 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.74 |\n| explained_variance | 0.428 |\n| learning_rate | 0.0003 |\n| loss | 7.13 |\n| n_updates | 352 |\n| policy_gradient_loss | 0.00012 |\n| value_loss | 12.8 |\n| train_metrics/ | |\n| action_queue_updates_success | 138 |\n| action_queue_updates_total | 155 |\n| ice_dug | 70 |\n| water_produced | 12 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 18.8 |\n| time/ | |\n| fps | 948 |\n| iterations | 178 |\n| time_elapsed | 750 |\n| total_timesteps | 712000 |\n| train/ | |\n| approx_kl | 0.00066508556 |\n| clip_fraction | 0.00137 |\n| clip_range | 0.2 |\n| entropy_loss | -1.76 |\n| explained_variance | 0.359 |\n| learning_rate | 0.0003 |\n| loss | 14.6 |\n| n_updates | 354 |\n| policy_gradient_loss | 0.000122 |\n| value_loss | 22.6 |\n| train_metrics/ | |\n| action_queue_updates_success | 135 |\n| action_queue_updates_total | 158 |\n| ice_dug | 117 |\n| water_produced | 21.7 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 16.3 |\n| time/ | |\n| fps | 948 |\n| iterations | 179 |\n| time_elapsed | 754 |\n| total_timesteps | 716000 |\n| train/ | |\n| approx_kl | 0.00093234255 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.88 |\n| explained_variance | 0.368 |\n| learning_rate | 0.0003 |\n| loss | 20.5 |\n| n_updates | 356 |\n| policy_gradient_loss | 0.000118 |\n| value_loss | 42.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 136 |\n| action_queue_updates_total | 154 |\n| ice_dug | 109 |\n| water_produced | 21.5 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 14.7 |\n| time/ | |\n| fps | 948 |\n| iterations | 180 |\n| time_elapsed | 758 |\n| total_timesteps | 720000 |\n| train/ | |\n| approx_kl | 0.0011250575 |\n| clip_fraction | 0.00075 |\n| clip_range | 0.2 |\n| entropy_loss | -1.79 |\n| explained_variance | 0.31 |\n| learning_rate | 0.0003 |\n| loss | 22 |\n| n_updates | 358 |\n| policy_gradient_loss | -0.000466 |\n| value_loss | 54.6 |\n| train_metrics/ | |\n| action_queue_updates_success | 133 |\n| action_queue_updates_total | 157 |\n| ice_dug | 28 |\n| water_produced | 5 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 19.4 |\n| time/ | |\n| fps | 949 |\n| iterations | 181 |\n| time_elapsed | 762 |\n| total_timesteps | 724000 |\n| train/ | |\n| approx_kl | 0.0038592597 |\n| clip_fraction | 0.0161 |\n| clip_range | 0.2 |\n| entropy_loss | -1.93 |\n| explained_variance | 0.741 |\n| learning_rate | 0.0003 |\n| loss | 2.37 |\n| n_updates | 360 |\n| policy_gradient_loss | -0.000127 |\n| value_loss | 5.41 |\n| train_metrics/ | |\n| action_queue_updates_success | 147 |\n| action_queue_updates_total | 152 |\n| ice_dug | 152 |\n| water_produced | 31.8 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 20.3 |\n| time/ | |\n| fps | 949 |\n| iterations | 182 |\n| time_elapsed | 766 |\n| total_timesteps | 728000 |\n| train/ | |\n| approx_kl | 0.00011359898 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.68 |\n| explained_variance | 0.308 |\n| learning_rate | 0.0003 |\n| loss | 37.3 |\n| n_updates | 362 |\n| policy_gradient_loss | -4.03e-05 |\n| value_loss | 59.5 |\n| train_metrics/ | |\n| action_queue_updates_success | 147 |\n| action_queue_updates_total | 162 |\n| ice_dug | 128 |\n| water_produced | 16.2 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 18.1 |\n| time/ | |\n| fps | 949 |\n| iterations | 183 |\n| time_elapsed | 771 |\n| total_timesteps | 732000 |\n| train/ | |\n| approx_kl | 6.9310365e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.83 |\n| explained_variance | 0.397 |\n| learning_rate | 0.0003 |\n| loss | 16.5 |\n| n_updates | 364 |\n| policy_gradient_loss | 7.25e-06 |\n| value_loss | 33.1 |\n| train_metrics/ | |\n| action_queue_updates_success | 143 |\n| action_queue_updates_total | 156 |\n| ice_dug | 70 |\n| water_produced | 11 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 21.9 |\n| time/ | |\n| fps | 949 |\n| iterations | 184 |\n| time_elapsed | 774 |\n| total_timesteps | 736000 |\n| train/ | |\n| approx_kl | 0.0009790689 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.8 |\n| explained_variance | 0.48 |\n| learning_rate | 0.0003 |\n| loss | 10.1 |\n| n_updates | 366 |\n| policy_gradient_loss | 0.000889 |\n| value_loss | 18.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 141 |\n| action_queue_updates_total | 161 |\n| ice_dug | 238 |\n| water_produced | 39.5 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 23.2 |\n| time/ | |\n| fps | 949 |\n| iterations | 185 |\n| time_elapsed | 779 |\n| total_timesteps | 740000 |\n| train/ | |\n| approx_kl | 0.0015198134 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.88 |\n| explained_variance | 0.389 |\n| learning_rate | 0.0003 |\n| loss | 43.4 |\n| n_updates | 368 |\n| policy_gradient_loss | -0.000326 |\n| value_loss | 72.3 |\n| train_metrics/ | |\n| action_queue_updates_success | 137 |\n| action_queue_updates_total | 160 |\n| ice_dug | 138 |\n| water_produced | 10.2 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 19 |\n| time/ | |\n| fps | 949 |\n| iterations | 186 |\n| time_elapsed | 783 |\n| total_timesteps | 744000 |\n| train/ | |\n| approx_kl | 0.00016919918 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.91 |\n| explained_variance | 0.623 |\n| learning_rate | 0.0003 |\n| loss | 7.58 |\n| n_updates | 370 |\n| policy_gradient_loss | -0.000173 |\n| value_loss | 15.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 137 |\n| action_queue_updates_total | 163 |\n| ice_dug | 68 |\n| water_produced | 11.5 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 19.8 |\n| time/ | |\n| fps | 949 |\n| iterations | 187 |\n| time_elapsed | 787 |\n| total_timesteps | 748000 |\n| train/ | |\n| approx_kl | 0.0016052086 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.98 |\n| explained_variance | 0.625 |\n| learning_rate | 0.0003 |\n| loss | 10.4 |\n| n_updates | 372 |\n| policy_gradient_loss | -3.54e-05 |\n| value_loss | 18.5 |\n| train_metrics/ | |\n| action_queue_updates_success | 136 |\n| action_queue_updates_total | 154 |\n| ice_dug | 102 |\n| water_produced | 20.5 |\n--------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 19.8 |\n| time/ | |\n| fps | 950 |\n| iterations | 188 |\n| time_elapsed | 791 |\n| total_timesteps | 752000 |\n| train/ | |\n| approx_kl | 0.000202163 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.83 |\n| explained_variance | 0.482 |\n| learning_rate | 0.0003 |\n| loss | 15.2 |\n| n_updates | 374 |\n| policy_gradient_loss | 0.000234 |\n| value_loss | 35.6 |\n| train_metrics/ | |\n| action_queue_updates_success | 138 |\n| action_queue_updates_total | 162 |\n| ice_dug | 79 |\n| water_produced | 10.8 |\n-------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 13.8 |\n| time/ | |\n| fps | 950 |\n| iterations | 189 |\n| time_elapsed | 795 |\n| total_timesteps | 756000 |\n| train/ | |\n| approx_kl | 0.0021645874 |\n| clip_fraction | 0.000375 |\n| clip_range | 0.2 |\n| entropy_loss | -1.96 |\n| explained_variance | 0.524 |\n| learning_rate | 0.0003 |\n| loss | 7.79 |\n| n_updates | 376 |\n| policy_gradient_loss | 3.79e-05 |\n| value_loss | 23.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 137 |\n| action_queue_updates_total | 154 |\n| ice_dug | 66 |\n| water_produced | 11.5 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 13.4 |\n| time/ | |\n| fps | 950 |\n| iterations | 190 |\n| time_elapsed | 799 |\n| total_timesteps | 760000 |\n| train/ | |\n| approx_kl | 0.00049861905 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.86 |\n| explained_variance | 0.602 |\n| learning_rate | 0.0003 |\n| loss | 11.7 |\n| n_updates | 378 |\n| policy_gradient_loss | -4.28e-05 |\n| value_loss | 21.5 |\n| train_metrics/ | |\n| action_queue_updates_success | 142 |\n| action_queue_updates_total | 157 |\n| ice_dug | 40 |\n| water_produced | 9 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 17.1 |\n| time/ | |\n| fps | 950 |\n| iterations | 191 |\n| time_elapsed | 803 |\n| total_timesteps | 764000 |\n| train/ | |\n| approx_kl | 0.00056018506 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.83 |\n| explained_variance | 0.633 |\n| learning_rate | 0.0003 |\n| loss | 11 |\n| n_updates | 380 |\n| policy_gradient_loss | -0.000282 |\n| value_loss | 17.5 |\n| train_metrics/ | |\n| action_queue_updates_success | 138 |\n| action_queue_updates_total | 156 |\n| ice_dug | 139 |\n| water_produced | 29.2 |\n---------------------------------------------------\nEval num_timesteps=768000, episode_reward=0.00 +/- 0.00\nEpisode length: 301.00 +/- 0.00\n--------------------------------------------------\n| eval/ | |\n| mean_ep_length | 301 |\n| mean_reward | 0 |\n| time/ | |\n| total_timesteps | 768000 |\n| train/ | |\n| approx_kl | 0.0015369231 |\n| clip_fraction | 0.00262 |\n| clip_range | 0.2 |\n| entropy_loss | -1.87 |\n| explained_variance | 0.458 |\n| learning_rate | 0.0003 |\n| loss | 32 |\n| n_updates | 382 |\n| policy_gradient_loss | -0.000529 |\n| value_loss | 57.6 |\n| train_metrics/ | |\n| action_queue_updates_success | 133 |\n| action_queue_updates_total | 146 |\n| ice_dug | 158 |\n| water_produced | 29.2 |\n--------------------------------------------------\n---------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 18.9 |\n| time/ | |\n| fps | 948 |\n| iterations | 192 |\n| time_elapsed | 810 |\n| total_timesteps | 768000 |\n---------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 18.3 |\n| time/ | |\n| fps | 948 |\n| iterations | 193 |\n| time_elapsed | 814 |\n| total_timesteps | 772000 |\n| train/ | |\n| approx_kl | 0.0011260008 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.7 |\n| explained_variance | 0.402 |\n| learning_rate | 0.0003 |\n| loss | 34.6 |\n| n_updates | 384 |\n| policy_gradient_loss | -2.04e-05 |\n| value_loss | 61.8 |\n| train_metrics/ | |\n| action_queue_updates_success | 133 |\n| action_queue_updates_total | 149 |\n| ice_dug | 42 |\n| water_produced | 8 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 21.5 |\n| time/ | |\n| fps | 948 |\n| iterations | 194 |\n| time_elapsed | 818 |\n| total_timesteps | 776000 |\n| train/ | |\n| approx_kl | 0.00096253026 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.79 |\n| explained_variance | 0.604 |\n| learning_rate | 0.0003 |\n| loss | 7.78 |\n| n_updates | 386 |\n| policy_gradient_loss | 0.000483 |\n| value_loss | 14.7 |\n| train_metrics/ | |\n| action_queue_updates_success | 136 |\n| action_queue_updates_total | 153 |\n| ice_dug | 133 |\n| water_produced | 26.8 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 25.5 |\n| time/ | |\n| fps | 948 |\n| iterations | 195 |\n| time_elapsed | 822 |\n| total_timesteps | 780000 |\n| train/ | |\n| approx_kl | 0.00034384383 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.8 |\n| explained_variance | 0.485 |\n| learning_rate | 0.0003 |\n| loss | 21.4 |\n| n_updates | 388 |\n| policy_gradient_loss | -6.56e-05 |\n| value_loss | 43.6 |\n| train_metrics/ | |\n| action_queue_updates_success | 136 |\n| action_queue_updates_total | 151 |\n| ice_dug | 137 |\n| water_produced | 28.2 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 21.5 |\n| time/ | |\n| fps | 949 |\n| iterations | 196 |\n| time_elapsed | 826 |\n| total_timesteps | 784000 |\n| train/ | |\n| approx_kl | 0.0021298998 |\n| clip_fraction | 0.00212 |\n| clip_range | 0.2 |\n| entropy_loss | -1.76 |\n| explained_variance | 0.362 |\n| learning_rate | 0.0003 |\n| loss | 34.3 |\n| n_updates | 390 |\n| policy_gradient_loss | -0.000806 |\n| value_loss | 57 |\n| train_metrics/ | |\n| action_queue_updates_success | 140 |\n| action_queue_updates_total | 156 |\n| ice_dug | 58 |\n| water_produced | 10 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 19.2 |\n| time/ | |\n| fps | 949 |\n| iterations | 197 |\n| time_elapsed | 830 |\n| total_timesteps | 788000 |\n| train/ | |\n| approx_kl | 0.00014952707 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.81 |\n| explained_variance | 0.471 |\n| learning_rate | 0.0003 |\n| loss | 9 |\n| n_updates | 392 |\n| policy_gradient_loss | -0.000124 |\n| value_loss | 24.7 |\n| train_metrics/ | |\n| action_queue_updates_success | 135 |\n| action_queue_updates_total | 148 |\n| ice_dug | 108 |\n| water_produced | 18 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 20.7 |\n| time/ | |\n| fps | 949 |\n| iterations | 198 |\n| time_elapsed | 834 |\n| total_timesteps | 792000 |\n| train/ | |\n| approx_kl | 0.00022887201 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.69 |\n| explained_variance | 0.469 |\n| learning_rate | 0.0003 |\n| loss | 11.4 |\n| n_updates | 394 |\n| policy_gradient_loss | -4.75e-05 |\n| value_loss | 28.7 |\n| train_metrics/ | |\n| action_queue_updates_success | 136 |\n| action_queue_updates_total | 148 |\n| ice_dug | 126 |\n| water_produced | 14.7 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 20.9 |\n| time/ | |\n| fps | 949 |\n| iterations | 199 |\n| time_elapsed | 838 |\n| total_timesteps | 796000 |\n| train/ | |\n| approx_kl | 0.0001751883 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.67 |\n| explained_variance | 0.319 |\n| learning_rate | 0.0003 |\n| loss | 18.6 |\n| n_updates | 396 |\n| policy_gradient_loss | -0.000129 |\n| value_loss | 41.3 |\n| train_metrics/ | |\n| action_queue_updates_success | 133 |\n| action_queue_updates_total | 148 |\n| ice_dug | 121 |\n| water_produced | 27.8 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 19.1 |\n| time/ | |\n| fps | 949 |\n| iterations | 200 |\n| time_elapsed | 842 |\n| total_timesteps | 800000 |\n| train/ | |\n| approx_kl | 0.0015025137 |\n| clip_fraction | 0.000875 |\n| clip_range | 0.2 |\n| entropy_loss | -1.75 |\n| explained_variance | 0.349 |\n| learning_rate | 0.0003 |\n| loss | 30.4 |\n| n_updates | 398 |\n| policy_gradient_loss | -0.000833 |\n| value_loss | 62.1 |\n| train_metrics/ | |\n| action_queue_updates_success | 135 |\n| action_queue_updates_total | 149 |\n| ice_dug | 111 |\n| water_produced | 20 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 17.6 |\n| time/ | |\n| fps | 949 |\n| iterations | 201 |\n| time_elapsed | 846 |\n| total_timesteps | 804000 |\n| train/ | |\n| approx_kl | 0.0001693565 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.64 |\n| explained_variance | 0.396 |\n| learning_rate | 0.0003 |\n| loss | 22.1 |\n| n_updates | 400 |\n| policy_gradient_loss | 9.85e-05 |\n| value_loss | 35.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 136 |\n| action_queue_updates_total | 149 |\n| ice_dug | 39 |\n| water_produced | 2.25 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 20.2 |\n| time/ | |\n| fps | 949 |\n| iterations | 202 |\n| time_elapsed | 850 |\n| total_timesteps | 808000 |\n| train/ | |\n| approx_kl | 0.0015583395 |\n| clip_fraction | 0.000125 |\n| clip_range | 0.2 |\n| entropy_loss | -1.68 |\n| explained_variance | 0.712 |\n| learning_rate | 0.0003 |\n| loss | 2.72 |\n| n_updates | 402 |\n| policy_gradient_loss | -1.11e-05 |\n| value_loss | 6.37 |\n| train_metrics/ | |\n| action_queue_updates_success | 133 |\n| action_queue_updates_total | 144 |\n| ice_dug | 173 |\n| water_produced | 30.7 |\n--------------------------------------------------\n----------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 22.8 |\n| time/ | |\n| fps | 950 |\n| iterations | 203 |\n| time_elapsed | 854 |\n| total_timesteps | 812000 |\n| train/ | |\n| approx_kl | 0.000101483776 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.65 |\n| explained_variance | 0.353 |\n| learning_rate | 0.0003 |\n| loss | 36.9 |\n| n_updates | 404 |\n| policy_gradient_loss | -7.24e-05 |\n| value_loss | 64 |\n| train_metrics/ | |\n| action_queue_updates_success | 130 |\n| action_queue_updates_total | 144 |\n| ice_dug | 128 |\n| water_produced | 27.7 |\n----------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 22.9 |\n| time/ | |\n| fps | 950 |\n| iterations | 204 |\n| time_elapsed | 858 |\n| total_timesteps | 816000 |\n| train/ | |\n| approx_kl | 0.0007318262 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.63 |\n| explained_variance | 0.333 |\n| learning_rate | 0.0003 |\n| loss | 31.7 |\n| n_updates | 406 |\n| policy_gradient_loss | -0.000503 |\n| value_loss | 65.7 |\n| train_metrics/ | |\n| action_queue_updates_success | 140 |\n| action_queue_updates_total | 153 |\n| ice_dug | 145 |\n| water_produced | 27.8 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 26.9 |\n| time/ | |\n| fps | 950 |\n| iterations | 205 |\n| time_elapsed | 862 |\n| total_timesteps | 820000 |\n| train/ | |\n| approx_kl | 0.00013486322 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.68 |\n| explained_variance | 0.363 |\n| learning_rate | 0.0003 |\n| loss | 33.7 |\n| n_updates | 408 |\n| policy_gradient_loss | -0.000349 |\n| value_loss | 70.1 |\n| train_metrics/ | |\n| action_queue_updates_success | 134 |\n| action_queue_updates_total | 144 |\n| ice_dug | 212 |\n| water_produced | 39.2 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 28.9 |\n| time/ | |\n| fps | 950 |\n| iterations | 206 |\n| time_elapsed | 867 |\n| total_timesteps | 824000 |\n| train/ | |\n| approx_kl | 0.00041726144 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.55 |\n| explained_variance | 0.317 |\n| learning_rate | 0.0003 |\n| loss | 42.5 |\n| n_updates | 410 |\n| policy_gradient_loss | -0.000502 |\n| value_loss | 92.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 131 |\n| action_queue_updates_total | 146 |\n| ice_dug | 58 |\n| water_produced | 12 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 26.3 |\n| time/ | |\n| fps | 950 |\n| iterations | 207 |\n| time_elapsed | 871 |\n| total_timesteps | 828000 |\n| train/ | |\n| approx_kl | 0.00026639557 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.64 |\n| explained_variance | 0.41 |\n| learning_rate | 0.0003 |\n| loss | 14.5 |\n| n_updates | 412 |\n| policy_gradient_loss | -2.44e-05 |\n| value_loss | 31.1 |\n| train_metrics/ | |\n| action_queue_updates_success | 126 |\n| action_queue_updates_total | 140 |\n| ice_dug | 99 |\n| water_produced | 18.2 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 26.5 |\n| time/ | |\n| fps | 950 |\n| iterations | 208 |\n| time_elapsed | 875 |\n| total_timesteps | 832000 |\n| train/ | |\n| approx_kl | 0.00089280866 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.59 |\n| explained_variance | 0.315 |\n| learning_rate | 0.0003 |\n| loss | 27 |\n| n_updates | 414 |\n| policy_gradient_loss | -8.18e-05 |\n| value_loss | 51.5 |\n| train_metrics/ | |\n| action_queue_updates_success | 130 |\n| action_queue_updates_total | 143 |\n| ice_dug | 177 |\n| water_produced | 28.2 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 25.5 |\n| time/ | |\n| fps | 950 |\n| iterations | 209 |\n| time_elapsed | 879 |\n| total_timesteps | 836000 |\n| train/ | |\n| approx_kl | 0.00011344401 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.61 |\n| explained_variance | 0.33 |\n| learning_rate | 0.0003 |\n| loss | 36 |\n| n_updates | 416 |\n| policy_gradient_loss | -0.000251 |\n| value_loss | 86.6 |\n| train_metrics/ | |\n| action_queue_updates_success | 131 |\n| action_queue_updates_total | 145 |\n| ice_dug | 131 |\n| water_produced | 22.8 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 18.9 |\n| time/ | |\n| fps | 951 |\n| iterations | 210 |\n| time_elapsed | 883 |\n| total_timesteps | 840000 |\n| train/ | |\n| approx_kl | 0.00014708556 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.58 |\n| explained_variance | 0.368 |\n| learning_rate | 0.0003 |\n| loss | 29.8 |\n| n_updates | 418 |\n| policy_gradient_loss | -0.000244 |\n| value_loss | 50.1 |\n| train_metrics/ | |\n| action_queue_updates_success | 120 |\n| action_queue_updates_total | 133 |\n| ice_dug | 82 |\n| water_produced | 7.75 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 18.3 |\n| time/ | |\n| fps | 951 |\n| iterations | 211 |\n| time_elapsed | 887 |\n| total_timesteps | 844000 |\n| train/ | |\n| approx_kl | 0.00051919126 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.54 |\n| explained_variance | 0.522 |\n| learning_rate | 0.0003 |\n| loss | 8.22 |\n| n_updates | 420 |\n| policy_gradient_loss | 0.000174 |\n| value_loss | 19.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 130 |\n| action_queue_updates_total | 145 |\n| ice_dug | 74 |\n| water_produced | 8.75 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 16.6 |\n| time/ | |\n| fps | 951 |\n| iterations | 212 |\n| time_elapsed | 891 |\n| total_timesteps | 848000 |\n| train/ | |\n| approx_kl | 0.00045394036 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.65 |\n| explained_variance | 0.478 |\n| learning_rate | 0.0003 |\n| loss | 11.6 |\n| n_updates | 422 |\n| policy_gradient_loss | 0.000123 |\n| value_loss | 22.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 125 |\n| action_queue_updates_total | 137 |\n| ice_dug | 117 |\n| water_produced | 9.75 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 14.7 |\n| time/ | |\n| fps | 951 |\n| iterations | 213 |\n| time_elapsed | 895 |\n| total_timesteps | 852000 |\n| train/ | |\n| approx_kl | 0.00015867902 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.63 |\n| explained_variance | 0.435 |\n| learning_rate | 0.0003 |\n| loss | 10.1 |\n| n_updates | 424 |\n| policy_gradient_loss | -0.000133 |\n| value_loss | 24.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 132 |\n| action_queue_updates_total | 148 |\n| ice_dug | 96 |\n| water_produced | 19.5 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 12.1 |\n| time/ | |\n| fps | 951 |\n| iterations | 214 |\n| time_elapsed | 899 |\n| total_timesteps | 856000 |\n| train/ | |\n| approx_kl | 0.0003449091 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.72 |\n| explained_variance | 0.364 |\n| learning_rate | 0.0003 |\n| loss | 27.4 |\n| n_updates | 426 |\n| policy_gradient_loss | -0.000261 |\n| value_loss | 47.8 |\n| train_metrics/ | |\n| action_queue_updates_success | 122 |\n| action_queue_updates_total | 137 |\n| ice_dug | 48 |\n| water_produced | 10.8 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 15.1 |\n| time/ | |\n| fps | 951 |\n| iterations | 215 |\n| time_elapsed | 903 |\n| total_timesteps | 860000 |\n| train/ | |\n| approx_kl | 0.00028756118 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.61 |\n| explained_variance | 0.424 |\n| learning_rate | 0.0003 |\n| loss | 10.6 |\n| n_updates | 428 |\n| policy_gradient_loss | -6.61e-05 |\n| value_loss | 22.2 |\n| train_metrics/ | |\n| action_queue_updates_success | 132 |\n| action_queue_updates_total | 143 |\n| ice_dug | 179 |\n| water_produced | 21.5 |\n---------------------------------------------------\nEval num_timesteps=864000, episode_reward=92.56 +/- 51.51\nEpisode length: 389.00 +/- 49.05\n--------------------------------------------------\n| eval/ | |\n| mean_ep_length | 389 |\n| mean_reward | 92.6 |\n| time/ | |\n| total_timesteps | 864000 |\n| train/ | |\n| approx_kl | 0.0003211095 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.61 |\n| explained_variance | 0.336 |\n| learning_rate | 0.0003 |\n| loss | 23.3 |\n| n_updates | 430 |\n| policy_gradient_loss | 6.88e-05 |\n| value_loss | 50.1 |\n| train_metrics/ | |\n| action_queue_updates_success | 128 |\n| action_queue_updates_total | 145 |\n| ice_dug | 156 |\n| water_produced | 27 |\n--------------------------------------------------\nNew best mean reward!\n---------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 18.9 |\n| time/ | |\n| fps | 949 |\n| iterations | 216 |\n| time_elapsed | 910 |\n| total_timesteps | 864000 |\n---------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 19.2 |\n| time/ | |\n| fps | 949 |\n| iterations | 217 |\n| time_elapsed | 914 |\n| total_timesteps | 868000 |\n| train/ | |\n| approx_kl | 0.0003713021 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.7 |\n| explained_variance | 0.35 |\n| learning_rate | 0.0003 |\n| loss | 37 |\n| n_updates | 432 |\n| policy_gradient_loss | 6.79e-05 |\n| value_loss | 79.3 |\n| train_metrics/ | |\n| action_queue_updates_success | 128 |\n| action_queue_updates_total | 139 |\n| ice_dug | 59 |\n| water_produced | 11.8 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 19.7 |\n| time/ | |\n| fps | 949 |\n| iterations | 218 |\n| time_elapsed | 918 |\n| total_timesteps | 872000 |\n| train/ | |\n| approx_kl | 0.0007994392 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.59 |\n| explained_variance | 0.432 |\n| learning_rate | 0.0003 |\n| loss | 12.1 |\n| n_updates | 434 |\n| policy_gradient_loss | -0.000415 |\n| value_loss | 25.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 133 |\n| action_queue_updates_total | 145 |\n| ice_dug | 130 |\n| water_produced | 22 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 19.7 |\n| time/ | |\n| fps | 949 |\n| iterations | 219 |\n| time_elapsed | 922 |\n| total_timesteps | 876000 |\n| train/ | |\n| approx_kl | 4.7599304e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.72 |\n| explained_variance | 0.395 |\n| learning_rate | 0.0003 |\n| loss | 23.5 |\n| n_updates | 436 |\n| policy_gradient_loss | 8.82e-05 |\n| value_loss | 71 |\n| train_metrics/ | |\n| action_queue_updates_success | 123 |\n| action_queue_updates_total | 147 |\n| ice_dug | 48 |\n| water_produced | 10.5 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 20 |\n| time/ | |\n| fps | 949 |\n| iterations | 220 |\n| time_elapsed | 926 |\n| total_timesteps | 880000 |\n| train/ | |\n| approx_kl | 9.761705e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.85 |\n| explained_variance | 0.455 |\n| learning_rate | 0.0003 |\n| loss | 15.2 |\n| n_updates | 438 |\n| policy_gradient_loss | 1.25e-07 |\n| value_loss | 28.7 |\n| train_metrics/ | |\n| action_queue_updates_success | 119 |\n| action_queue_updates_total | 131 |\n| ice_dug | 132 |\n| water_produced | 23.5 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 16.3 |\n| time/ | |\n| fps | 949 |\n| iterations | 221 |\n| time_elapsed | 930 |\n| total_timesteps | 884000 |\n| train/ | |\n| approx_kl | 0.0008234276 |\n| clip_fraction | 0.00075 |\n| clip_range | 0.2 |\n| entropy_loss | -1.57 |\n| explained_variance | 0.366 |\n| learning_rate | 0.0003 |\n| loss | 24.9 |\n| n_updates | 440 |\n| policy_gradient_loss | -0.000351 |\n| value_loss | 52.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 124 |\n| action_queue_updates_total | 137 |\n| ice_dug | 56 |\n| water_produced | 9.5 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 17.7 |\n| time/ | |\n| fps | 950 |\n| iterations | 222 |\n| time_elapsed | 934 |\n| total_timesteps | 888000 |\n| train/ | |\n| approx_kl | 0.00032900774 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.63 |\n| explained_variance | 0.36 |\n| learning_rate | 0.0003 |\n| loss | 14.9 |\n| n_updates | 442 |\n| policy_gradient_loss | -0.000486 |\n| value_loss | 23.5 |\n| train_metrics/ | |\n| action_queue_updates_success | 128 |\n| action_queue_updates_total | 142 |\n| ice_dug | 116 |\n| water_produced | 18.2 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 18.4 |\n| time/ | |\n| fps | 950 |\n| iterations | 223 |\n| time_elapsed | 938 |\n| total_timesteps | 892000 |\n| train/ | |\n| approx_kl | 0.00010123962 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.65 |\n| explained_variance | 0.413 |\n| learning_rate | 0.0003 |\n| loss | 20.8 |\n| n_updates | 444 |\n| policy_gradient_loss | -0.000133 |\n| value_loss | 36.1 |\n| train_metrics/ | |\n| action_queue_updates_success | 127 |\n| action_queue_updates_total | 141 |\n| ice_dug | 127 |\n| water_produced | 25.2 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 24.1 |\n| time/ | |\n| fps | 950 |\n| iterations | 224 |\n| time_elapsed | 942 |\n| total_timesteps | 896000 |\n| train/ | |\n| approx_kl | 0.00043650725 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.66 |\n| explained_variance | 0.384 |\n| learning_rate | 0.0003 |\n| loss | 27.5 |\n| n_updates | 446 |\n| policy_gradient_loss | 5.24e-05 |\n| value_loss | 59.3 |\n| train_metrics/ | |\n| action_queue_updates_success | 134 |\n| action_queue_updates_total | 141 |\n| ice_dug | 221 |\n| water_produced | 37.5 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 28.7 |\n| time/ | |\n| fps | 950 |\n| iterations | 225 |\n| time_elapsed | 946 |\n| total_timesteps | 900000 |\n| train/ | |\n| approx_kl | 0.0003205806 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.54 |\n| explained_variance | 0.304 |\n| learning_rate | 0.0003 |\n| loss | 47.2 |\n| n_updates | 448 |\n| policy_gradient_loss | -0.000135 |\n| value_loss | 107 |\n| train_metrics/ | |\n| action_queue_updates_success | 125 |\n| action_queue_updates_total | 139 |\n| ice_dug | 226 |\n| water_produced | 45.8 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 32.9 |\n| time/ | |\n| fps | 951 |\n| iterations | 226 |\n| time_elapsed | 950 |\n| total_timesteps | 904000 |\n| train/ | |\n| approx_kl | 0.0010600372 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.58 |\n| explained_variance | 0.397 |\n| learning_rate | 0.0003 |\n| loss | 46.3 |\n| n_updates | 450 |\n| policy_gradient_loss | 4.57e-06 |\n| value_loss | 97 |\n| train_metrics/ | |\n| action_queue_updates_success | 127 |\n| action_queue_updates_total | 146 |\n| ice_dug | 132 |\n| water_produced | 29.8 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 38.3 |\n| time/ | |\n| fps | 951 |\n| iterations | 227 |\n| time_elapsed | 954 |\n| total_timesteps | 908000 |\n| train/ | |\n| approx_kl | 0.00043743983 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.7 |\n| explained_variance | 0.382 |\n| learning_rate | 0.0003 |\n| loss | 35.5 |\n| n_updates | 452 |\n| policy_gradient_loss | 5.05e-05 |\n| value_loss | 76.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 128 |\n| action_queue_updates_total | 140 |\n| ice_dug | 209 |\n| water_produced | 44 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 36.1 |\n| time/ | |\n| fps | 951 |\n| iterations | 228 |\n| time_elapsed | 958 |\n| total_timesteps | 912000 |\n| train/ | |\n| approx_kl | 0.00015562904 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.56 |\n| explained_variance | 0.33 |\n| learning_rate | 0.0003 |\n| loss | 50.8 |\n| n_updates | 454 |\n| policy_gradient_loss | -0.000223 |\n| value_loss | 108 |\n| train_metrics/ | |\n| action_queue_updates_success | 124 |\n| action_queue_updates_total | 127 |\n| ice_dug | 90 |\n| water_produced | 14.8 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 33.2 |\n| time/ | |\n| fps | 951 |\n| iterations | 229 |\n| time_elapsed | 962 |\n| total_timesteps | 916000 |\n| train/ | |\n| approx_kl | 6.7222936e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.34 |\n| explained_variance | 0.315 |\n| learning_rate | 0.0003 |\n| loss | 15.5 |\n| n_updates | 456 |\n| policy_gradient_loss | -0.000196 |\n| value_loss | 33.1 |\n| train_metrics/ | |\n| action_queue_updates_success | 112 |\n| action_queue_updates_total | 118 |\n| ice_dug | 111 |\n| water_produced | 24.2 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 23.9 |\n| time/ | |\n| fps | 951 |\n| iterations | 230 |\n| time_elapsed | 966 |\n| total_timesteps | 920000 |\n| train/ | |\n| approx_kl | 0.00039507946 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.38 |\n| explained_variance | 0.311 |\n| learning_rate | 0.0003 |\n| loss | 29.9 |\n| n_updates | 458 |\n| policy_gradient_loss | -0.000379 |\n| value_loss | 62 |\n| train_metrics/ | |\n| action_queue_updates_success | 127 |\n| action_queue_updates_total | 141 |\n| ice_dug | 34 |\n| water_produced | 1 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 22.5 |\n| time/ | |\n| fps | 951 |\n| iterations | 231 |\n| time_elapsed | 970 |\n| total_timesteps | 924000 |\n| train/ | |\n| approx_kl | 0.0003045352 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.59 |\n| explained_variance | 0.743 |\n| learning_rate | 0.0003 |\n| loss | 2.34 |\n| n_updates | 460 |\n| policy_gradient_loss | -7.69e-05 |\n| value_loss | 7.32 |\n| train_metrics/ | |\n| action_queue_updates_success | 132 |\n| action_queue_updates_total | 157 |\n| ice_dug | 181 |\n| water_produced | 22.2 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 15.5 |\n| time/ | |\n| fps | 951 |\n| iterations | 232 |\n| time_elapsed | 974 |\n| total_timesteps | 928000 |\n| train/ | |\n| approx_kl | 0.0005018158 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.87 |\n| explained_variance | 0.431 |\n| learning_rate | 0.0003 |\n| loss | 21 |\n| n_updates | 462 |\n| policy_gradient_loss | -5.04e-05 |\n| value_loss | 43.8 |\n| train_metrics/ | |\n| action_queue_updates_success | 131 |\n| action_queue_updates_total | 149 |\n| ice_dug | 109 |\n| water_produced | 10 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 18.5 |\n| time/ | |\n| fps | 952 |\n| iterations | 233 |\n| time_elapsed | 978 |\n| total_timesteps | 932000 |\n| train/ | |\n| approx_kl | 0.0007851451 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.7 |\n| explained_variance | 0.507 |\n| learning_rate | 0.0003 |\n| loss | 13.6 |\n| n_updates | 464 |\n| policy_gradient_loss | 1.68e-05 |\n| value_loss | 23.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 122 |\n| action_queue_updates_total | 137 |\n| ice_dug | 149 |\n| water_produced | 29 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 21.1 |\n| time/ | |\n| fps | 952 |\n| iterations | 234 |\n| time_elapsed | 982 |\n| total_timesteps | 936000 |\n| train/ | |\n| approx_kl | 8.262141e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.64 |\n| explained_variance | 0.413 |\n| learning_rate | 0.0003 |\n| loss | 35.8 |\n| n_updates | 466 |\n| policy_gradient_loss | -6.09e-05 |\n| value_loss | 55.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 126 |\n| action_queue_updates_total | 142 |\n| ice_dug | 169 |\n| water_produced | 36.8 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 24.4 |\n| time/ | |\n| fps | 952 |\n| iterations | 235 |\n| time_elapsed | 986 |\n| total_timesteps | 940000 |\n| train/ | |\n| approx_kl | 0.0005505717 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.65 |\n| explained_variance | 0.395 |\n| learning_rate | 0.0003 |\n| loss | 44.4 |\n| n_updates | 468 |\n| policy_gradient_loss | -0.000464 |\n| value_loss | 90.1 |\n| train_metrics/ | |\n| action_queue_updates_success | 133 |\n| action_queue_updates_total | 145 |\n| ice_dug | 158 |\n| water_produced | 16.5 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 23.8 |\n| time/ | |\n| fps | 952 |\n| iterations | 236 |\n| time_elapsed | 991 |\n| total_timesteps | 944000 |\n| train/ | |\n| approx_kl | 0.0020641664 |\n| clip_fraction | 0.00663 |\n| clip_range | 0.2 |\n| entropy_loss | -1.61 |\n| explained_variance | 0.407 |\n| learning_rate | 0.0003 |\n| loss | 18.1 |\n| n_updates | 470 |\n| policy_gradient_loss | 0.000386 |\n| value_loss | 36.5 |\n| train_metrics/ | |\n| action_queue_updates_success | 131 |\n| action_queue_updates_total | 142 |\n| ice_dug | 111 |\n| water_produced | 20 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 31.7 |\n| time/ | |\n| fps | 952 |\n| iterations | 237 |\n| time_elapsed | 995 |\n| total_timesteps | 948000 |\n| train/ | |\n| approx_kl | 0.00040785372 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.6 |\n| explained_variance | 0.391 |\n| learning_rate | 0.0003 |\n| loss | 15.2 |\n| n_updates | 472 |\n| policy_gradient_loss | -3.13e-05 |\n| value_loss | 41.5 |\n| train_metrics/ | |\n| action_queue_updates_success | 137 |\n| action_queue_updates_total | 149 |\n| ice_dug | 255 |\n| water_produced | 48 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 36.1 |\n| time/ | |\n| fps | 952 |\n| iterations | 238 |\n| time_elapsed | 999 |\n| total_timesteps | 952000 |\n| train/ | |\n| approx_kl | 0.0017728902 |\n| clip_fraction | 0.00425 |\n| clip_range | 0.2 |\n| entropy_loss | -1.61 |\n| explained_variance | 0.393 |\n| learning_rate | 0.0003 |\n| loss | 46.6 |\n| n_updates | 474 |\n| policy_gradient_loss | 2.96e-05 |\n| value_loss | 98.6 |\n| train_metrics/ | |\n| action_queue_updates_success | 135 |\n| action_queue_updates_total | 145 |\n| ice_dug | 220 |\n| water_produced | 50 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 29.2 |\n| time/ | |\n| fps | 953 |\n| iterations | 239 |\n| time_elapsed | 1003 |\n| total_timesteps | 956000 |\n| train/ | |\n| approx_kl | 0.0019152559 |\n| clip_fraction | 0.000625 |\n| clip_range | 0.2 |\n| entropy_loss | -1.59 |\n| explained_variance | 0.367 |\n| learning_rate | 0.0003 |\n| loss | 53.5 |\n| n_updates | 476 |\n| policy_gradient_loss | -0.000712 |\n| value_loss | 122 |\n| train_metrics/ | |\n| action_queue_updates_success | 122 |\n| action_queue_updates_total | 139 |\n| ice_dug | 18 |\n| water_produced | 3.75 |\n--------------------------------------------------\nEval num_timesteps=960000, episode_reward=37.60 +/- 75.00\nEpisode length: 337.00 +/- 72.00\n--------------------------------------------------\n| eval/ | |\n| mean_ep_length | 337 |\n| mean_reward | 37.6 |\n| time/ | |\n| total_timesteps | 960000 |\n| train/ | |\n| approx_kl | 0.0005348517 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.62 |\n| explained_variance | 0.668 |\n| learning_rate | 0.0003 |\n| loss | 4.98 |\n| n_updates | 478 |\n| policy_gradient_loss | 0.000477 |\n| value_loss | 10.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 136 |\n| action_queue_updates_total | 145 |\n| ice_dug | 309 |\n| water_produced | 56.5 |\n--------------------------------------------------\n---------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 37.5 |\n| time/ | |\n| fps | 950 |\n| iterations | 240 |\n| time_elapsed | 1009 |\n| total_timesteps | 960000 |\n---------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 40.6 |\n| time/ | |\n| fps | 950 |\n| iterations | 241 |\n| time_elapsed | 1013 |\n| total_timesteps | 964000 |\n| train/ | |\n| approx_kl | 3.21278e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.53 |\n| explained_variance | 0.356 |\n| learning_rate | 0.0003 |\n| loss | 68.6 |\n| n_updates | 480 |\n| policy_gradient_loss | -5.95e-05 |\n| value_loss | 138 |\n| train_metrics/ | |\n| action_queue_updates_success | 126 |\n| action_queue_updates_total | 134 |\n| ice_dug | 180 |\n| water_produced | 35 |\n-------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 32.1 |\n| time/ | |\n| fps | 951 |\n| iterations | 242 |\n| time_elapsed | 1017 |\n| total_timesteps | 968000 |\n| train/ | |\n| approx_kl | 0.00023631603 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.45 |\n| explained_variance | 0.362 |\n| learning_rate | 0.0003 |\n| loss | 41.5 |\n| n_updates | 482 |\n| policy_gradient_loss | 3.24e-05 |\n| value_loss | 91.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 125 |\n| action_queue_updates_total | 140 |\n| ice_dug | 85 |\n| water_produced | 7.25 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 23 |\n| time/ | |\n| fps | 951 |\n| iterations | 243 |\n| time_elapsed | 1021 |\n| total_timesteps | 972000 |\n| train/ | |\n| approx_kl | 0.0011677453 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.64 |\n| explained_variance | 0.6 |\n| learning_rate | 0.0003 |\n| loss | 8.09 |\n| n_updates | 484 |\n| policy_gradient_loss | 0.000343 |\n| value_loss | 18.2 |\n| train_metrics/ | |\n| action_queue_updates_success | 128 |\n| action_queue_updates_total | 141 |\n| ice_dug | 44 |\n| water_produced | 6 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 28.7 |\n| time/ | |\n| fps | 951 |\n| iterations | 244 |\n| time_elapsed | 1026 |\n| total_timesteps | 976000 |\n| train/ | |\n| approx_kl | 0.0010476264 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.64 |\n| explained_variance | 0.626 |\n| learning_rate | 0.0003 |\n| loss | 5.5 |\n| n_updates | 486 |\n| policy_gradient_loss | 0.000564 |\n| value_loss | 11.7 |\n| train_metrics/ | |\n| action_queue_updates_success | 128 |\n| action_queue_updates_total | 140 |\n| ice_dug | 154 |\n| water_produced | 31.2 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 20.7 |\n| time/ | |\n| fps | 951 |\n| iterations | 245 |\n| time_elapsed | 1030 |\n| total_timesteps | 980000 |\n| train/ | |\n| approx_kl | 0.00020335914 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.6 |\n| explained_variance | 0.431 |\n| learning_rate | 0.0003 |\n| loss | 35.5 |\n| n_updates | 488 |\n| policy_gradient_loss | -0.000159 |\n| value_loss | 61.2 |\n| train_metrics/ | |\n| action_queue_updates_success | 133 |\n| action_queue_updates_total | 153 |\n| ice_dug | 78 |\n| water_produced | 18.5 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 19.7 |\n| time/ | |\n| fps | 951 |\n| iterations | 246 |\n| time_elapsed | 1034 |\n| total_timesteps | 984000 |\n| train/ | |\n| approx_kl | 0.0029804884 |\n| clip_fraction | 0.013 |\n| clip_range | 0.2 |\n| entropy_loss | -1.79 |\n| explained_variance | 0.443 |\n| learning_rate | 0.0003 |\n| loss | 23.8 |\n| n_updates | 490 |\n| policy_gradient_loss | -0.000349 |\n| value_loss | 56.1 |\n| train_metrics/ | |\n| action_queue_updates_success | 129 |\n| action_queue_updates_total | 149 |\n| ice_dug | 150 |\n| water_produced | 30.5 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 20.4 |\n| time/ | |\n| fps | 951 |\n| iterations | 247 |\n| time_elapsed | 1037 |\n| total_timesteps | 988000 |\n| train/ | |\n| approx_kl | 0.00021298481 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.79 |\n| explained_variance | 0.441 |\n| learning_rate | 0.0003 |\n| loss | 48.7 |\n| n_updates | 492 |\n| policy_gradient_loss | -0.000215 |\n| value_loss | 88 |\n| train_metrics/ | |\n| action_queue_updates_success | 129 |\n| action_queue_updates_total | 147 |\n| ice_dug | 60 |\n| water_produced | 10.8 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 23.9 |\n| time/ | |\n| fps | 951 |\n| iterations | 248 |\n| time_elapsed | 1042 |\n| total_timesteps | 992000 |\n| train/ | |\n| approx_kl | 0.0011334476 |\n| clip_fraction | 0.00025 |\n| clip_range | 0.2 |\n| entropy_loss | -1.76 |\n| explained_variance | 0.543 |\n| learning_rate | 0.0003 |\n| loss | 11 |\n| n_updates | 494 |\n| policy_gradient_loss | 0.000349 |\n| value_loss | 22.7 |\n| train_metrics/ | |\n| action_queue_updates_success | 128 |\n| action_queue_updates_total | 148 |\n| ice_dug | 113 |\n| water_produced | 23 |\n--------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 20.1 |\n| time/ | |\n| fps | 952 |\n| iterations | 249 |\n| time_elapsed | 1046 |\n| total_timesteps | 996000 |\n| train/ | |\n| approx_kl | 0.000395175 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.76 |\n| explained_variance | 0.536 |\n| learning_rate | 0.0003 |\n| loss | 23 |\n| n_updates | 496 |\n| policy_gradient_loss | -0.000337 |\n| value_loss | 51.6 |\n| train_metrics/ | |\n| action_queue_updates_success | 123 |\n| action_queue_updates_total | 136 |\n| ice_dug | 72 |\n| water_produced | 13 |\n-------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 17.6 |\n| time/ | |\n| fps | 952 |\n| iterations | 250 |\n| time_elapsed | 1050 |\n| total_timesteps | 1000000 |\n| train/ | |\n| approx_kl | 0.0004987839 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.58 |\n| explained_variance | 0.515 |\n| learning_rate | 0.0003 |\n| loss | 13.6 |\n| n_updates | 498 |\n| policy_gradient_loss | -1.82e-05 |\n| value_loss | 28.2 |\n| train_metrics/ | |\n| action_queue_updates_success | 130 |\n| action_queue_updates_total | 160 |\n| ice_dug | 61 |\n| water_produced | 6.25 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 15.5 |\n| time/ | |\n| fps | 952 |\n| iterations | 251 |\n| time_elapsed | 1054 |\n| total_timesteps | 1004000 |\n| train/ | |\n| approx_kl | 0.0014642596 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2 |\n| explained_variance | 0.636 |\n| learning_rate | 0.0003 |\n| loss | 10.1 |\n| n_updates | 500 |\n| policy_gradient_loss | 0.000265 |\n| value_loss | 17.8 |\n| train_metrics/ | |\n| action_queue_updates_success | 135 |\n| action_queue_updates_total | 158 |\n| ice_dug | 98 |\n| water_produced | 20.2 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 19.8 |\n| time/ | |\n| fps | 952 |\n| iterations | 252 |\n| time_elapsed | 1058 |\n| total_timesteps | 1008000 |\n| train/ | |\n| approx_kl | 0.0010990673 |\n| clip_fraction | 0.0015 |\n| clip_range | 0.2 |\n| entropy_loss | -1.92 |\n| explained_variance | 0.509 |\n| learning_rate | 0.0003 |\n| loss | 30.2 |\n| n_updates | 502 |\n| policy_gradient_loss | -0.000299 |\n| value_loss | 47.7 |\n| train_metrics/ | |\n| action_queue_updates_success | 139 |\n| action_queue_updates_total | 151 |\n| ice_dug | 171 |\n| water_produced | 31.2 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 23.9 |\n| time/ | |\n| fps | 952 |\n| iterations | 253 |\n| time_elapsed | 1062 |\n| total_timesteps | 1012000 |\n| train/ | |\n| approx_kl | 0.00031601347 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.73 |\n| explained_variance | 0.458 |\n| learning_rate | 0.0003 |\n| loss | 32.6 |\n| n_updates | 504 |\n| policy_gradient_loss | 0.000166 |\n| value_loss | 72.5 |\n| train_metrics/ | |\n| action_queue_updates_success | 137 |\n| action_queue_updates_total | 150 |\n| ice_dug | 198 |\n| water_produced | 42.8 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 24.9 |\n| time/ | |\n| fps | 953 |\n| iterations | 254 |\n| time_elapsed | 1065 |\n| total_timesteps | 1016000 |\n| train/ | |\n| approx_kl | 0.0003452353 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.7 |\n| explained_variance | 0.436 |\n| learning_rate | 0.0003 |\n| loss | 48.7 |\n| n_updates | 506 |\n| policy_gradient_loss | -9.78e-05 |\n| value_loss | 107 |\n| train_metrics/ | |\n| action_queue_updates_success | 128 |\n| action_queue_updates_total | 151 |\n| ice_dug | 79 |\n| water_produced | 18 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 25.9 |\n| time/ | |\n| fps | 953 |\n| iterations | 255 |\n| time_elapsed | 1069 |\n| total_timesteps | 1020000 |\n| train/ | |\n| approx_kl | 0.0005792126 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.84 |\n| explained_variance | 0.505 |\n| learning_rate | 0.0003 |\n| loss | 21 |\n| n_updates | 508 |\n| policy_gradient_loss | 0.000189 |\n| value_loss | 50.8 |\n| train_metrics/ | |\n| action_queue_updates_success | 130 |\n| action_queue_updates_total | 142 |\n| ice_dug | 55 |\n| water_produced | 11.3 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 26.5 |\n| time/ | |\n| fps | 953 |\n| iterations | 256 |\n| time_elapsed | 1073 |\n| total_timesteps | 1024000 |\n| train/ | |\n| approx_kl | 0.0009578364 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.67 |\n| explained_variance | 0.54 |\n| learning_rate | 0.0003 |\n| loss | 14.6 |\n| n_updates | 510 |\n| policy_gradient_loss | -0.00029 |\n| value_loss | 30.3 |\n| train_metrics/ | |\n| action_queue_updates_success | 132 |\n| action_queue_updates_total | 153 |\n| ice_dug | 148 |\n| water_produced | 22.5 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 27.2 |\n| time/ | |\n| fps | 953 |\n| iterations | 257 |\n| time_elapsed | 1077 |\n| total_timesteps | 1028000 |\n| train/ | |\n| approx_kl | 0.0005047376 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.79 |\n| explained_variance | 0.472 |\n| learning_rate | 0.0003 |\n| loss | 23.2 |\n| n_updates | 512 |\n| policy_gradient_loss | -1.06e-05 |\n| value_loss | 66.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 137 |\n| action_queue_updates_total | 152 |\n| ice_dug | 210 |\n| water_produced | 34.5 |\n--------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 24.9 |\n| time/ | |\n| fps | 953 |\n| iterations | 258 |\n| time_elapsed | 1081 |\n| total_timesteps | 1032000 |\n| train/ | |\n| approx_kl | 0.000624693 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.76 |\n| explained_variance | 0.459 |\n| learning_rate | 0.0003 |\n| loss | 30.3 |\n| n_updates | 514 |\n| policy_gradient_loss | -4.47e-05 |\n| value_loss | 90.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 132 |\n| action_queue_updates_total | 142 |\n| ice_dug | 199 |\n| water_produced | 31.2 |\n-------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 22.5 |\n| time/ | |\n| fps | 953 |\n| iterations | 259 |\n| time_elapsed | 1086 |\n| total_timesteps | 1036000 |\n| train/ | |\n| approx_kl | 0.0010614784 |\n| clip_fraction | 0.00125 |\n| clip_range | 0.2 |\n| entropy_loss | -1.63 |\n| explained_variance | 0.463 |\n| learning_rate | 0.0003 |\n| loss | 39.1 |\n| n_updates | 516 |\n| policy_gradient_loss | 0.000329 |\n| value_loss | 72.3 |\n| train_metrics/ | |\n| action_queue_updates_success | 135 |\n| action_queue_updates_total | 149 |\n| ice_dug | 34 |\n| water_produced | 6.5 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 25.7 |\n| time/ | |\n| fps | 953 |\n| iterations | 260 |\n| time_elapsed | 1090 |\n| total_timesteps | 1040000 |\n| train/ | |\n| approx_kl | 0.0020411522 |\n| clip_fraction | 0.00288 |\n| clip_range | 0.2 |\n| entropy_loss | -1.77 |\n| explained_variance | 0.592 |\n| learning_rate | 0.0003 |\n| loss | 9.72 |\n| n_updates | 518 |\n| policy_gradient_loss | 0.000491 |\n| value_loss | 21.8 |\n| train_metrics/ | |\n| action_queue_updates_success | 131 |\n| action_queue_updates_total | 152 |\n| ice_dug | 126 |\n| water_produced | 26.5 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 27.3 |\n| time/ | |\n| fps | 954 |\n| iterations | 261 |\n| time_elapsed | 1094 |\n| total_timesteps | 1044000 |\n| train/ | |\n| approx_kl | 0.00035780176 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.81 |\n| explained_variance | 0.577 |\n| learning_rate | 0.0003 |\n| loss | 26.8 |\n| n_updates | 520 |\n| policy_gradient_loss | -0.000354 |\n| value_loss | 56.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 128 |\n| action_queue_updates_total | 146 |\n| ice_dug | 132 |\n| water_produced | 30.5 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 22.8 |\n| time/ | |\n| fps | 954 |\n| iterations | 262 |\n| time_elapsed | 1098 |\n| total_timesteps | 1048000 |\n| train/ | |\n| approx_kl | 0.00031716534 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.8 |\n| explained_variance | 0.505 |\n| learning_rate | 0.0003 |\n| loss | 51.5 |\n| n_updates | 522 |\n| policy_gradient_loss | 5.17e-05 |\n| value_loss | 90.3 |\n| train_metrics/ | |\n| action_queue_updates_success | 130 |\n| action_queue_updates_total | 150 |\n| ice_dug | 71 |\n| water_produced | 13.8 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 22.5 |\n| time/ | |\n| fps | 954 |\n| iterations | 263 |\n| time_elapsed | 1102 |\n| total_timesteps | 1052000 |\n| train/ | |\n| approx_kl | 7.243102e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.81 |\n| explained_variance | 0.559 |\n| learning_rate | 0.0003 |\n| loss | 20 |\n| n_updates | 524 |\n| policy_gradient_loss | 3e-05 |\n| value_loss | 39.7 |\n| train_metrics/ | |\n| action_queue_updates_success | 134 |\n| action_queue_updates_total | 158 |\n| ice_dug | 142 |\n| water_produced | 30.2 |\n--------------------------------------------------\nEval num_timesteps=1056000, episode_reward=26.40 +/- 52.80\nEpisode length: 326.00 +/- 50.00\n---------------------------------------------------\n| eval/ | |\n| mean_ep_length | 326 |\n| mean_reward | 26.4 |\n| time/ | |\n| total_timesteps | 1056000 |\n| train/ | |\n| approx_kl | 0.00055517023 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.89 |\n| explained_variance | 0.57 |\n| learning_rate | 0.0003 |\n| loss | 31.5 |\n| n_updates | 526 |\n| policy_gradient_loss | -0.00036 |\n| value_loss | 70.6 |\n| train_metrics/ | |\n| action_queue_updates_success | 130 |\n| action_queue_updates_total | 142 |\n| ice_dug | 136 |\n| water_produced | 29.8 |\n---------------------------------------------------\n---------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 27.4 |\n| time/ | |\n| fps | 952 |\n| iterations | 264 |\n| time_elapsed | 1109 |\n| total_timesteps | 1056000 |\n---------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 26.8 |\n| time/ | |\n| fps | 952 |\n| iterations | 265 |\n| time_elapsed | 1113 |\n| total_timesteps | 1060000 |\n| train/ | |\n| approx_kl | 0.00027754993 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.64 |\n| explained_variance | 0.526 |\n| learning_rate | 0.0003 |\n| loss | 34 |\n| n_updates | 528 |\n| policy_gradient_loss | -7.44e-05 |\n| value_loss | 70.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 129 |\n| action_queue_updates_total | 154 |\n| ice_dug | 120 |\n| water_produced | 23.5 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 22.1 |\n| time/ | |\n| fps | 952 |\n| iterations | 266 |\n| time_elapsed | 1117 |\n| total_timesteps | 1064000 |\n| train/ | |\n| approx_kl | 0.0002875963 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.93 |\n| explained_variance | 0.551 |\n| learning_rate | 0.0003 |\n| loss | 27.7 |\n| n_updates | 530 |\n| policy_gradient_loss | -6.99e-05 |\n| value_loss | 55.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 124 |\n| action_queue_updates_total | 143 |\n| ice_dug | 54 |\n| water_produced | 8.25 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 21.1 |\n| time/ | |\n| fps | 952 |\n| iterations | 267 |\n| time_elapsed | 1121 |\n| total_timesteps | 1068000 |\n| train/ | |\n| approx_kl | 0.0004328352 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.75 |\n| explained_variance | 0.701 |\n| learning_rate | 0.0003 |\n| loss | 12.6 |\n| n_updates | 532 |\n| policy_gradient_loss | -0.000385 |\n| value_loss | 19.8 |\n| train_metrics/ | |\n| action_queue_updates_success | 130 |\n| action_queue_updates_total | 150 |\n| ice_dug | 71 |\n| water_produced | 8.5 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 20.3 |\n| time/ | |\n| fps | 952 |\n| iterations | 268 |\n| time_elapsed | 1125 |\n| total_timesteps | 1072000 |\n| train/ | |\n| approx_kl | 0.0006023676 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.87 |\n| explained_variance | 0.629 |\n| learning_rate | 0.0003 |\n| loss | 22.1 |\n| n_updates | 534 |\n| policy_gradient_loss | -0.000301 |\n| value_loss | 27.3 |\n| train_metrics/ | |\n| action_queue_updates_success | 144 |\n| action_queue_updates_total | 157 |\n| ice_dug | 144 |\n| water_produced | 26.3 |\n--------------------------------------------------\n------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 18.2 |\n| time/ | |\n| fps | 952 |\n| iterations | 269 |\n| time_elapsed | 1129 |\n| total_timesteps | 1076000 |\n| train/ | |\n| approx_kl | 0.00113333 |\n| clip_fraction | 0.000625 |\n| clip_range | 0.2 |\n| entropy_loss | -1.81 |\n| explained_variance | 0.532 |\n| learning_rate | 0.0003 |\n| loss | 26.5 |\n| n_updates | 536 |\n| policy_gradient_loss | -0.000391 |\n| value_loss | 51.8 |\n| train_metrics/ | |\n| action_queue_updates_success | 143 |\n| action_queue_updates_total | 159 |\n| ice_dug | 121 |\n| water_produced | 19.5 |\n------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 16.5 |\n| time/ | |\n| fps | 952 |\n| iterations | 270 |\n| time_elapsed | 1133 |\n| total_timesteps | 1080000 |\n| train/ | |\n| approx_kl | 0.0020851283 |\n| clip_fraction | 0.0035 |\n| clip_range | 0.2 |\n| entropy_loss | -1.9 |\n| explained_variance | 0.473 |\n| learning_rate | 0.0003 |\n| loss | 21.1 |\n| n_updates | 538 |\n| policy_gradient_loss | 0.000479 |\n| value_loss | 49.6 |\n| train_metrics/ | |\n| action_queue_updates_success | 135 |\n| action_queue_updates_total | 156 |\n| ice_dug | 68 |\n| water_produced | 15.5 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 16.7 |\n| time/ | |\n| fps | 952 |\n| iterations | 271 |\n| time_elapsed | 1137 |\n| total_timesteps | 1084000 |\n| train/ | |\n| approx_kl | 0.00051485084 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.92 |\n| explained_variance | 0.591 |\n| learning_rate | 0.0003 |\n| loss | 10.2 |\n| n_updates | 540 |\n| policy_gradient_loss | 0.000279 |\n| value_loss | 32.1 |\n| train_metrics/ | |\n| action_queue_updates_success | 133 |\n| action_queue_updates_total | 161 |\n| ice_dug | 49 |\n| water_produced | 9.25 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 18.8 |\n| time/ | |\n| fps | 952 |\n| iterations | 272 |\n| time_elapsed | 1141 |\n| total_timesteps | 1088000 |\n| train/ | |\n| approx_kl | 0.0016538281 |\n| clip_fraction | 0.00025 |\n| clip_range | 0.2 |\n| entropy_loss | -2.01 |\n| explained_variance | 0.593 |\n| learning_rate | 0.0003 |\n| loss | 9.26 |\n| n_updates | 542 |\n| policy_gradient_loss | 0.000684 |\n| value_loss | 21.6 |\n| train_metrics/ | |\n| action_queue_updates_success | 138 |\n| action_queue_updates_total | 161 |\n| ice_dug | 103 |\n| water_produced | 18.5 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 18.2 |\n| time/ | |\n| fps | 952 |\n| iterations | 273 |\n| time_elapsed | 1146 |\n| total_timesteps | 1092000 |\n| train/ | |\n| approx_kl | 0.00017962238 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.02 |\n| explained_variance | 0.566 |\n| learning_rate | 0.0003 |\n| loss | 23.8 |\n| n_updates | 544 |\n| policy_gradient_loss | 0.000107 |\n| value_loss | 47.8 |\n| train_metrics/ | |\n| action_queue_updates_success | 138 |\n| action_queue_updates_total | 154 |\n| ice_dug | 131 |\n| water_produced | 23.8 |\n---------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 16.4 |\n| time/ | |\n| fps | 952 |\n| iterations | 274 |\n| time_elapsed | 1150 |\n| total_timesteps | 1096000 |\n| train/ | |\n| approx_kl | 0.000459477 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.85 |\n| explained_variance | 0.541 |\n| learning_rate | 0.0003 |\n| loss | 22.6 |\n| n_updates | 546 |\n| policy_gradient_loss | 0.00017 |\n| value_loss | 48.3 |\n| train_metrics/ | |\n| action_queue_updates_success | 134 |\n| action_queue_updates_total | 153 |\n| ice_dug | 51 |\n| water_produced | 10.8 |\n-------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 16.5 |\n| time/ | |\n| fps | 952 |\n| iterations | 275 |\n| time_elapsed | 1154 |\n| total_timesteps | 1100000 |\n| train/ | |\n| approx_kl | 0.0014787544 |\n| clip_fraction | 0.000625 |\n| clip_range | 0.2 |\n| entropy_loss | -1.86 |\n| explained_variance | 0.71 |\n| learning_rate | 0.0003 |\n| loss | 7.43 |\n| n_updates | 548 |\n| policy_gradient_loss | 0.000458 |\n| value_loss | 15.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 129 |\n| action_queue_updates_total | 156 |\n| ice_dug | 85 |\n| water_produced | 16.2 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 16.3 |\n| time/ | |\n| fps | 953 |\n| iterations | 276 |\n| time_elapsed | 1158 |\n| total_timesteps | 1104000 |\n| train/ | |\n| approx_kl | 0.00017726843 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.96 |\n| explained_variance | 0.631 |\n| learning_rate | 0.0003 |\n| loss | 15.3 |\n| n_updates | 550 |\n| policy_gradient_loss | 0.000169 |\n| value_loss | 33.8 |\n| train_metrics/ | |\n| action_queue_updates_success | 138 |\n| action_queue_updates_total | 159 |\n| ice_dug | 53 |\n| water_produced | 8 |\n---------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 14.7 |\n| time/ | |\n| fps | 953 |\n| iterations | 277 |\n| time_elapsed | 1162 |\n| total_timesteps | 1108000 |\n| train/ | |\n| approx_kl | 0.002192241 |\n| clip_fraction | 0.000875 |\n| clip_range | 0.2 |\n| entropy_loss | -1.99 |\n| explained_variance | 0.706 |\n| learning_rate | 0.0003 |\n| loss | 12 |\n| n_updates | 552 |\n| policy_gradient_loss | 0.000555 |\n| value_loss | 18.7 |\n| train_metrics/ | |\n| action_queue_updates_success | 134 |\n| action_queue_updates_total | 165 |\n| ice_dug | 77 |\n| water_produced | 10.7 |\n-------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 15.4 |\n| time/ | |\n| fps | 953 |\n| iterations | 278 |\n| time_elapsed | 1166 |\n| total_timesteps | 1112000 |\n| train/ | |\n| approx_kl | 0.00027118268 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.15 |\n| explained_variance | 0.655 |\n| learning_rate | 0.0003 |\n| loss | 9.21 |\n| n_updates | 554 |\n| policy_gradient_loss | 0.000142 |\n| value_loss | 21.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 134 |\n| action_queue_updates_total | 156 |\n| ice_dug | 125 |\n| water_produced | 27.5 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 15.2 |\n| time/ | |\n| fps | 953 |\n| iterations | 279 |\n| time_elapsed | 1170 |\n| total_timesteps | 1116000 |\n| train/ | |\n| approx_kl | 0.0005089751 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.96 |\n| explained_variance | 0.581 |\n| learning_rate | 0.0003 |\n| loss | 28.1 |\n| n_updates | 556 |\n| policy_gradient_loss | 0.000359 |\n| value_loss | 57.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 130 |\n| action_queue_updates_total | 160 |\n| ice_dug | 52 |\n| water_produced | 9.5 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 14.2 |\n| time/ | |\n| fps | 953 |\n| iterations | 280 |\n| time_elapsed | 1174 |\n| total_timesteps | 1120000 |\n| train/ | |\n| approx_kl | 0.0006080299 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.05 |\n| explained_variance | 0.68 |\n| learning_rate | 0.0003 |\n| loss | 14.4 |\n| n_updates | 558 |\n| policy_gradient_loss | 0.00016 |\n| value_loss | 26.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 128 |\n| action_queue_updates_total | 157 |\n| ice_dug | 62 |\n| water_produced | 11.5 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 15.7 |\n| time/ | |\n| fps | 953 |\n| iterations | 281 |\n| time_elapsed | 1178 |\n| total_timesteps | 1124000 |\n| train/ | |\n| approx_kl | 0.00016689653 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.06 |\n| explained_variance | 0.726 |\n| learning_rate | 0.0003 |\n| loss | 10.3 |\n| n_updates | 560 |\n| policy_gradient_loss | -6.36e-05 |\n| value_loss | 20.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 131 |\n| action_queue_updates_total | 159 |\n| ice_dug | 80 |\n| water_produced | 15.2 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 14.2 |\n| time/ | |\n| fps | 953 |\n| iterations | 282 |\n| time_elapsed | 1183 |\n| total_timesteps | 1128000 |\n| train/ | |\n| approx_kl | 0.0007549894 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.02 |\n| explained_variance | 0.705 |\n| learning_rate | 0.0003 |\n| loss | 15 |\n| n_updates | 562 |\n| policy_gradient_loss | -0.000409 |\n| value_loss | 27 |\n| train_metrics/ | |\n| action_queue_updates_success | 124 |\n| action_queue_updates_total | 164 |\n| ice_dug | 17 |\n| water_produced | 3.75 |\n--------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 10.9 |\n| time/ | |\n| fps | 953 |\n| iterations | 283 |\n| time_elapsed | 1187 |\n| total_timesteps | 1132000 |\n| train/ | |\n| approx_kl | 0.000624061 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.17 |\n| explained_variance | 0.792 |\n| learning_rate | 0.0003 |\n| loss | 4.12 |\n| n_updates | 564 |\n| policy_gradient_loss | -0.000774 |\n| value_loss | 8.38 |\n| train_metrics/ | |\n| action_queue_updates_success | 130 |\n| action_queue_updates_total | 164 |\n| ice_dug | 60 |\n| water_produced | 12 |\n-------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 10.5 |\n| time/ | |\n| fps | 953 |\n| iterations | 284 |\n| time_elapsed | 1191 |\n| total_timesteps | 1136000 |\n| train/ | |\n| approx_kl | 0.0003749789 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.16 |\n| explained_variance | 0.693 |\n| learning_rate | 0.0003 |\n| loss | 14.8 |\n| n_updates | 566 |\n| policy_gradient_loss | -0.000243 |\n| value_loss | 31.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 137 |\n| action_queue_updates_total | 166 |\n| ice_dug | 42 |\n| water_produced | 7.5 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 9.92 |\n| time/ | |\n| fps | 953 |\n| iterations | 285 |\n| time_elapsed | 1195 |\n| total_timesteps | 1140000 |\n| train/ | |\n| approx_kl | 0.0017598458 |\n| clip_fraction | 0.000125 |\n| clip_range | 0.2 |\n| entropy_loss | -2.16 |\n| explained_variance | 0.721 |\n| learning_rate | 0.0003 |\n| loss | 6.79 |\n| n_updates | 568 |\n| policy_gradient_loss | 0.000238 |\n| value_loss | 15.2 |\n| train_metrics/ | |\n| action_queue_updates_success | 130 |\n| action_queue_updates_total | 161 |\n| ice_dug | 59 |\n| water_produced | 8.5 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 6.71 |\n| time/ | |\n| fps | 953 |\n| iterations | 286 |\n| time_elapsed | 1199 |\n| total_timesteps | 1144000 |\n| train/ | |\n| approx_kl | 0.00020297637 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.1 |\n| explained_variance | 0.754 |\n| learning_rate | 0.0003 |\n| loss | 6.99 |\n| n_updates | 570 |\n| policy_gradient_loss | 0.000163 |\n| value_loss | 15.3 |\n| train_metrics/ | |\n| action_queue_updates_success | 135 |\n| action_queue_updates_total | 169 |\n| ice_dug | 2 |\n| water_produced | 0 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 7.02 |\n| time/ | |\n| fps | 954 |\n| iterations | 287 |\n| time_elapsed | 1203 |\n| total_timesteps | 1148000 |\n| train/ | |\n| approx_kl | 0.0020938807 |\n| clip_fraction | 0.00413 |\n| clip_range | 0.2 |\n| entropy_loss | -2.2 |\n| explained_variance | 0.822 |\n| learning_rate | 0.0003 |\n| loss | 1.5 |\n| n_updates | 572 |\n| policy_gradient_loss | 0.000942 |\n| value_loss | 3.31 |\n| train_metrics/ | |\n| action_queue_updates_success | 142 |\n| action_queue_updates_total | 170 |\n| ice_dug | 46 |\n| water_produced | 5 |\n--------------------------------------------------\nEval num_timesteps=1152000, episode_reward=1.08 +/- 2.16\nEpisode length: 302.00 +/- 2.00\n--------------------------------------------------\n| eval/ | |\n| mean_ep_length | 302 |\n| mean_reward | 1.08 |\n| time/ | |\n| total_timesteps | 1152000 |\n| train/ | |\n| approx_kl | 0.0008510974 |\n| clip_fraction | 0.000875 |\n| clip_range | 0.2 |\n| entropy_loss | -2.22 |\n| explained_variance | 0.682 |\n| learning_rate | 0.0003 |\n| loss | 6.03 |\n| n_updates | 574 |\n| policy_gradient_loss | 0.000453 |\n| value_loss | 12 |\n| train_metrics/ | |\n| action_queue_updates_success | 134 |\n| action_queue_updates_total | 171 |\n| ice_dug | 77 |\n| water_produced | 12.2 |\n--------------------------------------------------\n---------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 7.1 |\n| time/ | |\n| fps | 952 |\n| iterations | 288 |\n| time_elapsed | 1209 |\n| total_timesteps | 1152000 |\n---------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 8.72 |\n| time/ | |\n| fps | 952 |\n| iterations | 289 |\n| time_elapsed | 1213 |\n| total_timesteps | 1156000 |\n| train/ | |\n| approx_kl | 0.0005210367 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.22 |\n| explained_variance | 0.715 |\n| learning_rate | 0.0003 |\n| loss | 12.5 |\n| n_updates | 576 |\n| policy_gradient_loss | 2.57e-05 |\n| value_loss | 21 |\n| train_metrics/ | |\n| action_queue_updates_success | 131 |\n| action_queue_updates_total | 169 |\n| ice_dug | 77 |\n| water_produced | 15.2 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 8.8 |\n| time/ | |\n| fps | 952 |\n| iterations | 290 |\n| time_elapsed | 1217 |\n| total_timesteps | 1160000 |\n| train/ | |\n| approx_kl | 0.00044472553 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.22 |\n| explained_variance | 0.69 |\n| learning_rate | 0.0003 |\n| loss | 13.5 |\n| n_updates | 578 |\n| policy_gradient_loss | 0.000183 |\n| value_loss | 25.7 |\n| train_metrics/ | |\n| action_queue_updates_success | 133 |\n| action_queue_updates_total | 158 |\n| ice_dug | 49 |\n| water_produced | 9 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 11.5 |\n| time/ | |\n| fps | 952 |\n| iterations | 291 |\n| time_elapsed | 1221 |\n| total_timesteps | 1164000 |\n| train/ | |\n| approx_kl | 0.00062468735 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.01 |\n| explained_variance | 0.74 |\n| learning_rate | 0.0003 |\n| loss | 9.14 |\n| n_updates | 580 |\n| policy_gradient_loss | 0.000129 |\n| value_loss | 16.1 |\n| train_metrics/ | |\n| action_queue_updates_success | 137 |\n| action_queue_updates_total | 159 |\n| ice_dug | 75 |\n| water_produced | 12.8 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 11.4 |\n| time/ | |\n| fps | 952 |\n| iterations | 292 |\n| time_elapsed | 1225 |\n| total_timesteps | 1168000 |\n| train/ | |\n| approx_kl | 0.00031951623 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.02 |\n| explained_variance | 0.632 |\n| learning_rate | 0.0003 |\n| loss | 14.1 |\n| n_updates | 582 |\n| policy_gradient_loss | -0.000335 |\n| value_loss | 23.7 |\n| train_metrics/ | |\n| action_queue_updates_success | 125 |\n| action_queue_updates_total | 170 |\n| ice_dug | 20 |\n| water_produced | 4.75 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 11 |\n| time/ | |\n| fps | 952 |\n| iterations | 293 |\n| time_elapsed | 1229 |\n| total_timesteps | 1172000 |\n| train/ | |\n| approx_kl | 0.00011024615 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.25 |\n| explained_variance | 0.78 |\n| learning_rate | 0.0003 |\n| loss | 6.8 |\n| n_updates | 584 |\n| policy_gradient_loss | -0.000296 |\n| value_loss | 8.43 |\n| train_metrics/ | |\n| action_queue_updates_success | 138 |\n| action_queue_updates_total | 172 |\n| ice_dug | 43 |\n| water_produced | 10.5 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 13.2 |\n| time/ | |\n| fps | 953 |\n| iterations | 294 |\n| time_elapsed | 1233 |\n| total_timesteps | 1176000 |\n| train/ | |\n| approx_kl | 0.00043498678 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.22 |\n| explained_variance | 0.67 |\n| learning_rate | 0.0003 |\n| loss | 7.84 |\n| n_updates | 586 |\n| policy_gradient_loss | 0.000312 |\n| value_loss | 17.8 |\n| train_metrics/ | |\n| action_queue_updates_success | 137 |\n| action_queue_updates_total | 168 |\n| ice_dug | 159 |\n| water_produced | 25.5 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 15.9 |\n| time/ | |\n| fps | 953 |\n| iterations | 295 |\n| time_elapsed | 1237 |\n| total_timesteps | 1180000 |\n| train/ | |\n| approx_kl | 0.0010667145 |\n| clip_fraction | 0.000375 |\n| clip_range | 0.2 |\n| entropy_loss | -2.08 |\n| explained_variance | 0.683 |\n| learning_rate | 0.0003 |\n| loss | 25.5 |\n| n_updates | 588 |\n| policy_gradient_loss | 0.000221 |\n| value_loss | 55.7 |\n| train_metrics/ | |\n| action_queue_updates_success | 130 |\n| action_queue_updates_total | 166 |\n| ice_dug | 103 |\n| water_produced | 22.2 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 15.6 |\n| time/ | |\n| fps | 953 |\n| iterations | 296 |\n| time_elapsed | 1241 |\n| total_timesteps | 1184000 |\n| train/ | |\n| approx_kl | 0.0008863056 |\n| clip_fraction | 0.00075 |\n| clip_range | 0.2 |\n| entropy_loss | -2.12 |\n| explained_variance | 0.693 |\n| learning_rate | 0.0003 |\n| loss | 15.3 |\n| n_updates | 590 |\n| policy_gradient_loss | -9.44e-05 |\n| value_loss | 41.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 129 |\n| action_queue_updates_total | 161 |\n| ice_dug | 79 |\n| water_produced | 11 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 19.3 |\n| time/ | |\n| fps | 953 |\n| iterations | 297 |\n| time_elapsed | 1245 |\n| total_timesteps | 1188000 |\n| train/ | |\n| approx_kl | 0.00032515073 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.01 |\n| explained_variance | 0.795 |\n| learning_rate | 0.0003 |\n| loss | 10.3 |\n| n_updates | 592 |\n| policy_gradient_loss | -0.000181 |\n| value_loss | 20.2 |\n| train_metrics/ | |\n| action_queue_updates_success | 128 |\n| action_queue_updates_total | 171 |\n| ice_dug | 104 |\n| water_produced | 22.2 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 18.7 |\n| time/ | |\n| fps | 953 |\n| iterations | 298 |\n| time_elapsed | 1249 |\n| total_timesteps | 1192000 |\n| train/ | |\n| approx_kl | 0.0004766675 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.19 |\n| explained_variance | 0.684 |\n| learning_rate | 0.0003 |\n| loss | 29.4 |\n| n_updates | 594 |\n| policy_gradient_loss | 9.84e-05 |\n| value_loss | 59.2 |\n| train_metrics/ | |\n| action_queue_updates_success | 115 |\n| action_queue_updates_total | 155 |\n| ice_dug | 34 |\n| water_produced | 7.5 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 13.3 |\n| time/ | |\n| fps | 953 |\n| iterations | 299 |\n| time_elapsed | 1253 |\n| total_timesteps | 1196000 |\n| train/ | |\n| approx_kl | 0.00021486692 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.05 |\n| explained_variance | 0.82 |\n| learning_rate | 0.0003 |\n| loss | 9.98 |\n| n_updates | 596 |\n| policy_gradient_loss | -3.63e-05 |\n| value_loss | 18.1 |\n| train_metrics/ | |\n| action_queue_updates_success | 123 |\n| action_queue_updates_total | 169 |\n| ice_dug | 8 |\n| water_produced | 0 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 13.3 |\n| time/ | |\n| fps | 954 |\n| iterations | 300 |\n| time_elapsed | 1257 |\n| total_timesteps | 1200000 |\n| train/ | |\n| approx_kl | 0.0004866078 |\n| clip_fraction | 0.00025 |\n| clip_range | 0.2 |\n| entropy_loss | -2.23 |\n| explained_variance | 0.86 |\n| learning_rate | 0.0003 |\n| loss | 1.61 |\n| n_updates | 598 |\n| policy_gradient_loss | -0.000574 |\n| value_loss | 4.02 |\n| train_metrics/ | |\n| action_queue_updates_success | 134 |\n| action_queue_updates_total | 161 |\n| ice_dug | 120 |\n| water_produced | 22.5 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 12.5 |\n| time/ | |\n| fps | 954 |\n| iterations | 301 |\n| time_elapsed | 1261 |\n| total_timesteps | 1204000 |\n| train/ | |\n| approx_kl | 0.00045920373 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.96 |\n| explained_variance | 0.687 |\n| learning_rate | 0.0003 |\n| loss | 22.6 |\n| n_updates | 600 |\n| policy_gradient_loss | -0.000318 |\n| value_loss | 49.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 128 |\n| action_queue_updates_total | 162 |\n| ice_dug | 30 |\n| water_produced | 7.25 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 8.25 |\n| time/ | |\n| fps | 954 |\n| iterations | 302 |\n| time_elapsed | 1265 |\n| total_timesteps | 1208000 |\n| train/ | |\n| approx_kl | 0.0005177533 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.07 |\n| explained_variance | 0.764 |\n| learning_rate | 0.0003 |\n| loss | 8.45 |\n| n_updates | 602 |\n| policy_gradient_loss | -0.000422 |\n| value_loss | 18.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 131 |\n| action_queue_updates_total | 168 |\n| ice_dug | 10 |\n| water_produced | 2 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 7.44 |\n| time/ | |\n| fps | 954 |\n| iterations | 303 |\n| time_elapsed | 1269 |\n| total_timesteps | 1212000 |\n| train/ | |\n| approx_kl | 0.00035707653 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.2 |\n| explained_variance | 0.845 |\n| learning_rate | 0.0003 |\n| loss | 3.43 |\n| n_updates | 604 |\n| policy_gradient_loss | -4.12e-05 |\n| value_loss | 6.32 |\n| train_metrics/ | |\n| action_queue_updates_success | 127 |\n| action_queue_updates_total | 168 |\n| ice_dug | 26 |\n| water_produced | 3.5 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 12.4 |\n| time/ | |\n| fps | 954 |\n| iterations | 304 |\n| time_elapsed | 1273 |\n| total_timesteps | 1216000 |\n| train/ | |\n| approx_kl | 9.287884e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.2 |\n| explained_variance | 0.824 |\n| learning_rate | 0.0003 |\n| loss | 3.74 |\n| n_updates | 606 |\n| policy_gradient_loss | -0.000353 |\n| value_loss | 7.66 |\n| train_metrics/ | |\n| action_queue_updates_success | 140 |\n| action_queue_updates_total | 162 |\n| ice_dug | 134 |\n| water_produced | 23.5 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 10.3 |\n| time/ | |\n| fps | 954 |\n| iterations | 305 |\n| time_elapsed | 1277 |\n| total_timesteps | 1220000 |\n| train/ | |\n| approx_kl | 0.00015605132 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.04 |\n| explained_variance | 0.596 |\n| learning_rate | 0.0003 |\n| loss | 34.3 |\n| n_updates | 608 |\n| policy_gradient_loss | -0.000346 |\n| value_loss | 75 |\n| train_metrics/ | |\n| action_queue_updates_success | 134 |\n| action_queue_updates_total | 169 |\n| ice_dug | 53 |\n| water_produced | 12.5 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 13.8 |\n| time/ | |\n| fps | 954 |\n| iterations | 306 |\n| time_elapsed | 1281 |\n| total_timesteps | 1224000 |\n| train/ | |\n| approx_kl | 0.0002167614 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.19 |\n| explained_variance | 0.684 |\n| learning_rate | 0.0003 |\n| loss | 16.4 |\n| n_updates | 610 |\n| policy_gradient_loss | 0.000101 |\n| value_loss | 30.7 |\n| train_metrics/ | |\n| action_queue_updates_success | 134 |\n| action_queue_updates_total | 170 |\n| ice_dug | 97 |\n| water_produced | 24.2 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 16.7 |\n| time/ | |\n| fps | 955 |\n| iterations | 307 |\n| time_elapsed | 1285 |\n| total_timesteps | 1228000 |\n| train/ | |\n| approx_kl | 0.0002460958 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.2 |\n| explained_variance | 0.631 |\n| learning_rate | 0.0003 |\n| loss | 26.6 |\n| n_updates | 612 |\n| policy_gradient_loss | 0.000139 |\n| value_loss | 66.8 |\n| train_metrics/ | |\n| action_queue_updates_success | 144 |\n| action_queue_updates_total | 167 |\n| ice_dug | 86 |\n| water_produced | 16 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 18.8 |\n| time/ | |\n| fps | 955 |\n| iterations | 308 |\n| time_elapsed | 1289 |\n| total_timesteps | 1232000 |\n| train/ | |\n| approx_kl | 0.00046287593 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.04 |\n| explained_variance | 0.68 |\n| learning_rate | 0.0003 |\n| loss | 12.7 |\n| n_updates | 614 |\n| policy_gradient_loss | 0.000273 |\n| value_loss | 31 |\n| train_metrics/ | |\n| action_queue_updates_success | 136 |\n| action_queue_updates_total | 167 |\n| ice_dug | 60 |\n| water_produced | 13.2 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 17.3 |\n| time/ | |\n| fps | 955 |\n| iterations | 309 |\n| time_elapsed | 1293 |\n| total_timesteps | 1236000 |\n| train/ | |\n| approx_kl | 0.0006344596 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.16 |\n| explained_variance | 0.655 |\n| learning_rate | 0.0003 |\n| loss | 33.3 |\n| n_updates | 616 |\n| policy_gradient_loss | -0.000188 |\n| value_loss | 44.2 |\n| train_metrics/ | |\n| action_queue_updates_success | 127 |\n| action_queue_updates_total | 170 |\n| ice_dug | 71 |\n| water_produced | 17 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 18.2 |\n| time/ | |\n| fps | 955 |\n| iterations | 310 |\n| time_elapsed | 1297 |\n| total_timesteps | 1240000 |\n| train/ | |\n| approx_kl | 0.0002316423 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.2 |\n| explained_variance | 0.739 |\n| learning_rate | 0.0003 |\n| loss | 9.85 |\n| n_updates | 618 |\n| policy_gradient_loss | 7.57e-06 |\n| value_loss | 30.1 |\n| train_metrics/ | |\n| action_queue_updates_success | 137 |\n| action_queue_updates_total | 165 |\n| ice_dug | 115 |\n| water_produced | 16.2 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 18 |\n| time/ | |\n| fps | 955 |\n| iterations | 311 |\n| time_elapsed | 1301 |\n| total_timesteps | 1244000 |\n| train/ | |\n| approx_kl | 0.0009554931 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.09 |\n| explained_variance | 0.669 |\n| learning_rate | 0.0003 |\n| loss | 22.2 |\n| n_updates | 620 |\n| policy_gradient_loss | 0.000797 |\n| value_loss | 44.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 136 |\n| action_queue_updates_total | 164 |\n| ice_dug | 103 |\n| water_produced | 23 |\n--------------------------------------------------\nEval num_timesteps=1248000, episode_reward=1.88 +/- 3.76\nEpisode length: 301.00 +/- 0.00\n---------------------------------------------------\n| eval/ | |\n| mean_ep_length | 301 |\n| mean_reward | 1.88 |\n| time/ | |\n| total_timesteps | 1248000 |\n| train/ | |\n| approx_kl | 0.00045636072 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.06 |\n| explained_variance | 0.676 |\n| learning_rate | 0.0003 |\n| loss | 35.6 |\n| n_updates | 622 |\n| policy_gradient_loss | -0.000332 |\n| value_loss | 53.5 |\n| train_metrics/ | |\n| action_queue_updates_success | 132 |\n| action_queue_updates_total | 156 |\n| ice_dug | 21 |\n| water_produced | 2.25 |\n---------------------------------------------------\n---------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 15.1 |\n| time/ | |\n| fps | 954 |\n| iterations | 312 |\n| time_elapsed | 1307 |\n| total_timesteps | 1248000 |\n---------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 16.8 |\n| time/ | |\n| fps | 954 |\n| iterations | 313 |\n| time_elapsed | 1311 |\n| total_timesteps | 1252000 |\n| train/ | |\n| approx_kl | 0.002095736 |\n| clip_fraction | 0.00188 |\n| clip_range | 0.2 |\n| entropy_loss | -2 |\n| explained_variance | 0.841 |\n| learning_rate | 0.0003 |\n| loss | 4.02 |\n| n_updates | 624 |\n| policy_gradient_loss | -0.000134 |\n| value_loss | 9 |\n| train_metrics/ | |\n| action_queue_updates_success | 135 |\n| action_queue_updates_total | 159 |\n| ice_dug | 136 |\n| water_produced | 21 |\n-------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 13.2 |\n| time/ | |\n| fps | 954 |\n| iterations | 314 |\n| time_elapsed | 1315 |\n| total_timesteps | 1256000 |\n| train/ | |\n| approx_kl | 0.0011950241 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.98 |\n| explained_variance | 0.691 |\n| learning_rate | 0.0003 |\n| loss | 21.5 |\n| n_updates | 626 |\n| policy_gradient_loss | -0.000476 |\n| value_loss | 52.5 |\n| train_metrics/ | |\n| action_queue_updates_success | 130 |\n| action_queue_updates_total | 175 |\n| ice_dug | 0 |\n| water_produced | 0 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 10.5 |\n| time/ | |\n| fps | 954 |\n| iterations | 315 |\n| time_elapsed | 1319 |\n| total_timesteps | 1260000 |\n| train/ | |\n| approx_kl | 0.00046619904 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.32 |\n| explained_variance | 0.849 |\n| learning_rate | 0.0003 |\n| loss | 0.98 |\n| n_updates | 628 |\n| policy_gradient_loss | -0.000144 |\n| value_loss | 2.52 |\n| train_metrics/ | |\n| action_queue_updates_success | 127 |\n| action_queue_updates_total | 177 |\n| ice_dug | 16 |\n| water_produced | 3.25 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 5.86 |\n| time/ | |\n| fps | 954 |\n| iterations | 316 |\n| time_elapsed | 1323 |\n| total_timesteps | 1264000 |\n| train/ | |\n| approx_kl | 0.00029291006 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.28 |\n| explained_variance | 0.828 |\n| learning_rate | 0.0003 |\n| loss | 2.18 |\n| n_updates | 630 |\n| policy_gradient_loss | -0.000677 |\n| value_loss | 6.11 |\n| train_metrics/ | |\n| action_queue_updates_success | 132 |\n| action_queue_updates_total | 171 |\n| ice_dug | 5 |\n| water_produced | 1 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 7.63 |\n| time/ | |\n| fps | 954 |\n| iterations | 317 |\n| time_elapsed | 1327 |\n| total_timesteps | 1268000 |\n| train/ | |\n| approx_kl | 0.0011448052 |\n| clip_fraction | 0.00225 |\n| clip_range | 0.2 |\n| entropy_loss | -2.23 |\n| explained_variance | 0.843 |\n| learning_rate | 0.0003 |\n| loss | 1.62 |\n| n_updates | 632 |\n| policy_gradient_loss | 0.000466 |\n| value_loss | 3.45 |\n| train_metrics/ | |\n| action_queue_updates_success | 139 |\n| action_queue_updates_total | 176 |\n| ice_dug | 56 |\n| water_produced | 10.8 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 4.98 |\n| time/ | |\n| fps | 954 |\n| iterations | 318 |\n| time_elapsed | 1332 |\n| total_timesteps | 1272000 |\n| train/ | |\n| approx_kl | 0.00020211484 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.23 |\n| explained_variance | 0.684 |\n| learning_rate | 0.0003 |\n| loss | 12.6 |\n| n_updates | 634 |\n| policy_gradient_loss | -0.000292 |\n| value_loss | 24 |\n| train_metrics/ | |\n| action_queue_updates_success | 133 |\n| action_queue_updates_total | 172 |\n| ice_dug | 36 |\n| water_produced | 8.75 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 6.6 |\n| time/ | |\n| fps | 955 |\n| iterations | 319 |\n| time_elapsed | 1336 |\n| total_timesteps | 1276000 |\n| train/ | |\n| approx_kl | 0.00010213704 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.19 |\n| explained_variance | 0.706 |\n| learning_rate | 0.0003 |\n| loss | 10.6 |\n| n_updates | 636 |\n| policy_gradient_loss | -0.000202 |\n| value_loss | 21.5 |\n| train_metrics/ | |\n| action_queue_updates_success | 133 |\n| action_queue_updates_total | 173 |\n| ice_dug | 38 |\n| water_produced | 7.75 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 5.92 |\n| time/ | |\n| fps | 955 |\n| iterations | 320 |\n| time_elapsed | 1340 |\n| total_timesteps | 1280000 |\n| train/ | |\n| approx_kl | 0.00031237624 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.22 |\n| explained_variance | 0.772 |\n| learning_rate | 0.0003 |\n| loss | 6.48 |\n| n_updates | 638 |\n| policy_gradient_loss | 0.000327 |\n| value_loss | 11 |\n| train_metrics/ | |\n| action_queue_updates_success | 133 |\n| action_queue_updates_total | 172 |\n| ice_dug | 0 |\n| water_produced | 0 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 5.71 |\n| time/ | |\n| fps | 955 |\n| iterations | 321 |\n| time_elapsed | 1344 |\n| total_timesteps | 1284000 |\n| train/ | |\n| approx_kl | 0.0011285143 |\n| clip_fraction | 0.000625 |\n| clip_range | 0.2 |\n| entropy_loss | -2.17 |\n| explained_variance | 0.861 |\n| learning_rate | 0.0003 |\n| loss | 1.23 |\n| n_updates | 640 |\n| policy_gradient_loss | 0.000108 |\n| value_loss | 2.8 |\n| train_metrics/ | |\n| action_queue_updates_success | 131 |\n| action_queue_updates_total | 177 |\n| ice_dug | 0 |\n| water_produced | 0 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 5.71 |\n| time/ | |\n| fps | 955 |\n| iterations | 322 |\n| time_elapsed | 1348 |\n| total_timesteps | 1288000 |\n| train/ | |\n| approx_kl | 0.0006486765 |\n| clip_fraction | 0.0015 |\n| clip_range | 0.2 |\n| entropy_loss | -2.34 |\n| explained_variance | 0.85 |\n| learning_rate | 0.0003 |\n| loss | 0.279 |\n| n_updates | 642 |\n| policy_gradient_loss | 0.000494 |\n| value_loss | 0.772 |\n| train_metrics/ | |\n| action_queue_updates_success | 146 |\n| action_queue_updates_total | 179 |\n| ice_dug | 58 |\n| water_produced | 10.8 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 5.09 |\n| time/ | |\n| fps | 955 |\n| iterations | 323 |\n| time_elapsed | 1352 |\n| total_timesteps | 1292000 |\n| train/ | |\n| approx_kl | 0.0002622258 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.25 |\n| explained_variance | 0.713 |\n| learning_rate | 0.0003 |\n| loss | 8.06 |\n| n_updates | 644 |\n| policy_gradient_loss | -0.000331 |\n| value_loss | 16.8 |\n| train_metrics/ | |\n| action_queue_updates_success | 137 |\n| action_queue_updates_total | 177 |\n| ice_dug | 25 |\n| water_produced | 5.75 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 4.19 |\n| time/ | |\n| fps | 955 |\n| iterations | 324 |\n| time_elapsed | 1356 |\n| total_timesteps | 1296000 |\n| train/ | |\n| approx_kl | 0.00036467708 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.29 |\n| explained_variance | 0.688 |\n| learning_rate | 0.0003 |\n| loss | 6.82 |\n| n_updates | 646 |\n| policy_gradient_loss | 0.000228 |\n| value_loss | 15 |\n| train_metrics/ | |\n| action_queue_updates_success | 144 |\n| action_queue_updates_total | 174 |\n| ice_dug | 14 |\n| water_produced | 3.5 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 4.67 |\n| time/ | |\n| fps | 955 |\n| iterations | 325 |\n| time_elapsed | 1360 |\n| total_timesteps | 1300000 |\n| train/ | |\n| approx_kl | 0.0012498202 |\n| clip_fraction | 0.00188 |\n| clip_range | 0.2 |\n| entropy_loss | -2.27 |\n| explained_variance | 0.745 |\n| learning_rate | 0.0003 |\n| loss | 5.83 |\n| n_updates | 648 |\n| policy_gradient_loss | 0.000499 |\n| value_loss | 7.78 |\n| train_metrics/ | |\n| action_queue_updates_success | 135 |\n| action_queue_updates_total | 176 |\n| ice_dug | 12 |\n| water_produced | 2.25 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 6.79 |\n| time/ | |\n| fps | 955 |\n| iterations | 326 |\n| time_elapsed | 1364 |\n| total_timesteps | 1304000 |\n| train/ | |\n| approx_kl | 0.00040356535 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.33 |\n| explained_variance | 0.785 |\n| learning_rate | 0.0003 |\n| loss | 1.41 |\n| n_updates | 650 |\n| policy_gradient_loss | -0.000147 |\n| value_loss | 2.61 |\n| train_metrics/ | |\n| action_queue_updates_success | 147 |\n| action_queue_updates_total | 174 |\n| ice_dug | 61 |\n| water_produced | 10 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 4.94 |\n| time/ | |\n| fps | 955 |\n| iterations | 327 |\n| time_elapsed | 1368 |\n| total_timesteps | 1308000 |\n| train/ | |\n| approx_kl | 0.00036326153 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.25 |\n| explained_variance | 0.609 |\n| learning_rate | 0.0003 |\n| loss | 15.5 |\n| n_updates | 652 |\n| policy_gradient_loss | -0.00071 |\n| value_loss | 23 |\n| train_metrics/ | |\n| action_queue_updates_success | 145 |\n| action_queue_updates_total | 176 |\n| ice_dug | 8 |\n| water_produced | 2 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 5.37 |\n| time/ | |\n| fps | 955 |\n| iterations | 328 |\n| time_elapsed | 1373 |\n| total_timesteps | 1312000 |\n| train/ | |\n| approx_kl | 0.00085185573 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.27 |\n| explained_variance | 0.75 |\n| learning_rate | 0.0003 |\n| loss | 2.36 |\n| n_updates | 654 |\n| policy_gradient_loss | -0.000123 |\n| value_loss | 4.03 |\n| train_metrics/ | |\n| action_queue_updates_success | 139 |\n| action_queue_updates_total | 177 |\n| ice_dug | 38 |\n| water_produced | 7.75 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 4.69 |\n| time/ | |\n| fps | 955 |\n| iterations | 329 |\n| time_elapsed | 1377 |\n| total_timesteps | 1316000 |\n| train/ | |\n| approx_kl | 0.00029530743 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.31 |\n| explained_variance | 0.666 |\n| learning_rate | 0.0003 |\n| loss | 6.53 |\n| n_updates | 656 |\n| policy_gradient_loss | 0.000131 |\n| value_loss | 19.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 142 |\n| action_queue_updates_total | 176 |\n| ice_dug | 1 |\n| water_produced | 0.25 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 8.1 |\n| time/ | |\n| fps | 955 |\n| iterations | 330 |\n| time_elapsed | 1381 |\n| total_timesteps | 1320000 |\n| train/ | |\n| approx_kl | 0.00031200075 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.27 |\n| explained_variance | 0.901 |\n| learning_rate | 0.0003 |\n| loss | 0.397 |\n| n_updates | 658 |\n| policy_gradient_loss | 0.00057 |\n| value_loss | 1.23 |\n| train_metrics/ | |\n| action_queue_updates_success | 144 |\n| action_queue_updates_total | 173 |\n| ice_dug | 94 |\n| water_produced | 18.5 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 6.09 |\n| time/ | |\n| fps | 955 |\n| iterations | 331 |\n| time_elapsed | 1385 |\n| total_timesteps | 1324000 |\n| train/ | |\n| approx_kl | 0.0013709879 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.26 |\n| explained_variance | 0.67 |\n| learning_rate | 0.0003 |\n| loss | 20.4 |\n| n_updates | 660 |\n| policy_gradient_loss | -0.000149 |\n| value_loss | 41.5 |\n| train_metrics/ | |\n| action_queue_updates_success | 142 |\n| action_queue_updates_total | 177 |\n| ice_dug | 4 |\n| water_produced | 0.5 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 7.66 |\n| time/ | |\n| fps | 956 |\n| iterations | 332 |\n| time_elapsed | 1389 |\n| total_timesteps | 1328000 |\n| train/ | |\n| approx_kl | 0.00023335408 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.33 |\n| explained_variance | 0.79 |\n| learning_rate | 0.0003 |\n| loss | 0.576 |\n| n_updates | 662 |\n| policy_gradient_loss | 0.000437 |\n| value_loss | 1.34 |\n| train_metrics/ | |\n| action_queue_updates_success | 147 |\n| action_queue_updates_total | 178 |\n| ice_dug | 41 |\n| water_produced | 9.5 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 6.24 |\n| time/ | |\n| fps | 956 |\n| iterations | 333 |\n| time_elapsed | 1393 |\n| total_timesteps | 1332000 |\n| train/ | |\n| approx_kl | 0.00015113055 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.23 |\n| explained_variance | 0.648 |\n| learning_rate | 0.0003 |\n| loss | 8.93 |\n| n_updates | 664 |\n| policy_gradient_loss | 1.84e-05 |\n| value_loss | 22.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 139 |\n| action_queue_updates_total | 178 |\n| ice_dug | 7 |\n| water_produced | 1 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 7.13 |\n| time/ | |\n| fps | 956 |\n| iterations | 334 |\n| time_elapsed | 1397 |\n| total_timesteps | 1336000 |\n| train/ | |\n| approx_kl | 0.00029954236 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.29 |\n| explained_variance | 0.837 |\n| learning_rate | 0.0003 |\n| loss | 0.535 |\n| n_updates | 666 |\n| policy_gradient_loss | -0.000175 |\n| value_loss | 1.15 |\n| train_metrics/ | |\n| action_queue_updates_success | 147 |\n| action_queue_updates_total | 172 |\n| ice_dug | 20 |\n| water_produced | 4.5 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 4.35 |\n| time/ | |\n| fps | 956 |\n| iterations | 335 |\n| time_elapsed | 1401 |\n| total_timesteps | 1340000 |\n| train/ | |\n| approx_kl | 0.00051399204 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.27 |\n| explained_variance | 0.714 |\n| learning_rate | 0.0003 |\n| loss | 3.04 |\n| n_updates | 668 |\n| policy_gradient_loss | 0.000122 |\n| value_loss | 6.67 |\n| train_metrics/ | |\n| action_queue_updates_success | 150 |\n| action_queue_updates_total | 175 |\n| ice_dug | 29 |\n| water_produced | 5.25 |\n---------------------------------------------------\nEval num_timesteps=1344000, episode_reward=0.00 +/- 0.00\nEpisode length: 301.00 +/- 0.00\n---------------------------------------------------\n| eval/ | |\n| mean_ep_length | 301 |\n| mean_reward | 0 |\n| time/ | |\n| total_timesteps | 1344000 |\n| train/ | |\n| approx_kl | 0.00013514272 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.28 |\n| explained_variance | 0.712 |\n| learning_rate | 0.0003 |\n| loss | 3.1 |\n| n_updates | 670 |\n| policy_gradient_loss | -6.45e-05 |\n| value_loss | 7.63 |\n| train_metrics/ | |\n| action_queue_updates_success | 145 |\n| action_queue_updates_total | 175 |\n| ice_dug | 11 |\n| water_produced | 1.5 |\n---------------------------------------------------\n---------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 4.57 |\n| time/ | |\n| fps | 954 |\n| iterations | 336 |\n| time_elapsed | 1407 |\n| total_timesteps | 1344000 |\n---------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 2.96 |\n| time/ | |\n| fps | 954 |\n| iterations | 337 |\n| time_elapsed | 1411 |\n| total_timesteps | 1348000 |\n| train/ | |\n| approx_kl | 0.0007206025 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.27 |\n| explained_variance | 0.771 |\n| learning_rate | 0.0003 |\n| loss | 1.27 |\n| n_updates | 672 |\n| policy_gradient_loss | 0.00075 |\n| value_loss | 2.98 |\n| train_metrics/ | |\n| action_queue_updates_success | 151 |\n| action_queue_updates_total | 176 |\n| ice_dug | 11 |\n| water_produced | 1.75 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 2.74 |\n| time/ | |\n| fps | 954 |\n| iterations | 338 |\n| time_elapsed | 1415 |\n| total_timesteps | 1352000 |\n| train/ | |\n| approx_kl | 0.00046364925 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.33 |\n| explained_variance | 0.706 |\n| learning_rate | 0.0003 |\n| loss | 1.01 |\n| n_updates | 674 |\n| policy_gradient_loss | 0.000686 |\n| value_loss | 2.67 |\n| train_metrics/ | |\n| action_queue_updates_success | 148 |\n| action_queue_updates_total | 176 |\n| ice_dug | 0 |\n| water_produced | 0 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 1.97 |\n| time/ | |\n| fps | 955 |\n| iterations | 339 |\n| time_elapsed | 1419 |\n| total_timesteps | 1356000 |\n| train/ | |\n| approx_kl | 0.00054471765 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.34 |\n| explained_variance | 0.786 |\n| learning_rate | 0.0003 |\n| loss | 0.183 |\n| n_updates | 676 |\n| policy_gradient_loss | -0.000361 |\n| value_loss | 0.426 |\n| train_metrics/ | |\n| action_queue_updates_success | 157 |\n| action_queue_updates_total | 178 |\n| ice_dug | 8 |\n| water_produced | 0.75 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 4.75 |\n| time/ | |\n| fps | 955 |\n| iterations | 340 |\n| time_elapsed | 1423 |\n| total_timesteps | 1360000 |\n| train/ | |\n| approx_kl | 0.00054264773 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.34 |\n| explained_variance | 0.456 |\n| learning_rate | 0.0003 |\n| loss | 0.568 |\n| n_updates | 678 |\n| policy_gradient_loss | -2.82e-05 |\n| value_loss | 0.885 |\n| train_metrics/ | |\n| action_queue_updates_success | 159 |\n| action_queue_updates_total | 174 |\n| ice_dug | 95 |\n| water_produced | 18.5 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 7 |\n| time/ | |\n| fps | 955 |\n| iterations | 341 |\n| time_elapsed | 1427 |\n| total_timesteps | 1364000 |\n| train/ | |\n| approx_kl | 0.0038056516 |\n| clip_fraction | 0.0174 |\n| clip_range | 0.2 |\n| entropy_loss | -2.28 |\n| explained_variance | 0.579 |\n| learning_rate | 0.0003 |\n| loss | 13.9 |\n| n_updates | 680 |\n| policy_gradient_loss | -0.00272 |\n| value_loss | 30.5 |\n| train_metrics/ | |\n| action_queue_updates_success | 153 |\n| action_queue_updates_total | 169 |\n| ice_dug | 63 |\n| water_produced | 12.3 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 6.79 |\n| time/ | |\n| fps | 955 |\n| iterations | 342 |\n| time_elapsed | 1431 |\n| total_timesteps | 1368000 |\n| train/ | |\n| approx_kl | 0.00065781863 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.22 |\n| explained_variance | 0.547 |\n| learning_rate | 0.0003 |\n| loss | 12.9 |\n| n_updates | 682 |\n| policy_gradient_loss | -0.00121 |\n| value_loss | 29.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 155 |\n| action_queue_updates_total | 174 |\n| ice_dug | 4 |\n| water_produced | 0.75 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 7.12 |\n| time/ | |\n| fps | 955 |\n| iterations | 343 |\n| time_elapsed | 1435 |\n| total_timesteps | 1372000 |\n| train/ | |\n| approx_kl | 0.0011751932 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.3 |\n| explained_variance | 0.753 |\n| learning_rate | 0.0003 |\n| loss | 0.363 |\n| n_updates | 684 |\n| policy_gradient_loss | -6.03e-05 |\n| value_loss | 0.897 |\n| train_metrics/ | |\n| action_queue_updates_success | 159 |\n| action_queue_updates_total | 172 |\n| ice_dug | 15 |\n| water_produced | 1.5 |\n--------------------------------------------------\n------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 7.11 |\n| time/ | |\n| fps | 955 |\n| iterations | 344 |\n| time_elapsed | 1439 |\n| total_timesteps | 1376000 |\n| train/ | |\n| approx_kl | 0.00062722 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.28 |\n| explained_variance | 0.51 |\n| learning_rate | 0.0003 |\n| loss | 1.68 |\n| n_updates | 686 |\n| policy_gradient_loss | 0.000408 |\n| value_loss | 2.47 |\n| train_metrics/ | |\n| action_queue_updates_success | 150 |\n| action_queue_updates_total | 172 |\n| ice_dug | 4 |\n| water_produced | 0.75 |\n------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 3.44 |\n| time/ | |\n| fps | 955 |\n| iterations | 345 |\n| time_elapsed | 1443 |\n| total_timesteps | 1380000 |\n| train/ | |\n| approx_kl | 0.00057513604 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.29 |\n| explained_variance | 0.758 |\n| learning_rate | 0.0003 |\n| loss | 0.232 |\n| n_updates | 688 |\n| policy_gradient_loss | 0.00033 |\n| value_loss | 0.693 |\n| train_metrics/ | |\n| action_queue_updates_success | 160 |\n| action_queue_updates_total | 172 |\n| ice_dug | 11 |\n| water_produced | 1 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 2.13 |\n| time/ | |\n| fps | 955 |\n| iterations | 346 |\n| time_elapsed | 1447 |\n| total_timesteps | 1384000 |\n| train/ | |\n| approx_kl | 0.00044133878 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.29 |\n| explained_variance | 0.638 |\n| learning_rate | 0.0003 |\n| loss | 0.76 |\n| n_updates | 690 |\n| policy_gradient_loss | 0.000335 |\n| value_loss | 1.4 |\n| train_metrics/ | |\n| action_queue_updates_success | 160 |\n| action_queue_updates_total | 173 |\n| ice_dug | 32 |\n| water_produced | 6 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 2 |\n| time/ | |\n| fps | 956 |\n| iterations | 347 |\n| time_elapsed | 1451 |\n| total_timesteps | 1388000 |\n| train/ | |\n| approx_kl | 0.00040451757 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.29 |\n| explained_variance | 0.724 |\n| learning_rate | 0.0003 |\n| loss | 3.55 |\n| n_updates | 692 |\n| policy_gradient_loss | -0.00046 |\n| value_loss | 7.14 |\n| train_metrics/ | |\n| action_queue_updates_success | 161 |\n| action_queue_updates_total | 177 |\n| ice_dug | 13 |\n| water_produced | 0 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 2.39 |\n| time/ | |\n| fps | 956 |\n| iterations | 348 |\n| time_elapsed | 1455 |\n| total_timesteps | 1392000 |\n| train/ | |\n| approx_kl | 0.00014301736 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.32 |\n| explained_variance | 0.635 |\n| learning_rate | 0.0003 |\n| loss | 2 |\n| n_updates | 694 |\n| policy_gradient_loss | -0.000138 |\n| value_loss | 2.84 |\n| train_metrics/ | |\n| action_queue_updates_success | 155 |\n| action_queue_updates_total | 173 |\n| ice_dug | 37 |\n| water_produced | 3.25 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 3.02 |\n| time/ | |\n| fps | 955 |\n| iterations | 349 |\n| time_elapsed | 1460 |\n| total_timesteps | 1396000 |\n| train/ | |\n| approx_kl | 0.00038259962 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.28 |\n| explained_variance | 0.633 |\n| learning_rate | 0.0003 |\n| loss | 2.54 |\n| n_updates | 696 |\n| policy_gradient_loss | -0.00137 |\n| value_loss | 6.75 |\n| train_metrics/ | |\n| action_queue_updates_success | 152 |\n| action_queue_updates_total | 173 |\n| ice_dug | 40 |\n| water_produced | 3.5 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 4.15 |\n| time/ | |\n| fps | 956 |\n| iterations | 350 |\n| time_elapsed | 1464 |\n| total_timesteps | 1400000 |\n| train/ | |\n| approx_kl | 0.00021880404 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.24 |\n| explained_variance | 0.683 |\n| learning_rate | 0.0003 |\n| loss | 6.58 |\n| n_updates | 698 |\n| policy_gradient_loss | -0.000548 |\n| value_loss | 9.2 |\n| train_metrics/ | |\n| action_queue_updates_success | 158 |\n| action_queue_updates_total | 176 |\n| ice_dug | 29 |\n| water_produced | 6.5 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 6.19 |\n| time/ | |\n| fps | 956 |\n| iterations | 351 |\n| time_elapsed | 1468 |\n| total_timesteps | 1404000 |\n| train/ | |\n| approx_kl | 0.00025216496 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.25 |\n| explained_variance | 0.586 |\n| learning_rate | 0.0003 |\n| loss | 6.37 |\n| n_updates | 700 |\n| policy_gradient_loss | -0.000681 |\n| value_loss | 18.8 |\n| train_metrics/ | |\n| action_queue_updates_success | 159 |\n| action_queue_updates_total | 173 |\n| ice_dug | 74 |\n| water_produced | 15.8 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 11.2 |\n| time/ | |\n| fps | 956 |\n| iterations | 352 |\n| time_elapsed | 1472 |\n| total_timesteps | 1408000 |\n| train/ | |\n| approx_kl | 0.0010115396 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.23 |\n| explained_variance | 0.577 |\n| learning_rate | 0.0003 |\n| loss | 17.8 |\n| n_updates | 702 |\n| policy_gradient_loss | -0.000462 |\n| value_loss | 49.6 |\n| train_metrics/ | |\n| action_queue_updates_success | 167 |\n| action_queue_updates_total | 172 |\n| ice_dug | 114 |\n| water_produced | 24 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 16.3 |\n| time/ | |\n| fps | 956 |\n| iterations | 353 |\n| time_elapsed | 1476 |\n| total_timesteps | 1412000 |\n| train/ | |\n| approx_kl | 0.00075706496 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -2.14 |\n| explained_variance | 0.49 |\n| learning_rate | 0.0003 |\n| loss | 69.6 |\n| n_updates | 704 |\n| policy_gradient_loss | -2.89e-05 |\n| value_loss | 94.5 |\n| train_metrics/ | |\n| action_queue_updates_success | 155 |\n| action_queue_updates_total | 169 |\n| ice_dug | 170 |\n| water_produced | 27.5 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 23.7 |\n| time/ | |\n| fps | 956 |\n| iterations | 354 |\n| time_elapsed | 1480 |\n| total_timesteps | 1416000 |\n| train/ | |\n| approx_kl | 0.0032165137 |\n| clip_fraction | 0.0149 |\n| clip_range | 0.2 |\n| entropy_loss | -2.09 |\n| explained_variance | 0.565 |\n| learning_rate | 0.0003 |\n| loss | 34.9 |\n| n_updates | 706 |\n| policy_gradient_loss | -0.000829 |\n| value_loss | 79.1 |\n| train_metrics/ | |\n| action_queue_updates_success | 164 |\n| action_queue_updates_total | 168 |\n| ice_dug | 225 |\n| water_produced | 38.8 |\n--------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 28.4 |\n| time/ | |\n| fps | 956 |\n| iterations | 355 |\n| time_elapsed | 1484 |\n| total_timesteps | 1420000 |\n| train/ | |\n| approx_kl | 0.005030956 |\n| clip_fraction | 0.0268 |\n| clip_range | 0.2 |\n| entropy_loss | -1.96 |\n| explained_variance | 0.577 |\n| learning_rate | 0.0003 |\n| loss | 38.1 |\n| n_updates | 708 |\n| policy_gradient_loss | -0.00144 |\n| value_loss | 72.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 147 |\n| action_queue_updates_total | 161 |\n| ice_dug | 206 |\n| water_produced | 28.2 |\n-------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 42.2 |\n| time/ | |\n| fps | 956 |\n| iterations | 356 |\n| time_elapsed | 1488 |\n| total_timesteps | 1424000 |\n| train/ | |\n| approx_kl | 0.004134828 |\n| clip_fraction | 0.0155 |\n| clip_range | 0.2 |\n| entropy_loss | -1.93 |\n| explained_variance | 0.592 |\n| learning_rate | 0.0003 |\n| loss | 48.7 |\n| n_updates | 710 |\n| policy_gradient_loss | -0.000834 |\n| value_loss | 73.2 |\n| train_metrics/ | |\n| action_queue_updates_success | 146 |\n| action_queue_updates_total | 162 |\n| ice_dug | 412 |\n| water_produced | 81.2 |\n-------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 46.9 |\n| time/ | |\n| fps | 957 |\n| iterations | 357 |\n| time_elapsed | 1492 |\n| total_timesteps | 1428000 |\n| train/ | |\n| approx_kl | 0.0011539629 |\n| clip_fraction | 0.000125 |\n| clip_range | 0.2 |\n| entropy_loss | -1.83 |\n| explained_variance | 0.524 |\n| learning_rate | 0.0003 |\n| loss | 126 |\n| n_updates | 712 |\n| policy_gradient_loss | -0.000592 |\n| value_loss | 331 |\n| train_metrics/ | |\n| action_queue_updates_success | 141 |\n| action_queue_updates_total | 150 |\n| ice_dug | 253 |\n| water_produced | 46 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 49 |\n| time/ | |\n| fps | 957 |\n| iterations | 358 |\n| time_elapsed | 1496 |\n| total_timesteps | 1432000 |\n| train/ | |\n| approx_kl | 0.0038175047 |\n| clip_fraction | 0.0161 |\n| clip_range | 0.2 |\n| entropy_loss | -1.69 |\n| explained_variance | 0.502 |\n| learning_rate | 0.0003 |\n| loss | 102 |\n| n_updates | 714 |\n| policy_gradient_loss | -0.00116 |\n| value_loss | 161 |\n| train_metrics/ | |\n| action_queue_updates_success | 132 |\n| action_queue_updates_total | 142 |\n| ice_dug | 285 |\n| water_produced | 37 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 49.8 |\n| time/ | |\n| fps | 957 |\n| iterations | 359 |\n| time_elapsed | 1500 |\n| total_timesteps | 1436000 |\n| train/ | |\n| approx_kl | 0.00055099506 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.59 |\n| explained_variance | 0.555 |\n| learning_rate | 0.0003 |\n| loss | 65.4 |\n| n_updates | 716 |\n| policy_gradient_loss | -0.000637 |\n| value_loss | 106 |\n| train_metrics/ | |\n| action_queue_updates_success | 139 |\n| action_queue_updates_total | 143 |\n| ice_dug | 209 |\n| water_produced | 42.7 |\n---------------------------------------------------\nEval num_timesteps=1440000, episode_reward=100.96 +/- 131.28\nEpisode length: 398.00 +/- 126.24\n-------------------------------------------------\n| eval/ | |\n| mean_ep_length | 398 |\n| mean_reward | 101 |\n| time/ | |\n| total_timesteps | 1440000 |\n| train/ | |\n| approx_kl | 0.001383229 |\n| clip_fraction | 0.00075 |\n| clip_range | 0.2 |\n| entropy_loss | -1.6 |\n| explained_variance | 0.518 |\n| learning_rate | 0.0003 |\n| loss | 44.7 |\n| n_updates | 718 |\n| policy_gradient_loss | -0.000926 |\n| value_loss | 101 |\n| train_metrics/ | |\n| action_queue_updates_success | 141 |\n| action_queue_updates_total | 151 |\n| ice_dug | 185 |\n| water_produced | 30.8 |\n-------------------------------------------------\nNew best mean reward!\n---------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 50.2 |\n| time/ | |\n| fps | 955 |\n| iterations | 360 |\n| time_elapsed | 1506 |\n| total_timesteps | 1440000 |\n---------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 51.2 |\n| time/ | |\n| fps | 956 |\n| iterations | 361 |\n| time_elapsed | 1510 |\n| total_timesteps | 1444000 |\n| train/ | |\n| approx_kl | 0.00087729207 |\n| clip_fraction | 0.00363 |\n| clip_range | 0.2 |\n| entropy_loss | -1.76 |\n| explained_variance | 0.538 |\n| learning_rate | 0.0003 |\n| loss | 51 |\n| n_updates | 720 |\n| policy_gradient_loss | -0.000333 |\n| value_loss | 92.6 |\n| train_metrics/ | |\n| action_queue_updates_success | 151 |\n| action_queue_updates_total | 155 |\n| ice_dug | 411 |\n| water_produced | 86.2 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 55.3 |\n| time/ | |\n| fps | 956 |\n| iterations | 362 |\n| time_elapsed | 1514 |\n| total_timesteps | 1448000 |\n| train/ | |\n| approx_kl | 0.0047625764 |\n| clip_fraction | 0.0252 |\n| clip_range | 0.2 |\n| entropy_loss | -1.66 |\n| explained_variance | 0.536 |\n| learning_rate | 0.0003 |\n| loss | 138 |\n| n_updates | 722 |\n| policy_gradient_loss | 0.000488 |\n| value_loss | 244 |\n| train_metrics/ | |\n| action_queue_updates_success | 137 |\n| action_queue_updates_total | 151 |\n| ice_dug | 338 |\n| water_produced | 65.5 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 60.7 |\n| time/ | |\n| fps | 956 |\n| iterations | 363 |\n| time_elapsed | 1518 |\n| total_timesteps | 1452000 |\n| train/ | |\n| approx_kl | 0.0066222725 |\n| clip_fraction | 0.0409 |\n| clip_range | 0.2 |\n| entropy_loss | -1.65 |\n| explained_variance | 0.467 |\n| learning_rate | 0.0003 |\n| loss | 130 |\n| n_updates | 724 |\n| policy_gradient_loss | -0.000971 |\n| value_loss | 239 |\n| train_metrics/ | |\n| action_queue_updates_success | 123 |\n| action_queue_updates_total | 127 |\n| ice_dug | 289 |\n| water_produced | 64 |\n--------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 68.9 |\n| time/ | |\n| fps | 956 |\n| iterations | 364 |\n| time_elapsed | 1522 |\n| total_timesteps | 1456000 |\n| train/ | |\n| approx_kl | 0.001119815 |\n| clip_fraction | 0.000625 |\n| clip_range | 0.2 |\n| entropy_loss | -1.3 |\n| explained_variance | 0.451 |\n| learning_rate | 0.0003 |\n| loss | 108 |\n| n_updates | 726 |\n| policy_gradient_loss | 0.000359 |\n| value_loss | 207 |\n| train_metrics/ | |\n| action_queue_updates_success | 137 |\n| action_queue_updates_total | 148 |\n| ice_dug | 459 |\n| water_produced | 81.3 |\n-------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 74.4 |\n| time/ | |\n| fps | 956 |\n| iterations | 365 |\n| time_elapsed | 1526 |\n| total_timesteps | 1460000 |\n| train/ | |\n| approx_kl | 0.0010913487 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.45 |\n| explained_variance | 0.431 |\n| learning_rate | 0.0003 |\n| loss | 162 |\n| n_updates | 728 |\n| policy_gradient_loss | -0.000715 |\n| value_loss | 307 |\n| train_metrics/ | |\n| action_queue_updates_success | 118 |\n| action_queue_updates_total | 128 |\n| ice_dug | 255 |\n| water_produced | 57.5 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 62.1 |\n| time/ | |\n| fps | 956 |\n| iterations | 366 |\n| time_elapsed | 1530 |\n| total_timesteps | 1464000 |\n| train/ | |\n| approx_kl | 0.0001394096 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.29 |\n| explained_variance | 0.447 |\n| learning_rate | 0.0003 |\n| loss | 76.5 |\n| n_updates | 730 |\n| policy_gradient_loss | -0.000304 |\n| value_loss | 189 |\n| train_metrics/ | |\n| action_queue_updates_success | 123 |\n| action_queue_updates_total | 127 |\n| ice_dug | 129 |\n| water_produced | 27.8 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 65.5 |\n| time/ | |\n| fps | 956 |\n| iterations | 367 |\n| time_elapsed | 1534 |\n| total_timesteps | 1468000 |\n| train/ | |\n| approx_kl | 0.0029549673 |\n| clip_fraction | 0.0136 |\n| clip_range | 0.2 |\n| entropy_loss | -1.37 |\n| explained_variance | 0.421 |\n| learning_rate | 0.0003 |\n| loss | 67.7 |\n| n_updates | 732 |\n| policy_gradient_loss | -0.000472 |\n| value_loss | 123 |\n| train_metrics/ | |\n| action_queue_updates_success | 134 |\n| action_queue_updates_total | 145 |\n| ice_dug | 416 |\n| water_produced | 81.8 |\n--------------------------------------------------\n----------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 68.5 |\n| time/ | |\n| fps | 956 |\n| iterations | 368 |\n| time_elapsed | 1538 |\n| total_timesteps | 1472000 |\n| train/ | |\n| approx_kl | 0.000114817034 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.47 |\n| explained_variance | 0.48 |\n| learning_rate | 0.0003 |\n| loss | 127 |\n| n_updates | 734 |\n| policy_gradient_loss | -9.22e-05 |\n| value_loss | 289 |\n| train_metrics/ | |\n| action_queue_updates_success | 133 |\n| action_queue_updates_total | 140 |\n| ice_dug | 397 |\n| water_produced | 77.5 |\n----------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 65.7 |\n| time/ | |\n| fps | 956 |\n| iterations | 369 |\n| time_elapsed | 1542 |\n| total_timesteps | 1476000 |\n| train/ | |\n| approx_kl | 0.002678215 |\n| clip_fraction | 0.00625 |\n| clip_range | 0.2 |\n| entropy_loss | -1.4 |\n| explained_variance | 0.473 |\n| learning_rate | 0.0003 |\n| loss | 94.7 |\n| n_updates | 736 |\n| policy_gradient_loss | 2.16e-06 |\n| value_loss | 224 |\n| train_metrics/ | |\n| action_queue_updates_success | 129 |\n| action_queue_updates_total | 143 |\n| ice_dug | 327 |\n| water_produced | 68.8 |\n-------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 67.5 |\n| time/ | |\n| fps | 957 |\n| iterations | 370 |\n| time_elapsed | 1546 |\n| total_timesteps | 1480000 |\n| train/ | |\n| approx_kl | 0.0002475088 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.45 |\n| explained_variance | 0.472 |\n| learning_rate | 0.0003 |\n| loss | 71.7 |\n| n_updates | 738 |\n| policy_gradient_loss | 0.000381 |\n| value_loss | 208 |\n| train_metrics/ | |\n| action_queue_updates_success | 125 |\n| action_queue_updates_total | 133 |\n| ice_dug | 401 |\n| water_produced | 65.2 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 70.7 |\n| time/ | |\n| fps | 957 |\n| iterations | 371 |\n| time_elapsed | 1550 |\n| total_timesteps | 1484000 |\n| train/ | |\n| approx_kl | 0.00083621533 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.28 |\n| explained_variance | 0.462 |\n| learning_rate | 0.0003 |\n| loss | 89.1 |\n| n_updates | 740 |\n| policy_gradient_loss | -0.00068 |\n| value_loss | 185 |\n| train_metrics/ | |\n| action_queue_updates_success | 123 |\n| action_queue_updates_total | 127 |\n| ice_dug | 189 |\n| water_produced | 43 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 62.4 |\n| time/ | |\n| fps | 957 |\n| iterations | 372 |\n| time_elapsed | 1554 |\n| total_timesteps | 1488000 |\n| train/ | |\n| approx_kl | 0.0011812802 |\n| clip_fraction | 0.00675 |\n| clip_range | 0.2 |\n| entropy_loss | -1.3 |\n| explained_variance | 0.424 |\n| learning_rate | 0.0003 |\n| loss | 62.7 |\n| n_updates | 742 |\n| policy_gradient_loss | -0.000737 |\n| value_loss | 145 |\n| train_metrics/ | |\n| action_queue_updates_success | 133 |\n| action_queue_updates_total | 139 |\n| ice_dug | 248 |\n| water_produced | 41.8 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 55.3 |\n| time/ | |\n| fps | 957 |\n| iterations | 373 |\n| time_elapsed | 1558 |\n| total_timesteps | 1492000 |\n| train/ | |\n| approx_kl | 0.0007848259 |\n| clip_fraction | 0.00275 |\n| clip_range | 0.2 |\n| entropy_loss | -1.41 |\n| explained_variance | 0.477 |\n| learning_rate | 0.0003 |\n| loss | 48 |\n| n_updates | 744 |\n| policy_gradient_loss | -0.000524 |\n| value_loss | 134 |\n| train_metrics/ | |\n| action_queue_updates_success | 133 |\n| action_queue_updates_total | 142 |\n| ice_dug | 233 |\n| water_produced | 44 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 47.5 |\n| time/ | |\n| fps | 957 |\n| iterations | 374 |\n| time_elapsed | 1562 |\n| total_timesteps | 1496000 |\n| train/ | |\n| approx_kl | 0.0015707914 |\n| clip_fraction | 0.00288 |\n| clip_range | 0.2 |\n| entropy_loss | -1.45 |\n| explained_variance | 0.453 |\n| learning_rate | 0.0003 |\n| loss | 69.8 |\n| n_updates | 746 |\n| policy_gradient_loss | -0.000135 |\n| value_loss | 158 |\n| train_metrics/ | |\n| action_queue_updates_success | 141 |\n| action_queue_updates_total | 151 |\n| ice_dug | 137 |\n| water_produced | 31.2 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 53.1 |\n| time/ | |\n| fps | 957 |\n| iterations | 375 |\n| time_elapsed | 1566 |\n| total_timesteps | 1500000 |\n| train/ | |\n| approx_kl | 0.0036763034 |\n| clip_fraction | 0.0166 |\n| clip_range | 0.2 |\n| entropy_loss | -1.67 |\n| explained_variance | 0.51 |\n| learning_rate | 0.0003 |\n| loss | 42.8 |\n| n_updates | 748 |\n| policy_gradient_loss | -0.000443 |\n| value_loss | 82.3 |\n| train_metrics/ | |\n| action_queue_updates_success | 148 |\n| action_queue_updates_total | 151 |\n| ice_dug | 394 |\n| water_produced | 93.5 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 55.1 |\n| time/ | |\n| fps | 957 |\n| iterations | 376 |\n| time_elapsed | 1570 |\n| total_timesteps | 1504000 |\n| train/ | |\n| approx_kl | 0.0007597172 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.51 |\n| explained_variance | 0.516 |\n| learning_rate | 0.0003 |\n| loss | 135 |\n| n_updates | 750 |\n| policy_gradient_loss | -0.000129 |\n| value_loss | 250 |\n| train_metrics/ | |\n| action_queue_updates_success | 141 |\n| action_queue_updates_total | 156 |\n| ice_dug | 422 |\n| water_produced | 50.5 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 48.9 |\n| time/ | |\n| fps | 957 |\n| iterations | 377 |\n| time_elapsed | 1574 |\n| total_timesteps | 1508000 |\n| train/ | |\n| approx_kl | 0.0015540597 |\n| clip_fraction | 0.000375 |\n| clip_range | 0.2 |\n| entropy_loss | -1.56 |\n| explained_variance | 0.497 |\n| learning_rate | 0.0003 |\n| loss | 70.1 |\n| n_updates | 752 |\n| policy_gradient_loss | -0.000361 |\n| value_loss | 152 |\n| train_metrics/ | |\n| action_queue_updates_success | 138 |\n| action_queue_updates_total | 149 |\n| ice_dug | 65 |\n| water_produced | 12.8 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 51.8 |\n| time/ | |\n| fps | 957 |\n| iterations | 378 |\n| time_elapsed | 1578 |\n| total_timesteps | 1512000 |\n| train/ | |\n| approx_kl | 0.0051081693 |\n| clip_fraction | 0.0294 |\n| clip_range | 0.2 |\n| entropy_loss | -1.67 |\n| explained_variance | 0.584 |\n| learning_rate | 0.0003 |\n| loss | 12.6 |\n| n_updates | 754 |\n| policy_gradient_loss | 0.000933 |\n| value_loss | 27.5 |\n| train_metrics/ | |\n| action_queue_updates_success | 142 |\n| action_queue_updates_total | 151 |\n| ice_dug | 330 |\n| water_produced | 57.8 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 63 |\n| time/ | |\n| fps | 957 |\n| iterations | 379 |\n| time_elapsed | 1582 |\n| total_timesteps | 1516000 |\n| train/ | |\n| approx_kl | 0.00017399379 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.61 |\n| explained_variance | 0.535 |\n| learning_rate | 0.0003 |\n| loss | 89.7 |\n| n_updates | 756 |\n| policy_gradient_loss | -0.000171 |\n| value_loss | 179 |\n| train_metrics/ | |\n| action_queue_updates_success | 137 |\n| action_queue_updates_total | 139 |\n| ice_dug | 390 |\n| water_produced | 84.2 |\n---------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 60.9 |\n| time/ | |\n| fps | 957 |\n| iterations | 380 |\n| time_elapsed | 1586 |\n| total_timesteps | 1520000 |\n| train/ | |\n| approx_kl | 0.004943449 |\n| clip_fraction | 0.0246 |\n| clip_range | 0.2 |\n| entropy_loss | -1.35 |\n| explained_variance | 0.47 |\n| learning_rate | 0.0003 |\n| loss | 99.5 |\n| n_updates | 758 |\n| policy_gradient_loss | 0.00129 |\n| value_loss | 230 |\n| train_metrics/ | |\n| action_queue_updates_success | 129 |\n| action_queue_updates_total | 136 |\n| ice_dug | 465 |\n| water_produced | 82.8 |\n-------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 50.8 |\n| time/ | |\n| fps | 958 |\n| iterations | 381 |\n| time_elapsed | 1590 |\n| total_timesteps | 1524000 |\n| train/ | |\n| approx_kl | 0.002567702 |\n| clip_fraction | 0.00988 |\n| clip_range | 0.2 |\n| entropy_loss | -1.32 |\n| explained_variance | 0.486 |\n| learning_rate | 0.0003 |\n| loss | 115 |\n| n_updates | 760 |\n| policy_gradient_loss | -0.000433 |\n| value_loss | 229 |\n| train_metrics/ | |\n| action_queue_updates_success | 127 |\n| action_queue_updates_total | 137 |\n| ice_dug | 17 |\n| water_produced | 4 |\n-------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 61.8 |\n| time/ | |\n| fps | 958 |\n| iterations | 382 |\n| time_elapsed | 1594 |\n| total_timesteps | 1528000 |\n| train/ | |\n| approx_kl | 0.0011406463 |\n| clip_fraction | 0.00412 |\n| clip_range | 0.2 |\n| entropy_loss | -1.5 |\n| explained_variance | 0.537 |\n| learning_rate | 0.0003 |\n| loss | 5.79 |\n| n_updates | 762 |\n| policy_gradient_loss | -0.000714 |\n| value_loss | 17.7 |\n| train_metrics/ | |\n| action_queue_updates_success | 145 |\n| action_queue_updates_total | 152 |\n| ice_dug | 284 |\n| water_produced | 65.2 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 54.7 |\n| time/ | |\n| fps | 958 |\n| iterations | 383 |\n| time_elapsed | 1598 |\n| total_timesteps | 1532000 |\n| train/ | |\n| approx_kl | 0.0014206026 |\n| clip_fraction | 0.0045 |\n| clip_range | 0.2 |\n| entropy_loss | -1.51 |\n| explained_variance | 0.453 |\n| learning_rate | 0.0003 |\n| loss | 109 |\n| n_updates | 764 |\n| policy_gradient_loss | -0.000753 |\n| value_loss | 254 |\n| train_metrics/ | |\n| action_queue_updates_success | 141 |\n| action_queue_updates_total | 150 |\n| ice_dug | 102 |\n| water_produced | 24.7 |\n--------------------------------------------------\nEval num_timesteps=1536000, episode_reward=148.20 +/- 180.83\nEpisode length: 442.00 +/- 173.76\n--------------------------------------------------\n| eval/ | |\n| mean_ep_length | 442 |\n| mean_reward | 148 |\n| time/ | |\n| total_timesteps | 1536000 |\n| train/ | |\n| approx_kl | 0.0005775105 |\n| clip_fraction | 0.000125 |\n| clip_range | 0.2 |\n| entropy_loss | -1.64 |\n| explained_variance | 0.522 |\n| learning_rate | 0.0003 |\n| loss | 34.6 |\n| n_updates | 766 |\n| policy_gradient_loss | -0.000153 |\n| value_loss | 69.7 |\n| train_metrics/ | |\n| action_queue_updates_success | 144 |\n| action_queue_updates_total | 159 |\n| ice_dug | 172 |\n| water_produced | 42.8 |\n--------------------------------------------------\nNew best mean reward!\n---------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 46 |\n| time/ | |\n| fps | 955 |\n| iterations | 384 |\n| time_elapsed | 1607 |\n| total_timesteps | 1536000 |\n---------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 35 |\n| time/ | |\n| fps | 955 |\n| iterations | 385 |\n| time_elapsed | 1611 |\n| total_timesteps | 1540000 |\n| train/ | |\n| approx_kl | 0.0022543606 |\n| clip_fraction | 0.001 |\n| clip_range | 0.2 |\n| entropy_loss | -1.84 |\n| explained_variance | 0.557 |\n| learning_rate | 0.0003 |\n| loss | 57.2 |\n| n_updates | 768 |\n| policy_gradient_loss | 0.00069 |\n| value_loss | 117 |\n| train_metrics/ | |\n| action_queue_updates_success | 141 |\n| action_queue_updates_total | 158 |\n| ice_dug | 267 |\n| water_produced | 29.8 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 46.6 |\n| time/ | |\n| fps | 955 |\n| iterations | 386 |\n| time_elapsed | 1615 |\n| total_timesteps | 1544000 |\n| train/ | |\n| approx_kl | 0.0012980937 |\n| clip_fraction | 0.001 |\n| clip_range | 0.2 |\n| entropy_loss | -1.76 |\n| explained_variance | 0.521 |\n| learning_rate | 0.0003 |\n| loss | 45.9 |\n| n_updates | 770 |\n| policy_gradient_loss | 0.000118 |\n| value_loss | 99.5 |\n| train_metrics/ | |\n| action_queue_updates_success | 151 |\n| action_queue_updates_total | 162 |\n| ice_dug | 246 |\n| water_produced | 59.8 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 40.9 |\n| time/ | |\n| fps | 955 |\n| iterations | 387 |\n| time_elapsed | 1619 |\n| total_timesteps | 1548000 |\n| train/ | |\n| approx_kl | 0.0007145946 |\n| clip_fraction | 0.00175 |\n| clip_range | 0.2 |\n| entropy_loss | -1.8 |\n| explained_variance | 0.534 |\n| learning_rate | 0.0003 |\n| loss | 95.4 |\n| n_updates | 772 |\n| policy_gradient_loss | 0.000478 |\n| value_loss | 190 |\n| train_metrics/ | |\n| action_queue_updates_success | 148 |\n| action_queue_updates_total | 165 |\n| ice_dug | 331 |\n| water_produced | 36.5 |\n--------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 41.6 |\n| time/ | |\n| fps | 955 |\n| iterations | 388 |\n| time_elapsed | 1623 |\n| total_timesteps | 1552000 |\n| train/ | |\n| approx_kl | 0.001006267 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.84 |\n| explained_variance | 0.537 |\n| learning_rate | 0.0003 |\n| loss | 62.9 |\n| n_updates | 774 |\n| policy_gradient_loss | 0.000207 |\n| value_loss | 122 |\n| train_metrics/ | |\n| action_queue_updates_success | 139 |\n| action_queue_updates_total | 144 |\n| ice_dug | 158 |\n| water_produced | 27.5 |\n-------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 43.3 |\n| time/ | |\n| fps | 955 |\n| iterations | 389 |\n| time_elapsed | 1627 |\n| total_timesteps | 1556000 |\n| train/ | |\n| approx_kl | 0.0034312743 |\n| clip_fraction | 0.0119 |\n| clip_range | 0.2 |\n| entropy_loss | -1.63 |\n| explained_variance | 0.544 |\n| learning_rate | 0.0003 |\n| loss | 39.5 |\n| n_updates | 776 |\n| policy_gradient_loss | -0.000533 |\n| value_loss | 67.2 |\n| train_metrics/ | |\n| action_queue_updates_success | 146 |\n| action_queue_updates_total | 155 |\n| ice_dug | 247 |\n| water_produced | 50.5 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 50.3 |\n| time/ | |\n| fps | 956 |\n| iterations | 390 |\n| time_elapsed | 1631 |\n| total_timesteps | 1560000 |\n| train/ | |\n| approx_kl | 0.00073186384 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.72 |\n| explained_variance | 0.538 |\n| learning_rate | 0.0003 |\n| loss | 92.2 |\n| n_updates | 778 |\n| policy_gradient_loss | 0.000255 |\n| value_loss | 157 |\n| train_metrics/ | |\n| action_queue_updates_success | 142 |\n| action_queue_updates_total | 149 |\n| ice_dug | 292 |\n| water_produced | 64.5 |\n---------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 52.2 |\n| time/ | |\n| fps | 956 |\n| iterations | 391 |\n| time_elapsed | 1635 |\n| total_timesteps | 1564000 |\n| train/ | |\n| approx_kl | 0.002189424 |\n| clip_fraction | 0.00388 |\n| clip_range | 0.2 |\n| entropy_loss | -1.6 |\n| explained_variance | 0.566 |\n| learning_rate | 0.0003 |\n| loss | 103 |\n| n_updates | 780 |\n| policy_gradient_loss | -0.000697 |\n| value_loss | 177 |\n| train_metrics/ | |\n| action_queue_updates_success | 148 |\n| action_queue_updates_total | 154 |\n| ice_dug | 326 |\n| water_produced | 68.2 |\n-------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 61.1 |\n| time/ | |\n| fps | 956 |\n| iterations | 392 |\n| time_elapsed | 1639 |\n| total_timesteps | 1568000 |\n| train/ | |\n| approx_kl | 0.001144874 |\n| clip_fraction | 0.000375 |\n| clip_range | 0.2 |\n| entropy_loss | -1.64 |\n| explained_variance | 0.549 |\n| learning_rate | 0.0003 |\n| loss | 89.8 |\n| n_updates | 782 |\n| policy_gradient_loss | -9.89e-05 |\n| value_loss | 204 |\n| train_metrics/ | |\n| action_queue_updates_success | 138 |\n| action_queue_updates_total | 145 |\n| ice_dug | 439 |\n| water_produced | 80.3 |\n-------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 61.7 |\n| time/ | |\n| fps | 956 |\n| iterations | 393 |\n| time_elapsed | 1644 |\n| total_timesteps | 1572000 |\n| train/ | |\n| approx_kl | 0.0023118933 |\n| clip_fraction | 0.00625 |\n| clip_range | 0.2 |\n| entropy_loss | -1.48 |\n| explained_variance | 0.525 |\n| learning_rate | 0.0003 |\n| loss | 123 |\n| n_updates | 784 |\n| policy_gradient_loss | -0.000882 |\n| value_loss | 231 |\n| train_metrics/ | |\n| action_queue_updates_success | 142 |\n| action_queue_updates_total | 151 |\n| ice_dug | 152 |\n| water_produced | 30.2 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 68.8 |\n| time/ | |\n| fps | 956 |\n| iterations | 394 |\n| time_elapsed | 1648 |\n| total_timesteps | 1576000 |\n| train/ | |\n| approx_kl | 0.00097276305 |\n| clip_fraction | 0.0005 |\n| clip_range | 0.2 |\n| entropy_loss | -1.61 |\n| explained_variance | 0.503 |\n| learning_rate | 0.0003 |\n| loss | 68.6 |\n| n_updates | 786 |\n| policy_gradient_loss | -0.000294 |\n| value_loss | 106 |\n| train_metrics/ | |\n| action_queue_updates_success | 146 |\n| action_queue_updates_total | 155 |\n| ice_dug | 364 |\n| water_produced | 85 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 59.5 |\n| time/ | |\n| fps | 956 |\n| iterations | 395 |\n| time_elapsed | 1652 |\n| total_timesteps | 1580000 |\n| train/ | |\n| approx_kl | 0.0001081898 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.63 |\n| explained_variance | 0.586 |\n| learning_rate | 0.0003 |\n| loss | 113 |\n| n_updates | 788 |\n| policy_gradient_loss | -3e-05 |\n| value_loss | 224 |\n| train_metrics/ | |\n| action_queue_updates_success | 138 |\n| action_queue_updates_total | 149 |\n| ice_dug | 91 |\n| water_produced | 20.2 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 55.3 |\n| time/ | |\n| fps | 956 |\n| iterations | 396 |\n| time_elapsed | 1656 |\n| total_timesteps | 1584000 |\n| train/ | |\n| approx_kl | 0.00019883896 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.65 |\n| explained_variance | 0.492 |\n| learning_rate | 0.0003 |\n| loss | 39.1 |\n| n_updates | 790 |\n| policy_gradient_loss | 5.21e-05 |\n| value_loss | 87.1 |\n| train_metrics/ | |\n| action_queue_updates_success | 147 |\n| action_queue_updates_total | 155 |\n| ice_dug | 206 |\n| water_produced | 48 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 59.2 |\n| time/ | |\n| fps | 956 |\n| iterations | 397 |\n| time_elapsed | 1660 |\n| total_timesteps | 1588000 |\n| train/ | |\n| approx_kl | 0.0020753383 |\n| clip_fraction | 0.0045 |\n| clip_range | 0.2 |\n| entropy_loss | -1.67 |\n| explained_variance | 0.538 |\n| learning_rate | 0.0003 |\n| loss | 70.2 |\n| n_updates | 792 |\n| policy_gradient_loss | 0.000274 |\n| value_loss | 153 |\n| train_metrics/ | |\n| action_queue_updates_success | 145 |\n| action_queue_updates_total | 153 |\n| ice_dug | 471 |\n| water_produced | 99.5 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 68.6 |\n| time/ | |\n| fps | 956 |\n| iterations | 398 |\n| time_elapsed | 1664 |\n| total_timesteps | 1592000 |\n| train/ | |\n| approx_kl | 0.0043610996 |\n| clip_fraction | 0.0207 |\n| clip_range | 0.2 |\n| entropy_loss | -1.53 |\n| explained_variance | 0.548 |\n| learning_rate | 0.0003 |\n| loss | 141 |\n| n_updates | 794 |\n| policy_gradient_loss | 0.00124 |\n| value_loss | 286 |\n| train_metrics/ | |\n| action_queue_updates_success | 140 |\n| action_queue_updates_total | 147 |\n| ice_dug | 315 |\n| water_produced | 75.7 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 64.5 |\n| time/ | |\n| fps | 956 |\n| iterations | 399 |\n| time_elapsed | 1668 |\n| total_timesteps | 1596000 |\n| train/ | |\n| approx_kl | 0.0007994406 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.47 |\n| explained_variance | 0.529 |\n| learning_rate | 0.0003 |\n| loss | 116 |\n| n_updates | 796 |\n| policy_gradient_loss | -5.19e-05 |\n| value_loss | 226 |\n| train_metrics/ | |\n| action_queue_updates_success | 138 |\n| action_queue_updates_total | 148 |\n| ice_dug | 285 |\n| water_produced | 65.5 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 71.2 |\n| time/ | |\n| fps | 956 |\n| iterations | 400 |\n| time_elapsed | 1671 |\n| total_timesteps | 1600000 |\n| train/ | |\n| approx_kl | 0.00010545462 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.57 |\n| explained_variance | 0.499 |\n| learning_rate | 0.0003 |\n| loss | 133 |\n| n_updates | 798 |\n| policy_gradient_loss | -0.000163 |\n| value_loss | 217 |\n| train_metrics/ | |\n| action_queue_updates_success | 133 |\n| action_queue_updates_total | 145 |\n| ice_dug | 348 |\n| water_produced | 51.2 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 73.5 |\n| time/ | |\n| fps | 957 |\n| iterations | 401 |\n| time_elapsed | 1676 |\n| total_timesteps | 1604000 |\n| train/ | |\n| approx_kl | 0.00010798282 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.51 |\n| explained_variance | 0.521 |\n| learning_rate | 0.0003 |\n| loss | 58 |\n| n_updates | 800 |\n| policy_gradient_loss | -1.56e-05 |\n| value_loss | 144 |\n| train_metrics/ | |\n| action_queue_updates_success | 138 |\n| action_queue_updates_total | 148 |\n| ice_dug | 279 |\n| water_produced | 58.7 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 74.6 |\n| time/ | |\n| fps | 957 |\n| iterations | 402 |\n| time_elapsed | 1679 |\n| total_timesteps | 1608000 |\n| train/ | |\n| approx_kl | 0.00044857958 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.54 |\n| explained_variance | 0.554 |\n| learning_rate | 0.0003 |\n| loss | 83.7 |\n| n_updates | 802 |\n| policy_gradient_loss | 0.000212 |\n| value_loss | 142 |\n| train_metrics/ | |\n| action_queue_updates_success | 136 |\n| action_queue_updates_total | 142 |\n| ice_dug | 517 |\n| water_produced | 104 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 69 |\n| time/ | |\n| fps | 957 |\n| iterations | 403 |\n| time_elapsed | 1684 |\n| total_timesteps | 1612000 |\n| train/ | |\n| approx_kl | 0.0042897267 |\n| clip_fraction | 0.0211 |\n| clip_range | 0.2 |\n| entropy_loss | -1.36 |\n| explained_variance | 0.471 |\n| learning_rate | 0.0003 |\n| loss | 150 |\n| n_updates | 804 |\n| policy_gradient_loss | -0.000508 |\n| value_loss | 309 |\n| train_metrics/ | |\n| action_queue_updates_success | 138 |\n| action_queue_updates_total | 147 |\n| ice_dug | 274 |\n| water_produced | 48.2 |\n--------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 66.5 |\n| time/ | |\n| fps | 957 |\n| iterations | 404 |\n| time_elapsed | 1688 |\n| total_timesteps | 1616000 |\n| train/ | |\n| approx_kl | 0.000497521 |\n| clip_fraction | 0.00075 |\n| clip_range | 0.2 |\n| entropy_loss | -1.46 |\n| explained_variance | 0.473 |\n| learning_rate | 0.0003 |\n| loss | 84.3 |\n| n_updates | 806 |\n| policy_gradient_loss | -0.000273 |\n| value_loss | 164 |\n| train_metrics/ | |\n| action_queue_updates_success | 132 |\n| action_queue_updates_total | 139 |\n| ice_dug | 253 |\n| water_produced | 53.5 |\n-------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 67.4 |\n| time/ | |\n| fps | 957 |\n| iterations | 405 |\n| time_elapsed | 1691 |\n| total_timesteps | 1620000 |\n| train/ | |\n| approx_kl | 0.003651478 |\n| clip_fraction | 0.0194 |\n| clip_range | 0.2 |\n| entropy_loss | -1.44 |\n| explained_variance | 0.476 |\n| learning_rate | 0.0003 |\n| loss | 69.6 |\n| n_updates | 808 |\n| policy_gradient_loss | 5.27e-05 |\n| value_loss | 153 |\n| train_metrics/ | |\n| action_queue_updates_success | 140 |\n| action_queue_updates_total | 147 |\n| ice_dug | 271 |\n| water_produced | 56.2 |\n-------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 81.1 |\n| time/ | |\n| fps | 957 |\n| iterations | 406 |\n| time_elapsed | 1695 |\n| total_timesteps | 1624000 |\n| train/ | |\n| approx_kl | 0.00018070725 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.52 |\n| explained_variance | 0.509 |\n| learning_rate | 0.0003 |\n| loss | 69.5 |\n| n_updates | 810 |\n| policy_gradient_loss | -0.000265 |\n| value_loss | 153 |\n| train_metrics/ | |\n| action_queue_updates_success | 144 |\n| action_queue_updates_total | 155 |\n| ice_dug | 652 |\n| water_produced | 124 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 76.9 |\n| time/ | |\n| fps | 957 |\n| iterations | 407 |\n| time_elapsed | 1699 |\n| total_timesteps | 1628000 |\n| train/ | |\n| approx_kl | 0.00078471424 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.48 |\n| explained_variance | 0.523 |\n| learning_rate | 0.0003 |\n| loss | 183 |\n| n_updates | 812 |\n| policy_gradient_loss | -0.000334 |\n| value_loss | 325 |\n| train_metrics/ | |\n| action_queue_updates_success | 143 |\n| action_queue_updates_total | 154 |\n| ice_dug | 417 |\n| water_produced | 84 |\n---------------------------------------------------\nEval num_timesteps=1632000, episode_reward=133.24 +/- 205.73\nEpisode length: 428.00 +/- 197.52\n--------------------------------------------------\n| eval/ | |\n| mean_ep_length | 428 |\n| mean_reward | 133 |\n| time/ | |\n| total_timesteps | 1632000 |\n| train/ | |\n| approx_kl | 0.0036631408 |\n| clip_fraction | 0.0126 |\n| clip_range | 0.2 |\n| entropy_loss | -1.43 |\n| explained_variance | 0.518 |\n| learning_rate | 0.0003 |\n| loss | 111 |\n| n_updates | 814 |\n| policy_gradient_loss | 0.00101 |\n| value_loss | 217 |\n| train_metrics/ | |\n| action_queue_updates_success | 134 |\n| action_queue_updates_total | 138 |\n| ice_dug | 141 |\n| water_produced | 21.8 |\n--------------------------------------------------\n---------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 71.3 |\n| time/ | |\n| fps | 955 |\n| iterations | 408 |\n| time_elapsed | 1707 |\n| total_timesteps | 1632000 |\n---------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 72.6 |\n| time/ | |\n| fps | 956 |\n| iterations | 409 |\n| time_elapsed | 1711 |\n| total_timesteps | 1636000 |\n| train/ | |\n| approx_kl | 0.004190961 |\n| clip_fraction | 0.0254 |\n| clip_range | 0.2 |\n| entropy_loss | -1.38 |\n| explained_variance | 0.537 |\n| learning_rate | 0.0003 |\n| loss | 29.6 |\n| n_updates | 816 |\n| policy_gradient_loss | -0.0011 |\n| value_loss | 67.5 |\n| train_metrics/ | |\n| action_queue_updates_success | 144 |\n| action_queue_updates_total | 152 |\n| ice_dug | 279 |\n| water_produced | 59.8 |\n-------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 80.9 |\n| time/ | |\n| fps | 956 |\n| iterations | 410 |\n| time_elapsed | 1715 |\n| total_timesteps | 1640000 |\n| train/ | |\n| approx_kl | 0.00051521737 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.56 |\n| explained_variance | 0.546 |\n| learning_rate | 0.0003 |\n| loss | 95 |\n| n_updates | 818 |\n| policy_gradient_loss | 9.86e-05 |\n| value_loss | 182 |\n| train_metrics/ | |\n| action_queue_updates_success | 150 |\n| action_queue_updates_total | 156 |\n| ice_dug | 457 |\n| water_produced | 96 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 63.5 |\n| time/ | |\n| fps | 956 |\n| iterations | 411 |\n| time_elapsed | 1719 |\n| total_timesteps | 1644000 |\n| train/ | |\n| approx_kl | 0.0015219627 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.5 |\n| explained_variance | 0.52 |\n| learning_rate | 0.0003 |\n| loss | 151 |\n| n_updates | 820 |\n| policy_gradient_loss | 0.000269 |\n| value_loss | 287 |\n| train_metrics/ | |\n| action_queue_updates_success | 145 |\n| action_queue_updates_total | 152 |\n| ice_dug | 188 |\n| water_produced | 41.2 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 60.7 |\n| time/ | |\n| fps | 956 |\n| iterations | 412 |\n| time_elapsed | 1722 |\n| total_timesteps | 1648000 |\n| train/ | |\n| approx_kl | 0.0035028257 |\n| clip_fraction | 0.0134 |\n| clip_range | 0.2 |\n| entropy_loss | -1.57 |\n| explained_variance | 0.521 |\n| learning_rate | 0.0003 |\n| loss | 64.9 |\n| n_updates | 822 |\n| policy_gradient_loss | 0.000154 |\n| value_loss | 122 |\n| train_metrics/ | |\n| action_queue_updates_success | 142 |\n| action_queue_updates_total | 154 |\n| ice_dug | 326 |\n| water_produced | 70.7 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 73.9 |\n| time/ | |\n| fps | 956 |\n| iterations | 413 |\n| time_elapsed | 1726 |\n| total_timesteps | 1652000 |\n| train/ | |\n| approx_kl | 0.00025205294 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.65 |\n| explained_variance | 0.563 |\n| learning_rate | 0.0003 |\n| loss | 116 |\n| n_updates | 824 |\n| policy_gradient_loss | 0.000153 |\n| value_loss | 193 |\n| train_metrics/ | |\n| action_queue_updates_success | 145 |\n| action_queue_updates_total | 153 |\n| ice_dug | 367 |\n| water_produced | 85.5 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 69.4 |\n| time/ | |\n| fps | 956 |\n| iterations | 414 |\n| time_elapsed | 1731 |\n| total_timesteps | 1656000 |\n| train/ | |\n| approx_kl | 0.00068352977 |\n| clip_fraction | 0.000125 |\n| clip_range | 0.2 |\n| entropy_loss | -1.53 |\n| explained_variance | 0.582 |\n| learning_rate | 0.0003 |\n| loss | 99 |\n| n_updates | 826 |\n| policy_gradient_loss | 0.000217 |\n| value_loss | 218 |\n| train_metrics/ | |\n| action_queue_updates_success | 134 |\n| action_queue_updates_total | 139 |\n| ice_dug | 209 |\n| water_produced | 38 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 60.1 |\n| time/ | |\n| fps | 956 |\n| iterations | 415 |\n| time_elapsed | 1734 |\n| total_timesteps | 1660000 |\n| train/ | |\n| approx_kl | 0.00059676403 |\n| clip_fraction | 0.001 |\n| clip_range | 0.2 |\n| entropy_loss | -1.43 |\n| explained_variance | 0.545 |\n| learning_rate | 0.0003 |\n| loss | 48 |\n| n_updates | 828 |\n| policy_gradient_loss | -7.39e-05 |\n| value_loss | 97.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 138 |\n| action_queue_updates_total | 144 |\n| ice_dug | 301 |\n| water_produced | 51.2 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 69.8 |\n| time/ | |\n| fps | 956 |\n| iterations | 416 |\n| time_elapsed | 1738 |\n| total_timesteps | 1664000 |\n| train/ | |\n| approx_kl | 0.00035647216 |\n| clip_fraction | 0.001 |\n| clip_range | 0.2 |\n| entropy_loss | -1.47 |\n| explained_variance | 0.532 |\n| learning_rate | 0.0003 |\n| loss | 71.6 |\n| n_updates | 830 |\n| policy_gradient_loss | -0.000322 |\n| value_loss | 141 |\n| train_metrics/ | |\n| action_queue_updates_success | 144 |\n| action_queue_updates_total | 158 |\n| ice_dug | 497 |\n| water_produced | 86.2 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 62.3 |\n| time/ | |\n| fps | 957 |\n| iterations | 417 |\n| time_elapsed | 1742 |\n| total_timesteps | 1668000 |\n| train/ | |\n| approx_kl | 0.0032437718 |\n| clip_fraction | 0.00762 |\n| clip_range | 0.2 |\n| entropy_loss | -1.59 |\n| explained_variance | 0.577 |\n| learning_rate | 0.0003 |\n| loss | 94.7 |\n| n_updates | 832 |\n| policy_gradient_loss | -0.000675 |\n| value_loss | 232 |\n| train_metrics/ | |\n| action_queue_updates_success | 141 |\n| action_queue_updates_total | 149 |\n| ice_dug | 201 |\n| water_produced | 34.8 |\n--------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 63.5 |\n| time/ | |\n| fps | 957 |\n| iterations | 418 |\n| time_elapsed | 1746 |\n| total_timesteps | 1672000 |\n| train/ | |\n| approx_kl | 0.001079922 |\n| clip_fraction | 0.00138 |\n| clip_range | 0.2 |\n| entropy_loss | -1.5 |\n| explained_variance | 0.577 |\n| learning_rate | 0.0003 |\n| loss | 44.2 |\n| n_updates | 834 |\n| policy_gradient_loss | -0.00025 |\n| value_loss | 107 |\n| train_metrics/ | |\n| action_queue_updates_success | 138 |\n| action_queue_updates_total | 144 |\n| ice_dug | 406 |\n| water_produced | 91 |\n-------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 68.9 |\n| time/ | |\n| fps | 957 |\n| iterations | 419 |\n| time_elapsed | 1750 |\n| total_timesteps | 1676000 |\n| train/ | |\n| approx_kl | 0.00015473705 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.41 |\n| explained_variance | 0.542 |\n| learning_rate | 0.0003 |\n| loss | 138 |\n| n_updates | 836 |\n| policy_gradient_loss | -0.000189 |\n| value_loss | 244 |\n| train_metrics/ | |\n| action_queue_updates_success | 143 |\n| action_queue_updates_total | 155 |\n| ice_dug | 284 |\n| water_produced | 64.2 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 83.8 |\n| time/ | |\n| fps | 957 |\n| iterations | 420 |\n| time_elapsed | 1754 |\n| total_timesteps | 1680000 |\n| train/ | |\n| approx_kl | 0.00018151515 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.57 |\n| explained_variance | 0.562 |\n| learning_rate | 0.0003 |\n| loss | 104 |\n| n_updates | 838 |\n| policy_gradient_loss | 0.000307 |\n| value_loss | 188 |\n| train_metrics/ | |\n| action_queue_updates_success | 145 |\n| action_queue_updates_total | 154 |\n| ice_dug | 606 |\n| water_produced | 123 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 75.1 |\n| time/ | |\n| fps | 957 |\n| iterations | 421 |\n| time_elapsed | 1758 |\n| total_timesteps | 1684000 |\n| train/ | |\n| approx_kl | 0.0006172243 |\n| clip_fraction | 0.000125 |\n| clip_range | 0.2 |\n| entropy_loss | -1.46 |\n| explained_variance | 0.521 |\n| learning_rate | 0.0003 |\n| loss | 166 |\n| n_updates | 840 |\n| policy_gradient_loss | -0.000672 |\n| value_loss | 362 |\n| train_metrics/ | |\n| action_queue_updates_success | 142 |\n| action_queue_updates_total | 152 |\n| ice_dug | 260 |\n| water_produced | 45 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 81 |\n| time/ | |\n| fps | 957 |\n| iterations | 422 |\n| time_elapsed | 1762 |\n| total_timesteps | 1688000 |\n| train/ | |\n| approx_kl | 8.720688e-05 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.47 |\n| explained_variance | 0.504 |\n| learning_rate | 0.0003 |\n| loss | 116 |\n| n_updates | 842 |\n| policy_gradient_loss | 7.55e-05 |\n| value_loss | 186 |\n| train_metrics/ | |\n| action_queue_updates_success | 146 |\n| action_queue_updates_total | 152 |\n| ice_dug | 297 |\n| water_produced | 63.3 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 68.7 |\n| time/ | |\n| fps | 957 |\n| iterations | 423 |\n| time_elapsed | 1766 |\n| total_timesteps | 1692000 |\n| train/ | |\n| approx_kl | 0.0012443729 |\n| clip_fraction | 0.00025 |\n| clip_range | 0.2 |\n| entropy_loss | -1.55 |\n| explained_variance | 0.545 |\n| learning_rate | 0.0003 |\n| loss | 93.1 |\n| n_updates | 844 |\n| policy_gradient_loss | -0.00114 |\n| value_loss | 181 |\n| train_metrics/ | |\n| action_queue_updates_success | 146 |\n| action_queue_updates_total | 153 |\n| ice_dug | 189 |\n| water_produced | 31.8 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 59.6 |\n| time/ | |\n| fps | 957 |\n| iterations | 424 |\n| time_elapsed | 1771 |\n| total_timesteps | 1696000 |\n| train/ | |\n| approx_kl | 0.0010985421 |\n| clip_fraction | 0.00163 |\n| clip_range | 0.2 |\n| entropy_loss | -1.58 |\n| explained_variance | 0.556 |\n| learning_rate | 0.0003 |\n| loss | 49.9 |\n| n_updates | 846 |\n| policy_gradient_loss | -0.00045 |\n| value_loss | 99.3 |\n| train_metrics/ | |\n| action_queue_updates_success | 147 |\n| action_queue_updates_total | 155 |\n| ice_dug | 184 |\n| water_produced | 19.7 |\n--------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 66.8 |\n| time/ | |\n| fps | 957 |\n| iterations | 425 |\n| time_elapsed | 1775 |\n| total_timesteps | 1700000 |\n| train/ | |\n| approx_kl | 0.005727559 |\n| clip_fraction | 0.0261 |\n| clip_range | 0.2 |\n| entropy_loss | -1.7 |\n| explained_variance | 0.566 |\n| learning_rate | 0.0003 |\n| loss | 37.4 |\n| n_updates | 848 |\n| policy_gradient_loss | 0.000242 |\n| value_loss | 72.5 |\n| train_metrics/ | |\n| action_queue_updates_success | 154 |\n| action_queue_updates_total | 159 |\n| ice_dug | 682 |\n| water_produced | 158 |\n-------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 73.8 |\n| time/ | |\n| fps | 957 |\n| iterations | 426 |\n| time_elapsed | 1778 |\n| total_timesteps | 1704000 |\n| train/ | |\n| approx_kl | 0.0020301088 |\n| clip_fraction | 0.00425 |\n| clip_range | 0.2 |\n| entropy_loss | -1.49 |\n| explained_variance | 0.59 |\n| learning_rate | 0.0003 |\n| loss | 200 |\n| n_updates | 850 |\n| policy_gradient_loss | 0.000347 |\n| value_loss | 420 |\n| train_metrics/ | |\n| action_queue_updates_success | 139 |\n| action_queue_updates_total | 150 |\n| ice_dug | 477 |\n| water_produced | 77.8 |\n--------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 70.3 |\n| time/ | |\n| fps | 958 |\n| iterations | 427 |\n| time_elapsed | 1782 |\n| total_timesteps | 1708000 |\n| train/ | |\n| approx_kl | 0.004778543 |\n| clip_fraction | 0.0247 |\n| clip_range | 0.2 |\n| entropy_loss | -1.42 |\n| explained_variance | 0.538 |\n| learning_rate | 0.0003 |\n| loss | 109 |\n| n_updates | 852 |\n| policy_gradient_loss | -0.000212 |\n| value_loss | 197 |\n| train_metrics/ | |\n| action_queue_updates_success | 140 |\n| action_queue_updates_total | 146 |\n| ice_dug | 271 |\n| water_produced | 46 |\n-------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 69.6 |\n| time/ | |\n| fps | 958 |\n| iterations | 428 |\n| time_elapsed | 1786 |\n| total_timesteps | 1712000 |\n| train/ | |\n| approx_kl | 0.0016137175 |\n| clip_fraction | 0.00563 |\n| clip_range | 0.2 |\n| entropy_loss | -1.45 |\n| explained_variance | 0.517 |\n| learning_rate | 0.0003 |\n| loss | 86.7 |\n| n_updates | 854 |\n| policy_gradient_loss | -0.00033 |\n| value_loss | 140 |\n| train_metrics/ | |\n| action_queue_updates_success | 134 |\n| action_queue_updates_total | 139 |\n| ice_dug | 191 |\n| water_produced | 28.5 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 82.5 |\n| time/ | |\n| fps | 958 |\n| iterations | 429 |\n| time_elapsed | 1790 |\n| total_timesteps | 1716000 |\n| train/ | |\n| approx_kl | 0.0033905834 |\n| clip_fraction | 0.0171 |\n| clip_range | 0.2 |\n| entropy_loss | -1.54 |\n| explained_variance | 0.579 |\n| learning_rate | 0.0003 |\n| loss | 65.5 |\n| n_updates | 856 |\n| policy_gradient_loss | 0.00019 |\n| value_loss | 94.9 |\n| train_metrics/ | |\n| action_queue_updates_success | 153 |\n| action_queue_updates_total | 160 |\n| ice_dug | 365 |\n| water_produced | 82.2 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 66 |\n| time/ | |\n| fps | 958 |\n| iterations | 430 |\n| time_elapsed | 1794 |\n| total_timesteps | 1720000 |\n| train/ | |\n| approx_kl | 0.0014947278 |\n| clip_fraction | 0.001 |\n| clip_range | 0.2 |\n| entropy_loss | -1.58 |\n| explained_variance | 0.592 |\n| learning_rate | 0.0003 |\n| loss | 119 |\n| n_updates | 858 |\n| policy_gradient_loss | 1.67e-06 |\n| value_loss | 222 |\n| train_metrics/ | |\n| action_queue_updates_success | 150 |\n| action_queue_updates_total | 156 |\n| ice_dug | 335 |\n| water_produced | 79.3 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 62.7 |\n| time/ | |\n| fps | 958 |\n| iterations | 431 |\n| time_elapsed | 1798 |\n| total_timesteps | 1724000 |\n| train/ | |\n| approx_kl | 0.0007636421 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.49 |\n| explained_variance | 0.544 |\n| learning_rate | 0.0003 |\n| loss | 92.2 |\n| n_updates | 860 |\n| policy_gradient_loss | 0.000152 |\n| value_loss | 207 |\n| train_metrics/ | |\n| action_queue_updates_success | 147 |\n| action_queue_updates_total | 152 |\n| ice_dug | 284 |\n| water_produced | 63 |\n--------------------------------------------------\nEval num_timesteps=1728000, episode_reward=69.88 +/- 83.42\nEpisode length: 367.00 +/- 81.08\n--------------------------------------------------\n| eval/ | |\n| mean_ep_length | 367 |\n| mean_reward | 69.9 |\n| time/ | |\n| total_timesteps | 1728000 |\n| train/ | |\n| approx_kl | 0.0004504029 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.53 |\n| explained_variance | 0.495 |\n| learning_rate | 0.0003 |\n| loss | 89.1 |\n| n_updates | 862 |\n| policy_gradient_loss | -0.000301 |\n| value_loss | 199 |\n| train_metrics/ | |\n| action_queue_updates_success | 140 |\n| action_queue_updates_total | 150 |\n| ice_dug | 361 |\n| water_produced | 77 |\n--------------------------------------------------\n---------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 69.1 |\n| time/ | |\n| fps | 957 |\n| iterations | 432 |\n| time_elapsed | 1805 |\n| total_timesteps | 1728000 |\n---------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 83.1 |\n| time/ | |\n| fps | 957 |\n| iterations | 433 |\n| time_elapsed | 1809 |\n| total_timesteps | 1732000 |\n| train/ | |\n| approx_kl | 0.0013612735 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.46 |\n| explained_variance | 0.546 |\n| learning_rate | 0.0003 |\n| loss | 87.7 |\n| n_updates | 864 |\n| policy_gradient_loss | 0.000347 |\n| value_loss | 182 |\n| train_metrics/ | |\n| action_queue_updates_success | 143 |\n| action_queue_updates_total | 153 |\n| ice_dug | 409 |\n| water_produced | 96.5 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 79.3 |\n| time/ | |\n| fps | 957 |\n| iterations | 434 |\n| time_elapsed | 1813 |\n| total_timesteps | 1736000 |\n| train/ | |\n| approx_kl | 0.00043552456 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.49 |\n| explained_variance | 0.539 |\n| learning_rate | 0.0003 |\n| loss | 146 |\n| n_updates | 866 |\n| policy_gradient_loss | 0.000361 |\n| value_loss | 276 |\n| train_metrics/ | |\n| action_queue_updates_success | 138 |\n| action_queue_updates_total | 150 |\n| ice_dug | 282 |\n| water_produced | 64.3 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 81.1 |\n| time/ | |\n| fps | 957 |\n| iterations | 435 |\n| time_elapsed | 1817 |\n| total_timesteps | 1740000 |\n| train/ | |\n| approx_kl | 0.00030206534 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.48 |\n| explained_variance | 0.521 |\n| learning_rate | 0.0003 |\n| loss | 85.3 |\n| n_updates | 868 |\n| policy_gradient_loss | 0.000212 |\n| value_loss | 170 |\n| train_metrics/ | |\n| action_queue_updates_success | 145 |\n| action_queue_updates_total | 155 |\n| ice_dug | 387 |\n| water_produced | 87.5 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 79.5 |\n| time/ | |\n| fps | 957 |\n| iterations | 436 |\n| time_elapsed | 1821 |\n| total_timesteps | 1744000 |\n| train/ | |\n| approx_kl | 0.00037216608 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.55 |\n| explained_variance | 0.54 |\n| learning_rate | 0.0003 |\n| loss | 121 |\n| n_updates | 870 |\n| policy_gradient_loss | -3.02e-05 |\n| value_loss | 260 |\n| train_metrics/ | |\n| action_queue_updates_success | 139 |\n| action_queue_updates_total | 149 |\n| ice_dug | 257 |\n| water_produced | 55.2 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 82.9 |\n| time/ | |\n| fps | 957 |\n| iterations | 437 |\n| time_elapsed | 1825 |\n| total_timesteps | 1748000 |\n| train/ | |\n| approx_kl | 0.0010058836 |\n| clip_fraction | 0.0005 |\n| clip_range | 0.2 |\n| entropy_loss | -1.52 |\n| explained_variance | 0.545 |\n| learning_rate | 0.0003 |\n| loss | 81.2 |\n| n_updates | 872 |\n| policy_gradient_loss | -0.000306 |\n| value_loss | 171 |\n| train_metrics/ | |\n| action_queue_updates_success | 139 |\n| action_queue_updates_total | 150 |\n| ice_dug | 525 |\n| water_produced | 92.3 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 77.2 |\n| time/ | |\n| fps | 957 |\n| iterations | 438 |\n| time_elapsed | 1829 |\n| total_timesteps | 1752000 |\n| train/ | |\n| approx_kl | 0.00043419338 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.48 |\n| explained_variance | 0.512 |\n| learning_rate | 0.0003 |\n| loss | 155 |\n| n_updates | 874 |\n| policy_gradient_loss | 0.000169 |\n| value_loss | 294 |\n| train_metrics/ | |\n| action_queue_updates_success | 149 |\n| action_queue_updates_total | 157 |\n| ice_dug | 404 |\n| water_produced | 68 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 78.8 |\n| time/ | |\n| fps | 957 |\n| iterations | 439 |\n| time_elapsed | 1833 |\n| total_timesteps | 1756000 |\n| train/ | |\n| approx_kl | 0.00019644527 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.49 |\n| explained_variance | 0.524 |\n| learning_rate | 0.0003 |\n| loss | 114 |\n| n_updates | 876 |\n| policy_gradient_loss | 0.000132 |\n| value_loss | 227 |\n| train_metrics/ | |\n| action_queue_updates_success | 145 |\n| action_queue_updates_total | 150 |\n| ice_dug | 367 |\n| water_produced | 71.5 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 76.7 |\n| time/ | |\n| fps | 957 |\n| iterations | 440 |\n| time_elapsed | 1837 |\n| total_timesteps | 1760000 |\n| train/ | |\n| approx_kl | 0.00020844644 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.45 |\n| explained_variance | 0.559 |\n| learning_rate | 0.0003 |\n| loss | 86.2 |\n| n_updates | 878 |\n| policy_gradient_loss | -9.26e-05 |\n| value_loss | 175 |\n| train_metrics/ | |\n| action_queue_updates_success | 143 |\n| action_queue_updates_total | 150 |\n| ice_dug | 400 |\n| water_produced | 77 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 95.7 |\n| time/ | |\n| fps | 957 |\n| iterations | 441 |\n| time_elapsed | 1841 |\n| total_timesteps | 1764000 |\n| train/ | |\n| approx_kl | 0.00032559113 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.42 |\n| explained_variance | 0.533 |\n| learning_rate | 0.0003 |\n| loss | 109 |\n| n_updates | 880 |\n| policy_gradient_loss | -0.000255 |\n| value_loss | 206 |\n| train_metrics/ | |\n| action_queue_updates_success | 143 |\n| action_queue_updates_total | 150 |\n| ice_dug | 693 |\n| water_produced | 146 |\n---------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 78.4 |\n| time/ | |\n| fps | 958 |\n| iterations | 442 |\n| time_elapsed | 1845 |\n| total_timesteps | 1768000 |\n| train/ | |\n| approx_kl | 0.002587312 |\n| clip_fraction | 0.00987 |\n| clip_range | 0.2 |\n| entropy_loss | -1.29 |\n| explained_variance | 0.539 |\n| learning_rate | 0.0003 |\n| loss | 187 |\n| n_updates | 882 |\n| policy_gradient_loss | 0.000409 |\n| value_loss | 379 |\n| train_metrics/ | |\n| action_queue_updates_success | 138 |\n| action_queue_updates_total | 146 |\n| ice_dug | 84 |\n| water_produced | 10.2 |\n-------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 83.7 |\n| time/ | |\n| fps | 958 |\n| iterations | 443 |\n| time_elapsed | 1849 |\n| total_timesteps | 1772000 |\n| train/ | |\n| approx_kl | 0.00018191138 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.42 |\n| explained_variance | 0.668 |\n| learning_rate | 0.0003 |\n| loss | 24.5 |\n| n_updates | 884 |\n| policy_gradient_loss | 0.000452 |\n| value_loss | 55.1 |\n| train_metrics/ | |\n| action_queue_updates_success | 143 |\n| action_queue_updates_total | 148 |\n| ice_dug | 413 |\n| water_produced | 94 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 85 |\n| time/ | |\n| fps | 958 |\n| iterations | 444 |\n| time_elapsed | 1853 |\n| total_timesteps | 1776000 |\n| train/ | |\n| approx_kl | 0.0009189613 |\n| clip_fraction | 0.00025 |\n| clip_range | 0.2 |\n| entropy_loss | -1.4 |\n| explained_variance | 0.559 |\n| learning_rate | 0.0003 |\n| loss | 151 |\n| n_updates | 886 |\n| policy_gradient_loss | -0.00119 |\n| value_loss | 279 |\n| train_metrics/ | |\n| action_queue_updates_success | 141 |\n| action_queue_updates_total | 153 |\n| ice_dug | 441 |\n| water_produced | 77.3 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 74.2 |\n| time/ | |\n| fps | 958 |\n| iterations | 445 |\n| time_elapsed | 1857 |\n| total_timesteps | 1780000 |\n| train/ | |\n| approx_kl | 0.0018954424 |\n| clip_fraction | 0.00162 |\n| clip_range | 0.2 |\n| entropy_loss | -1.41 |\n| explained_variance | 0.515 |\n| learning_rate | 0.0003 |\n| loss | 111 |\n| n_updates | 888 |\n| policy_gradient_loss | 0.000834 |\n| value_loss | 211 |\n| train_metrics/ | |\n| action_queue_updates_success | 134 |\n| action_queue_updates_total | 144 |\n| ice_dug | 119 |\n| water_produced | 26 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 67.3 |\n| time/ | |\n| fps | 958 |\n| iterations | 446 |\n| time_elapsed | 1861 |\n| total_timesteps | 1784000 |\n| train/ | |\n| approx_kl | 0.0025546248 |\n| clip_fraction | 0.0106 |\n| clip_range | 0.2 |\n| entropy_loss | -1.47 |\n| explained_variance | 0.576 |\n| learning_rate | 0.0003 |\n| loss | 31.9 |\n| n_updates | 890 |\n| policy_gradient_loss | -0.0011 |\n| value_loss | 73.2 |\n| train_metrics/ | |\n| action_queue_updates_success | 144 |\n| action_queue_updates_total | 149 |\n| ice_dug | 576 |\n| water_produced | 112 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 80.5 |\n| time/ | |\n| fps | 958 |\n| iterations | 447 |\n| time_elapsed | 1865 |\n| total_timesteps | 1788000 |\n| train/ | |\n| approx_kl | 0.00011663725 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.34 |\n| explained_variance | 0.483 |\n| learning_rate | 0.0003 |\n| loss | 153 |\n| n_updates | 892 |\n| policy_gradient_loss | -0.000155 |\n| value_loss | 334 |\n| train_metrics/ | |\n| action_queue_updates_success | 144 |\n| action_queue_updates_total | 154 |\n| ice_dug | 412 |\n| water_produced | 73.3 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 76.2 |\n| time/ | |\n| fps | 958 |\n| iterations | 448 |\n| time_elapsed | 1869 |\n| total_timesteps | 1792000 |\n| train/ | |\n| approx_kl | 0.00039348943 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.47 |\n| explained_variance | 0.505 |\n| learning_rate | 0.0003 |\n| loss | 127 |\n| n_updates | 894 |\n| policy_gradient_loss | 8.15e-05 |\n| value_loss | 228 |\n| train_metrics/ | |\n| action_queue_updates_success | 150 |\n| action_queue_updates_total | 160 |\n| ice_dug | 399 |\n| water_produced | 72.8 |\n---------------------------------------------------\n----------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 76.9 |\n| time/ | |\n| fps | 958 |\n| iterations | 449 |\n| time_elapsed | 1873 |\n| total_timesteps | 1796000 |\n| train/ | |\n| approx_kl | 0.000120795776 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.5 |\n| explained_variance | 0.561 |\n| learning_rate | 0.0003 |\n| loss | 105 |\n| n_updates | 896 |\n| policy_gradient_loss | 0.00012 |\n| value_loss | 193 |\n| train_metrics/ | |\n| action_queue_updates_success | 146 |\n| action_queue_updates_total | 156 |\n| ice_dug | 452 |\n| water_produced | 80.5 |\n----------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 82.1 |\n| time/ | |\n| fps | 958 |\n| iterations | 450 |\n| time_elapsed | 1877 |\n| total_timesteps | 1800000 |\n| train/ | |\n| approx_kl | 0.00026793292 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.43 |\n| explained_variance | 0.533 |\n| learning_rate | 0.0003 |\n| loss | 116 |\n| n_updates | 898 |\n| policy_gradient_loss | 0.000474 |\n| value_loss | 223 |\n| train_metrics/ | |\n| action_queue_updates_success | 141 |\n| action_queue_updates_total | 153 |\n| ice_dug | 271 |\n| water_produced | 50.5 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 68 |\n| time/ | |\n| fps | 958 |\n| iterations | 451 |\n| time_elapsed | 1882 |\n| total_timesteps | 1804000 |\n| train/ | |\n| approx_kl | 0.0036859326 |\n| clip_fraction | 0.0171 |\n| clip_range | 0.2 |\n| entropy_loss | -1.55 |\n| explained_variance | 0.631 |\n| learning_rate | 0.0003 |\n| loss | 67 |\n| n_updates | 900 |\n| policy_gradient_loss | -0.000209 |\n| value_loss | 179 |\n| train_metrics/ | |\n| action_queue_updates_success | 146 |\n| action_queue_updates_total | 154 |\n| ice_dug | 324 |\n| water_produced | 44.3 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 64.3 |\n| time/ | |\n| fps | 958 |\n| iterations | 452 |\n| time_elapsed | 1886 |\n| total_timesteps | 1808000 |\n| train/ | |\n| approx_kl | 0.0018166773 |\n| clip_fraction | 0.00225 |\n| clip_range | 0.2 |\n| entropy_loss | -1.51 |\n| explained_variance | 0.572 |\n| learning_rate | 0.0003 |\n| loss | 74.9 |\n| n_updates | 902 |\n| policy_gradient_loss | 0.000275 |\n| value_loss | 150 |\n| train_metrics/ | |\n| action_queue_updates_success | 142 |\n| action_queue_updates_total | 152 |\n| ice_dug | 308 |\n| water_produced | 55.7 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 59.1 |\n| time/ | |\n| fps | 958 |\n| iterations | 453 |\n| time_elapsed | 1890 |\n| total_timesteps | 1812000 |\n| train/ | |\n| approx_kl | 0.00060997525 |\n| clip_fraction | 0.00025 |\n| clip_range | 0.2 |\n| entropy_loss | -1.56 |\n| explained_variance | 0.489 |\n| learning_rate | 0.0003 |\n| loss | 75.4 |\n| n_updates | 904 |\n| policy_gradient_loss | -2.32e-05 |\n| value_loss | 146 |\n| train_metrics/ | |\n| action_queue_updates_success | 146 |\n| action_queue_updates_total | 157 |\n| ice_dug | 350 |\n| water_produced | 47.2 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 57 |\n| time/ | |\n| fps | 958 |\n| iterations | 454 |\n| time_elapsed | 1894 |\n| total_timesteps | 1816000 |\n| train/ | |\n| approx_kl | 0.00084881595 |\n| clip_fraction | 0.000625 |\n| clip_range | 0.2 |\n| entropy_loss | -1.62 |\n| explained_variance | 0.549 |\n| learning_rate | 0.0003 |\n| loss | 83.7 |\n| n_updates | 906 |\n| policy_gradient_loss | 0.000132 |\n| value_loss | 161 |\n| train_metrics/ | |\n| action_queue_updates_success | 149 |\n| action_queue_updates_total | 161 |\n| ice_dug | 330 |\n| water_produced | 71.2 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 66.1 |\n| time/ | |\n| fps | 958 |\n| iterations | 455 |\n| time_elapsed | 1898 |\n| total_timesteps | 1820000 |\n| train/ | |\n| approx_kl | 0.00041178885 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.65 |\n| explained_variance | 0.665 |\n| learning_rate | 0.0003 |\n| loss | 87.2 |\n| n_updates | 908 |\n| policy_gradient_loss | -0.000216 |\n| value_loss | 191 |\n| train_metrics/ | |\n| action_queue_updates_success | 146 |\n| action_queue_updates_total | 160 |\n| ice_dug | 499 |\n| water_produced | 94 |\n---------------------------------------------------\nEval num_timesteps=1824000, episode_reward=136.60 +/- 165.94\nEpisode length: 430.00 +/- 158.06\n--------------------------------------------------\n| eval/ | |\n| mean_ep_length | 430 |\n| mean_reward | 137 |\n| time/ | |\n| total_timesteps | 1824000 |\n| train/ | |\n| approx_kl | 0.0010346528 |\n| clip_fraction | 0.00175 |\n| clip_range | 0.2 |\n| entropy_loss | -1.6 |\n| explained_variance | 0.597 |\n| learning_rate | 0.0003 |\n| loss | 120 |\n| n_updates | 910 |\n| policy_gradient_loss | 0.000347 |\n| value_loss | 253 |\n| train_metrics/ | |\n| action_queue_updates_success | 147 |\n| action_queue_updates_total | 155 |\n| ice_dug | 221 |\n| water_produced | 45.3 |\n--------------------------------------------------\n---------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 66.1 |\n| time/ | |\n| fps | 957 |\n| iterations | 456 |\n| time_elapsed | 1905 |\n| total_timesteps | 1824000 |\n---------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 70.9 |\n| time/ | |\n| fps | 957 |\n| iterations | 457 |\n| time_elapsed | 1909 |\n| total_timesteps | 1828000 |\n| train/ | |\n| approx_kl | 0.00016431455 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.58 |\n| explained_variance | 0.555 |\n| learning_rate | 0.0003 |\n| loss | 57.1 |\n| n_updates | 912 |\n| policy_gradient_loss | -0.000173 |\n| value_loss | 121 |\n| train_metrics/ | |\n| action_queue_updates_success | 140 |\n| action_queue_updates_total | 147 |\n| ice_dug | 385 |\n| water_produced | 79 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 71.1 |\n| time/ | |\n| fps | 957 |\n| iterations | 458 |\n| time_elapsed | 1913 |\n| total_timesteps | 1832000 |\n| train/ | |\n| approx_kl | 0.00034296457 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.45 |\n| explained_variance | 0.535 |\n| learning_rate | 0.0003 |\n| loss | 98.2 |\n| n_updates | 914 |\n| policy_gradient_loss | -2.82e-05 |\n| value_loss | 245 |\n| train_metrics/ | |\n| action_queue_updates_success | 146 |\n| action_queue_updates_total | 151 |\n| ice_dug | 240 |\n| water_produced | 49.2 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 73.4 |\n| time/ | |\n| fps | 957 |\n| iterations | 459 |\n| time_elapsed | 1917 |\n| total_timesteps | 1836000 |\n| train/ | |\n| approx_kl | 0.0005815922 |\n| clip_fraction | 0.001 |\n| clip_range | 0.2 |\n| entropy_loss | -1.52 |\n| explained_variance | 0.551 |\n| learning_rate | 0.0003 |\n| loss | 65.2 |\n| n_updates | 916 |\n| policy_gradient_loss | -0.000518 |\n| value_loss | 136 |\n| train_metrics/ | |\n| action_queue_updates_success | 143 |\n| action_queue_updates_total | 152 |\n| ice_dug | 435 |\n| water_produced | 81.7 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 68 |\n| time/ | |\n| fps | 957 |\n| iterations | 460 |\n| time_elapsed | 1921 |\n| total_timesteps | 1840000 |\n| train/ | |\n| approx_kl | 0.0011707498 |\n| clip_fraction | 0.00025 |\n| clip_range | 0.2 |\n| entropy_loss | -1.47 |\n| explained_variance | 0.581 |\n| learning_rate | 0.0003 |\n| loss | 127 |\n| n_updates | 918 |\n| policy_gradient_loss | -1.23e-05 |\n| value_loss | 213 |\n| train_metrics/ | |\n| action_queue_updates_success | 148 |\n| action_queue_updates_total | 151 |\n| ice_dug | 349 |\n| water_produced | 68.2 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 74.4 |\n| time/ | |\n| fps | 957 |\n| iterations | 461 |\n| time_elapsed | 1925 |\n| total_timesteps | 1844000 |\n| train/ | |\n| approx_kl | 0.00027483905 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.48 |\n| explained_variance | 0.533 |\n| learning_rate | 0.0003 |\n| loss | 114 |\n| n_updates | 920 |\n| policy_gradient_loss | -0.000133 |\n| value_loss | 223 |\n| train_metrics/ | |\n| action_queue_updates_success | 149 |\n| action_queue_updates_total | 157 |\n| ice_dug | 384 |\n| water_produced | 76 |\n---------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 79.2 |\n| time/ | |\n| fps | 957 |\n| iterations | 462 |\n| time_elapsed | 1929 |\n| total_timesteps | 1848000 |\n| train/ | |\n| approx_kl | 0.000451012 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.54 |\n| explained_variance | 0.547 |\n| learning_rate | 0.0003 |\n| loss | 96.5 |\n| n_updates | 922 |\n| policy_gradient_loss | -0.000174 |\n| value_loss | 237 |\n| train_metrics/ | |\n| action_queue_updates_success | 150 |\n| action_queue_updates_total | 162 |\n| ice_dug | 519 |\n| water_produced | 101 |\n-------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 92 |\n| time/ | |\n| fps | 957 |\n| iterations | 463 |\n| time_elapsed | 1933 |\n| total_timesteps | 1852000 |\n| train/ | |\n| approx_kl | 0.001407727 |\n| clip_fraction | 0.00162 |\n| clip_range | 0.2 |\n| entropy_loss | -1.56 |\n| explained_variance | 0.558 |\n| learning_rate | 0.0003 |\n| loss | 128 |\n| n_updates | 924 |\n| policy_gradient_loss | -0.000251 |\n| value_loss | 262 |\n| train_metrics/ | |\n| action_queue_updates_success | 151 |\n| action_queue_updates_total | 160 |\n| ice_dug | 584 |\n| water_produced | 110 |\n-------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 86.3 |\n| time/ | |\n| fps | 957 |\n| iterations | 464 |\n| time_elapsed | 1937 |\n| total_timesteps | 1856000 |\n| train/ | |\n| approx_kl | 0.0042222636 |\n| clip_fraction | 0.0174 |\n| clip_range | 0.2 |\n| entropy_loss | -1.48 |\n| explained_variance | 0.571 |\n| learning_rate | 0.0003 |\n| loss | 146 |\n| n_updates | 926 |\n| policy_gradient_loss | 0.00146 |\n| value_loss | 292 |\n| train_metrics/ | |\n| action_queue_updates_success | 136 |\n| action_queue_updates_total | 138 |\n| ice_dug | 232 |\n| water_produced | 55.3 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 83.1 |\n| time/ | |\n| fps | 957 |\n| iterations | 465 |\n| time_elapsed | 1941 |\n| total_timesteps | 1860000 |\n| train/ | |\n| approx_kl | 0.0011341423 |\n| clip_fraction | 0.00263 |\n| clip_range | 0.2 |\n| entropy_loss | -1.3 |\n| explained_variance | 0.473 |\n| learning_rate | 0.0003 |\n| loss | 65.1 |\n| n_updates | 928 |\n| policy_gradient_loss | 0.000123 |\n| value_loss | 150 |\n| train_metrics/ | |\n| action_queue_updates_success | 135 |\n| action_queue_updates_total | 144 |\n| ice_dug | 279 |\n| water_produced | 53 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 85.1 |\n| time/ | |\n| fps | 957 |\n| iterations | 466 |\n| time_elapsed | 1945 |\n| total_timesteps | 1864000 |\n| train/ | |\n| approx_kl | 0.0064335777 |\n| clip_fraction | 0.0404 |\n| clip_range | 0.2 |\n| entropy_loss | -1.44 |\n| explained_variance | 0.522 |\n| learning_rate | 0.0003 |\n| loss | 99.3 |\n| n_updates | 930 |\n| policy_gradient_loss | -7.53e-05 |\n| value_loss | 164 |\n| train_metrics/ | |\n| action_queue_updates_success | 149 |\n| action_queue_updates_total | 158 |\n| ice_dug | 422 |\n| water_produced | 85.2 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 72.2 |\n| time/ | |\n| fps | 958 |\n| iterations | 467 |\n| time_elapsed | 1949 |\n| total_timesteps | 1868000 |\n| train/ | |\n| approx_kl | 0.0009797883 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.53 |\n| explained_variance | 0.629 |\n| learning_rate | 0.0003 |\n| loss | 113 |\n| n_updates | 932 |\n| policy_gradient_loss | -0.000218 |\n| value_loss | 222 |\n| train_metrics/ | |\n| action_queue_updates_success | 147 |\n| action_queue_updates_total | 152 |\n| ice_dug | 183 |\n| water_produced | 40.5 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 61.5 |\n| time/ | |\n| fps | 958 |\n| iterations | 468 |\n| time_elapsed | 1953 |\n| total_timesteps | 1872000 |\n| train/ | |\n| approx_kl | 0.0033152208 |\n| clip_fraction | 0.0155 |\n| clip_range | 0.2 |\n| entropy_loss | -1.53 |\n| explained_variance | 0.556 |\n| learning_rate | 0.0003 |\n| loss | 84.4 |\n| n_updates | 934 |\n| policy_gradient_loss | 0.000314 |\n| value_loss | 145 |\n| train_metrics/ | |\n| action_queue_updates_success | 149 |\n| action_queue_updates_total | 158 |\n| ice_dug | 268 |\n| water_produced | 59.8 |\n--------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 66.8 |\n| time/ | |\n| fps | 958 |\n| iterations | 469 |\n| time_elapsed | 1958 |\n| total_timesteps | 1876000 |\n| train/ | |\n| approx_kl | 0.0009402173 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.7 |\n| explained_variance | 0.685 |\n| learning_rate | 0.0003 |\n| loss | 96.5 |\n| n_updates | 936 |\n| policy_gradient_loss | -0.000678 |\n| value_loss | 175 |\n| train_metrics/ | |\n| action_queue_updates_success | 149 |\n| action_queue_updates_total | 161 |\n| ice_dug | 330 |\n| water_produced | 80.5 |\n--------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 72.7 |\n| time/ | |\n| fps | 958 |\n| iterations | 470 |\n| time_elapsed | 1962 |\n| total_timesteps | 1880000 |\n| train/ | |\n| approx_kl | 0.00041009142 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.65 |\n| explained_variance | 0.613 |\n| learning_rate | 0.0003 |\n| loss | 115 |\n| n_updates | 938 |\n| policy_gradient_loss | 9.32e-05 |\n| value_loss | 241 |\n| train_metrics/ | |\n| action_queue_updates_success | 155 |\n| action_queue_updates_total | 167 |\n| ice_dug | 440 |\n| water_produced | 81 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 64.8 |\n| time/ | |\n| fps | 958 |\n| iterations | 471 |\n| time_elapsed | 1966 |\n| total_timesteps | 1884000 |\n| train/ | |\n| approx_kl | 0.00075543846 |\n| clip_fraction | 0.000375 |\n| clip_range | 0.2 |\n| entropy_loss | -1.71 |\n| explained_variance | 0.57 |\n| learning_rate | 0.0003 |\n| loss | 139 |\n| n_updates | 940 |\n| policy_gradient_loss | 3.84e-06 |\n| value_loss | 270 |\n| train_metrics/ | |\n| action_queue_updates_success | 142 |\n| action_queue_updates_total | 148 |\n| ice_dug | 219 |\n| water_produced | 48 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 71.4 |\n| time/ | |\n| fps | 958 |\n| iterations | 472 |\n| time_elapsed | 1970 |\n| total_timesteps | 1888000 |\n| train/ | |\n| approx_kl | 0.00024510163 |\n| clip_fraction | 0.000125 |\n| clip_range | 0.2 |\n| entropy_loss | -1.5 |\n| explained_variance | 0.527 |\n| learning_rate | 0.0003 |\n| loss | 74.1 |\n| n_updates | 942 |\n| policy_gradient_loss | 2.64e-05 |\n| value_loss | 146 |\n| train_metrics/ | |\n| action_queue_updates_success | 158 |\n| action_queue_updates_total | 162 |\n| ice_dug | 453 |\n| water_produced | 70.8 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 70 |\n| time/ | |\n| fps | 958 |\n| iterations | 473 |\n| time_elapsed | 1974 |\n| total_timesteps | 1892000 |\n| train/ | |\n| approx_kl | 0.00032139808 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.58 |\n| explained_variance | 0.562 |\n| learning_rate | 0.0003 |\n| loss | 95.4 |\n| n_updates | 944 |\n| policy_gradient_loss | -8.95e-05 |\n| value_loss | 209 |\n| train_metrics/ | |\n| action_queue_updates_success | 146 |\n| action_queue_updates_total | 157 |\n| ice_dug | 277 |\n| water_produced | 52.7 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 70.6 |\n| time/ | |\n| fps | 958 |\n| iterations | 474 |\n| time_elapsed | 1977 |\n| total_timesteps | 1896000 |\n| train/ | |\n| approx_kl | 0.00019400133 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.61 |\n| explained_variance | 0.568 |\n| learning_rate | 0.0003 |\n| loss | 73.1 |\n| n_updates | 946 |\n| policy_gradient_loss | 4.46e-05 |\n| value_loss | 152 |\n| train_metrics/ | |\n| action_queue_updates_success | 149 |\n| action_queue_updates_total | 153 |\n| ice_dug | 376 |\n| water_produced | 83 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 59.4 |\n| time/ | |\n| fps | 958 |\n| iterations | 475 |\n| time_elapsed | 1981 |\n| total_timesteps | 1900000 |\n| train/ | |\n| approx_kl | 0.00017442036 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.5 |\n| explained_variance | 0.541 |\n| learning_rate | 0.0003 |\n| loss | 124 |\n| n_updates | 948 |\n| policy_gradient_loss | 0.000144 |\n| value_loss | 227 |\n| train_metrics/ | |\n| action_queue_updates_success | 141 |\n| action_queue_updates_total | 153 |\n| ice_dug | 187 |\n| water_produced | 27.2 |\n---------------------------------------------------\n---------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 60.5 |\n| time/ | |\n| fps | 958 |\n| iterations | 476 |\n| time_elapsed | 1985 |\n| total_timesteps | 1904000 |\n| train/ | |\n| approx_kl | 0.00016221762 |\n| clip_fraction | 0 |\n| clip_range | 0.2 |\n| entropy_loss | -1.59 |\n| explained_variance | 0.573 |\n| learning_rate | 0.0003 |\n| loss | 51.2 |\n| n_updates | 950 |\n| policy_gradient_loss | -4.32e-05 |\n| value_loss | 111 |\n| train_metrics/ | |\n| action_queue_updates_success | 149 |\n| action_queue_updates_total | 152 |\n| ice_dug | 310 |\n| water_produced | 52.8 |\n---------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 64.4 |\n| time/ | |\n| fps | 958 |\n| iterations | 477 |\n| time_elapsed | 1990 |\n| total_timesteps | 1908000 |\n| train/ | |\n| approx_kl | 0.0010714286 |\n| clip_fraction | 0.001 |\n| clip_range | 0.2 |\n| entropy_loss | -1.49 |\n| explained_variance | 0.538 |\n| learning_rate | 0.0003 |\n| loss | 87 |\n| n_updates | 952 |\n| policy_gradient_loss | -0.000912 |\n| value_loss | 190 |\n| train_metrics/ | |\n| action_queue_updates_success | 153 |\n| action_queue_updates_total | 162 |\n| ice_dug | 434 |\n| water_produced | 90.2 |\n--------------------------------------------------\n-------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 63.1 |\n| time/ | |\n| fps | 958 |\n| iterations | 478 |\n| time_elapsed | 1994 |\n| total_timesteps | 1912000 |\n| train/ | |\n| approx_kl | 0.005110504 |\n| clip_fraction | 0.0207 |\n| clip_range | 0.2 |\n| entropy_loss | -1.57 |\n| explained_variance | 0.608 |\n| learning_rate | 0.0003 |\n| loss | 124 |\n| n_updates | 954 |\n| policy_gradient_loss | 0.00151 |\n| value_loss | 225 |\n| train_metrics/ | |\n| action_queue_updates_success | 145 |\n| action_queue_updates_total | 152 |\n| ice_dug | 304 |\n| water_produced | 46 |\n-------------------------------------------------\n--------------------------------------------------\n| rollout/ | |\n| ep_len_mean | 200 |\n| ep_rew_mean | 75 |\n| time/ | |\n| fps | 958 |\n| iterations | 479 |\n| time_elapsed | 1998 |\n| total_timesteps | 1916000 |\n| train/ | |\n| approx_kl | 0.0016839042 |\n| clip_fraction | 0.00213 |\n| clip_range | 0.2 |\n| entropy_loss | -1.54 |\n| explained_variance | 0.547 |\n| learning_rate | 0.0003 |\n| loss | 70.8 |\n| n_updates | 956 |\n| policy_gradient_loss | -0.00066 |\n| value_loss | 146 |\n| train_metrics/ | |\n| action_queue_updates_success | 147 |\n| action_queue_updates_total | 151 |\n| ice_dug | 615 |\n| water_produced | 140 |\n--------------------------------------------------\n","output_type":"stream"},{"name":"stderr","text":"Process ForkServerProcess-14:\nTraceback (most recent call last):\n File \"/opt/conda/lib/python3.7/multiprocessing/process.py\", line 297, in _bootstrap\n self.run()\n File \"/opt/conda/lib/python3.7/multiprocessing/process.py\", line 99, in run\n self._target(*self._args, **self._kwargs)\n File \"/opt/conda/lib/python3.7/site-packages/stable_baselines3/common/vec_env/subproc_vec_env.py\", line 30, in _worker\n observation, reward, done, info = env.step(data)\n File \"/opt/conda/lib/python3.7/site-packages/stable_baselines3/common/monitor.py\", line 94, in step\n observation, reward, done, info = self.env.step(action)\n File \"/opt/conda/lib/python3.7/site-packages/gym/wrappers/time_limit.py\", line 18, in step\n observation, reward, done, info = self.env.step(action)\n File \"/tmp/ipykernel_862/2513648194.py\", line 25, in step\nTypeError: 'bool' object is not subscriptable\n","output_type":"stream"},{"traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mEOFError\u001b[0m Traceback (most recent call last)","\u001b[0;32m/tmp/ipykernel_862/3377558824.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m model.learn(\n\u001b[1;32m 3\u001b[0m \u001b[0mtotal_timesteps\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mcallback\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mTensorboardCallback\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtag\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"train_metrics\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0meval_callback\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m )\n\u001b[1;32m 6\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mosp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlog_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"models/latest_model\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/opt/conda/lib/python3.7/site-packages/stable_baselines3/ppo/ppo.py\u001b[0m in \u001b[0;36mlearn\u001b[0;34m(self, total_timesteps, callback, log_interval, tb_log_name, reset_num_timesteps, progress_bar)\u001b[0m\n\u001b[1;32m 311\u001b[0m \u001b[0mtb_log_name\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtb_log_name\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 312\u001b[0m \u001b[0mreset_num_timesteps\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mreset_num_timesteps\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 313\u001b[0;31m \u001b[0mprogress_bar\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mprogress_bar\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 314\u001b[0m )\n","\u001b[0;32m/opt/conda/lib/python3.7/site-packages/stable_baselines3/common/on_policy_algorithm.py\u001b[0m in \u001b[0;36mlearn\u001b[0;34m(self, total_timesteps, callback, log_interval, tb_log_name, reset_num_timesteps, progress_bar)\u001b[0m\n\u001b[1;32m 246\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnum_timesteps\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mtotal_timesteps\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 247\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 248\u001b[0;31m \u001b[0mcontinue_training\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcollect_rollouts\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrollout_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_rollout_steps\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_steps\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 249\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 250\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcontinue_training\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/opt/conda/lib/python3.7/site-packages/stable_baselines3/common/on_policy_algorithm.py\u001b[0m in \u001b[0;36mcollect_rollouts\u001b[0;34m(self, env, callback, rollout_buffer, n_rollout_steps)\u001b[0m\n\u001b[1;32m 179\u001b[0m \u001b[0;31m# Give access to local variables\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 180\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate_locals\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlocals\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 181\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 182\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/opt/conda/lib/python3.7/site-packages/stable_baselines3/common/callbacks.py\u001b[0m in \u001b[0;36mon_step\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 98\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnum_timesteps\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnum_timesteps\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 99\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 100\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_on_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 101\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 102\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mon_training_end\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/opt/conda/lib/python3.7/site-packages/stable_baselines3/common/callbacks.py\u001b[0m in \u001b[0;36m_on_step\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 202\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mcallback\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcallbacks\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 203\u001b[0m \u001b[0;31m# Return False (stop training) if at least one callback returns False\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 204\u001b[0;31m \u001b[0mcontinue_training\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mcontinue_training\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 205\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mcontinue_training\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 206\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/opt/conda/lib/python3.7/site-packages/stable_baselines3/common/callbacks.py\u001b[0m in \u001b[0;36mon_step\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 98\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnum_timesteps\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnum_timesteps\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 99\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 100\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_on_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 101\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 102\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mon_training_end\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/opt/conda/lib/python3.7/site-packages/stable_baselines3/common/callbacks.py\u001b[0m in \u001b[0;36m_on_step\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 453\u001b[0m \u001b[0mreturn_episode_rewards\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 454\u001b[0m \u001b[0mwarn\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwarn\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 455\u001b[0;31m \u001b[0mcallback\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_log_success_callback\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 456\u001b[0m )\n\u001b[1;32m 457\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/opt/conda/lib/python3.7/site-packages/stable_baselines3/common/evaluation.py\u001b[0m in \u001b[0;36mevaluate_policy\u001b[0;34m(model, env, n_eval_episodes, deterministic, render, callback, reward_threshold, return_episode_rewards, warn)\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mepisode_counts\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mepisode_count_targets\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 88\u001b[0m \u001b[0mactions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstates\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobservations\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstate\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstates\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepisode_start\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mepisode_starts\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdeterministic\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdeterministic\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 89\u001b[0;31m \u001b[0mobservations\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrewards\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdones\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minfos\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mactions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 90\u001b[0m \u001b[0mcurrent_rewards\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mrewards\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0mcurrent_lengths\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/opt/conda/lib/python3.7/site-packages/stable_baselines3/common/vec_env/base_vec_env.py\u001b[0m in \u001b[0;36mstep\u001b[0;34m(self, actions)\u001b[0m\n\u001b[1;32m 161\u001b[0m \"\"\"\n\u001b[1;32m 162\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep_async\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mactions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 163\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep_wait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 164\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_images\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mSequence\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/opt/conda/lib/python3.7/site-packages/stable_baselines3/common/vec_env/subproc_vec_env.py\u001b[0m in \u001b[0;36mstep_wait\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 119\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 120\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mstep_wait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mVecEnvStepReturn\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 121\u001b[0;31m \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mremote\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mremote\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mremotes\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 122\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwaiting\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 123\u001b[0m \u001b[0mobs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrews\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdones\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minfos\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mresults\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/opt/conda/lib/python3.7/site-packages/stable_baselines3/common/vec_env/subproc_vec_env.py\u001b[0m in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 119\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 120\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mstep_wait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mVecEnvStepReturn\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 121\u001b[0;31m \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mremote\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mremote\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mremotes\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 122\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwaiting\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 123\u001b[0m \u001b[0mobs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrews\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdones\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minfos\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mresults\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/opt/conda/lib/python3.7/multiprocessing/connection.py\u001b[0m in \u001b[0;36mrecv\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 248\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_check_closed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 249\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_check_readable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 250\u001b[0;31m \u001b[0mbuf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_recv_bytes\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 251\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_ForkingPickler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloads\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbuf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetbuffer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 252\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/opt/conda/lib/python3.7/multiprocessing/connection.py\u001b[0m in \u001b[0;36m_recv_bytes\u001b[0;34m(self, maxsize)\u001b[0m\n\u001b[1;32m 405\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 406\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_recv_bytes\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmaxsize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 407\u001b[0;31m \u001b[0mbuf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_recv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 408\u001b[0m \u001b[0msize\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstruct\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munpack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"!i\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbuf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetvalue\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 409\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmaxsize\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0msize\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0mmaxsize\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/opt/conda/lib/python3.7/multiprocessing/connection.py\u001b[0m in \u001b[0;36m_recv\u001b[0;34m(self, size, read)\u001b[0m\n\u001b[1;32m 381\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mn\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 382\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mremaining\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0msize\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 383\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mEOFError\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 384\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 385\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mOSError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"got end of file during message\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mEOFError\u001b[0m: "],"ename":"EOFError","evalue":"","output_type":"error"}]},{"cell_type":"markdown","source":"## Packaging and Submission\n\nWe now have a trained policy. In order to make it submittable to the competition we recommend you use the RL starter kit code (which is already imported into this tutorial if you pulled from GitHub or used Kaggle).","metadata":{}},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]} \ No newline at end of file diff --git a/luxai_s2/luxai_s2/env.py b/luxai_s2/luxai_s2/env.py index 39a5df94..338f3da7 100644 --- a/luxai_s2/luxai_s2/env.py +++ b/luxai_s2/luxai_s2/env.py @@ -1097,6 +1097,5 @@ def raw_env() -> LuxAI_S2: gym.register( id="LuxAI_S2-v0", - entry_point="luxai_s2.env:LuxAI_S2", - max_episode_steps=1000, + entry_point="luxai_s2.env:LuxAI_S2" ) diff --git a/luxai_s2/luxai_s2/utils/heuristics/bidding.py b/luxai_s2/luxai_s2/utils/heuristics/bidding.py index e69de29b..68c036fe 100644 --- a/luxai_s2/luxai_s2/utils/heuristics/bidding.py +++ b/luxai_s2/luxai_s2/utils/heuristics/bidding.py @@ -0,0 +1,7 @@ +from luxai_s2.unit import BidActionType +from luxai_s2.state import ObservationStateDict +def zero_bid(player, obs: ObservationStateDict) -> BidActionType: + faction = "AlphaStrike" + if player == "player_1": + faction = "MotherMars" + return dict(bid=0, faction=faction) \ No newline at end of file diff --git a/luxai_s2/luxai_s2/utils/heuristics/factory_placement.py b/luxai_s2/luxai_s2/utils/heuristics/factory_placement.py index c3467aee..4f940625 100644 --- a/luxai_s2/luxai_s2/utils/heuristics/factory_placement.py +++ b/luxai_s2/luxai_s2/utils/heuristics/factory_placement.py @@ -1,7 +1,15 @@ import numpy as np - +from luxai_s2.unit import FactoryPlacementActionType from luxai_s2.state import ObservationStateDict +def random_factory_placement(player, obs: ObservationStateDict) -> FactoryPlacementActionType: + """ + This policy places factories with 150 water and metal at random locations + """ + # we will spawn our factory in a random location with 150 metal and water if it is our turn to place + potential_spawns = np.array(list(zip(*np.where(obs["board"]["valid_spawns_mask"] == 1)))) + spawn_loc = potential_spawns[np.random.randint(0, len(potential_spawns))] + return dict(spawn=spawn_loc, metal=150, water=150) def place_near_random_ice(player, obs: ObservationStateDict): if obs["teams"][player]["metal"] == 0: diff --git a/luxai_s2/luxai_s2/wrappers/__init__.py b/luxai_s2/luxai_s2/wrappers/__init__.py index 9b072569..370b654f 100644 --- a/luxai_s2/luxai_s2/wrappers/__init__.py +++ b/luxai_s2/luxai_s2/wrappers/__init__.py @@ -1,3 +1,2 @@ -from .controllers import SimpleDiscreteController, SimpleSingleUnitDiscreteController -from .obs_wrappers import ImageObservationWrapper, SingleUnitObservationWrapper +from .controllers import Controller from .sb3 import SB3Wrapper diff --git a/luxai_s2/luxai_s2/wrappers/controllers.py b/luxai_s2/luxai_s2/wrappers/controllers.py index 608860eb..d463bfca 100644 --- a/luxai_s2/luxai_s2/wrappers/controllers.py +++ b/luxai_s2/luxai_s2/wrappers/controllers.py @@ -1,242 +1,22 @@ -from typing import Dict +from typing import Dict, Any -import numpy as np import numpy.typing as npt from gym import spaces -from luxai_s2.actions import format_action_vec -from luxai_s2.config import EnvConfig -from luxai_s2.state import ObservationStateDict - - class Controller: def __init__(self, action_space: spaces.Space) -> None: self.action_space = action_space def action_to_lux_action( - self, agent: str, obs: Dict[str, ObservationStateDict], action: npt.NDArray + self, agent: str, obs: Dict[str, Any], action: npt.NDArray ): """ Takes as input the current "raw observation" and the parameterized action and returns an action formatted for the Lux env """ raise NotImplementedError() - - -class SimpleSingleUnitDiscreteController(Controller): - def __init__(self, env_cfg: EnvConfig) -> None: - """ - A simple controller that controls only the heavy unit that will get spawned. This assumes for whichever environment wrapper you use - you have defined a policy to generate the first factory action - - For the heavy unit - - 4 cardinal direction movement (4 dims) - - a move center no-op action (1 dim) - - transfer action just for transferring ice in 4 cardinal directions or center (5) - - pickup action for each resource (5 dims) - - dig action (1 dim) - - It does not include - - self destruct action - - recharge action - - planning (via actions executing multiple times or repeating actions) - - factory actions - - transferring power or resources other than ice + def action_masks(self, agent: str, obs: Dict[str, Any]): """ - self.env_cfg = env_cfg - self.move_act_dims = 5 - self.transfer_act_dims = 5 # 5 * 5 - self.pickup_act_dims = 5 - self.dig_act_dims = 1 - - self.move_dim_high = self.move_act_dims - self.transfer_dim_high = self.move_dim_high + self.transfer_act_dims - self.pickup_dim_high = self.transfer_dim_high + self.pickup_act_dims - self.dig_dim_high = self.pickup_dim_high + self.dig_act_dims - - total_act_dims = self.dig_dim_high - # action_space = spaces.Box(0, 1, shape=(total_act_dims,)) - action_space = spaces.Discrete(total_act_dims) - super().__init__(action_space) - - def _is_move_action(self, id): - return id < self.move_dim_high - - def _get_move_action(self, id): - return np.array([0, id, 0, 0, 0, 1]) - - def _is_transfer_action(self, id): - return id < self.transfer_dim_high - - def _get_transfer_action(self, id): - id = id - self.move_dim_high - transfer_dir = id % 5 - # resource_type = id // 5 - return np.array([1, transfer_dir, 0, self.env_cfg.max_transfer_amount, 0, 1]) - - def _is_pickup_action(self, id): - return id < self.pickup_dim_high - - def _get_pickup_action(self, id): - id = id - self.transfer_dim_high - return np.array([2, 0, id % 5, self.env_cfg.max_transfer_amount, 0, 1]) - - def _is_dig_action(self, id): - return id < self.dig_dim_high - - def _get_dig_action(self, id): - return np.array([3, 0, 0, 0, 0, 1]) - - def action_to_lux_action( - self, agent: str, obs: Dict[str, ObservationStateDict], action: npt.NDArray - ): - shared_obs = obs["player_0"] - lux_action = dict() - factories = shared_obs["factories"][agent] - units = shared_obs["units"][agent] - for unit_id in units.keys(): - unit = units[unit_id] - pos = unit["pos"] - unit_related_action = action - choice = action # unit_related_action.argmax() - action_queue = [] - if self._is_move_action(choice): - action_queue = [self._get_move_action(choice)] - elif self._is_transfer_action(choice): - action_queue = [self._get_transfer_action(choice)] - elif self._is_pickup_action(choice): - action_queue = [self._get_pickup_action(choice)] - - elif self._is_dig_action(choice): - action_queue = [self._get_dig_action(choice)] - lux_action[unit_id] = action_queue - # only control the first unit! - break - return lux_action - - -class SimpleDiscreteController(Controller): - def __init__(self, env_cfg: EnvConfig) -> None: + Generates a boolean action mask indicating in each discrete dimension whether it would be valid or not """ - A simple controller that uses a discrete action parameterization for Lux AI S2. It includes - - For units - - 4 cardinal direction movement (4 dims) - - a move center no-op action (1 dim) - - transfer action for each combination of the (4 cardinal directions plus center) x (resource type or power) (5*5 = 25 dims) - - pickup action for each resource (5 dims) - - dig action (1 dim) - - For factories - - all actions (build light, heavy, or water) (3 dims) - - - It does not include - - self destruct action - - recharge action - - planning (via actions executing multiple times or repeating actions) - - Sampling from this controller will always result in a valid action, albeit sometimes disastrous - """ - self.env_cfg = env_cfg - self.move_act_dims = 5 - self.transfer_act_dims = 5 * 5 - self.pickup_act_dims = 5 - self.dig_act_dims = 1 - # self.self_destruct_act_dims = 1 - # self.recharge_act_dims = 1 - self.factory_act_dims = 3 # 0 = light, 1 = heavy, 2 = water - - self.move_dim_high = self.move_act_dims - self.transfer_dim_high = self.move_dim_high + self.transfer_act_dims - self.pickup_dim_high = self.transfer_dim_high + self.pickup_act_dims - self.dig_dim_high = self.pickup_dim_high + self.dig_act_dims - - self.factory_dim_high = 3 # self.dig_dim_high + self.factory_act_dims - - total_act_dims = self.factory_dim_high - # action_space = spaces.Discrete(total_act_dims) - action_space = spaces.Box( - 0, 1, shape=(env_cfg.map_size, env_cfg.map_size, total_act_dims) - ) - - super().__init__(action_space) - - # note that all the _is_x_action are meant to be called in a if, elseif... cascade/waterfall - # to understand how _get_x_action works to map the parameterization back to the original action space see luxai_s2/actions.py - def _is_move_action(self, id): - return id < self.move_dim_high - - def _get_move_action(self, id): - return np.array([0, id, 0, 0, 0, 1]) - - def _is_transfer_action(self, id): - return id < self.transfer_dim_high - - def _get_transfer_action(self, id): - id = id - self.move_dim_high - transfer_dir = id % 5 - resource_type = id // 5 - return np.array( - [1, transfer_dir, resource_type, self.env_cfg.max_transfer_amount, 0, 1] - ) - - def _is_pickup_action(self, id): - return id < self.pickup_dim_high - - def _get_pickup_action(self, id): - id = id - self.transfer_dim_high - return np.array([2, 0, id % 5, self.env_cfg.max_transfer_amount, 0, 1]) - - def _is_dig_action(self, id): - return id < self.dig_dim_high - - def _get_dig_action(self, id): - return np.array([3, 0, 0, 0, 0, 1]) - - # def _is_self_destruct_action(self, id): - # return id < self.move_act_dims + self.transfer_act_dims + self.self_destruct_dims - # def _get_self_destruct_action(self, id): - # return [2, 0, 0, 0, 0, 1] - - def action_to_lux_action( - self, agent: str, obs: Dict[str, ObservationStateDict], action: npt.NDArray - ): - """ - Generate an action compatible with LuxAI_S2 engine for a single player - """ - shared_obs = obs["player_0"] - lux_action = dict() - factories = shared_obs["factories"][agent] - units = shared_obs["units"][agent] - for unit_id in units.keys(): - unit = units[unit_id] - pos = unit["pos"] - action_here = action[pos[0], pos[1]] - unit_related_action = action_here[ - : -self.factory_act_dims - ] # assuming factory action is always the final few dimensions - choice = unit_related_action.argmax() - action_queue = [] - # if self._is_move_action(choice): - # action_queue = [self._get_move_action(choice)] - # elif self._is_transfer_action(choice): - # action_queue = [self._get_transfer_action(choice)] - # elif self._is_pickup_action(choice): - # action_queue = [self._get_pickup_action(choice)] - # elif self._is_dig_action(choice): - # action_queue = [self._get_dig_action(choice)] - - lux_action[unit_id] = action_queue - - for unit_id in factories.keys(): - factory = factories[unit_id] - pos = factory["pos"] - - action_here = action[pos[0], pos[1]] - factory_related_action = action_here[ - -self.factory_act_dims : - ] # assuming factory action is always the final few dimensions - choice = factory_related_action.argmax() - lux_action[unit_id] = choice - return lux_action + raise NotImplementedError() diff --git a/luxai_s2/luxai_s2/wrappers/lux.py b/luxai_s2/luxai_s2/wrappers/lux.py deleted file mode 100644 index 7822d033..00000000 --- a/luxai_s2/luxai_s2/wrappers/lux.py +++ /dev/null @@ -1,27 +0,0 @@ -""" -Wrappers that allow users to insert heuristics into the environment reset and step functions -""" -from typing import Dict - -import gym -import numpy as np -import numpy.typing as npt -from gym import spaces - -import luxai_s2.env -from luxai_s2.env import LuxAI_S2 -from luxai_s2.state import ObservationStateDict -from luxai_s2.utils import my_turn_to_place_factory -from luxai_s2.wrappers.controllers import ( - Controller, - SimpleDiscreteController, - SimpleSingleUnitDiscreteController, -) - - -class FactoryControlWrapper(gym.Wrapper): - def __init__(self, env: gym.Env) -> None: - super().__init__(env) - - def step(self, action): - return super().step(action) diff --git a/luxai_s2/luxai_s2/wrappers/obs_wrappers.py b/luxai_s2/luxai_s2/wrappers/obs_wrappers.py deleted file mode 100644 index 16e08c99..00000000 --- a/luxai_s2/luxai_s2/wrappers/obs_wrappers.py +++ /dev/null @@ -1,195 +0,0 @@ -from typing import Callable, Dict - -import gym -import numpy as np -import numpy.typing as npt -from gym import spaces - -import luxai_s2.env -from luxai_s2.env import LuxAI_S2 -from luxai_s2.state import ObservationStateDict -from luxai_s2.unit import ActionType, BidActionType, FactoryPlacementActionType -from luxai_s2.utils import my_turn_to_place_factory -from luxai_s2.wrappers.controllers import ( - Controller, - SimpleDiscreteController, - SimpleSingleUnitDiscreteController, -) - - -class SingleUnitObservationWrapper(gym.ObservationWrapper): - """ - A state based observation to work with in pair with the SimpleSingleUnitDiscreteController - - It contains info only on the first heavy unit, the first factory you own, and some useful features. If there are no owned heavy units the observation is just zero. - No information about the opponent is included - - - Included features: - - First heavy unit's stats - - Position of closest ice tile - - First factory - - """ - - def __init__(self, env: gym.Env) -> None: - super().__init__(env) - self.observation_space = spaces.Box(-999, 999, shape=(13,)) - - def observation( - self, obs: Dict[str, ObservationStateDict] - ) -> Dict[str, npt.NDArray]: - observation = dict() - shared_obs = obs["player_0"] - ice_map = shared_obs["board"]["ice"] - ice_tile_locations = np.argwhere(ice_map == 1) - for agent in obs.keys(): - factories = shared_obs["factories"][agent] - factory_vec = np.zeros(2) - for k in factories.keys(): - factory = factories[k] - factory_vec = np.array(factory["pos"]) / self.env.state.env_cfg.map_size - break - units = shared_obs["units"][agent] - for k in units.keys(): - unit = units[k] - cargo_space = self.env.state.env_cfg.ROBOTS[ - unit["unit_type"] - ].CARGO_SPACE - battery_cap = self.env.state.env_cfg.ROBOTS[ - unit["unit_type"] - ].BATTERY_CAPACITY - cargo_vec = np.array( - [ - unit["power"] / battery_cap, - unit["cargo"]["ice"] / cargo_space, - unit["cargo"]["ore"] / cargo_space, - unit["cargo"]["water"] / cargo_space, - unit["cargo"]["metal"] / cargo_space, - ] - ) - unit_type = ( - 0 if unit["unit_type"] == "LIGHT" else 1 - ) # note that build actions use 0 to encode Light - - pos = np.array(unit["pos"]) / self.env.state.env_cfg.map_size - unit_vec = np.concatenate( - [pos, [unit_type], cargo_vec, [unit["team_id"]]], axis=-1 - ) - # engineered features - - # compute closest ice tile - ice_tile_distances = np.mean( - (ice_tile_locations - np.array(unit["pos"])) ** 2, 1 - ) - closest_ice_tile = ( - ice_tile_locations[np.argmin(ice_tile_distances)] - / self.env.state.env_cfg.map_size - ) - obs_vec = np.concatenate( - [unit_vec, factory_vec - pos, closest_ice_tile - pos], axis=-1 - ) - - observation[agent] = obs_vec - break - if agent not in observation: - observation[agent] = np.zeros(13) - return observation - - -class ImageObservationWrapper(gym.ObservationWrapper): - def __init__(self, env: gym.Env) -> None: - super().__init__(env) - obs_dims = 23 # see _convert_obs function for how this is computed - self.map_size = self.env.env_cfg.map_size - self.observation_space = spaces.Box( - -999, 999, shape=(self.map_size, self.map_size, obs_dims) - ) - - def observation( - self, obs: Dict[str, ObservationStateDict] - ) -> Dict[str, npt.NDArray]: - shared_obs = obs["player_0"] - unit_mask = np.zeros((self.map_size, self.map_size, 1)) - unit_data = np.zeros( - (self.map_size, self.map_size, 9) - ) # power(1) + cargo(4) + unit_type(1) + unit_pos(2) + team(1) - factory_mask = np.zeros_like(unit_mask) - factory_data = np.zeros( - (self.map_size, self.map_size, 8) - ) # power(1) + cargo(4) + factory_pos(2) + team(1) - for agent in obs.keys(): - factories = shared_obs["factories"][agent] - units = shared_obs["units"][agent] - - for unit_id in units.keys(): - unit = units[unit_id] - # we encode everything but unit_id or action queue - cargo_space = self.env.state.env_cfg.ROBOTS[ - unit["unit_type"] - ].CARGO_SPACE - battery_cap = self.env.state.env_cfg.ROBOTS[ - unit["unit_type"] - ].BATTERY_CAPACITY - cargo_vec = np.array( - [ - unit["power"] / battery_cap, - unit["cargo"]["ice"] / cargo_space, - unit["cargo"]["ore"] / cargo_space, - unit["cargo"]["water"] / cargo_space, - unit["cargo"]["metal"] / cargo_space, - ] - ) - unit_type = ( - 0 if unit["unit_type"] == "LIGHT" else 1 - ) # note that build actions use 0 to encode Light - unit_vec = np.concatenate( - [unit["pos"], [unit_type], cargo_vec, [unit["team_id"]]], axis=-1 - ) - unit_vec[:2] /= self.env.state.env_cfg.map_size - - # note that all data is stored as map[x, y] format - unit_data[unit["pos"][0], unit["pos"][1]] = unit_vec - unit_mask[unit["pos"][0], unit["pos"][1]] = 1 - - for unit_id in factories.keys(): - factory = factories[unit_id] - # we encode everything but strain_id or unit_id - cargo_vec = np.array( - [ - factory["power"], - factory["cargo"]["ice"], - factory["cargo"]["ore"], - factory["cargo"]["water"], - factory["cargo"]["metal"], - ] - ) - cargo_vec = cargo_vec * 1 / 1000 - - factory_vec = np.concatenate( - [factory["pos"], cargo_vec, [factory["team_id"]]], axis=-1 - ) - factory_vec[:2] /= self.env.state.env_cfg.map_size - factory_data[factory["pos"][0], factory["pos"][1]] = factory_vec - factory_mask[factory["pos"][0], factory["pos"][1]] = 1 - - image_features = np.concatenate( - [ - np.expand_dims(shared_obs["board"]["lichen"], -1) - / self.env.state.env_cfg.MAX_LICHEN_PER_TILE, - np.expand_dims(shared_obs["board"]["rubble"], -1) - / self.env.state.env_cfg.MAX_RUBBLE, - np.expand_dims(shared_obs["board"]["ice"], -1), - np.expand_dims(shared_obs["board"]["ore"], -1), - unit_mask, - unit_data, - factory_mask, - factory_data, - ], - axis=-1, - ) - - new_obs = dict() - for agent in self.all_agents: - new_obs[agent] = image_features - return new_obs diff --git a/luxai_s2/luxai_s2/wrappers/sb3.py b/luxai_s2/luxai_s2/wrappers/sb3.py index 7e3126f5..9da6965f 100644 --- a/luxai_s2/luxai_s2/wrappers/sb3.py +++ b/luxai_s2/luxai_s2/wrappers/sb3.py @@ -12,8 +12,6 @@ from luxai_s2.utils import my_turn_to_place_factory from luxai_s2.wrappers.controllers import ( Controller, - SimpleDiscreteController, - SimpleSingleUnitDiscreteController, ) @@ -27,9 +25,6 @@ def __init__( factory_placement_policy: Callable[ [str, ObservationStateDict], Dict[str, FactoryPlacementActionType] ] = None, - heuristic_policy: Callable[ - [str, ObservationStateDict], Dict[str, ActionType] - ] = None, controller: Controller = None, ) -> None: """ @@ -37,7 +32,7 @@ def __init__( into a single phase game and places the first two phases (bidding and factory placement) into the env.reset function so that interacting agents directly start generating actions to play the third phase of the game. - It's highly recommended to use one of the observation wrappers as well + It also accepts a Controller that translates action's in one action space to a Lux S2 compatible action Parameters ---------- @@ -50,30 +45,19 @@ def __init__( controller : Controller A controller that parameterizes the action space into something more usable and converts parameterized actions to lux actions. See luxai_s2/wrappers/controllers.py for available controllers and how to make your own - - heuristic_policy: Function - A function accepting player: str and obs: ObservationStateDict as input and returns a lux action. This can be provided by the user - to define custom logic or a model to generate actions for any of the units or factories. For any action generate for a unit or factory, it will - override the original action for that unit or factory when the step function is called. By defalt this is None and not used """ gym.Wrapper.__init__(self, env) self.env = env - if controller is None: - controller = SimpleDiscreteController(self.env.state.env_cfg) + + assert controller is not None + + # set our controller and replace the action space self.controller = controller - self.action_space = controller.action_space - obs_dims = 23 # see _convert_obs function for how this is computed - self.map_size = self.env.env_cfg.map_size - self.observation_space = spaces.Box( - -999, 999, shape=(self.map_size, self.map_size, obs_dims) - ) - # The simplified wrapper removes the first two phases of the game by using predefined policies (trained or heuristic) # to handle those two phases during each reset if factory_placement_policy is None: - def factory_placement_policy(player, obs: ObservationStateDict): potential_spawns = np.array( list(zip(*np.where(obs["board"]["valid_spawns_mask"] == 1))) @@ -85,7 +69,6 @@ def factory_placement_policy(player, obs: ObservationStateDict): self.factory_placement_policy = factory_placement_policy if bid_policy is None: - def bid_policy(player, obs: ObservationStateDict): faction = "AlphaStrike" if player == "player_1": @@ -94,42 +77,42 @@ def bid_policy(player, obs: ObservationStateDict): self.bid_policy = bid_policy - self.heuristic_policy = heuristic_policy - self.prev_obs = None - # list of all agents regardless of status - self.all_agents = [] def step(self, action: Dict[str, npt.NDArray]): + + # here, for each agent in the game we translate their action into a Lux S2 action lux_action = dict() - for agent in self.all_agents: + for agent in self.env.agents: if agent in action: lux_action[agent] = self.controller.action_to_lux_action( agent=agent, obs=self.prev_obs, action=action[agent] ) else: lux_action[agent] = dict() - if self.heuristic_policy is not None: - heuristic_lux_action = self.heuristic_policy( - agent, self.prev_obs[agent] - ) - # override keys - for k in heuristic_lux_action: - lux_action[agent][k] = heuristic_lux_action[k] + + # lux_action is now a dict mapping agent name to an action obs, reward, done, info = self.env.step(lux_action) self.prev_obs = obs return obs, reward, done, info def reset(self, **kwargs): + # we upgrade the reset function here + + # we call the original reset function first obs = self.env.reset(**kwargs) - self.all_agents = self.env.agents + + # then use the bid policy to go through the bidding phase action = dict() - for agent in self.all_agents: + for agent in self.env.agents: action[agent] = self.bid_policy(agent, obs[agent]) obs, _, _, _ = self.env.step(action) + + # while real_env_steps < 0, we are in the factory placement phase + # so we use the factory placement policy to step through this while self.env.state.real_env_steps < 0: action = dict() - for agent in self.all_agents: + for agent in self.env.agents: if my_turn_to_place_factory( obs["player_0"]["teams"][agent]["place_first"], self.env.state.env_steps, @@ -139,4 +122,5 @@ def reset(self, **kwargs): action[agent] = dict() obs, _, _, _ = self.env.step(action) self.prev_obs = obs + return obs diff --git a/luxai_s2/setup.py b/luxai_s2/setup.py index 7ee32cf3..b2f10539 100644 --- a/luxai_s2/setup.py +++ b/luxai_s2/setup.py @@ -17,7 +17,7 @@ def read(fname): long_description="Code for the Lux AI Challenge Season 2", packages=find_packages(exclude="kits"), entry_points={"console_scripts": ["luxai-s2 = luxai_runner.cli:main"]}, - version="2.1.0", + version="2.1.1", python_requires=">=3.7", install_requires=[ "numpy",