diff --git a/tonic/environments/__init__.py b/tonic/environments/__init__.py index b375932c..c0f6afb4 100644 --- a/tonic/environments/__init__.py +++ b/tonic/environments/__init__.py @@ -1,8 +1,8 @@ -from .builders import Bullet, ControlSuite, Gym +from .builders import Bullet, ControlSuite, Gym, Unity from .distributed import distribute, Parallel, Sequential from .wrappers import ActionRescaler, TimeFeature __all__ = [ - Bullet, ControlSuite, Gym, distribute, Parallel, Sequential, + Bullet, ControlSuite, Gym, Unity, distribute, Parallel, Sequential, ActionRescaler, TimeFeature] diff --git a/tonic/environments/builders.py b/tonic/environments/builders.py index 3adec879..b9cbca0d 100644 --- a/tonic/environments/builders.py +++ b/tonic/environments/builders.py @@ -9,6 +9,8 @@ from tonic.utils import logger +worker_index = 0 + def gym_environment(*args, **kwargs): '''Returns a wrapped Gym environment.''' @@ -17,6 +19,32 @@ def _builder(*args, **kwargs): return build_environment(_builder, *args, **kwargs) +def unity_environment(*args, **kwargs): + '''Returns a wrapped Unity environment.''' + from gym_unity.envs import UnityToGymWrapper + from mlagents_envs.environment import UnityEnvironment + from mlagents_envs.exception import UnityWorkerInUseException + def _builder(name, start_id, *args, **kwargs): + # Try connecting to the Unity3D game instance. + global worker_index + worker_index = int(start_id) + while True: + try: + unity_env = UnityEnvironment(name, + no_graphics=True, + worker_id=worker_index) + except UnityWorkerInUseException: + import random + worker_index += random.randint(0, 1000) + else: + break + + print("worker_index", worker_index, flush=True) + environment = UnityToGymWrapper(unity_env) + time_limit = int(1000) # The time limit is not yet visible from the env + return gym.wrappers.TimeLimit(environment, time_limit) + + return build_environment(_builder, *args, **kwargs) def bullet_environment(*args, **kwargs): '''Returns a wrapped PyBullet environment.''' @@ -160,3 +188,4 @@ def render(self, mode='rgb_array', height=None, width=None, camera_id=0): Gym = gym_environment Bullet = bullet_environment ControlSuite = control_suite_environment +Unity = unity_environment diff --git a/tonic/environments/distributed.py b/tonic/environments/distributed.py index 3d9f4f1f..42f7158d 100644 --- a/tonic/environments/distributed.py +++ b/tonic/environments/distributed.py @@ -1,7 +1,6 @@ '''Builders for distributed training.''' import multiprocessing - import numpy as np @@ -97,6 +96,7 @@ def proc(action_pipe, index, seed): dummy_environment = self.environment_builder() self.observation_space = dummy_environment.observation_space self.action_space = dummy_environment.action_space + dummy_environment.close() del dummy_environment self.started = False @@ -159,6 +159,7 @@ def distribute(environment_builder, worker_groups=1, workers_per_group=1): '''Distributes workers over parallel and sequential groups.''' dummy_environment = environment_builder() max_episode_steps = dummy_environment.max_episode_steps + dummy_environment.close() del dummy_environment if worker_groups < 2: