Skip to content

Commit

Permalink
Updated environment files (teleportball, teleportrobot, general fixes)
Browse files Browse the repository at this point in the history
  • Loading branch information
flimdejong committed Jan 17, 2025
1 parent ba8f916 commit a224e9f
Show file tree
Hide file tree
Showing 4 changed files with 159 additions and 230 deletions.
195 changes: 95 additions & 100 deletions roboteam_ai/src/rl/env_ray.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@
sys.path.append(roboteam_path)

# Now import the functions
from roboteam_ai.src.rl.src.sentActionCommand import send_action_command
from roboteam_ai.src.rl.src.sentActionCommand import send_action_command, send_num_attackers
from roboteam_ai.src.rl.src.getState import get_ball_state, get_robot_state, get_referee_state
from roboteam_ai.src.rl.src.teleportBall import teleport_ball
from roboteam_ai.src.rl.src.resetRefereeAPI import reset_referee_state
from roboteam_ai.src.rl.src.changeGameState import start_game
from roboteam_ai.src.rl.src.teleportRobot import teleport_robots

"""
This environment file is in the form of a gymnasium environment.
Expand All @@ -35,20 +36,22 @@ def __init__(self, config=None):
self.config = config or {} # Config placeholder
self.MAX_ROBOTS_US = 10

# Define the number of robots that are present in each grid + ball location
self.robot_grid = np.zeros((4, 2), dtype=int) # left up, right up, left down, right down
self.ball_position = np.zeros(2) # Single row with 2 columns
self.ball_quadrant = 4 # This refers to the center

self.yellow_score = 0 # Initialize score to zero
self.blue_score = 0 # Initialize blue score to zero

# Initialize the observation space
self.observation_space = spaces.Box(low=float('-inf'), high=float('inf'), shape=(15,), dtype=np.float64)
# Initialize the observation space for:
# 44 values (robot positions) + 1 (is_yellow_dribbling) + 2 (ball x,y)
self.observation_space = spaces.Box(
low=float('-inf'),
high=float('inf'),
shape=(47,), # 44 + 1 + 2
dtype=np.float64
)

# Action space: [attackers, defenders]
# Wallers will be automatically calculated
self.action_space = spaces.MultiDiscrete([self.MAX_ROBOTS_US + 1, self.MAX_ROBOTS_US + 1])
# Action space: single discrete value 0-6 representing number of attackers
self.action_space = spaces.Discrete(7) # 0 to 6 attackers possible

# Define the xy coordinates of the designated ball position
self.x = 0
Expand All @@ -69,6 +72,13 @@ def __init__(self, config=None):
# Set first_step flag
self.is_first_step = True

# Add previous dribbling state
self.previous_yellow_dribbling = False
self.step_taken = False

self.last_step_time = 0
self.min_step_interval = 1.5 # Minimum time between steps in seconds

def teleport_ball_with_check(self, x, y):
"""
Verify that ball teleportation completes successfully by attempting multiple times
Expand Down Expand Up @@ -112,7 +122,7 @@ def calculate_reward(self):

# Reward shaping
shaped_reward = 0
if not self.shaped_reward_given and self.is_yellow_dribbling and (self.ball_quadrant == 1 or self.ball_quadrant == 3):
if not self.shaped_reward_given and self.is_yellow_dribbling:
self.shaped_reward_given = True # Set it to true
shaped_reward = 0.1

Expand All @@ -124,115 +134,94 @@ def calculate_reward(self):
reward = goal_scored_reward + shaped_reward

return reward


# def get_observation(self):
# """
# get_observation is meant to get the observation space (kinda like the state)
# """

# # Get the robot grid representation
# self.robot_grid, self.is_yellow_dribbling, self.is_blue_dribbling = get_robot_state() # Matrix of 4 by 2 + 2 booleans
# print(f"Robot grid: {self.robot_grid}")
# print(f"Yellow dribbling: {self.is_yellow_dribbling}, Blue dribbling: {self.is_blue_dribbling}")

# # Get the ball location
# self.ball_position, self.ball_quadrant = get_ball_state() # x,y coordinates, quadrant

# print(f"Ball position: {self.ball_position}, Ball quadrant: {self.ball_quadrant}")

# observation_space = {
# 'robot_positions': self.robot_grid,
# 'ball_position': self.ball_quadrant,
# 'is_yellow_dribbling' : self.is_yellow_dribbling
# }

# robot_positions_flat = self.robot_grid.flatten() # Shape (8,)
# ball_position_flat = np.array([self.ball_quadrant]) # Shape (1,)
# dribbling_flat = np.array([self.is_yellow_dribbling]) # Shape (1,)

# # Combine all parts into a single flat array
# full_observation = np.concatenate([robot_positions_flat, ball_position_flat, dribbling_flat])

# # Pad or reshape `full_observation` to shape [32, 10]
# padded_observation = np.zeros(32 * 10) # Initialize a zero-padded observation
# padded_observation[:full_observation.size] = full_observation # Fill with actual data
# observation_2d = padded_observation.reshape(32, 10) # Reshape to [32, 10]

# return observation_2d, self.calculate_reward()

def get_observation(self):
"""
get_observation is meant to get the observation space (kinda like the state)
Get observation space: robot positions (44) + dribbling (1) + ball position (2)
"""
# Get the robot grid representation
self.robot_grid, self.is_yellow_dribbling, self.is_blue_dribbling = get_robot_state()
# Get the robot positions and dribbling states
robot_positions, self.is_yellow_dribbling, self.is_blue_dribbling = get_robot_state()

# Get the ball location
# Get the ball location (continuous x,y values)
self.ball_position, self.ball_quadrant = get_ball_state()

# Convert and flatten robot positions to float64
robot_positions_flat = self.robot_grid.astype(np.float64).flatten() # 8 elements

# Use ball quadrant for observation
ball_quadrant = np.array([float(self.ball_quadrant)], dtype=np.float64) # 1 element

# Convert dribbling status to float64
is_yellow_dribbling = np.array([float(self.is_yellow_dribbling)], dtype=np.float64) # 1 element
dribbling = np.array([float(self.is_yellow_dribbling)], dtype=np.float64)

# Combine all parts into the observation array with padding
# Combine all parts into the observation array
observation = np.concatenate([
robot_positions_flat, # 8 elements
ball_quadrant, # 1 element
is_yellow_dribbling, # 1 element
np.zeros(5, dtype=np.float64) # 5 elements to reach total of 15
robot_positions, # 44 elements (x,y for each robot)
dribbling, # 1 element
self.ball_position # 2 elements (x,y)
])

# Make sure it's flat
observation = observation.reshape(15,)

return observation

def step(self, action):
"""
The step function is called in every loop the RL agent goes through.
It receives a state, reward and carries out an action
The step function waits for either:
1. A true possession change (lost ball to opponent or gained from opponent)
2. Specific referee commands (8,9)
With rate limiting to prevent too frequent steps
"""

while True:
# Get current state
observation_space = self.get_observation()

# Get referee state
self.yellow_score, self.blue_score, self.stage, self.ref_command, self.x, self.y = get_referee_state()

if self.ref_command in (2,8,9): # NORMAL_START, DIRECT_FREE_YELLOW, DIRECT_FREE_BLUE
if self.is_first_step:
print("First step after reset - waiting for kickoff to finish...")
time.sleep(5)

print("Game is RUNNING, executing action")
attackers, defenders = action
wallers = self.MAX_ROBOTS_US - (attackers + defenders)
send_action_command(num_attacker=attackers, num_defender=defenders, num_waller=wallers)
break

# If HALT, but a goal is scored, we need to reset so we break
# If HALT, and goal is false, we truncate.
# If STOP, we truncate

elif self.ref_command in (0, 1): # HALT or STOP
break # Code will break if it hits HALT or STOP, then it checks if it truncated or a goal was scored.

elif self.ref_command in (16, 17): # Ball placement
if self.ref_command in (16,17): # If there is ball placement
self.teleport_ball_with_check(self.x, self.y)

self.is_first_step = False
observation_space = self.get_observation()
reward = self.calculate_reward()

truncated = self.is_truncated()
done = self.is_terminated()

time.sleep(0.5)

return observation_space, reward, done, truncated, {}
# Reset step_taken flag if ref_command is 2
if self.ref_command == 2:
self.step_taken = False

# Check for true possession change
possession_changed = (
(self.is_yellow_dribbling != self.previous_yellow_dribbling) and
(self.is_blue_dribbling or self.previous_yellow_dribbling)
)

# Check if enough time has passed since last step
current_time = time.time()
time_since_last_step = current_time - self.last_step_time
can_take_step = time_since_last_step >= self.min_step_interval

should_take_step = (
can_take_step and (
possession_changed or
(self.ref_command in (8,9) and not self.step_taken)
)
)

if should_take_step:
print(f"Taking step (time since last: {time_since_last_step:.2f}s)")
# Update last step time
self.last_step_time = current_time

# Execute action
send_num_attackers(action)

# Mark step as taken for this ref state
if self.ref_command in (8,9):
self.step_taken = True

# Update previous possession state
self.previous_yellow_dribbling = self.is_yellow_dribbling

reward = self.calculate_reward()

# Check for termination
truncated = self.is_truncated()
done = self.is_terminated()

return observation_space, reward, done, truncated, {}

time.sleep(0.1)


def is_terminated(self):
"""
Expand Down Expand Up @@ -268,10 +257,11 @@ def reset(self, seed=None, options=None):
Reset the environment to initial state.
"""

# Reset rate limiting variables
self.last_step_time = time.time() # Reset the timestamp

# Reset internal state variables
self.robot_grid = np.zeros((4, 2), dtype=int)
self.ball_position = np.zeros(2)
self.ball_quadrant = 4
self.yellow_score = 0
self.blue_score = 0
self.yellow_yellow_cards = 0
Expand All @@ -281,13 +271,18 @@ def reset(self, seed=None, options=None):
self.shaped_reward_given = False
self.is_yellow_dribbling = False
self.is_blue_dribbling = False
self.previous_yellow_dribbling = False # Add this if not already in init
self.step_taken = False # Reset step taken flag

# Reset physical environment
print("Resetting physical environment...")

# Ensure ball teleport completes
self.teleport_ball_with_check(0,0)

# Teleport the robots
teleport_robots()

# Reset referee state with verification
try:
reset_referee_state()
Expand Down Expand Up @@ -320,7 +315,7 @@ def reset(self, seed=None, options=None):
except Exception as e:
print(f"Error getting initial observation: {str(e)}")
# Provide fallback observation if needed
observation = np.zeros(15, dtype=np.float64)
observation = np.zeros(47, dtype=np.float64)

# Set first_step flag
self.is_first_step = True
Expand Down
36 changes: 15 additions & 21 deletions roboteam_ai/src/rl/src/getState.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,47 +85,41 @@ def get_ball_state():

return ball_position, ball_quadrant

# Function to get the robot state
def get_robot_state():
grid_array = np.zeros((4, 2), dtype=int) # 4 quadrants, 2 columns for yellow/blue counts
# Initialize array for 22 robots (11 per team) with x,y coordinates
robot_positions = np.zeros(44) # [yellow1_x, yellow1_y, ..., blue1_x, blue1_y, ...]
yellow_team_dribbling = False
blue_team_dribbling = False

context = zmq.Context()
socket_world = context.socket(zmq.SUB)
socket_world.setsockopt_string(zmq.SUBSCRIBE, "")

zmq_address = get_zmq_address()
#print(f"Connecting to ZMQ at: {zmq_address}")
socket_world.connect(zmq_address)

try:
message = socket_world.recv()
state = RoboState.FromString(message)
# print(state)

if not len(state.processed_vision_packets):
return grid_array, yellow_team_dribbling, blue_team_dribbling
return robot_positions, yellow_team_dribbling, blue_team_dribbling

world = state.last_seen_world

def get_grid_position(x, y):
if x < 0:
return 0 if y > 0 else 2
else:
return 1 if y > 0 else 3

# Process yellow robots
for bot in world.yellow:
grid_pos = get_grid_position(bot.pos.x, bot.pos.y)
grid_array[grid_pos, 0] += 1
# Process yellow robots (first 11 robots, indices 0-21)
for i, bot in enumerate(world.yellow[:11]):
idx = i * 2
robot_positions[idx] = bot.pos.x
robot_positions[idx + 1] = bot.pos.y
if bot.feedbackInfo.dribbler_sees_ball:
yellow_team_dribbling = True

# Process blue robots
for bot in world.blue:
grid_pos = get_grid_position(bot.pos.x, bot.pos.y)
grid_array[grid_pos, 1] += 1
# Process blue robots (last 11 robots, indices 22-43)
for i, bot in enumerate(world.blue[:11]):
idx = (i + 11) * 2
robot_positions[idx] = bot.pos.x
robot_positions[idx + 1] = bot.pos.y
if bot.feedbackInfo.dribbler_sees_ball:
blue_team_dribbling = True

Expand All @@ -137,7 +131,7 @@ def get_grid_position(x, y):
socket_world.close()
context.term()

return grid_array, yellow_team_dribbling, blue_team_dribbling
return robot_positions, yellow_team_dribbling, blue_team_dribbling

def get_referee_state():
"""
Expand Down
14 changes: 8 additions & 6 deletions roboteam_ai/src/rl/src/teleportBall.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,25 +26,27 @@
def teleport_ball(x, y, z=0.0):
# Create a UDP socket
sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)

# Create the TeleportBall message
teleport = TeleportBall()
teleport.x = x
teleport.y = y
teleport.z = z

# Set velocities to zero
teleport.vx = 0.0
teleport.vy = 0.0
teleport.vz = 0.0

# Create the SimulatorCommand
control = SimulatorControl()
control.teleport_ball.CopyFrom(teleport)
command = SimulatorCommand()
command.control.CopyFrom(control)

# Serialize and send the command
serialized_command = command.SerializeToString()
sock.sendto(serialized_command, ("localhost", SIMULATION_CONTROL_PORT))

#print(f"Sent command to teleport ball to ({x}, {y}, {z})")


# Close the socket
sock.close()

Expand Down
Loading

0 comments on commit a224e9f

Please sign in to comment.