Skip to content

Commit

Permalink
Add experiments for the conjunction case
Browse files Browse the repository at this point in the history
  • Loading branch information
YuqianJiang committed Sep 16, 2020
1 parent b8e430d commit 85663f2
Show file tree
Hide file tree
Showing 5 changed files with 320 additions and 89 deletions.
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,24 @@ The environment `env.py` implements a robot sweeping repeatedly and non-uniforml

### Training

To run the experiment in the first scenario, run this command:
To run the experiment in the "always kitchen" scenario:

```train
python continual_area_sweeping/shield_experiment_region.py
```

To run the experiment in the second scenario, run this command:
To run the experiment in the "always keep human visible" scenario:

```train
python continual_area_sweeping/shield_experiment_person.py
```

To run the experiment in the "always keep human visible and always corridor" scenario:

```train
python continual_area_sweeping/experiment_conjunction.py
```

## Grid World

### Environment
Expand Down
249 changes: 249 additions & 0 deletions continual_area_sweeping/experiment_conjunction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
import pprint

import argparse

import numpy as np
import yaml
import random
import os
import csv
import json

import train
import gridworld

def generate_random_grid(base, num_event_cells, period_range, bound, mode='linear', stack=True, event_region=None, extra_event_region=[]):
min_period, max_period = period_range
free_spaces = np.argwhere(base == 0) if event_region is None else event_region
np.random.shuffle(free_spaces)
cells = []
for n in range(num_event_cells):
obj = gridworld.Object(x=free_spaces[n, 1], y=free_spaces[n, 0], period=random.randint(min_period, max_period),
bound=bound)
cells.append(obj)

pos = (free_spaces[num_event_cells, 1], free_spaces[num_event_cells, 0])
person = None
if mode == "person":
person = gridworld.Person((free_spaces[num_event_cells, 1], free_spaces[num_event_cells, 0]))
cells = [gridworld.Object(x=free_spaces[n, 1], y=free_spaces[n, 0], period=random.randint(min_period, max_period),
bound=bound) for n in range(len(free_spaces))]

gw = gridworld.GridWorld(base, cells, person=person, initialpos=pos, viewable_distance=0, mode=mode,
stack=stack, extra_event_region=extra_event_region)
return gw


if __name__ == '__main__':

parser = argparse.ArgumentParser(description='Shaping Experiment')
parser.add_argument('-c', '--config', help='Config File', default=None)
parser.add_argument('-f', '--csv', help='CSV File', default="results.csv")
args = parser.parse_args()
if not args.config:
config = {
'mode': 'person',
'bound': 1,
'average_reward_learning_rate': 0.0001,
'eval_period': 1000,
'exploration_sched_timesteps': 10000,
'strategy_file': 'Example1_Perm_readable.json',
'replay_buffer_size': 100000
}
else:
with open(args.config, 'r') as f:
config = yaml.load(f)

# Print config
pprint.pprint(config)

# Sheild
strategy_file = config.get("strategy_file", None)
w_dict = None
if strategy_file:
w_dict = {}
following_region = [[] for x in range(225)]
with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), strategy_file), 'r') as f:
strategy = json.load(f)
for num, state in strategy.items():
successors = []
for successor in state["Successors"]:
succ_state = strategy[str(successor)]["State"]
successors.append((succ_state["s"], succ_state["st"]))
w_dict[(state["State"]["s"], state["State"]["st"])] = successors
if state["State"]["st"] < 225:
following_region[state["State"]["st"]].append(np.unravel_index(state["State"]["s"], [15, 15]))


# Visibility
invisibility_file = 'iset.json'
invisibility_dict = {}
with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), invisibility_file), 'r') as f:
invisibility = json.load(f)

for s1, s2 in invisibility.items():
invisibility_dict[int(s1)] = s2

with open(args.csv, 'w', newline='') as csvfile:
fieldnames = ['TYPE', 'ADT', 'DPS', 'TOTALDETECTIONS', 'TOTALSTEPS', 'TOTALEVENTS', 'NUMVISIBLE']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

writer.writeheader()
img = np.zeros([15, 15])
img[0:6, 7] = 1
img[9:15, 7] = 1
img[5][0:5] = 1
img[5][6] = 1
img[5][10:15] = 1
img[5][8] = 1
img[9][0:5] = 1
img[9][6] = 1
img[9][10:15] = 1
img[9][8] = 1

print (img)

gw = generate_random_grid(img, 188, (10, 20), config.get('bound', 1), mode=config.get('mode', 'linear'),
stack=True, extra_event_region = [(r, c) for r in range(6, 9) for c in range(0, 15)])

strategy = (w_dict, following_region)
gw.invisibility = invisibility_dict

# RL
gw.reset()

eval_period = config.get('eval_period', 20000)

np.set_printoptions(precision=3, suppress=True, linewidth=150)

def sliding_window_eval_fn(env, policy, q_func, vizgrid, num_iters):
adt = env.gw.get_adt()
#dps = env.gw.get_dps()
dps = (env.gw.num_detections - env.gw.prev_num_detections) / eval_period
print(num_iters, "ADT: ", adt, "\tDPS: ", dps, "\tDetections: ", env.gw.num_detections, \
"\tTotal Timesteps: ", env.gw.timestep, "\tTotal Events: ", env.gw.total_num_events,
"\tVisible: ", env.gw.person.viewable_counts)
writer.writerow({'TYPE': str(num_iters),
'ADT': adt,
'DPS': dps,
'TOTALDETECTIONS': env.gw.num_detections,
'TOTALSTEPS': env.gw.timestep,
'TOTALEVENTS': env.gw.total_num_events,
'NUMVISIBLE': env.gw.person.viewable_counts})
csvfile.flush()

def region_distance(pos, region, gw):
def manhattan_dist(pos1, pos2):
return abs(pos1[0] - pos2[0]) + abs(pos1[1] - pos2[1])
min_dist = gw.fw.distance(pos, (region[0][1], region[0][0])) #region is row/column
min_cell = region[0]
for cell in region[1:]:
dist = gw.fw.distance(pos, (cell[1], cell[0]))
if dist < min_dist:
min_dist = dist
min_cell = cell

return min_dist

def get_mask_person_shaping(spec, gw, pos, person_pos):
dist_curr = region_distance(pos, gw.extra_event_region, gw)
phi_mask = np.full(len(gw.actions), -dist_curr)
for action in range(len(gw.actions)):
target = gw.get_target(action, pos)
if not gw.check_target(target, pos):
continue
dist_next = region_distance(target, gw.extra_event_region, gw)
if dist_next < dist_curr:
phi_mask[action] += 1
elif dist_curr == 0 and dist_next == 0:
phi_mask[action] += 1

if gw.person_viewable(pos, person_pos): # the person is visible now
spec_dict, following_region = spec
ind_person = person_pos[1] * gw.grid.shape[1] + person_pos[0]
dist_curr = region_distance(pos, following_region[ind_person], gw)
for action in range(len(gw.actions)):
phi_mask[action] += -dist_curr
target = gw.get_target(action, pos)
if not gw.check_target(target, pos):
continue
dist_next = region_distance(target, following_region[ind_person], gw)
if dist_next < dist_curr:
phi_mask[action] += 1
if dist_next == 0:
phi_mask[action] = 0
else:
phi_mask = [phi - 6 for phi in phi_mask]
return phi_mask

def get_mask_person_shielding(spec, gw, pos, person_pos):
spec_dict, following_region = spec
shield_following_neginf_mask = np.full(len(gw.actions), -np.inf)
shield_neginf_mask = np.full(len(gw.actions), -np.inf)
ind_person = person_pos[1] * gw.grid.shape[1] + person_pos[0]

dist_curr = region_distance(pos, gw.extra_event_region, gw)
for action in range(len(gw.actions)):
target = gw.get_target(action, pos)
if not gw.check_target(target, pos):
continue
dist_next = region_distance(target, gw.extra_event_region, gw)
if dist_next < dist_curr or (dist_curr == 0 and dist_next == 0):
shield_neginf_mask[action] = 0

return shield_neginf_mask

'''
if gw.person_viewable(pos, person_pos):
dist_curr = region_distance(pos, gw.extra_event_region, gw)
for action in range(len(gw.actions)):
target = gw.get_target(action, pos)
if not gw.check_target(target, pos):
continue
dist_next = region_distance(target, gw.extra_event_region, gw)
closer = False
if dist_next < dist_curr or (dist_curr == 0 and dist_next == 0):
closer = True
ind_robot_next = target[1] * gw.grid.shape[1] + target[0]
if (ind_robot_next, ind_person) in spec_dict:
shield_following_neginf_mask[action] = 0
if closer:
shield_neginf_mask[action] = 0
else:
print ("Lost person")
if np.max(shield_neginf_mask) > -np.inf:
return shield_neginf_mask
else:
return shield_following_neginf_mask
return shield_following_neginf_mask
'''

def get_mask_person_pos(gw, method_type, spec, pos, person_pos):
if not method_type:
mask = np.zeros(len(gw.actions))
elif method_type == "shielding":
mask = get_mask_person_shielding(spec, gw, pos, person_pos)
elif method_type == "shaping":
mask = get_mask_person_shaping(spec, gw, pos, person_pos)
return mask

writer.writerow({'TYPE': 'Shaping'})
csvfile.flush()
print("Shaping")
train.run(config, gw, strategy, "shaping", eval_period, sliding_window_eval_fn, get_mask_person_pos)
gw.reset()

writer.writerow({'TYPE': 'Baseline'})
csvfile.flush()
print("Baseline")
train.run(config, gw, strategy, None, eval_period, sliding_window_eval_fn, get_mask_person_pos)
gw.reset()

writer.writerow({'TYPE': 'Shielding'})
csvfile.flush()
print("Shielding")
train.run(config, gw, strategy, "shielding", eval_period, sliding_window_eval_fn, get_mask_person_pos)
gw.reset()

61 changes: 58 additions & 3 deletions continual_area_sweeping/shield_experiment_person.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,22 +137,77 @@ def sliding_window_eval_fn(env, policy, q_func, vizgrid, num_iters):
'NUMVISIBLE': env.gw.person.viewable_counts})
csvfile.flush()

def region_distance(pos, region, gw):
def manhattan_dist(pos1, pos2):
return abs(pos1[0] - pos2[0]) + abs(pos1[1] - pos2[1])
min_dist = gw.fw.distance(pos, (region[0][1], region[0][0])) #region is row/column
min_cell = region[0]
for cell in region[1:]:
dist = gw.fw.distance(pos, (cell[1], cell[0]))
if dist < min_dist:
min_dist = dist
min_cell = cell

return min_dist

def get_mask_person_shaping(spec, gw, pos, person_pos):
if gw.person_viewable(pos, person_pos): # the person is visible now
spec_dict, following_region = spec
ind_person = person_pos[1] * gw.grid.shape[1] + person_pos[0]
dist_curr = region_distance(pos, following_region[ind_person], gw)
phi_mask = np.full(len(gw.actions), -dist_curr)
for action in range(len(gw.actions)):
target = gw.get_target(action, pos)
if not gw.check_target(target, pos):
continue
dist_next = region_distance(target, following_region[ind_person], gw)
if dist_next < dist_curr:
phi_mask[action] += 1
if dist_next == 0:
phi_mask[action] = 0
else:
phi_mask = np.full(len(gw.actions), -6)
return phi_mask

def get_mask_person_shielding(spec, gw, pos, person_pos):
spec_dict, following_region = spec
shield_neginf_mask = np.full(len(gw.actions), -np.inf)
ind_person = person_pos[1] * gw.grid.shape[1] + person_pos[0]

if gw.person_viewable(pos, person_pos): # but the person is visible now
for action in range(len(gw.actions)):
target = gw.get_target(action, pos)
ind_robot_next = target[1] * gw.grid.shape[1] + target[0]
if (ind_robot_next, ind_person) in spec_dict:
shield_neginf_mask[action] = 0
else:
print("Lost the person while shielding!")
return shield_neginf_mask

def get_mask_person_pos(gw, method_type, spec, pos, person_pos):
if not method_type:
mask = np.zeros(len(gw.actions))
elif method_type == "shielding":
mask = get_mask_person_shielding(spec, gw, pos, person_pos)
elif method_type == "shaping":
mask = get_mask_person_shaping(spec, gw, pos, person_pos)
return mask

writer.writerow({'TYPE': 'Shaping'})
csvfile.flush()
print("Shaping")
train.run(config, gw, strategy, "shaping", eval_period, sliding_window_eval_fn)
train.run(config, gw, strategy, "shaping", eval_period, sliding_window_eval_fn, get_mask_person_pos)
gw.reset()

writer.writerow({'TYPE': 'Baseline'})
csvfile.flush()
print("Baseline")
train.run(config, gw, strategy, None, eval_period, sliding_window_eval_fn)
train.run(config, gw, strategy, None, eval_period, sliding_window_eval_fn, get_mask_person_pos)
gw.reset()

writer.writerow({'TYPE': 'Shielding'})
csvfile.flush()
print("Shielding")
train.run(config, gw, strategy, "shielding", eval_period, sliding_window_eval_fn)
train.run(config, gw, strategy, "shielding", eval_period, sliding_window_eval_fn, get_mask_person_pos)
gw.reset()

Loading

0 comments on commit 85663f2

Please sign in to comment.