-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathagentPolicy.py
46 lines (28 loc) · 1.31 KB
/
agentPolicy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from sb3_contrib.common.recurrent.policies import RecurrentMultiInputActorCriticPolicy
import torch
class AgentLSTMPolicy(RecurrentMultiInputActorCriticPolicy):
def __init__(self, *args, sensor_set, **kwargs):
super().__init__(*args, **kwargs)
self.sensor_set = sensor_set
def forward(self, obs, lstm_states, episode_starts, deterministic):
masked_obs = {}
for key, value in obs.items():
if key in self.sensor_set:
masked_obs[key] = value
else:
masked_obs[key] = torch.randn_like(value)
return super().forward(masked_obs, lstm_states, episode_starts, deterministic)
class ProbeLSTMPolicy(RecurrentMultiInputActorCriticPolicy):
def __init__(self, *args, sensor_set, **kwargs):
super().__init_(*args, **kwargs)
self.sensor_set = sensor_set
def forward(self, obs, lstm_states, episode_starts, deterministic):
if episode_starts:
lstm_states = self.env.get_final_state()
masked_obs = {}
for key, value in obs.items():
if key in self.sensor_set:
masked_obs[key] = value
else:
masked_obs[key] = torch.randn_like(value)
return super().forward(masked_obs, lstm_states, episode_starts, deterministic)