diff --git a/mushr_rhc_ros/launch/sim/sim_model_deploy.launch b/mushr_rhc_ros/launch/sim/sim_model_deploy.launch
index 405ae61..dbfadf5 100644
--- a/mushr_rhc_ros/launch/sim/sim_model_deploy.launch
+++ b/mushr_rhc_ros/launch/sim/sim_model_deploy.launch
@@ -8,9 +8,10 @@
     </group>
 
     <group ns="$(arg car_name)">
+        <node pkg="mushr_rhc_ros" type="rhcnode_network_iros.py" name="rhcontroller" output="screen">
         <!-- <node pkg="mushr_rhc_ros" type="rhcnode_network_shuang.py" name="rhcontroller" output="screen"> -->
         <!-- <node pkg="mushr_rhc_ros" type="rhcnode_network.py" name="rhcontroller" output="screen"> -->
-        <node pkg="mushr_rhc_ros" type="rhcnode_network_prime.py" name="rhcontroller" output="screen">
+        <!-- <node pkg="mushr_rhc_ros" type="rhcnode_network_prime.py" name="rhcontroller" output="screen"> -->
             <env name="RHC_USE_CUDA" value="0" />
 
             <param name="inferred_pose_t" value="car_pose" />
diff --git a/mushr_rhc_ros/launch/sim/sim_server_eval.launch b/mushr_rhc_ros/launch/sim/sim_server_eval.launch
new file mode 100644
index 0000000..2619400
--- /dev/null
+++ b/mushr_rhc_ros/launch/sim/sim_server_eval.launch
@@ -0,0 +1,30 @@
+<launch>
+    <arg name="tg" default="dispersion" />
+	<arg name="map_server" default="1" />
+	<arg name="car_name" default="car" />
+
+    <arg name="out_path" default="/home/rb/hackathon_data/e2e_eval/model_test" />
+    <arg name="model_path" default="/home/rb/hackathon_data/aml_outputs/log_output/normal-kingfish/GPTiros_e2e_8gpu_2022-02-17_1645120431.7528405_2022-02-17_1645120431.7528613/model/epoch10.pth.tar" />
+
+    <group if="$(arg map_server)">
+	<include file="$(find mushr_rhc_ros)/launch/map_server.launch" />
+    </group>
+
+    <group ns="$(arg car_name)">
+        <node pkg="mushr_rhc_ros" type="rhcnode_network_iros.py" name="rhcontroller" output="screen">
+            <env name="RHC_USE_CUDA" value="0" />
+
+            <param name="out_path" value="$(arg out_path)" />
+            <param name="model_path" value="$(arg model_path)" />
+
+            <param name="inferred_pose_t" value="car_pose" />
+
+            <param name="car_name" value="$(arg car_name)" />
+
+            <rosparam file="$(find mushr_rhc_ros)/launch/params/trajgen/$(arg tg).yaml" />
+            <rosparam file="$(find mushr_rhc_ros)/launch/params/all_params.yaml" />
+            <rosparam file="$(find mushr_rhc_ros)/launch/sim/params.yaml" />
+        </node>
+    </group>
+
+</launch>
diff --git a/mushr_rhc_ros/src/mingpt/model_mushr_iros.py b/mushr_rhc_ros/src/mingpt/model_mushr_iros.py
new file mode 100644
index 0000000..b1d554e
--- /dev/null
+++ b/mushr_rhc_ros/src/mingpt/model_mushr_iros.py
@@ -0,0 +1,473 @@
+"""
+GPT model:
+- the initial stem consists of a combination of token encoding and a positional encoding
+- the meat of it is a uniform sequence of Transformer blocks
+    - each Transformer is a sequential combination of a 1-hidden-layer MLP block and a self-attention block
+    - all blocks feed into a central residual pathway similar to resnets
+- the final decoder is a linear projection into a vanilla Softmax classifier
+"""
+
+from base64 import encode
+import math
+import logging
+
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+from resnet_custom import resnet18_custom, resnet50_custom
+
+logger = logging.getLogger(__name__)
+
+import numpy as np
+
+import sys
+sys.path.append('../')
+from models.compass.select_backbone import select_resnet
+
+class GELU(nn.Module):
+    def forward(self, input):
+        return F.gelu(input)
+
+class GPTConfig:
+    """ base GPT config, params common to all GPT versions """
+    embd_pdrop = 0.1
+    resid_pdrop = 0.1
+    attn_pdrop = 0.1
+
+    def __init__(self, vocab_size, block_size, max_timestep, **kwargs):
+        self.vocab_size = vocab_size
+        self.block_size = block_size
+        self.max_timestep = max_timestep
+        for k,v in kwargs.items():
+            setattr(self, k, v)
+            print(k, v)
+
+class GPT1Config(GPTConfig):
+    """ GPT-1 like network roughly 125M params """
+    n_layer = 12
+    n_head = 12
+    n_embd = 768
+
+class CausalSelfAttention(nn.Module):
+    """
+    A vanilla multi-head masked self-attention layer with a projection at the end.
+    It is possible to use torch.nn.MultiheadAttention here but I am including an
+    explicit implementation here to show that there is nothing too scary here.
+    """
+
+    def __init__(self, config):
+        super().__init__()
+        assert config.n_embd % config.n_head == 0
+        # key, query, value projections for all heads
+        self.key = nn.Linear(config.n_embd, config.n_embd)
+        self.query = nn.Linear(config.n_embd, config.n_embd)
+        self.value = nn.Linear(config.n_embd, config.n_embd)
+        # regularization
+        self.attn_drop = nn.Dropout(config.attn_pdrop)
+        self.resid_drop = nn.Dropout(config.resid_pdrop)
+        # output projection
+        self.proj = nn.Linear(config.n_embd, config.n_embd)
+        # causal mask to ensure that attention is only applied to the left in the input sequence
+        # self.register_buffer("mask", torch.tril(torch.ones(config.block_size, config.block_size))
+        #                              .view(1, 1, config.block_size, config.block_size))
+        self.register_buffer("mask", torch.tril(torch.ones(config.block_size + 1, config.block_size + 1))
+                                     .view(1, 1, config.block_size + 1, config.block_size + 1))
+        self.n_head = config.n_head
+
+    def forward(self, x, layer_past=None):
+        B, T, C = x.size()
+
+        # calculate query, key, values for all heads in batch and move head forward to be the batch dim
+        k = self.key(x).view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs)
+        q = self.query(x).view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs)
+        v = self.value(x).view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs)
+
+        # causal self-attention; Self-attend: (B, nh, T, hs) x (B, nh, hs, T) -> (B, nh, T, T)
+        att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
+        att = att.masked_fill(self.mask[:,:,:T,:T] == 0, float('-inf'))
+        att = F.softmax(att, dim=-1)
+        att = self.attn_drop(att)
+        y = att @ v # (B, nh, T, T) x (B, nh, T, hs) -> (B, nh, T, hs)
+        y = y.transpose(1, 2).contiguous().view(B, T, C) # re-assemble all head outputs side by side
+
+        # output projection
+        y = self.resid_drop(self.proj(y))
+        return y
+
+class Block(nn.Module):
+    """ an unassuming Transformer block """
+
+    def __init__(self, config):
+        super().__init__()
+        self.ln1 = nn.LayerNorm(config.n_embd)
+        self.ln2 = nn.LayerNorm(config.n_embd)
+        self.attn = CausalSelfAttention(config)
+        self.mlp = nn.Sequential(
+            nn.Linear(config.n_embd, 4 * config.n_embd),
+            GELU(),
+            nn.Linear(4 * config.n_embd, config.n_embd),
+            nn.Dropout(config.resid_pdrop),
+        )
+
+    def forward(self, x):
+        x = x + self.attn(self.ln1(x))
+        x = x + self.mlp(self.ln2(x))
+        return x
+
+class GPT(nn.Module):
+    """  the full GPT language model, with a context size of block_size """
+
+    def __init__(self, config, device):
+        super().__init__()
+
+        self.config = config
+        self.device = device
+
+        self.model_type = config.model_type
+        self.use_pred_state = config.use_pred_state
+
+        self.map_recon_dim = config.map_recon_dim
+
+        # input embedding stem
+        self.tok_emb = nn.Embedding(config.vocab_size, config.n_embd)
+        # self.pos_emb = nn.Parameter(torch.zeros(1, config.block_size, config.n_embd))
+        self.pos_emb = nn.Parameter(torch.zeros(1, config.block_size + 1, config.n_embd))
+        self.global_pos_emb = nn.Parameter(torch.zeros(1, config.max_timestep+1, config.n_embd))
+        self.drop = nn.Dropout(config.embd_pdrop)
+
+        # transformer
+        self.blocks = nn.Sequential(*[Block(config) for _ in range(config.n_layer)])
+        # decoder head
+        self.ln_f = nn.LayerNorm(config.n_embd)
+        self.head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
+
+        self.block_size = config.block_size
+        self.apply(self._init_weights)
+        action_tanh=True
+
+        logger.info("number of parameters: %e", sum(p.numel() for p in self.parameters()))
+
+
+        if config.state_tokenizer == 'conv2D':
+            self.state_encoder = nn.Sequential(nn.Conv2d(1, 32, 8, stride=4, padding=0), nn.ReLU(),
+                                     nn.Conv2d(32, 64, 4, stride=2, padding=0), nn.ReLU(),
+                                     nn.Conv2d(64, 64, 3, stride=1, padding=0), nn.ReLU(),
+                                     nn.Flatten(), nn.Linear(36864, config.n_embd), nn.Tanh())
+        elif config.state_tokenizer == 'resnet18':
+            self.state_encoder = nn.Sequential(resnet18_custom(pretrained=False, clip_len=1), nn.ReLU(),
+                                               nn.Linear(1000, config.n_embd), nn.Tanh())
+
+        self.ret_emb = nn.Sequential(nn.Linear(1, config.n_embd), nn.Tanh())
+
+        # add map decoder
+        encoded_feat_dim = (config.n_embd * config.block_size) // 2
+        if config.map_decoder == 'mlp':
+            #MLP map decoder
+            self.map_decoder = nn.Sequential(nn.Linear(encoded_feat_dim, 1024), nn.Tanh(),
+                                             nn.Linear(1024, 2048), nn.Tanh(),
+                                             nn.Linear(2048, 64*64), nn.Tanh())
+        elif config.map_decoder == 'deconv':
+            if self.map_recon_dim == 64:
+                # conv2d map decoder - original
+                self.map_decoder = nn.Sequential(nn.Linear(encoded_feat_dim, 4096), nn.Tanh(),
+                                                 Reshape(16, 16, 16),
+                                                 MapDecoder_2x_Deconv(16))
+            elif self.map_recon_dim == 128:
+                # conv2d map decoder - new trial
+                self.map_decoder = nn.Sequential(nn.Linear(encoded_feat_dim, 4096), nn.Tanh(),
+                                                 Reshape(16, 16, 16),
+                                                 MapDecoder_4x_Deconv128px(16))
+            else:
+                print('Not support!')
+        else:
+            print('Not support!')
+
+    
+        # ======== linear action space ===========================
+        self.action_embeddings = nn.Sequential(
+                                 nn.Linear(1, 32),
+                                 nn.Tanh(),
+                                 nn.Linear(32, 64),
+                                 nn.Tanh(),
+                                 nn.Linear(64, config.n_embd)
+        )
+
+        self.predict_action = nn.Sequential(
+                              nn.Linear(config.n_embd, 64),
+                              nn.Tanh(),
+                              nn.Linear(64, 32),
+                              nn.Tanh(),
+                              nn.Linear(32, 1)
+        )
+
+        self.predict_state = nn.Sequential(
+            *([nn.Linear(config.n_embd, config.n_embd)] + ([nn.Tanh()] if action_tanh else []))
+        )
+
+        criterion = torch.nn.MSELoss(reduction='mean')
+        self.criterion = criterion.cuda(device)
+
+        self.load_pretrained_model_weights(config.pretrained_model_path)
+
+        
+    def load_pretrained_model_weights(self, model_path):
+        if model_path:
+            ckpt = torch.load(model_path)['state_dict']  # COMPASS checkpoint format.
+            ckpt2 = {}
+            ckpt3 = {}
+            ckpt4 = {}
+            for key in ckpt:
+                print(key)
+                if key.startswith('blocks'):
+                    ckpt2[key.replace('blocks.', '')] = ckpt[key]
+                if key.startswith('state_encoder'):
+                    ckpt3[key.replace('state_encoder.', '')] = ckpt[key]
+                if key.startswith('action_embeddings'):
+                    ckpt4[key.replace('action_embeddings.', '')] = ckpt[key]
+                
+            self.blocks.load_state_dict(ckpt2)
+            self.state_encoder.load_state_dict(ckpt3)
+            self.action_embeddings.load_state_dict(ckpt4)
+            print('Successfully loaded pretrained checkpoint: {}.'.format(model_path))
+        else:
+            print('Train from scratch.')
+
+
+    def reconstruction_loss(self, pred, target):
+        loss = F.l1_loss(pred, target)
+        return loss 
+
+    def _initialize_weights(self, module):
+        for name, param in module.named_parameters():
+            if 'bias' in name:
+                nn.init.constant_(param, 0.0)
+            elif 'weight' in name:
+                nn.init.orthogonal_(param, 0.1)
+
+
+    def get_block_size(self):
+        return self.block_size
+
+    def _init_weights(self, module):
+        if isinstance(module, (nn.Linear, nn.Embedding)):
+            module.weight.data.normal_(mean=0.0, std=0.02)
+            if isinstance(module, nn.Linear) and module.bias is not None:
+                module.bias.data.zero_()
+        elif isinstance(module, nn.LayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+
+    def configure_optimizers(self, train_config):
+        """
+        This long function is unfortunately doing something very simple and is being very defensive:
+        We are separating out all parameters of the model into two buckets: those that will experience
+        weight decay for regularization and those that won't (biases, and layernorm/embedding weights).
+        We are then returning the PyTorch optimizer object.
+        """
+
+        # separate out all parameters to those that will and won't experience regularizing weight decay
+        decay = set()
+        no_decay = set()
+        # whitelist_weight_modules = (torch.nn.Linear, )
+        whitelist_weight_modules = (torch.nn.Linear, torch.nn.Conv2d, torch.nn.Conv3d, torch.nn.ConvTranspose2d)
+        blacklist_weight_modules = (torch.nn.LayerNorm, torch.nn.Embedding, torch.nn.BatchNorm3d, torch.nn.BatchNorm2d)
+        for mn, m in self.named_modules():
+            for pn, p in m.named_parameters():
+                fpn = '%s.%s' % (mn, pn) if mn else pn # full param name
+
+                if pn.endswith('bias'):
+                    # all biases will not be decayed
+                    no_decay.add(fpn)
+                elif pn.endswith('weight') and isinstance(m, whitelist_weight_modules):
+                    # weights of whitelist modules will be weight decayed
+                    decay.add(fpn)
+                elif pn.endswith('weight') and isinstance(m, blacklist_weight_modules):
+                    # weights of blacklist modules will NOT be weight decayed
+                    no_decay.add(fpn)
+
+        # special case the position embedding parameter in the root GPT module as not decayed
+        no_decay.add('pos_emb')
+        no_decay.add('global_pos_emb')
+
+        # validate that we considered every parameter
+        param_dict = {pn: p for pn, p in self.named_parameters()}
+        inter_params = decay & no_decay
+        union_params = decay | no_decay
+        assert len(inter_params) == 0, "parameters %s made it into both decay/no_decay sets!" % (str(inter_params), )
+        assert len(param_dict.keys() - union_params) == 0, "parameters %s were not separated into either decay/no_decay set!" \
+                                                    % (str(param_dict.keys() - union_params), )
+
+        # create the pytorch optimizer object
+        optim_groups = [
+            {"params": [param_dict[pn] for pn in sorted(list(decay))], "weight_decay": train_config.weight_decay},
+            {"params": [param_dict[pn] for pn in sorted(list(no_decay))], "weight_decay": 0.0},
+        ]
+        optimizer = torch.optim.AdamW(optim_groups, lr=train_config.learning_rate, betas=train_config.betas)
+        return optimizer
+
+    # state, and action
+    def forward(self, states, actions, targets=None, gt_map=None, timesteps=None):
+        # states: (batch, block_size, 4*84*84)
+        # actions: (batch, block_size, 1)
+        # targets: (batch, block_size, 1)
+        # timesteps: (batch, 1, 1) 
+        if self.config.state_tokenizer == 'resnet18':
+            state_embeddings = self.state_encoder(states.reshape(-1, 1, 200 , 200).type(torch.float32).contiguous()) # (batch * block_size, n_embd)
+            state_embeddings = state_embeddings.reshape(states.shape[0], states.shape[1], self.config.n_embd) # (batch, block_size, n_embd)
+        elif self.config.state_tokenizer == 'conv2D':
+            state_embeddings = self.state_encoder(states.reshape(-1, 1, 244 , 244).type(torch.float32).contiguous()) # (batch * block_size, n_embd)
+            state_embeddings = state_embeddings.reshape(states.shape[0], states.shape[1], self.config.n_embd) # (batch, block_size, n_embd)
+        else:
+            print('Not supported!')
+
+        if actions is not None and self.model_type == 'GPT':
+            if self.config.loss == 'MSE':
+                B, N, C = actions.shape
+                tmp = actions.view(B*N, C)
+                action_embeddings = self.action_embeddings(tmp).view(B, N, -1) # (batch, block_size, n_embd) 
+
+            
+            #token_embeddings = torch.zeros((states.shape[0], states.shape[1]*2 - int(targets is None), self.config.n_embd), dtype=torch.float32, device=state_embeddings.device)
+            token_embeddings = torch.zeros((states.shape[0], states.shape[1]*2 - int(targets is None), self.config.n_embd), dtype=torch.float32, device=self.device)
+
+            token_embeddings[:,::2,:] = state_embeddings
+            token_embeddings[:,1::2,:] = action_embeddings[:,-states.shape[1] + int(targets is None):,:]
+        elif actions is None and self.model_type == 'GPT': # only happens at very first timestep of evaluation
+            token_embeddings = state_embeddings
+        else:
+            raise NotImplementedError()
+        batch_size = states.shape[0]
+        all_global_pos_emb = torch.repeat_interleave(self.global_pos_emb, batch_size, dim=0) # batch_size, traj_length, n_embd
+
+        position_embeddings = torch.gather(all_global_pos_emb, 1, torch.repeat_interleave(timesteps, self.config.n_embd, dim=-1)) + self.pos_emb[:, :token_embeddings.shape[1], :]
+        #position_embeddings = torch.gather(all_global_pos_emb, 1, self.pos_emb[:, :token_embeddings.shape[1], :].type(torch.long))
+
+        x = self.drop(token_embeddings + position_embeddings)
+        x = self.blocks(x)
+        x = self.ln_f(x)
+
+        if self.config.train_mode == 'e2e':
+            action_preds = self.predict_action(x[:, ::2, :])
+        elif self.config.train_mode == 'map':
+            percep_feat = x[:, ::2, :]
+            B, N, D = percep_feat.shape
+            feat = percep_feat.reshape(B, -1)  # reshape to a vector
+            map_recon = self.map_decoder(feat)
+        else:
+            print('Not support!')
+
+
+        loss = None
+        if targets is not None:
+            if self.config.train_mode == 'map':
+                loss = self.criterion(map_recon.reshape(-1, self.map_recon_dim, self.map_recon_dim), gt_map)
+                return map_recon, loss
+            elif self.config.train_mode == 'e2e':
+                # loss over N timesteps
+                loss = self.criterion(actions, torch.tanh(action_preds) )
+                return action_preds, loss 
+
+class MapDecoder_4x_Deconv(nn.Module):
+    def __init__(self, in_channels=384):
+        super().__init__()
+
+        self.decoder = nn.Sequential(
+            ConvTranspose2d_FixOutputSize(nn.ConvTranspose2d(in_channels, 256, kernel_size=3, stride=2, padding=1), output_size=(50, 50)),
+            nn.BatchNorm2d(256),
+            nn.ReLU(),
+            ConvTranspose2d_FixOutputSize(nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1), output_size=(100, 100)),
+            nn.BatchNorm2d(128),
+            nn.ReLU(),
+            ConvTranspose2d_FixOutputSize(nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1), output_size=(200, 200)),
+            nn.BatchNorm2d(64),
+            nn.ReLU(),
+            ConvTranspose2d_FixOutputSize(nn.ConvTranspose2d(64, 1, kernel_size=3, stride=2, padding=1), output_size=(400, 400)),
+        )
+
+    def forward(self, x):
+        return self.decoder(x)
+
+class MapDecoder_2x_Deconv(nn.Module):
+    def __init__(self, in_channels=768):
+        super().__init__()
+
+        # The parameters for ConvTranspose2D are from the PyTorch repo.
+        # Ref: https://pytorch.org/docs/stable/generated/torch.nn.ConvTranspose2d.html
+        # Ref: https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
+        # Ref: https://discuss.pytorch.org/t/the-output-size-of-convtranspose2d-differs-from-the-expected-output-size/1876/13
+        # Ref: (padding) https://towardsdatascience.com/what-is-transposed-convolutional-layer-40e5e6e31c11
+        self.decoder = nn.Sequential(
+            ConvTranspose2d_FixOutputSize(nn.ConvTranspose2d(in_channels, 8, kernel_size=3, stride=2, padding=1), output_size=(32, 32)),
+            nn.BatchNorm2d(8),
+            nn.ReLU(),
+            ConvTranspose2d_FixOutputSize(nn.ConvTranspose2d(8, 1, kernel_size=3, stride=2, padding=1), output_size=(64, 64)),
+            nn.Tanh()
+        )
+
+    def forward(self, x):
+        return self.decoder(x)
+
+class MapDecoder_4x_Deconv128px(nn.Module):
+    def __init__(self, in_channels=384):
+        super().__init__()
+
+        self.decoder = nn.Sequential(
+            ConvTranspose2d_FixOutputSize(nn.ConvTranspose2d(in_channels, 256, kernel_size=3, stride=2, padding=1), output_size=(32, 32)),
+            nn.BatchNorm2d(256),
+            nn.ReLU(),
+            ConvTranspose2d_FixOutputSize(nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1), output_size=(64, 64)),
+            nn.BatchNorm2d(128),
+            nn.ReLU(),
+            ConvTranspose2d_FixOutputSize(nn.ConvTranspose2d(128, 1, kernel_size=3, stride=2, padding=1), output_size=(128, 128)),
+            nn.Tanh()
+        )
+
+    def forward(self, x):
+        return self.decoder(x)
+
+
+class MapDecoder_5x_Deconv_64output(nn.Module):
+    def __init__(self, in_channels=768):
+        super().__init__()
+        self.decoder = nn.Sequential(
+            ConvTranspose2d_FixOutputSize(nn.ConvTranspose2d(in_channels, 64, kernel_size=4, stride=1, padding=2), output_size=(8, 8)),
+            nn.BatchNorm2d(8),
+            nn.ReLU(),
+            ConvTranspose2d_FixOutputSize(nn.ConvTranspose2d(64, 32, kernel_size=5, stride=2, padding=2), output_size=(16, 16)),
+            nn.BatchNorm2d(8),
+            nn.ReLU(),
+            ConvTranspose2d_FixOutputSize(nn.ConvTranspose2d(32, 16, kernel_size=6, stride=2, padding=2), output_size=(32, 32)),
+            nn.BatchNorm2d(8),
+            nn.ReLU(),
+            ConvTranspose2d_FixOutputSize(nn.ConvTranspose2d(16, 8, kernel_size=3, stride=2, padding=1), output_size=(32, 32)),
+            nn.BatchNorm2d(8),
+            nn.ReLU(),
+            ConvTranspose2d_FixOutputSize(nn.ConvTranspose2d(8, 1, kernel_size=3, stride=2, padding=1), output_size=(64, 64)),
+            nn.Tanh()
+        )
+
+    def forward(self, x):
+        return self.decoder(x)
+
+
+class ConvTranspose2d_FixOutputSize(nn.Module):
+    """ 
+    A wrapper to fix the output size of ConvTranspose2D.
+    Ref: https://discuss.pytorch.org/t/the-output-size-of-convtranspose2d-differs-from-the-expected-output-size/1876/13
+    Ref: (other alternatives) https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
+    """
+    def __init__(self, conv, output_size):
+        super(ConvTranspose2d_FixOutputSize, self).__init__()
+        self.output_size = output_size
+        self.conv = conv
+        
+    def forward(self, x):
+        x = self.conv(x, output_size=self.output_size)
+        return x
+
+class Reshape(nn.Module):
+    def __init__(self, *args):
+     super(Reshape, self).__init__()
+     self.shape = args
+    def forward(self, x):
+     return x.view((x.size(0),)+self.shape)
\ No newline at end of file
diff --git a/mushr_rhc_ros/src/rhcnode_network_iros.py b/mushr_rhc_ros/src/rhcnode_network_iros.py
new file mode 100755
index 0000000..7c2e797
--- /dev/null
+++ b/mushr_rhc_ros/src/rhcnode_network_iros.py
@@ -0,0 +1,516 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2019, The Personal Robotics Lab, The MuSHR Team, The Contributors of MuSHR
+# License: BSD 3-Clause. See LICENSE.md file in root directory.
+
+# from torchsummary import summary
+import sys
+import os
+import signal
+import threading
+import random
+import numpy as np
+from queue import Queue
+import time
+
+import rospy
+from ackermann_msgs.msg import AckermannDriveStamped
+from geometry_msgs.msg import Point, PoseStamped, PoseWithCovarianceStamped
+from std_msgs.msg import ColorRGBA, Empty, String
+from std_srvs.srv import Empty as SrvEmpty
+from visualization_msgs.msg import Marker
+from sensor_msgs.msg import LaserScan
+
+import logger
+import parameters
+import rhcbase
+import rhctensor
+import utilss
+import librhc.utils as utils_other
+
+import torch
+from mingpt.model_resnetdirect import ResnetDirect, ResnetDirectWithActions
+# from mingpt.model_musher import GPT, GPTConfig
+# from mingpt.model_mushr_rogerio import GPT, GPTConfig
+from mingpt.model_mushr_iros import GPT, GPTConfig
+import preprocessing_utils as pre
+from visualization_msgs.msg import Marker
+
+# import torch_tensorrt
+
+
+class RHCNode(rhcbase.RHCBase):
+    def __init__(self, dtype, params, logger, name):
+        rospy.init_node(name, anonymous=True, disable_signals=True)
+
+        super(RHCNode, self).__init__(dtype, params, logger)
+
+        self.scan_lock = threading.Lock()
+
+        self.reset_lock = threading.Lock()
+        self.inferred_pose_lock = threading.Lock()
+        self.inferred_pose_lock_prev = threading.Lock()
+        self._inferred_pose = None
+        self._inferred_pose_prev = None
+
+        self.hp_zerocost_ids = None
+        self.hp_map = None
+        self.hp_world = None
+        self.time_started_goal = None
+        self.num_trials = 0
+
+        self.cur_rollout = self.cur_rollout_ip = None
+        self.traj_pub_lock = threading.Lock()
+
+        self.goal_event = threading.Event()
+        self.map_metadata_event = threading.Event()
+        self.ready_event = threading.Event()
+        self.events = [self.goal_event, self.map_metadata_event, self.ready_event]
+        self.run = True
+
+        self.default_speed = 2.5
+        # self.default_speed = 1.5
+        self.default_angle = 0.0
+        self.nx = None
+        self.ny = None
+        
+        # network loading
+        print("Starting to load model")
+        os.environ["CUDA_VISIBLE_DEVICES"]=str(0)
+        device = torch.device('cuda')
+        
+        self.clip_len = 16
+
+        # tests for IROS
+        saved_model_path = rospy.get_param("~model_path", 'default_value')
+        self.out_path = rospy.get_param("~out_path", 'default_value')
+        # saved_model_path = '/home/rb/hackathon_data/aml_outputs/log_output/normal-kingfish/GPTiros_e2e_8gpu_2022-02-17_1645120431.7528405_2022-02-17_1645120431.7528613/model/epoch10.pth.tar'
+
+        # saved_model_path = '/home/rb/downloaded_models/epoch30.pth.tar'
+        # saved_model_path = '/home/robot/weight_files/epoch15.pth.tar'
+        # saved_model_path = '/home/rb/hackathon_data/aml_outputs/log_output/gpt_resnet18_0/GPTgpt_resnet18_4gpu_2022-01-24_1642987604.6403077_2022-01-24_1642987604.640322/model/epoch15.pth.tar'
+        # saved_model_path = '/home/rb/hackathon_data/aml_outputs/log_output/gpt_resnet18_8_exp2/GPTgpt_resnet18_8gpu_exp2_2022-01-25_1643076745.003202_2022-01-25_1643076745.0032148/model/epoch12.pth.tar'
+        vocab_size = 100
+        block_size = self.clip_len * 2
+        max_timestep = 7
+        # mconf = GPTConfig(vocab_size, block_size, max_timestep,
+        #               n_layer=6, n_head=8, n_embd=128, model_type='GPT', use_pred_state=True,
+        #               state_tokenizer='conv2D', train_mode='e2e', pretrained_model_path='')
+        mconf = GPTConfig(vocab_size, block_size, max_timestep,
+                      n_layer=6, n_head=8, n_embd=128, model_type='GPT', use_pred_state=True,
+                      state_tokenizer='resnet18', train_mode='e2e', pretrained_model_path='', pretrained_encoder_path='', loss='MSE',
+                      map_decoder='deconv', map_recon_dim=64)
+        model = GPT(mconf, device)
+        model=torch.nn.DataParallel(model)
+
+        # ckpt = torch.load('/home/rb/downloaded_models/epoch30.pth.tar')['state_dict']
+        # for key in ckpt:
+        #     print('********',key)
+        # model.load_state_dict(torch.load('/home/rb/downloaded_models/epoch30.pth.tar')['state_dict'], strict=True)
+        
+        checkpoint = torch.load(saved_model_path)
+        model.load_state_dict(checkpoint['state_dict'])
+        model.eval()
+        
+        # inputs = [torch_tensorrt.Input(
+        #     states_shape=[1, self.clip_len, 200*200],
+        #     actions_shape=[1, self.clip_len , 1],
+        #     targets_shape=[1, self.clip_len , 1],
+        #     timesteps_shape=[1, 1, 1],
+        #     dtype=torch.half,
+        # )]
+        # enabled_precisions = {torch.float, torch.half}
+        # trt_ts_module = torch_tensorrt.compile(model, inputs=inputs, enabled_precisions=enabled_precisions)
+
+        # model.half()
+        model.to(device)
+
+        self.model = model
+        self.device = device
+        print("Finished loading model")
+
+        self.q_scans = Queue(maxsize = self.clip_len+1)
+        self.q_actions = Queue(maxsize = self.clip_len)
+        for i in range(self.clip_len):
+            self.q_actions.put(self.default_angle)
+        self.last_action = self.default_angle
+        self.compute_network = False
+
+        # parameters for model evaluation
+        self.reset_counter = 0
+        self.last_reset_time = time.time()
+        self.distance_so_far = 0.0
+        self.file_name = os.path.join(self.out_path,'info.csv')
+
+    def start(self):
+        self.logger.info("Starting RHController")
+        self.setup_pub_sub()
+        self.rhctrl = self.load_controller()
+        self.find_allowable_pts() # gets the allowed halton points from the map
+
+        self.ready_event.set()
+
+        rate_hz = 50
+        rate = rospy.Rate(rate_hz)
+        self.logger.info("Initialized")
+
+        # set initial pose for the car in the very first time in an allowable region
+        self.send_initial_pose()
+        self.time_started = rospy.Time.now()
+
+        # wait until we actually have a car pose
+        rospy.loginfo("Waiting to receive pose")
+        while not rospy.is_shutdown() and self.inferred_pose is None:
+            pass
+        rospy.loginfo("Vehicle pose received")
+ 
+        while not rospy.is_shutdown() and self.run:
+
+            # check if we should reset the vehicle if crashed
+            if self.check_reset(rate_hz):
+                rospy.loginfo("Resetting the car's position")
+
+            # publish next action
+            if self.compute_network:
+                # don't have to run the network at all times, only when scans change and scans are full
+                self.last_action = self.apply_network()
+                self.q_actions.get()  # remove the oldest action from the queue
+                self.q_actions.put(self.last_action)
+                # rospy.loginfo("Applied network: "+str(self.last_action))
+                self.compute_network = False
+            
+            self.publish_vel_marker()
+            self.publish_traj(self.default_speed, self.last_action)
+
+            rate.sleep()
+
+    def publish_vel_marker(self):
+        marker = Marker()
+        marker.header.frame_id = "/car/base_link"
+        marker.header.stamp = rospy.Time.now()
+        marker.type = 0 # arrow
+        marker.id = 0
+
+        # Set the scale of the marker
+        marker.scale.x = 1
+        marker.scale.y = 0.1
+        marker.scale.z = 0.1
+
+        # Set the color
+        marker.color.r = 1.0
+        marker.color.g = 0.0
+        marker.color.b = 0.0
+        marker.color.a = 1.0
+
+        # set the first point
+        # point_start = Point()
+        # point_start.x = point_start.y = point_start.z = 0.0
+        # marker.points.append(point_start)
+
+        # l = 5.0
+        # point_end = Point()
+        # point_end.x = l*np.cos(self.last_action)
+        # point_end.y = l*np.sin(self.last_action)
+        # point_end.z = 0.0
+        # marker.points.append(point_end)
+
+        # Set the pose of the marker
+        marker.pose.position.x = 0.32
+        marker.pose.position.y = 0
+        marker.pose.position.z = 0
+        marker.pose.orientation = utilss.angle_to_rosquaternion(self.last_action)
+        # marker.pose.orientation.x = 0.0
+        # marker.pose.orientation.y = 0.0
+        # marker.pose.orientation.z = 0.0
+        # marker.pose.orientation.w = 1.0
+
+        self.vel_marker_pub.publish(marker)
+
+
+
+    def apply_network(self):
+        start = time.time()
+        # organize the scan input
+        x_imgs = torch.zeros(1,self.clip_len,self.nx,self.ny)
+        y_imgs = torch.zeros(1,1,self.nx,self.ny)
+        x_act = torch.zeros(1,self.clip_len)
+        y_act = None
+        
+        
+        self.scan_lock.acquire()
+        queue_list = list(self.q_scans.queue)
+        queue_size = self.q_scans.qsize()
+        self.scan_lock.release()
+
+        # while True:
+        #     try:
+        #         queue_list = self.q_scans.queue
+        #         if len(queue_list)==self.clip_len+1:
+        #             break
+        #     except ValueError:
+        #         print("EXCEPTION: diff number of images, or read at the wrong time")
+        
+        idx = 0
+        for img in queue_list:
+            if idx==queue_size-1:
+                y_imgs[0,0,:] = torch.tensor(img)
+            else:
+                x_imgs[0,idx,:] = torch.tensor(img)
+            idx+=1
+        idx = 0
+        for act in self.q_actions.queue:
+            x_act[0,idx] = torch.tensor(act)
+            idx+=1
+
+        x_imgs = x_imgs.contiguous().view(1, self.clip_len, 200*200)
+        # x_imgs.half()
+        x_imgs = x_imgs.to(self.device)
+        # y_imgs = y_imgs.to(self.device)
+
+        x_act = x_act.view(1, self.clip_len , 1)
+        # x_act.half()
+        x_act = x_act.to(self.device)
+        # y_act = y_act.to(self.device)
+
+        t = np.ones((1, 1, 1), dtype=int) * 7
+        t = torch.tensor(t)
+        # t.half()
+        t = t.to(self.device)
+
+        finish_processing = time.time()
+        # rospy.loginfo("processing delay: "+str(finish_processing-start))
+
+        # organize the action input
+        with torch.set_grad_enabled(False):
+            action_pred, loss = self.model(states=x_imgs, actions=x_act, targets=x_act, gt_map=None, timesteps=t)
+            action_pred = action_pred[0,self.clip_len-1,0].cpu().flatten().item()
+        finished_network = time.time()
+        # rospy.loginfo("network delay: "+str(finished_network-finish_processing))
+
+        # de-normalize
+        action_pred = pre.denorm_angle(action_pred)
+        return action_pred
+
+    def check_reset(self, rate_hz):
+        # condition if the car gets stuck
+        if self.inferred_pose_prev() is not None and self.time_started is not None:
+            # calculate distance traveled
+            delta_dist = np.linalg.norm(np.asarray(self.inferred_pose())-np.asarray(self.inferred_pose_prev()))
+            self.distance_so_far += delta_dist
+            # look at speed and termination condition
+            v = delta_dist * rate_hz
+            if v < 0.05 and rospy.Time.now().to_sec() - self.time_started.to_sec() > 1.0:
+                # this means that the car was supposed to follow a traj, but velocity is too low bc it's stuck
+                # first we reset the car pose
+                self.reset_counter +=1
+        if self.reset_counter > 5:
+            self.send_initial_pose()
+            rospy.loginfo("Got stuck, resetting pose of the car to default value")
+            msg = String()
+            msg.data = "got stuck"
+            self.expr_at_goal.publish(msg)
+            self.reset_counter = 0
+            # save distance data to file and reset distance
+            delta_time = time.time() - self.last_reset_time
+            new_line = np.array([self.distance_so_far, delta_time])
+            print("Distance: {}  | Time: {}".format(self.distance_so_far, delta_time))
+            with open(self.file_name,'a') as fd:
+                fd.write(str(self.distance_so_far)+','+str(delta_time)+'\n')
+            # self.out_file = open(self.file_name,'ab')
+            # np.savetxt(self.out_file, new_line, delimiter=',')
+            # self.out_file.close()
+            self.distance_so_far = 0.0
+            self.last_reset_time = time.time()
+            return True
+        else:
+            return False
+
+    def send_initial_pose(self):
+        # sample a initial pose for the car based on the valid samples
+        hp_world_valid = self.hp_world[self.hp_zerocost_ids]
+        new_pos_idx = np.random.randint(0, hp_world_valid.shape[0])
+        msg = PoseWithCovarianceStamped()
+        msg.header.stamp = rospy.Time.now()
+        msg.header.frame_id = "map"
+        # msg.pose.pose.position.x = hp_world_valid[new_pos_idx][0]
+        # msg.pose.pose.position.y = hp_world_valid[new_pos_idx][1]
+        # msg.pose.pose.position.z = 0.0
+        # quat = utilss.angle_to_rosquaternion(hp_world_valid[new_pos_idx][1])
+        msg.pose.pose.position.x = 4.12211 + (np.random.rand()-0.5)*2.0*0.5
+        msg.pose.pose.position.y = -7.49623 + (np.random.rand()-0.5)*2.0*0.5
+        msg.pose.pose.position.z = 0.0
+        quat = utilss.angle_to_rosquaternion(np.radians(62.373 + (np.random.rand()-0.5)*2.0*360))
+        msg.pose.pose.orientation = quat
+        self.pose_reset.publish(msg)
+
+    def shutdown(self, signum, frame):
+        rospy.signal_shutdown("SIGINT recieved")
+        self.run = False
+        for ev in self.events:
+            ev.set()
+    
+    def process_scan(self, msg):
+        scan = np.zeros((721), dtype=np.float)
+        scan[0] = msg.header.stamp.to_sec()
+        scan[1:] = msg.ranges
+        original_points, sensor_origins, time_stamps, pc_range, voxel_size, lo_occupied, lo_free = pre.load_params(scan)
+        vis_mat, nx, ny = pre.compute_bev_image(original_points, sensor_origins, time_stamps, pc_range, voxel_size)
+        if self.nx is None:
+            self.nx = nx
+            self.ny = ny
+        return vis_mat
+
+    def cb_scan(self, msg):
+        # remove oldest element if the queue is already full
+        self.scan_lock.acquire()
+        if self.q_scans.full():
+            self.compute_network = True  # start running the network in the main loop from now on
+            self.q_scans.get()  # remove the oldest element, will be replaced next
+        self.scan_lock.release()
+        # add new processed scan
+        tmp = self.process_scan(msg)
+        self.scan_lock.acquire()
+        self.q_scans.put(tmp) # store matrices from 0-1 with the scans
+        self.scan_lock.release()
+        
+
+    def setup_pub_sub(self):
+        rospy.Service("~reset/soft", SrvEmpty, self.srv_reset_soft)
+        rospy.Service("~reset/hard", SrvEmpty, self.srv_reset_hard)
+
+        car_name = self.params.get_str("car_name", default="car")
+
+        rospy.Subscriber(
+            "/" + car_name + "/" + 'scan',
+            LaserScan,
+            self.cb_scan,
+            queue_size=10,
+        )
+
+        rospy.Subscriber(
+            "/" + car_name + "/" + rospy.get_param("~inferred_pose_t"),
+            PoseStamped,
+            self.cb_pose,
+            queue_size=10,
+        )
+
+        self.rp_ctrls = rospy.Publisher(
+            "/"
+            + car_name
+            + "/"
+            + self.params.get_str(
+                "ctrl_topic", default="mux/ackermann_cmd_mux/input/navigation"
+            ),
+            AckermannDriveStamped,
+            queue_size=2,
+        )
+
+        self.vel_marker_pub = rospy.Publisher("/model_action_marker", Marker, queue_size = 1)
+
+        self.pose_reset = rospy.Publisher("/initialpose", PoseWithCovarianceStamped, queue_size=1)
+
+        traj_chosen_t = self.params.get_str("traj_chosen_topic", default="~traj_chosen")
+        self.traj_chosen_pub = rospy.Publisher(traj_chosen_t, Marker, queue_size=10)
+
+        # For the experiment framework, need indicators to listen on
+        self.expr_at_goal = rospy.Publisher("experiments/finished", String, queue_size=1)
+        
+        # to publish the new goal, for visualization
+        self.goal_pub = rospy.Publisher("~goal", Marker, queue_size=10)
+
+    def srv_reset_hard(self, msg):
+        """
+        Hard reset does a complete reload of the controller
+        """
+        rospy.loginfo("Start hard reset")
+        self.reset_lock.acquire()
+        self.load_controller()
+        self.goal_event.clear()
+        self.reset_lock.release()
+        rospy.loginfo("End hard reset")
+        return []
+
+    def srv_reset_soft(self, msg):
+        """
+        Soft reset only resets soft state (like tensors). No dependencies or maps
+        are reloaded
+        """
+        rospy.loginfo("Start soft reset")
+        self.reset_lock.acquire()
+        self.rhctrl.reset()
+        self.goal_event.clear()
+        self.reset_lock.release()
+        rospy.loginfo("End soft reset")
+        return []
+
+    def find_allowable_pts(self):
+        self.hp_map, self.hp_world = self.rhctrl.cost.value_fn._get_halton_pts()
+        self.hp_zerocost_ids = np.zeros(self.hp_map.shape[0], dtype=bool)
+        for i, pts in enumerate(self.hp_map):
+            pts = pts.astype(np.int)
+            if int(pts[0])<self.rhctrl.cost.world_rep.dist_field.shape[1] and int(pts[1])<self.rhctrl.cost.world_rep.dist_field.shape[0]:
+                if self.rhctrl.cost.world_rep.dist_field[pts[1],pts[0]] == 0.0:
+                    self.hp_zerocost_ids[i] = True             
+
+    def cb_pose(self, msg):
+        if self.inferred_pose is not None:
+            self.set_inferred_pose_prev(self.inferred_pose())
+        self.set_inferred_pose(self.dtype(utilss.rospose_to_posetup(msg.pose)))
+
+        if self.cur_rollout is not None and self.cur_rollout_ip is not None:
+            m = Marker()
+            m.header.frame_id = "map"
+            m.type = m.LINE_STRIP
+            m.action = m.ADD
+            with self.traj_pub_lock:
+                pts = (
+                    self.cur_rollout[:, :2] - self.cur_rollout_ip[:2]
+                ) + self.inferred_pose()[:2]
+
+            m.points = list(map(lambda xy: Point(x=xy[0], y=xy[1]), pts))
+
+            r, g, b = 0x36, 0xCD, 0xC4
+            m.colors = [ColorRGBA(r=r / 255.0, g=g / 255.0, b=b / 255.0, a=0.7)] * len(
+                list(m.points)
+            )
+            m.scale.x = 0.05
+            self.traj_chosen_pub.publish(m)
+
+    def publish_traj(self, speed, angle):
+        ctrlmsg = AckermannDriveStamped()
+        ctrlmsg.header.stamp = rospy.Time.now()
+        ctrlmsg.drive.speed = speed
+        ctrlmsg.drive.steering_angle = angle
+        self.rp_ctrls.publish(ctrlmsg)
+
+    def set_inferred_pose(self, ip):
+        with self.inferred_pose_lock:
+            self._inferred_pose = ip
+
+    def inferred_pose(self):
+        with self.inferred_pose_lock:
+            return self._inferred_pose
+    
+    def set_inferred_pose_prev(self, ip_prev):
+        with self.inferred_pose_lock_prev:
+            self._inferred_pose_prev = ip_prev
+
+    def inferred_pose_prev(self):
+        with self.inferred_pose_lock_prev:
+            return self._inferred_pose_prev
+
+
+if __name__ == "__main__":
+    params = parameters.RosParams()
+    logger = logger.RosLog()
+    node = RHCNode(rhctensor.float_tensor(), params, logger, "rhcontroller")
+
+    signal.signal(signal.SIGINT, node.shutdown)
+    rhc = threading.Thread(target=node.start)
+    rhc.start()
+
+    # wait for a signal to shutdown
+    while node.run:
+        signal.pause()
+
+    rhc.join()
diff --git a/mushr_rhc_ros/src/rhcnode_network_shuang.py b/mushr_rhc_ros/src/rhcnode_network_shuang.py
index 2d18cf6..5ca2b43 100755
--- a/mushr_rhc_ros/src/rhcnode_network_shuang.py
+++ b/mushr_rhc_ros/src/rhcnode_network_shuang.py
@@ -31,7 +31,8 @@
 import torch
 from mingpt.model_resnetdirect import ResnetDirect, ResnetDirectWithActions
 # from mingpt.model_musher import GPT, GPTConfig
-from mingpt.model_mushr_rogerio import GPT, GPTConfig
+# from mingpt.model_mushr_rogerio import GPT, GPTConfig
+from mingpt.model_mushr_iros import GPT, GPTConfig
 import preprocessing_utils as pre
 from visualization_msgs.msg import Marker
 
@@ -79,9 +80,13 @@ def __init__(self, dtype, params, logger, name):
         device = torch.device('cuda')
         
         self.clip_len = 16
+
+        # tests for IROS
+        saved_model_path = '/home/rb/hackathon_data/aml_outputs/log_output/normal-kingfish/GPTiros_e2e_8gpu_2022-02-17_1645120431.7528405_2022-02-17_1645120431.7528613/model/epoch10.pth.tar'
+
         # saved_model_path = '/home/rb/downloaded_models/epoch30.pth.tar'
         # saved_model_path = '/home/robot/weight_files/epoch15.pth.tar'
-        saved_model_path = '/home/rb/hackathon_data/aml_outputs/log_output/gpt_resnet18_0/GPTgpt_resnet18_4gpu_2022-01-24_1642987604.6403077_2022-01-24_1642987604.640322/model/epoch15.pth.tar'
+        # saved_model_path = '/home/rb/hackathon_data/aml_outputs/log_output/gpt_resnet18_0/GPTgpt_resnet18_4gpu_2022-01-24_1642987604.6403077_2022-01-24_1642987604.640322/model/epoch15.pth.tar'
         # saved_model_path = '/home/rb/hackathon_data/aml_outputs/log_output/gpt_resnet18_8_exp2/GPTgpt_resnet18_8gpu_exp2_2022-01-25_1643076745.003202_2022-01-25_1643076745.0032148/model/epoch12.pth.tar'
         vocab_size = 100
         block_size = self.clip_len * 2
@@ -91,7 +96,8 @@ def __init__(self, dtype, params, logger, name):
         #               state_tokenizer='conv2D', train_mode='e2e', pretrained_model_path='')
         mconf = GPTConfig(vocab_size, block_size, max_timestep,
                       n_layer=6, n_head=8, n_embd=128, model_type='GPT', use_pred_state=True,
-                      state_tokenizer='resnet18', train_mode='e2e', pretrained_model_path='', pretrained_encoder_path='', loss='MSE')              
+                      state_tokenizer='resnet18', train_mode='e2e', pretrained_model_path='', pretrained_encoder_path='', loss='MSE',
+                      map_decoder='deconv', map_recon_dim=64)
         model = GPT(mconf, device)
         model=torch.nn.DataParallel(model)