Xilinx · Giuseppe5 · Oct 31, 2023 · Oct 27, 2023 · Oct 3, 2023
diff --git a/src/brevitas/nn/mixin/base.py b/src/brevitas/nn/mixin/base.py
@@ -172,8 +172,9 @@ def unpack_input(self, inp: Union[Tensor, QuantTensor]):
                 cached_inp = _CachedIO(inp.detach(), self.cache_quant_io_metadata_only)
                 self._cached_inp = cached_inp
         # Remove any naming metadata to avoid dowmstream errors
+        # Avoid inplace operations on the input in case of forward hooks
         if not torch._C._get_tracing_state():
-            inp.value.rename_(None)
+            inp = inp.set(value=inp.value.rename(None))
         return inp
 
     def pack_output(self, quant_output: QuantTensor):

diff --git a/src/brevitas_examples/imagenet_classification/ptq/learned_round_utils.py b/src/brevitas_examples/imagenet_classification/ptq/learned_round_utils.py
@@ -36,6 +36,7 @@
 from brevitas.inject.enum import FloatToIntImplType
 from brevitas.inject.enum import LearnedRoundImplType
 from brevitas.nn.quant_layer import QuantWeightBiasInputOutputLayer as QuantWBIOL
+from brevitas.quant_tensor import QuantTensor
 
 config.IGNORE_MISSING_KEYS = True
 
@@ -53,6 +54,19 @@ def __init__(self, store_output: False):
         self.output_store = None
 
     def __call__(self, module, input_batch, output_batch):
+        input_batch = input_batch[0]
+        if isinstance(input_batch, QuantTensor):
+            input_batch = input_batch.value
+
+        if hasattr(input_batch, 'names') and 'N' in input_batch.names:
+            batch_dim = input_batch.names.index('N')
+
+            input_batch.rename_(None)
+            input_batch = input_batch.transpose(0, batch_dim)
+            if self.store_output:
+                output_batch.rename_(None)
+                output_batch = output_batch.transpose(0, batch_dim)
+
         if self.store_output:
             self.output_store = output_batch
         self.input_store = input_batch
@@ -183,9 +197,9 @@ def save_inp_out_data(
                 pass
             if store_inp:
                 if keep_gpu:
-                    cached[0].append(data_saver.input_store[0].detach())
+                    cached[0].append(data_saver.input_store.detach())
                 else:
-                    cached[0].append(data_saver.input_store[0].detach().cpu())
+                    cached[0].append(data_saver.input_store.detach().cpu())
             if store_out:
                 if keep_gpu:
                     cached[1].append(data_saver.output_store.detach())

diff --git a/src/brevitas_examples/imagenet_classification/ptq/ptq_common.py b/src/brevitas_examples/imagenet_classification/ptq/ptq_common.py
@@ -361,6 +361,14 @@ def kwargs_prefix(prefix, weight_kwargs):
         'return_quant_tensor': False}
     # yapf: enable
 
+    quant_act_kwargs = {'act_quant': act_quant, 'return_quant_tensor': True}
+    # For potentially unsigned activations, we create a separate dict
+    unsigned_quant_act_kwargs = quant_act_kwargs.copy()
+    if uint_sym_act_for_unsigned_values:
+        # In case we support unsigned activation, the output of softmax can be unsigned
+        quant_mha_kwargs['attn_output_weights_signed'] = False
+        unsigned_quant_act_kwargs['signed'] = False
+
     # Layerwise is  basic quant kwargs + input_quant
     layerwise_quant_wbiol_kwargs = {**quant_wbiol_kwargs, 'input_quant': per_tensor_act_quant}
 
@@ -374,16 +382,6 @@ def kwargs_prefix(prefix, weight_kwargs):
         torch.nn.ConvTranspose1d: (qnn.QuantConvTranspose1d, quant_wbiol_kwargs),
         torch.nn.ConvTranspose2d: (qnn.QuantConvTranspose2d, quant_wbiol_kwargs),}
 
-    act_quant_and_bit_width = {'act_quant': act_quant, 'bit_width': act_bit_width}
-    quant_act_kwargs = {**act_quant_and_bit_width, 'return_quant_tensor': True}
-
-    # For potentially unsigned activations, we create a separate dict
-    unsigned_quant_act_kwargs = quant_act_kwargs.copy()
-    if uint_sym_act_for_unsigned_values:
-        # In case we support unsigned activation, the output of softmax can be unsigned
-        quant_mha_kwargs['attn_output_weights_signed'] = False
-        unsigned_quant_act_kwargs['signed'] = False
-
     quant_act_map = {
         torch.nn.ReLU: (qnn.QuantReLU, {
             **unsigned_quant_act_kwargs}),