Update test

vllm-project · Jan 14, 2025 · d1dd1d6 · d1dd1d6
1 parent 141c607
commit d1dd1d6
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 6 deletions.
diff --git a/tests/llmcompressor/transformers/compression/recipes/sparse_24_fp8.yaml b/tests/llmcompressor/transformers/compression/recipes/sparse_24_fp8.yaml
@@ -14,13 +14,13 @@ quant_stage:
                     weights:
                         num_bits: 8
                         type: float
-                        strategy: tensor
+                        strategy: channel
                         dynamic: false
                         symmetric: true
                     input_activations:
                         num_bits: 8
                         type: float
-                        strategy: tensor
+                        strategy: token
                         dynamic: true
                         symmetric: true
                     targets: ["Linear"]

diff --git a/tests/llmcompressor/transformers/sparsification/test_compress_tensor_utils.py b/tests/llmcompressor/transformers/sparsification/test_compress_tensor_utils.py
@@ -363,9 +363,9 @@ def test_model_shared_tensors_gpu(
     [
         (
             "Xenova/llama2.c-stories15M",
-            "tests/llmcompressor/transformers/compression/recipes/sparse_int8.yaml",
-            CompressionFormat.sparse_bitmask.value,
-            CompressionFormat.int_quantized.value,
+            "tests/llmcompressor/transformers/compression/recipes/sparse_24_fp8.yaml",
+            CompressionFormat.sparse_24_bitmask.value,
+            CompressionFormat.float_quantized.value,
         ),
     ],
 )
@@ -437,7 +437,8 @@ def test_compressor_stacking(model_stub, recipe, sparse_format, quant_format, tm
         if key.endswith("weight") and quant_format != "dense":
             # we don't expect an exact match for compressed
             diff = torch.abs(dense_tensor - reconstructed_tensor)
-            assert not torch.any(diff > 0.01), f"Max diff: {torch.max(diff)}"
+            # max diff value found empirically
+            assert not torch.any(diff > 0.022), f"Max diff: {torch.max(diff)}"
         else:
             assert torch.equal(dense_tensor, reconstructed_tensor)
     shutil.rmtree(tmp_path)