#0: Clean the code

tenstorrent · Sep 25, 2024 · a47d70f · a47d70f
1 parent e703983
commit a47d70f
Show file tree

Hide file tree

Showing 4 changed files with 10 additions and 5 deletions.
diff --git a/models/demos/t3000/llama2_70b/tt/model_config.py b/models/demos/t3000/llama2_70b/tt/model_config.py
@@ -72,7 +72,6 @@ def get_model_config(llama_version="llama3", max_batch_size=32, max_context_len=
         "NUM_DEVICES": num_devices,
         "llama3-tg": MAX_SEQ_LEN_LLAMA3,
         "llama3.1-tg": MAX_SEQ_LEN_LLAMA3_1,
-        "PADDING_LENGTH": 32,
         "COMPUTE_KERNEL_CONFIG": ttnn.WormholeComputeKernelConfig(
             math_fidelity=ttnn.MathFidelity.HiFi2,
             math_approx_mode=True,

diff --git a/models/demos/tg/llama3_70b/demo/demo.py b/models/demos/tg/llama3_70b/demo/demo.py
@@ -7,7 +7,7 @@
 import json
 import torch
 import torch.nn.functional as F
-import ttnn
+
 from time import time
 import pytest
 from loguru import logger

diff --git a/models/demos/tg/llama3_70b/tests/test_llama_attention_galaxy.py b/models/demos/tg/llama3_70b/tests/test_llama_attention_galaxy.py
@@ -458,7 +458,7 @@ def test_LlamaAttention_inference(
         max_batch_size=max_batch_size,
         max_context_len=max_context_len,
     )
-
+    check_mesh_device(mesh_device, model_config)
     run_test_LlamaAttention_inference(
         mesh_device,
         cluster_shape,

diff --git a/models/demos/tg/llama3_70b/tests/test_llama_model_galaxy.py b/models/demos/tg/llama3_70b/tests/test_llama_model_galaxy.py
@@ -240,8 +240,14 @@ def run_test_LlamaModel_inference(
 )
 @pytest.mark.parametrize(
     "batch, seq_len",
-    [(32, 1), (1, 32), (1, 256), (1, 8192), (1, 32768), (1, 128 * 1024)],
-    ids=["decode", "prefill_32", "prefill_256", "prefill_8k", "prefill_32k", "prefill_128k"],
+    [
+        (32, 1),
+        # (1, 32), (1, 256), (1, 8192), (1, 32768), (1, 128 * 1024)
+    ],
+    ids=[
+        "decode",
+        # "prefill_32", "prefill_256", "prefill_8k", "prefill_32k", "prefill_128k"
+    ],
 )
 @pytest.mark.parametrize(
     "max_batch_size, max_context_len",