[Llama3.2-11b-vision] Add max_cross_attn_tokens property to vLLM gene…

…rator class (tenstorrent#17401)
nikileshx · Feb 3, 2025 · 2c5f154 · 2c5f154
1 parent 7750a3b
commit 2c5f154
Showing 1 changed file with 4 additions and 0 deletions.
diff --git a/models/demos/llama3/tt/generator_vllm.py b/models/demos/llama3/tt/generator_vllm.py
@@ -130,6 +130,10 @@ def initialize_vllm_model(cls, hf_config, mesh_device, max_batch_size):
     def cache_path(self):
         return self.model_args.model_cache_path
 
+    @property
+    def max_cross_attn_tokens(self):
+        return self.model_args.vision_max_num_chunks * nearest_32(self.model_args.vision_chunk_ntok)
+
     def prefill_forward(
         self,
         tokens: torch.Tensor,