fix requirements for virtual envs - do not use common requirements as…

… restrictions for vllm/lmi-dist
deepjavalibrary · Feb 3, 2025 · da91e9a · da91e9a
1 parent 54185ba
commit da91e9a
Show file tree

Hide file tree

Showing 5 changed files with 47 additions and 25 deletions.
diff --git a/serving/docker/lmi-container-requirements-common.txt b/serving/docker/lmi-container-requirements-common.txt
@@ -1,6 +1,6 @@
-peft
+peft==0.13.2
 protobuf==3.20.3
-transformers>=4.45.2
+transformers==4.45.2
 hf-transfer
 zstandard
 datasets==3.0.1
@@ -23,8 +23,8 @@ onnx
 sentence_transformers
 onnxruntime-gpu==1.20.0
 autoawq==0.2.5
-tokenizers>=0.20.3
-pydantic>=2.9.2
+tokenizers==0.20.3
+pydantic==2.9.2
 optimum==1.23.2
 torch==2.5.1
 torchvision==0.20.1

diff --git a/serving/docker/requirements-lmi.txt b/serving/docker/requirements-lmi.txt
@@ -1,4 +1,4 @@
--r requirements-common.txt
+peft==0.13.2
 llmcompressor
 # flash infer kernels for vllm/lmi-dist
 https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.6/flashinfer-0.1.6+cu124torch2.4-cp311-cp311-linux_x86_64.whl

diff --git a/serving/docker/requirements-vllm.txt b/serving/docker/requirements-vllm.txt
@@ -1,3 +1,3 @@
--r requirements-common.txt
+peft==0.14.0
 llmcompressor
 vllm==0.7.1
diff --git a/tests/integration/llm/client.py b/tests/integration/llm/client.py
@@ -1198,15 +1198,21 @@ def batch_generation(batch_size):
 def batch_generation_chat(batch_size):
     messages = [
         [{
-            "role": "system",
-            "content": "You are a helpful assistant."
+            "role": "user",
+            "content": "hello, can you help me?"
+        }, {
+            "role": "assistant",
+            "content": "Hi, what can i help you with today?"
         }, {
             "role": "user",
             "content": "What is deep learning?"
         }],
         [{
-            "role": "system",
-            "content": "You are a helpful assistant."
+            "role": "user",
+            "content": "hello, can you help me?"
+        }, {
+            "role": "assistant",
+            "content": "Hi, what can i help you with today?"
         }, {
             "role": "user",
             "content": "Who won the world series in 2020?"
@@ -1218,45 +1224,61 @@ def batch_generation_chat(batch_size):
             "content": "Where was it played?"
         }],
         [{
-            "role": "system",
-            "content": "You are a helpful assistant."
+            "role": "user",
+            "content": "hello, can you help me?"
+        }, {
+            "role": "assistant",
+            "content": "Hi, what can i help you with today?"
         }, {
             "role": "user",
             "content": "How do I build a car from cardboard and paper clips?"
         }],
         [{
-            "role": "system",
-            "content": "You are a helpful assistant."
+            "role": "user",
+            "content": "hello, can you help me?"
+        }, {
+            "role": "assistant",
+            "content": "Hi, what can i help you with today?"
         }, {
             "role": "user",
             "content": "Hello!"
         }],
         [{
-            "role": "system",
-            "content": "You are a helpful assistant."
+            "role": "user",
+            "content": "hello, can you help me?"
+        }, {
+            "role": "assistant",
+            "content": "Hi, what can i help you with today?"
         }, {
             "role": "user",
             "content": "Who are you?"
         }],
         [{
-            "role": "system",
-            "content": "You are a helpful assistant."
+            "role": "user",
+            "content": "hello, can you help me?"
+        }, {
+            "role": "assistant",
+            "content": "Hi, what can i help you with today?"
         }, {
             "role": "user",
             "content": "Hello world!"
         }],
         [{
-            "role":
-            "system",
-            "content":
-            "You're a helpful assistant! Answer the users question best you can."
+            "role": "user",
+            "content": "hello, can you help me?"
+        }, {
+            "role": "assistant",
+            "content": "Hi, what can i help you with today?"
         }, {
             "role": "user",
             "content": "What is the weather like in Brooklyn, New York?"
         }],
         [{
-            "role": "system",
-            "content": "You are a helpful assistant."
+            "role": "user",
+            "content": "hello, can you help me?"
+        }, {
+            "role": "assistant",
+            "content": "Hi, what can i help you with today?"
         }, {
             "role":
             "user",

diff --git a/tests/integration/llm/prepare.py b/tests/integration/llm/prepare.py
@@ -763,7 +763,7 @@
         "option.tensor_parallel_degree": 4
     },
     "mistral-7b": {
-        "option.model_id": "s3://djl-llm/mistral-7b",
+        "option.model_id": "s3://djl-llm/mistral-7b-instruct-v03",
         "option.task": "text-generation",
         "option.tensor_parallel_degree": 4,
         "option.max_rolling_batch_size": 4