update llm endpoint validation commands (opea-project#869)

Signed-off-by: letonghan <[email protected]>
opea-aws-proserve · Nov 8, 2024 · 75eb864 · 75eb864
1 parent ca6a4e3
commit 75eb864
Show file tree

Hide file tree

Showing 5 changed files with 24 additions and 36 deletions.
diff --git a/comps/llms/summarization/tgi/langchain/README.md b/comps/llms/summarization/tgi/langchain/README.md
@@ -23,10 +23,10 @@ docker run -p 8008:80 -v ./data:/data --name llm-docsum-tgi --shm-size 1g ghcr.i
 ### 1.3 Verify the TGI Service
 
 ```bash
-curl http://${your_ip}:8008/generate \
-  -X POST \
-  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
-  -H 'Content-Type: application/json'
+curl http://${your_ip}:8008/v1/chat/completions \
+     -X POST \
+     -d '{"model": ${your_hf_llm_model}, "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \
+     -H 'Content-Type: application/json'
 ```
 
 ### 1.4 Start LLM Service with Python Script

diff --git a/comps/llms/text-generation/README.md b/comps/llms/text-generation/README.md
@@ -270,23 +270,19 @@ curl http://${your_ip}:9000/v1/health_check\
 #### 3.2.1 Verify the TGI Service
 
 ```bash
-curl http://${your_ip}:8008/generate \
-  -X POST \
-  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
-  -H 'Content-Type: application/json'
+curl http://${your_ip}:8008/v1/chat/completions \
+     -X POST \
+     -d '{"model": ${your_hf_llm_model}, "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \
+     -H 'Content-Type: application/json'
 ```
 
 #### 3.2.2 Verify the vLLM Service
 
 ```bash
-curl http://${your_ip}:8008/v1/completions \
-  -H "Content-Type: application/json" \
-  -d '{
-  "model": ${your_hf_llm_model},
-  "prompt": "What is Deep Learning?",
-  "max_tokens": 32,
-  "temperature": 0
-  }'
+curl http://${host_ip}:8008/v1/chat/completions \
+    -X POST \
+    -H "Content-Type: application/json" \
+    -d '{"model": ${your_hf_llm_model}, "messages": [{"role": "user", "content": "What is Deep Learning?"}]}'
 ```
 
 ### 3.3 Consume LLM Service

diff --git a/comps/llms/text-generation/tgi/README.md b/comps/llms/text-generation/tgi/README.md
@@ -22,10 +22,10 @@ docker run -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/h
 ### 1.3 Verify the TGI Service
 
 ```bash
-curl http://${your_ip}:8008/generate \
-  -X POST \
-  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
-  -H 'Content-Type: application/json'
+curl http://${your_ip}:8008/v1/chat/completions \
+     -X POST \
+     -d '{"model": ${your_hf_llm_model}, "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \
+     -H 'Content-Type: application/json'
 ```
 
 ### 1.4 Start LLM Service with Python Script

diff --git a/comps/llms/text-generation/vllm/langchain/README.md b/comps/llms/text-generation/vllm/langchain/README.md
@@ -186,14 +186,10 @@ OpenVINO best known configuration for GPU is:
 And then you can make requests like below to check the service status:
 
 ```bash
-curl http://${your_ip}:8008/v1/completions \
-  -H "Content-Type: application/json" \
-  -d '{
-  "model": "meta-llama/Meta-Llama-3-8B-Instruct",
-  "prompt": "What is Deep Learning?",
-  "max_tokens": 32,
-  "temperature": 0
-  }'
+curl http://${host_ip}:9009/v1/chat/completions \
+    -X POST \
+    -H "Content-Type: application/json" \
+    -d '{"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}]}'
 ```
 
 ## 🚀3. Set up LLM microservice

diff --git a/comps/llms/text-generation/vllm/llama_index/README.md b/comps/llms/text-generation/vllm/llama_index/README.md
@@ -153,14 +153,10 @@ OpenVINO best known configuration is:
 And then you can make requests like below to check the service status:
 
 ```bash
-curl http://${your_ip}:8008/v1/completions \
-  -H "Content-Type: application/json" \
-  -d '{
-  "model": "meta-llama/Meta-Llama-3-8B-Instruct",
-  "prompt": "What is Deep Learning?",
-  "max_tokens": 32,
-  "temperature": 0
-  }'
+curl http://${host_ip}:8008/v1/chat/completions \
+    -X POST \
+    -H "Content-Type: application/json" \
+    -d '{"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}]}'
 ```
 
 ## 🚀3. Set up LLM microservice