-
Notifications
You must be signed in to change notification settings - Fork 169
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Enable Intel ARC gpu test for vllm openvino.
Signed-off-by: senhui2intel <[email protected]>
- Loading branch information
1 parent
b8948f2
commit 3d877c6
Showing
1 changed file
with
150 additions
and
0 deletions.
There are no files selected for viewing
150 changes: 150 additions & 0 deletions
150
tests/llms/test_llms_text-generation_vllm_langchain_openvino_on_intel_arc.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
#!/bin/bash | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
set -x | ||
|
||
WORKPATH="$( cd "$( dirname "$0" )" && pwd )" | ||
|
||
# Define variables | ||
port=5033 | ||
RENDER_GROUP_ID=110 | ||
HF_MODEL_FIR=$HOME/hf_model | ||
DOCKER_IMAGE="vllm-openvino:comps" | ||
CONTAINER_NAME="test-comps-vllm-openvino-container" | ||
|
||
function build_container() { | ||
cd $WORKPATH | ||
git clone https://github.com/vllm-project/vllm.git vllm-openvino | ||
cd ./vllm-openvino | ||
|
||
git reset --hard 067e77f9a87c3466fce41c8fe8710fddc69ec26c # resolve circular import issue | ||
|
||
# Add ARC drive to dockerfile | ||
sed -i '9r /dev/stdin' Dockerfile.openvino << EndOfMessage | ||
RUN apt-get install -y gpg-agent wget | ||
RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \\ | ||
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" | \\ | ||
tee /etc/apt/sources.list.d/intel-gpu-jammy.list && \\ | ||
apt-get update -y && \\ | ||
apt-get install -y \\ | ||
intel-opencl-icd intel-level-zero-gpu level-zero \\ | ||
intel-media-va-driver-non-free libmfx1 libmfxgen1 libvpl2 \\ | ||
libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \\ | ||
libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \\ | ||
mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo | ||
EndOfMessage | ||
|
||
docker build --no-cache -t $DOCKER_IMAGE \ | ||
-f Dockerfile.openvino \ | ||
. \ | ||
--build-arg https_proxy=$https_proxy \ | ||
--build-arg http_proxy=$http_proxy | ||
if [ $? -ne 0 ]; then | ||
echo "vllm-openvino built fail" | ||
exit 1 | ||
else | ||
echo "vllm-openvino built successful" | ||
fi | ||
cd $WORKPATH | ||
rm -rf vllm-openvino | ||
} | ||
|
||
# Function to start Docker container | ||
start_container() { | ||
|
||
docker run -d --rm --name=$CONTAINER_NAME \ | ||
-p $port:$port \ | ||
--ipc=host \ | ||
-e HTTPS_PROXY=$https_proxy \ | ||
-e HTTP_PROXY=$https_proxy \ | ||
-v $HF_MODEL_FIR:/hf_model \ | ||
--device=/dev/dri:/dev/dri \ | ||
--group-add $RENDER_GROUP_ID | ||
vllm-openvino:comps /bin/bash -c "\ | ||
export VLLM_CPU_KVCACHE_SPACE=8 && \ | ||
export VLLM_OPENVINO_DEVICE=GPU && \ | ||
export VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON && \ | ||
python3 -m vllm.entrypoints.openai.api_server \ | ||
--model /hf_model/neural-chat-7b-v3-3 \ | ||
--host 0.0.0.0 \ | ||
--port $port" | ||
|
||
# check whether service is fully ready | ||
n=0 | ||
until [[ "$n" -ge 300 ]]; do | ||
docker logs $CONTAINER_NAME > /tmp/$CONTAINER_NAME.log 2>&1 | ||
n=$((n+1)) | ||
if grep -q "Uvicorn running on" /tmp/$CONTAINER_NAME.log; then | ||
break | ||
fi | ||
sleep 3s | ||
done | ||
|
||
} | ||
|
||
# Cleanup Function | ||
cleanup() { | ||
# Stop and remove Docker container and images | ||
cid=$(docker ps -aq --filter "name=$CONTAINER_NAME") | ||
if [[ ! -z "$cid" ]]; then docker stop $cid || docker rm $cid && sleep 1s; fi | ||
docker rmi -f $DOCKER_IMAGE | ||
rm /tmp/$CONTAINER_NAME.log | ||
} | ||
|
||
# Function to test API endpoint | ||
function test_api_endpoint { | ||
local endpoint="$1" | ||
local expected_status="$2" | ||
|
||
# Make the HTTP request | ||
if test "$1" = "v1/completions" | ||
then | ||
local response=$(curl "http://localhost:$port/$endpoint" \ | ||
-H "Content-Type: application/json" \ | ||
-d '{ | ||
"model": "/hf_model/neural-chat-7b-v3-3", | ||
"prompt": "What is the key advantage of Openvino framework", | ||
"max_tokens": 300, | ||
"temperature": 0.7 | ||
}' \ | ||
--write-out '%{http_code}' \ | ||
--silent \ | ||
--output /dev/null) | ||
else | ||
local response=$(curl "http://localhost:$port/$endpoint" \ | ||
--write-out '%{http_code}' \ | ||
--silent \ | ||
--output /dev/null) | ||
fi | ||
|
||
# Assert the response status code | ||
if [[ "$response" -eq "$expected_status" ]]; then | ||
echo "PASS: $endpoint returned expected status code: $expected_status" | ||
else | ||
echo "FAIL: $endpoint returned unexpected status code: $response (expected: $expected_status)" | ||
docker logs $CONTAINER_NAME | ||
exit 1 | ||
fi | ||
} | ||
|
||
# Main function | ||
main() { | ||
|
||
# use local image to skip slow network connection | ||
# build_container | ||
start_container | ||
|
||
# Sleep to allow the container to start up fully | ||
sleep 10 | ||
# Test the /v1/models API | ||
test_api_endpoint "v1/models" 200 | ||
|
||
# Test the /v1/completions API | ||
test_api_endpoint "v1/completions" 200 | ||
|
||
cleanup | ||
} | ||
|
||
# Call main function | ||
main |