Skip to content

Commit

Permalink
Enable Intel ARC gpu test for vllm openvino.
Browse files Browse the repository at this point in the history
Signed-off-by: senhui2intel <[email protected]>
  • Loading branch information
senhui2intel committed Nov 6, 2024
1 parent b8948f2 commit 96a1207
Showing 1 changed file with 150 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

set -x

WORKPATH="$( cd "$( dirname "$0" )" && pwd )"

# Define variables
port=5033
RENDER_GROUP_ID=110
HF_MODEL_FIR=$HOME/hf_model
DOCKER_IMAGE="vllm-openvino:comps"
CONTAINER_NAME="test-comps-vllm-openvino-container"

function build_container() {
cd $WORKPATH
git clone https://github.com/vllm-project/vllm.git vllm-openvino
cd ./vllm-openvino

git reset --hard 067e77f9a87c3466fce41c8fe8710fddc69ec26c # resolve circular import issue

# Add ARC drive to dockerfile
sed -i '9r /dev/stdin' Dockerfile.openvino << EndOfMessage
RUN apt-get install -y gpg-agent wget
RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \\
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" | \\
tee /etc/apt/sources.list.d/intel-gpu-jammy.list && \\
apt-get update -y && \\
apt-get install -y \\
intel-opencl-icd intel-level-zero-gpu level-zero \\
intel-media-va-driver-non-free libmfx1 libmfxgen1 libvpl2 \\
libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \\
libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \\
mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo
EndOfMessage

docker build --no-cache -t $DOCKER_IMAGE \
-f Dockerfile.openvino \
. \
--build-arg https_proxy=$https_proxy \
--build-arg http_proxy=$http_proxy
if [ $? -ne 0 ]; then
echo "vllm-openvino built fail"
exit 1
else
echo "vllm-openvino built successful"
fi
cd $WORKPATH
rm -rf vllm-openvino
}

# Function to start Docker container
start_container() {

docker run -d --rm --name=$CONTAINER_NAME \
-p $port:$port \
--ipc=host \
-e HTTPS_PROXY=$https_proxy \
-e HTTP_PROXY=$https_proxy \
-v $HF_MODEL_FIR:/hf_model \
--device=/dev/dri:/dev/dri \
--group-add $RENDER_GROUP_ID
vllm-openvino:comps /bin/bash -c "\
export VLLM_OPENVINO_DEVICE=GPU && \
export VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON && \
python3 -m vllm.entrypoints.openai.api_server \
--model /hf_model/neural-chat-7b-v3-3 \
--host 0.0.0.0 \
--port $port \
--max_model_len 8192"

# check whether service is fully ready
n=0
until [[ "$n" -ge 300 ]]; do
docker logs $CONTAINER_NAME > /tmp/$CONTAINER_NAME.log 2>&1
n=$((n+1))
if grep -q "Uvicorn running on" /tmp/$CONTAINER_NAME.log; then
break
fi
sleep 3s
done

}

# Cleanup Function
cleanup() {
# Stop and remove Docker container and images
cid=$(docker ps -aq --filter "name=$CONTAINER_NAME")
if [[ ! -z "$cid" ]]; then docker stop $cid || docker rm $cid && sleep 1s; fi
docker rmi -f $DOCKER_IMAGE
rm /tmp/$CONTAINER_NAME.log
}

# Function to test API endpoint
function test_api_endpoint {
local endpoint="$1"
local expected_status="$2"

# Make the HTTP request
if test "$1" = "v1/completions"
then
local response=$(curl "http://localhost:$port/$endpoint" \
-H "Content-Type: application/json" \
-d '{
"model": "/hf_model/neural-chat-7b-v3-3",
"prompt": "What is the key advantage of Openvino framework",
"max_tokens": 300,
"temperature": 0.7
}' \
--write-out '%{http_code}' \
--silent \
--output /dev/null)
else
local response=$(curl "http://localhost:$port/$endpoint" \
--write-out '%{http_code}' \
--silent \
--output /dev/null)
fi

# Assert the response status code
if [[ "$response" -eq "$expected_status" ]]; then
echo "PASS: $endpoint returned expected status code: $expected_status"
else
echo "FAIL: $endpoint returned unexpected status code: $response (expected: $expected_status)"
docker logs $CONTAINER_NAME
exit 1
fi
}

# Main function
main() {

# use local image to skip slow network connection
# build_container
start_container

# Sleep to allow the container to start up fully
sleep 10
# Test the /v1/models API
test_api_endpoint "v1/models" 200

# Test the /v1/completions API
test_api_endpoint "v1/completions" 200

cleanup
}

# Call main function
main

0 comments on commit 96a1207

Please sign in to comment.