Skip to content

Commit

Permalink
Enable Intel ARC gpu test for vllm openvino.
Browse files Browse the repository at this point in the history
Signed-off-by: senhui2intel <[email protected]>
  • Loading branch information
senhui2intel committed Nov 7, 2024
1 parent 96a1207 commit c8f879f
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 30 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# The vLLM Dockerfile is used to construct vLLM image that can be directly used
# to run the OpenAI compatible server.
# Based on https://github.com/vllm-project/vllm/blob/main/Dockerfile.openvino
# add Intel ARC support package

FROM ubuntu:22.04 AS dev

RUN apt-get update -y && \
apt-get install -y \
git python3-pip \
ffmpeg libsm6 libxext6 libgl1 \
gpg-agent wget

RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" | \
tee /etc/apt/sources.list.d/intel-gpu-jammy.list &&\
apt update -y &&\
apt install -y \
intel-opencl-icd intel-level-zero-gpu level-zero \
intel-media-va-driver-non-free libmfx1 libmfxgen1 libvpl2 \
libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo

WORKDIR /workspace

RUN git clone -b v0.6.3.post1 https://github.com/vllm-project/vllm.git

#ARG GIT_REPO_CHECK=0
#RUN --mount=type=bind,source=.git,target=.git \
# if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi

# install build requirements
RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" python3 -m pip install -r /workspace/vllm/requirements-build.txt
# build vLLM with OpenVINO backend
RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" VLLM_TARGET_DEVICE="openvino" python3 -m pip install /workspace/vllm/

#COPY examples/ /workspace/vllm/examples
#COPY benchmarks/ /workspace/vllm/benchmarks


CMD ["/bin/bash"]

Original file line number Diff line number Diff line change
Expand Up @@ -5,49 +5,28 @@
set -x

WORKPATH="$( cd "$( dirname "$0" )" && pwd )"
DOCKER_FILE="$WORKPATH"/../../comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.openvino_arc

# Define variables
port=5033
RENDER_GROUP_ID=110
HF_MODEL_FIR=$HOME/hf_model
DOCKER_IMAGE="vllm-openvino:comps"
CONTAINER_NAME="test-comps-vllm-openvino-container"
HF_CACHE_DIR=$HOME/.cache/huggingface

function build_container() {
cd $WORKPATH
git clone https://github.com/vllm-project/vllm.git vllm-openvino
cd ./vllm-openvino

git reset --hard 067e77f9a87c3466fce41c8fe8710fddc69ec26c # resolve circular import issue

# Add ARC drive to dockerfile
sed -i '9r /dev/stdin' Dockerfile.openvino << EndOfMessage
RUN apt-get install -y gpg-agent wget
RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \\
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" | \\
tee /etc/apt/sources.list.d/intel-gpu-jammy.list && \\
apt-get update -y && \\
apt-get install -y \\
intel-opencl-icd intel-level-zero-gpu level-zero \\
intel-media-va-driver-non-free libmfx1 libmfxgen1 libvpl2 \\
libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \\
libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \\
mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo
EndOfMessage

docker build --no-cache -t $DOCKER_IMAGE \
-f Dockerfile.openvino \
-f $DOCKER_FILE \
. \
--build-arg https_proxy=$https_proxy \
--build-arg http_proxy=$http_proxy

if [ $? -ne 0 ]; then
echo "vllm-openvino built fail"
exit 1
else
echo "vllm-openvino built successful"
fi
cd $WORKPATH
rm -rf vllm-openvino
}

# Function to start Docker container
Expand All @@ -58,14 +37,14 @@ start_container() {
--ipc=host \
-e HTTPS_PROXY=$https_proxy \
-e HTTP_PROXY=$https_proxy \
-v $HF_MODEL_FIR:/hf_model \
-v $HF_CACHE_DIR:/root/.cache/huggingface \
--device=/dev/dri:/dev/dri \
--group-add $RENDER_GROUP_ID
--group-add $RENDER_GROUP_ID \
vllm-openvino:comps /bin/bash -c "\
export VLLM_OPENVINO_DEVICE=GPU && \
export VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON && \
python3 -m vllm.entrypoints.openai.api_server \
--model /hf_model/neural-chat-7b-v3-3 \
--model Intel/neural-chat-7b-v3-3 \
--host 0.0.0.0 \
--port $port \
--max_model_len 8192"
Expand Down Expand Up @@ -131,8 +110,7 @@ function test_api_endpoint {
# Main function
main() {

# use local image to skip slow network connection
# build_container
build_container
start_container

# Sleep to allow the container to start up fully
Expand Down

0 comments on commit c8f879f

Please sign in to comment.