[Bugfix] Disable w16a16 2of4 sparse CompressedTensors24 #4143
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Lint and Deploy Charts | |
on: pull_request | |
jobs: | |
lint-and-deploy: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout | |
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | |
with: | |
fetch-depth: 0 | |
- name: Set up Helm | |
uses: azure/setup-helm@fe7b79cd5ee1e45176fcad797de68ecaf3ca4814 # v4.2.0 | |
with: | |
version: v3.14.4 | |
#Python is required because ct lint runs Yamale and yamllint which require Python. | |
- uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 | |
with: | |
python-version: '3.13' | |
- name: Set up chart-testing | |
uses: helm/chart-testing-action@e6669bcd63d7cb57cb4380c33043eebe5d111992 # v2.6.1 | |
with: | |
version: v3.10.1 | |
- name: Run chart-testing (lint) | |
run: ct lint --target-branch ${{ github.event.repository.default_branch }} --chart-dirs examples/online_serving/chart-helm --charts examples/online_serving/chart-helm | |
- name: Setup minio | |
run: | | |
docker network create vllm-net | |
docker run -d -p 9000:9000 --name minio --net vllm-net \ | |
-e "MINIO_ACCESS_KEY=minioadmin" \ | |
-e "MINIO_SECRET_KEY=minioadmin" \ | |
-v /tmp/data:/data \ | |
-v /tmp/config:/root/.minio \ | |
minio/minio server /data | |
export AWS_ACCESS_KEY_ID=minioadmin | |
export AWS_SECRET_ACCESS_KEY=minioadmin | |
export AWS_EC2_METADATA_DISABLED=true | |
mkdir opt-125m | |
cd opt-125m && curl -O -Ls "https://huggingface.co/facebook/opt-125m/resolve/main/{pytorch_model.bin,config.json,generation_config.json,merges.txt,special_tokens_map.json,tokenizer_config.json,vocab.json}" && cd .. | |
aws --endpoint-url http://127.0.0.1:9000/ s3 mb s3://testbucket | |
aws --endpoint-url http://127.0.0.1:9000/ s3 cp opt-125m/ s3://testbucket/opt-125m --recursive | |
- name: Create kind cluster | |
uses: helm/kind-action@0025e74a8c7512023d06dc019c617aa3cf561fde # v1.10.0 | |
- name: Build the Docker image vllm cpu | |
run: docker buildx build -f Dockerfile.cpu -t vllm-cpu-env . | |
- name: Configuration of docker images, network and namespace for the kind cluster | |
run: | | |
docker pull amazon/aws-cli:2.6.4 | |
kind load docker-image amazon/aws-cli:2.6.4 --name chart-testing | |
kind load docker-image vllm-cpu-env:latest --name chart-testing | |
docker network connect vllm-net "$(docker ps -aqf "name=chart-testing-control-plane")" | |
kubectl create ns ns-vllm | |
- name: Run chart-testing (install) | |
run: | | |
export AWS_ACCESS_KEY_ID=minioadmin | |
export AWS_SECRET_ACCESS_KEY=minioadmin | |
sleep 30 && kubectl -n ns-vllm logs -f "$(kubectl -n ns-vllm get pods | awk '/deployment/ {print $1;exit}')" & | |
helm install --wait --wait-for-jobs --timeout 5m0s --debug --create-namespace --namespace=ns-vllm test-vllm examples/online_serving/chart-helm -f examples/online_serving/chart-helm/values.yaml --set secrets.s3endpoint=http://minio:9000 --set secrets.s3bucketname=testbucket --set secrets.s3accesskeyid=$AWS_ACCESS_KEY_ID --set secrets.s3accesskey=$AWS_SECRET_ACCESS_KEY --set resources.requests.cpu=1 --set resources.requests.memory=4Gi --set resources.limits.cpu=2 --set resources.limits.memory=5Gi --set image.env[0].name=VLLM_CPU_KVCACHE_SPACE --set image.env[1].name=VLLM_LOGGING_LEVEL --set-string image.env[0].value="1" --set-string image.env[1].value="DEBUG" --set-string extraInit.s3modelpath="opt-125m/" --set-string 'resources.limits.nvidia\.com/gpu=0' --set-string 'resources.requests.nvidia\.com/gpu=0' --set-string image.repository="vllm-cpu-env" | |
- name: curl test | |
run: | | |
kubectl -n ns-vllm port-forward service/test-vllm-service 8001:80 & | |
sleep 10 | |
CODE="$(curl -v -f --location http://localhost:8001/v1/completions \ | |
--header "Content-Type: application/json" \ | |
--data '{ | |
"model": "opt-125m", | |
"prompt": "San Francisco is a", | |
"max_tokens": 7, | |
"temperature": 0 | |
}'):$CODE" | |
echo "$CODE" |