diff --git a/.github/workflows/publish-docker.yml b/.github/workflows/publish-docker.yml new file mode 100644 index 0000000000000..b9e5bfdaf0423 --- /dev/null +++ b/.github/workflows/publish-docker.yml @@ -0,0 +1,51 @@ +name: Docker Build + Publish + +on: + # For now, just manually trigger + # push: + # branches: + # - main + # pull_request: + # branches: + # - main + workflow_dispatch: + +jobs: + build-docker-image: + + runs-on: aws-avx2-192G-4-a10g-96G + timeout-minutes: 240 + + steps: + + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Github Packages + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Checkout code + uses: actions/checkout@v3 + with: + fetch-depth: 1 + submodules: recursive + + - name: Get version tag + id: extract_tag + run: echo "tag=$(date +%Y%m%d)" >> $GITHUB_OUTPUT + + - name: Current Version Name + run: echo ${{ steps.extract_tag.outputs.tag }} + + - name: nm-vllm latest + uses: docker/build-push-action@v5 + with: + context: . + target: vllm-openai + push: true + tags: ghcr.io/neuralmagic/nm-vllm-openai:${{ steps.extract_tag.outputs.tag }},ghcr.io/neuralmagic/nm-vllm-openai:latest diff --git a/Dockerfile b/Dockerfile index 8be03b3567f0e..a595c37fbb556 100644 --- a/Dockerfile +++ b/Dockerfile @@ -108,6 +108,10 @@ COPY requirements.txt requirements.txt RUN --mount=type=cache,target=/root/.cache/pip \ pip install -r requirements.txt +# UPSTREAM SYNC: Install sparsity extras +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install nm-magic-wand + # Install flash attention (from pre-built wheel) RUN --mount=type=bind,from=flash-attn-builder,src=/usr/src/flash-attention-v2,target=/usr/src/flash-attention-v2 \ pip install /usr/src/flash-attention-v2/*.whl --no-cache-dir