Saturn #62
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: "Docker Build & Deploy" | |
on: | |
pull_request: | |
push: | |
branches: | |
- "inference" | |
workflow_dispatch: | |
# Cancel outdated workflows if they are still running | |
concurrency: | |
group: docker-build-${{ github.head_ref || github.run_id }} | |
cancel-in-progress: true | |
jobs: | |
docker-build-rocm: | |
name: Build and Install FlexFlow in a Docker Container (ROCm backend) | |
runs-on: ubuntu-22.04 | |
if: ${{ ( github.event_name != 'push' && github.event_name != 'schedule' && github.event_name != 'workflow_dispatch' ) || github.ref_name != 'inference' }} | |
env: | |
FF_GPU_BACKEND: "hip_rocm" | |
hip_version: 5.6 | |
steps: | |
- name: Checkout Git Repository | |
uses: actions/checkout@v3 | |
with: | |
submodules: recursive | |
- name: Free additional space on runner | |
run: .github/workflows/helpers/free_space_on_runner.sh | |
- name: Build Docker container | |
run: FF_HIP_ARCH="gfx1100,gfx1036" ./docker/build.sh flexflow | |
- name: Check availability of flexflow modules in Python | |
run: docker run --entrypoint /bin/bash flexflow-${FF_GPU_BACKEND}-${hip_version}:latest -c "python -c 'import flexflow.core; import flexflow.serve as ff; exit()'" | |
docker-build-and-publish-rocm: | |
name: Build and Deploy FlexFlow Docker Containers (ROCm backend) | |
runs-on: "runs-on=${{ github.run_id }}/runner=rocm-builder" | |
if: ${{ ( github.event_name == 'push' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' }} | |
strategy: | |
matrix: | |
hip_version: ["5.6"] | |
fail-fast: false | |
env: | |
FF_GPU_BACKEND: "hip_rocm" | |
hip_version: ${{ matrix.hip_version }} | |
steps: | |
- name: Checkout Git Repository | |
uses: actions/checkout@v3 | |
with: | |
submodules: recursive | |
- name: Build Docker container | |
# On push to inference, build for all compatible architectures, so that we can publish | |
# a pre-built general-purpose image. On all other cases, only build for one architecture | |
# to save time. | |
run: FF_HIP_ARCH=all ./docker/build.sh flexflow | |
- name: Check availability of flexflow modules in Python | |
run: docker run --entrypoint /bin/bash flexflow-${FF_GPU_BACKEND}-${hip_version}:latest -c "python -c 'import flexflow.core; import flexflow.serve as ff; exit()'" | |
- name: Publish Docker environment image (on push to inference) | |
env: | |
FLEXFLOW_CONTAINER_TOKEN: ${{ secrets.FLEXFLOW_CONTAINER_TOKEN }} | |
run: | | |
./docker/publish.sh flexflow-environment | |
./docker/publish.sh flexflow | |
docker-build-cuda: | |
name: Build and Install FlexFlow in a Docker Container (CUDA backend) | |
runs-on: ubuntu-22.04 | |
strategy: | |
matrix: | |
cuda_version: ["11.8", "12.0", "12.1", "12.2"] | |
fail-fast: false | |
env: | |
FF_GPU_BACKEND: "cuda" | |
cuda_version: ${{ matrix.cuda_version }} | |
steps: | |
- name: Checkout Git Repository | |
if: ${{ ( ( github.event_name == 'push' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' ) || matrix.cuda_version == '12.0' }} | |
uses: actions/checkout@v3 | |
with: | |
submodules: recursive | |
- name: Free additional space on runner | |
if: ${{ ( ( github.event_name == 'push' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' ) || matrix.cuda_version == '12.0' }} | |
run: .github/workflows/helpers/free_space_on_runner.sh | |
- name: Build Docker container | |
if: ${{ ( ( github.event_name == 'push' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' ) || matrix.cuda_version == '12.0' }} | |
env: | |
deploy_needed: ${{ ( github.event_name == 'push' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' }} | |
build_needed: ${{ matrix.cuda_version == '12.0' }} | |
run: | | |
# On push to inference, build for all compatible architectures, so that we can publish | |
# a pre-built general-purpose image. On all other cases, only build for one architecture | |
# to save time. | |
if [[ $deploy_needed == "true" ]] ; then | |
export FF_CUDA_ARCH=all | |
./docker/build.sh flexflow | |
elif [[ $build_needed == "true" ]]; then | |
export FF_CUDA_ARCH=86 | |
./docker/build.sh flexflow | |
fi | |
- name: Check availability of flexflow modules in Python | |
if: ${{ ( ( github.event_name == 'push' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' ) || matrix.cuda_version == '12.0' }} | |
run: docker run --entrypoint /bin/bash flexflow-${FF_GPU_BACKEND}-${cuda_version}:latest -c "export LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH; sudo ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1; python -c 'import flexflow.core; import flexflow.serve as ff; exit()'" | |
- name: Publish Docker environment image (on push to inference) | |
if: ${{ github.repository_owner == 'flexflow' && ( github.event_name == 'push' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' }} | |
env: | |
FLEXFLOW_CONTAINER_TOKEN: ${{ secrets.FLEXFLOW_CONTAINER_TOKEN }} | |
run: | | |
./docker/publish.sh flexflow-environment | |
./docker/publish.sh flexflow | |
notify-slack: | |
name: Notify Slack in case of failure | |
runs-on: ubuntu-22.04 | |
needs: [docker-build-cuda, docker-build-and-publish-rocm] | |
if: ${{ failure() && github.repository_owner == 'flexflow' && ( github.event_name == 'push' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' }} | |
steps: | |
- name: Send Slack message | |
env: | |
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} | |
run: | | |
curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"flexflow-serve Docker images build failed! <https://github.com/flexflow/flexflow-serve/actions/runs/$GITHUB_RUN_ID|(See here).> :x: \"}" $SLACK_WEBHOOK |