-
Notifications
You must be signed in to change notification settings - Fork 4
167 lines (144 loc) · 4.94 KB
/
gpu-ci.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
name: "GPU-based Tests"
on:
push:
branches:
- "inference"
workflow_dispatch:
concurrency:
group: gpu-ci-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
python-interface-check:
name: Check Python Interface
runs-on: "runs-on=${{ github.run_id }}/runner=gpu-nvidia"
defaults:
run:
shell: bash -l {0} # required to use an activated conda environment
env:
CONDA: "3"
# container:
# image: ghcr.io/flexflow/flexflow-environment-cuda-12.2:latest
# options: --gpus all --shm-size=8192m
steps:
- name: Display NVIDIA SMI and NVCC details
run: |
nvidia-smi
nvidia-smi -L
nvidia-smi -q -d Memory
nvcc --version
- name: Display disk space
run: df -h
- name: Install updated git version
run: sudo add-apt-repository ppa:git-core/ppa -y && sudo apt update -y && sudo apt install -y --no-install-recommends git
- name: Checkout Git Repository
uses: actions/checkout@v4
with:
submodules: recursive
- name: Install conda and FlexFlow dependencies
uses: conda-incubator/setup-miniconda@v3
with:
miniconda-version: "latest"
activate-environment: flexflow
environment-file: conda/flexflow.yml
auto-activate-base: false
auto-update-conda: false
- name: Build FlexFlow
run: |
export PATH=$CONDA_PREFIX/bin:$PATH
mkdir build
cd build
../config/config.linux
make -j
- name: Check FlexFlow Python interface (before installation)
run: |
export PATH=$CONDA_PREFIX/bin:$PATH
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib
./tests/python_interface_test.sh before-installation
- name: Install FlexFlow
run: |
export PATH=$CONDA_PREFIX/bin:$PATH
cd build
../config/config.linux
make install
ldconfig
- name: Check FlexFlow Python interface (after installation)
run: |
export PATH=$CONDA_PREFIX/bin:$PATH
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib
./tests/python_interface_test.sh after-installation
inference-tests:
name: Inference Tests
runs-on: "runs-on=${{ github.run_id }}/runner=gpu-nvidia"
defaults:
run:
shell: bash -l {0} # required to use an activated conda environment
env:
CONDA: "3"
HUGGINGFACE_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN }}
# container:
# image: ghcr.io/flexflow/flexflow-environment-cuda-12.2:latest
# options: --gpus all --shm-size=8192m
steps:
- name: Display NVIDIA SMI and NVCC details
run: |
nvidia-smi
nvidia-smi -L
nvidia-smi -q -d Memory
nvcc --version
- name: Display disk space
run: df -h
- name: Install updated git version
run: sudo add-apt-repository ppa:git-core/ppa -y && sudo apt update -y && sudo apt install -y --no-install-recommends git
- name: Checkout Git Repository
uses: actions/checkout@v4
with:
submodules: recursive
- name: Install conda and FlexFlow dependencies
uses: conda-incubator/setup-miniconda@v3
with:
miniconda-version: "latest"
activate-environment: flexflow
environment-file: conda/flexflow.yml
auto-activate-base: false
auto-update-conda: false
- name: Build FlexFlow
run: |
export PATH=$CONDA_PREFIX/bin:$PATH
mkdir build
cd build
../config/config.linux
make -j
- name: Run inference tests
env:
CPP_INFERENCE_TESTS: ${{ vars.CPP_INFERENCE_TESTS }}
run: |
export PATH=$CONDA_PREFIX/bin:$PATH
export CUDNN_DIR=/usr/local/cuda
export CUDA_DIR=/usr/local/cuda
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib
# Inference tests
source ./build/set_python_envs.sh
./tests/inference_tests.sh
- name: Run PEFT tests
run: |
export PATH=$CONDA_PREFIX/bin:$PATH
export CUDNN_DIR=/usr/local/cuda
export CUDA_DIR=/usr/local/cuda
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib
source ./build/set_python_envs.sh
./tests/peft_test.sh
- name: Save inference output as an artifact
if: always()
run: |
cd inference
tar -zcvf output.tar.gz ./output
- name: Upload artifact
uses: actions/upload-artifact@v4
if: always()
with:
name: output
path: inference/output.tar.gz
# Github persists the .cache folder across different runs/containers
- name: Clear cache
if: always()
run: sudo rm -rf ~/.cache