forked from mosaicml/examples
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgpt2_small_fsdp_attention.yaml
38 lines (34 loc) · 1.08 KB
/
gpt2_small_fsdp_attention.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
image: mosaicml/pytorch:1.12.1_cu116-python3.9-ubuntu20.04 # #mosaicml/pytorch # Name of the docker image to use
run_name: gpt2_small_fsdp_attention
platform: r7z1
gpu_type: a100_40gb
gpu_num: 8
integrations:
- integration_type: wandb
project: mosaic-gpt2
entity: stanford-mercury
command: |
echo 'get benchmarks'
git clone https://github.com/stanford-crfm/mosaicml-benchmarks.git benchmarks
echo 'install composer tag=fsdp-alpha'
git clone https://github.com/mosaicml/composer.git
cd composer
git fetch
git checkout tags/fsdp-alpha
pip install -e .
pip install mosaicml[streaming]
cd ..
echo 'install flash attention'
git clone https://github.com/HazyResearch/flash-attention.git
cd flash-attention
ls -lhd *
echo 'run flash attention benchmark to verify'
PYTHONPATH=$PWD python benchmarks/benchmark_flash_attention.py
cd ..
echo 'install python dependencies'
pip install transformers==4.21.3 datasets==2.4.0 omegaconf wandb
echo 'check wandb'
wandb status
echo 'launch gpt-2 training'
cd benchmarks/llm
composer main.py yamls/gpt-125m-demo.yaml