llm/gpt2_small_fsdp_attention.yaml

image: mosaicml/pytorch:1.12.1_cu116-python3.9-ubuntu20.04 # #mosaicml/pytorch  # Name of the docker image to use
run_name: gpt2_small_fsdp_attention
platform: r7z1
gpu_type: a100_40gb
gpu_num: 8

integrations:
  - integration_type: wandb

    project: mosaic-gpt2

    entity: stanford-mercury

command: |
  echo 'get benchmarks'
  git clone https://github.com/stanford-crfm/mosaicml-benchmarks.git benchmarks
  echo 'install composer tag=fsdp-alpha'
  git clone https://github.com/mosaicml/composer.git
  cd composer
  git fetch
  git checkout tags/fsdp-alpha
  pip install -e .
  pip install mosaicml[streaming]
  cd ..
  echo 'install flash attention'
  git clone https://github.com/HazyResearch/flash-attention.git
  cd flash-attention
  ls -lhd *
  echo 'run flash attention benchmark to verify'
  PYTHONPATH=$PWD python benchmarks/benchmark_flash_attention.py
  cd ..
  echo 'install python dependencies'
  pip install transformers==4.21.3 datasets==2.4.0 omegaconf wandb
  echo 'check wandb'
  wandb status
  echo 'launch gpt-2 training'
  cd benchmarks/llm
  composer main.py yamls/gpt-125m-demo.yaml