Skip to content

Commit

Permalink
Revert "Revert "Docker automation fix""
Browse files Browse the repository at this point in the history
  • Loading branch information
priyakasimbeg authored Jul 27, 2023
1 parent db37df4 commit 9ef40d2
Show file tree
Hide file tree
Showing 56 changed files with 6,669 additions and 1,702 deletions.
90 changes: 66 additions & 24 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ jobs:
fastmri:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Set up Python 3.9
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: 3.9
cache: 'pip' # Cache pip dependencies\.
cache-dependency-path: '**/setup.py'
- name: Install Modules and Run
run: |
pip install .[jax_cpu]
Expand All @@ -22,11 +24,13 @@ jobs:
wmt_jax:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Set up Python 3.9
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: 3.9
cache: 'pip' # Cache pip dependencies\.
cache-dependency-path: '**/setup.py'
- name: Install Modules and Run
run: |
pip install .[jax_cpu]
Expand All @@ -37,11 +41,13 @@ jobs:
wmt_pytorch:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Set up Python 3.9
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: 3.9
cache: 'pip' # Cache pip dependencies\.
cache-dependency-path: '**/setup.py'
- name: Install Modules and Run
run: |
pip install .[jax_cpu]
Expand All @@ -52,11 +58,13 @@ jobs:
imagenet_jax:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Set up Python 3.9
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: 3.9
cache: 'pip' # Cache pip dependencies\.
cache-dependency-path: '**/setup.py'
- name: Install Modules and Run
run: |
pip install .[jax_cpu]
Expand All @@ -68,11 +76,13 @@ jobs:
imagenet_pytorch:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Set up Python 3.9
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: 3.9
cache: 'pip' # Cache pip dependencies\.
cache-dependency-path: '**/setup.py'
- name: Install Modules and Run
run: |
pip install .[jax_cpu]
Expand All @@ -85,11 +95,13 @@ jobs:
criteo_jax:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Set up Python 3.9
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: 3.9
cache: 'pip' # Cache pip dependencies\.
cache-dependency-path: '**/setup.py'
- name: Install Modules and Run
run: |
pip install .[jax_cpu]
Expand All @@ -100,11 +112,13 @@ jobs:
criteo_pytorch:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Set up Python 3.9
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: 3.9
cache: 'pip' # Cache pip dependencies\.
cache-dependency-path: '**/setup.py'
- name: Install Modules and Run
run: |
pip install .[jax_cpu]
Expand All @@ -115,11 +129,13 @@ jobs:
speech_jax:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Set up Python 3.9
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: 3.9
cache: 'pip' # Cache pip dependencies\.
cache-dependency-path: '**/setup.py'
- name: Install Modules and Run
run: |
pip install .[jax_cpu]
Expand All @@ -131,11 +147,13 @@ jobs:
speech_pytorch:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Set up Python 3.9
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: 3.9
cache: 'pip' # Cache pip dependencies\.
cache-dependency-path: '**/setup.py'
- name: Install Modules and Run
run: |
pip install .[jax_cpu]
Expand All @@ -147,11 +165,13 @@ jobs:
ogbg:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Set up Python 3.9
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: 3.9
cache: 'pip' # Cache pip dependencies\.
cache-dependency-path: '**/setup.py'
- name: Install Modules and Run
run: |
pip install .[jax_cpu]
Expand All @@ -163,22 +183,44 @@ jobs:
pytest:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Set up Python 3.9
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: 3.9
cache: 'pip' # Cache pip dependencies\.
cache-dependency-path: '**/setup.py'
- name: Install pytest
run: |
python -m pip install --upgrade pip
pip install pytest
pip install .[full]
pip install .[jax_cpu]
pip install .[pytorch_cpu]
- name: Run pytest
- name: Run pytest tests
run: |
pytest -vx tests/version_test.py
pytest -vx tests/test_num_params.py
pytest -vx tests/test_param_shapes.py
pytest -vx tests/test_param_types.py
pytest -vx tests/test_ssim.py
pytest -vx tests/test_ssim.py
pytest-baselines:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.9
uses: actions/setup-python@v4
with:
python-version: 3.9
cache: 'pip' # Cache pip dependencies\.
cache-dependency-path: '**/setup.py'
- name: Install pytest
run: |
python -m pip install --upgrade pip
pip install pytest
pip install .[full]
pip install .[jax_cpu]
pip install .[pytorch_cpu]
- name: Run baseline tests
run: |
pytest --verbosity=1 tests/test_baselines.py
136 changes: 136 additions & 0 deletions .github/workflows/regression_tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
name: Containerized Regression Tests

on:
pull_request:
branches:
- 'main'

jobs:
fastmri_jax:
runs-on: self-hosted
steps:
- uses: actions/checkout@v2
- name: Run containerized workload
run: |
docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_dev
docker run -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_dev -d fastmri -f jax -s baselines/adamw/jax/submission.py -w fastmri -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
imagenet_resnet_jax:
runs-on: self-hosted
steps:
- uses: actions/checkout@v2
- name: Run containerized workload
run: |
docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_dev
docker run -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_dev -d imagenet -f jax -s baselines/adamw/jax/submission.py -w imagenet_resnet -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
imagenet_vit_jax:
runs-on: self-hosted
steps:
- uses: actions/checkout@v2
- name: Run containerized workload
run: |
docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_dev
docker run -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_dev -d imagenet -f jax -s baselines/adamw/jax/submission.py -w imagenet_vit -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
ogbg_jax:
runs-on: self-hosted
steps:
- uses: actions/checkout@v2
- name: Run containerized workload
run: |
docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_dev
docker run -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_dev -d ogbg -f jax -s baselines/adamw/jax/submission.py -w ogbg -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
criteo_jax:
runs-on: self-hosted
steps:
- uses: actions/checkout@v2
- name: Run containerized workload
run: |
docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_dev
docker run -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_dev -d criteo1tb -f jax -s baselines/adamw/jax/submission.py -w criteo1tb -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
librispeech_conformer_jax:
runs-on: self-hosted
steps:
- uses: actions/checkout@v2
- name: Run containerized workload
run: |
docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_dev
docker run -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_dev -d librispeech -f jax -s baselines/adamw/jax/submission.py -w librispeech_conformer -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
librispeech_deepspeech_jax:
runs-on: self-hosted
steps:
- uses: actions/checkout@v2
- name: Run containerized workload
run: |
docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_dev
docker run -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_dev -d librispeech -f jax -s baselines/adamw/jax/submission.py -w librispeech_deepspeech -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
wmt_jax:
runs-on: self-hosted
steps:
- uses: actions/checkout@v2
- name: Run containerized workload
run: |
docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_dev
docker run -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_dev -d wmt -f jax -s baselines/adamw/jax/submission.py -w wmt -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
fastmri_pytorch:
runs-on: self-hosted
steps:
- uses: actions/checkout@v2
- name: Run containerized workload
run: |
docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_dev
docker run -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_dev -d fastmri -f pytorch -s baselines/adamw/pytorch/submission.py -w fastmri -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
imagenet_resnet_pytorch:
runs-on: self-hosted
steps:
- uses: actions/checkout@v2
- name: Run containerized workload
run: |
docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_dev
docker run -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_dev -d imagenet -f pytorch -s baselines/adamw/pytorch/submission.py -w imagenet_resnet -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
imagenet_vit_pytorch:
runs-on: self-hosted
steps:
- uses: actions/checkout@v2
- name: Run containerized workload
run: |
docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_dev
docker run -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_dev -d imagenet -f pytorch -s baselines/adamw/pytorch/submission.py -w imagenet_vit -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
ogbg_pytorch:
runs-on: self-hosted
steps:
- uses: actions/checkout@v2
- name: Run containerized workload
run: |
docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_dev
docker run -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_dev -d ogbg -f pytorch -s baselines/adamw/pytorch/submission.py -w ogbg -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
criteo_pytorch:
runs-on: self-hosted
steps:
- uses: actions/checkout@v2
- name: Run containerized workload
run: |
docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_dev
docker run -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_dev -d criteo1tb -f pytorch -s baselines/adamw/pytorch/submission.py -w criteo1tb -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
librispeech_conformer_pytorch:
runs-on: self-hosted
steps:
- uses: actions/checkout@v2
- name: Run containerized workload
run: |
docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_dev
docker run -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_dev -d librispeech -f pytorch -s baselines/adamw/pytorch/submission.py -w librispeech_conformer -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
librispeech_deepspeech_pytorch:
runs-on: self-hosted
steps:
- uses: actions/checkout@v2
- name: Run containerized workload
run: |
docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_dev
docker run -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_dev -d librispeech -f pytorch -s baselines/adamw/pytorch/submission.py -w librispeech_deepspeech -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
wmt_pytorch:
runs-on: self-hosted
steps:
- uses: actions/checkout@v2
- name: Run containerized workload
run: |
docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_dev
docker run -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_dev -d wmt -f pytorch -s baselines/adamw/pytorch/submission.py -w wmt -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
Loading

0 comments on commit 9ef40d2

Please sign in to comment.