diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
index d89ae8887..89b5ef288 100644
--- a/.github/workflows/linting.yml
+++ b/.github/workflows/linting.yml
@@ -18,8 +18,8 @@ jobs:
     - name: Run pylint
       run: |
         pylint algorithmic_efficiency
-        pylint baselines
         pylint reference_algorithms
+        pylint prize_qualification_baselines
         pylint submission_runner.py
         pylint tests
 
diff --git a/.github/workflows/regression_tests.yml b/.github/workflows/regression_tests.yml
index 3a0736fa2..cb8595f58 100644
--- a/.github/workflows/regression_tests.yml
+++ b/.github/workflows/regression_tests.yml
@@ -44,7 +44,7 @@ jobs:
     - name: Run containerized workload
       run: |
         docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  
-        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  -d fastmri -f jax -s baselines/adamw/jax/submission.py -w fastmri -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
+        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  -d fastmri -f jax -s reference_algorithms/paper_baselines/adamw/jax/submission.py -w fastmri -t reference_algorithms/paper_baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
   imagenet_resnet_jax:
     runs-on: self-hosted
     needs: build_and_push_jax_docker_image
@@ -53,7 +53,7 @@ jobs:
     - name: Run containerized workload
       run: |
         docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  
-        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  -d imagenet -f jax -s baselines/adamw/jax/submission.py -w imagenet_resnet -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
+        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  -d imagenet -f jax -s reference_algorithms/paper_baselines/adamw/jax/submission.py -w imagenet_resnet -t reference_algorithms/paper_baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
   imagenet_vit_jax:
     runs-on: self-hosted
     needs: build_and_push_jax_docker_image
@@ -62,7 +62,7 @@ jobs:
     - name: Run containerized workload
       run: |
         docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  
-        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  -d imagenet -f jax -s baselines/adamw/jax/submission.py -w imagenet_vit -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
+        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  -d imagenet -f jax -s reference_algorithms/paper_baselines/adamw/jax/submission.py -w imagenet_vit -t reference_algorithms/paper_baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
   ogbg_jax:
     runs-on: self-hosted
     needs: build_and_push_jax_docker_image
@@ -71,7 +71,7 @@ jobs:
     - name: Run containerized workload
       run: |
         docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  
-        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  -d ogbg -f jax -s baselines/adamw/jax/submission.py -w ogbg -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
+        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  -d ogbg -f jax -s reference_algorithms/paper_baselines/adamw/jax/submission.py -w ogbg -t reference_algorithms/paper_baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
   criteo_jax:
     runs-on: self-hosted
     needs: build_and_push_jax_docker_image
@@ -80,7 +80,7 @@ jobs:
     - name: Run containerized workload
       run: |
         docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  
-        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  -d criteo1tb -f jax -s baselines/adamw/jax/submission.py -w criteo1tb -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
+        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  -d criteo1tb -f jax -s reference_algorithms/paper_baselines/adamw/jax/submission.py -w criteo1tb -t reference_algorithms/paper_baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
   librispeech_conformer_jax:
     runs-on: self-hosted
     needs: build_and_push_jax_docker_image
@@ -89,7 +89,7 @@ jobs:
     - name: Run containerized workload
       run: |
         docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  
-        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  -d librispeech -f jax -s baselines/adamw/jax/submission.py -w librispeech_conformer -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
+        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  -d librispeech -f jax -s reference_algorithms/paper_baselines/adamw/jax/submission.py -w librispeech_conformer -t reference_algorithms/paper_baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
   librispeech_deepspeech_jax:
     runs-on: self-hosted
     needs: build_and_push_jax_docker_image
@@ -98,7 +98,7 @@ jobs:
     - name: Run containerized workload
       run: |
         docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  
-        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  -d librispeech -f jax -s baselines/adamw/jax/submission.py -w librispeech_deepspeech -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
+        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  -d librispeech -f jax -s reference_algorithms/paper_baselines/adamw/jax/submission.py -w librispeech_deepspeech -t reference_algorithms/paper_baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
   wmt_jax:
     runs-on: self-hosted
     needs: build_and_push_jax_docker_image
@@ -107,7 +107,7 @@ jobs:
     - name: Run containerized workload
       run: |
         docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  
-        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  -d wmt -f jax -s baselines/adamw/jax/submission.py -w wmt -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false     
+        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  -d wmt -f jax -s reference_algorithms/paper_baselines/adamw/jax/submission.py -w wmt -t reference_algorithms/paper_baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false     
   fastmri_pytorch:
     runs-on: self-hosted
     needs: build_and_push_pytorch_docker_image
@@ -116,7 +116,7 @@ jobs:
     - name: Run containerized workload
       run: |
         docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }}  
-        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }}  -d fastmri -f pytorch -s baselines/adamw/pytorch/submission.py -w fastmri -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
+        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }}  -d fastmri -f pytorch -s reference_algorithms/paper_baselines/adamw/pytorch/submission.py -w fastmri -t reference_algorithms/paper_baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
   imagenet_resnet_pytorch:
     runs-on: self-hosted
     needs: build_and_push_pytorch_docker_image
@@ -125,7 +125,7 @@ jobs:
     - name: Run containerized workload
       run: |
         docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }}  
-        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }}  -d imagenet -f pytorch -s baselines/adamw/pytorch/submission.py -w imagenet_resnet -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
+        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }}  -d imagenet -f pytorch -s reference_algorithms/paper_baselines/adamw/pytorch/submission.py -w imagenet_resnet -t reference_algorithms/paper_baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
   imagenet_vit_pytorch:
     runs-on: self-hosted
     needs: build_and_push_pytorch_docker_image
@@ -134,7 +134,7 @@ jobs:
     - name: Run containerized workload
       run: |
         docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }}  
-        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }}  -d imagenet -f pytorch -s baselines/adamw/pytorch/submission.py -w imagenet_vit -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
+        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }}  -d imagenet -f pytorch -s reference_algorithms/paper_baselines/adamw/pytorch/submission.py -w imagenet_vit -t reference_algorithms/paper_baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
   ogbg_pytorch:
     runs-on: self-hosted
     needs: build_and_push_pytorch_docker_image
@@ -143,7 +143,7 @@ jobs:
     - name: Run containerized workload
       run: |
         docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }}  
-        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }}  -d ogbg -f pytorch -s baselines/adamw/pytorch/submission.py -w ogbg -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
+        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }}  -d ogbg -f pytorch -s reference_algorithms/paper_baselines/adamw/pytorch/submission.py -w ogbg -t reference_algorithms/paper_baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
   criteo_pytorch:
     runs-on: self-hosted
     needs: build_and_push_pytorch_docker_image
@@ -152,7 +152,7 @@ jobs:
     - name: Run containerized workload
       run: |
         docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }}
-        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }} -d criteo1tb -f pytorch -s baselines/adamw/pytorch/submission.py -w criteo1tb -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
+        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }} -d criteo1tb -f pytorch -s reference_algorithms/paper_baselines/adamw/pytorch/submission.py -w criteo1tb -t reference_algorithms/paper_baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
         exit $?
   librispeech_conformer_pytorch:
     runs-on: self-hosted
@@ -162,7 +162,7 @@ jobs:
     - name: Run containerized workload
       run: |
         docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }}  
-        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }}  -d librispeech -f pytorch -s baselines/adamw/pytorch/submission.py -w librispeech_conformer -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
+        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }}  -d librispeech -f pytorch -s reference_algorithms/paper_baselines/adamw/pytorch/submission.py -w librispeech_conformer -t reference_algorithms/paper_baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
   librispeech_deepspeech_pytorch:
     runs-on: self-hosted
     needs: build_and_push_pytorch_docker_image
@@ -171,7 +171,7 @@ jobs:
     - name: Run containerized workload
       run: |
         docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }}  
-        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }}  -d librispeech -f pytorch -s baselines/adamw/pytorch/submission.py -w librispeech_deepspeech -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
+        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }}  -d librispeech -f pytorch -s reference_algorithms/paper_baselines/adamw/pytorch/submission.py -w librispeech_deepspeech -t reference_algorithms/paper_baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
   wmt_pytorch:
     runs-on: self-hosted
     needs: build_and_push_pytorch_docker_image
@@ -180,4 +180,4 @@ jobs:
     - name: Run containerized workload
       run: |
         docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }}  
-        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }}  -d wmt -f pytorch -s baselines/adamw/pytorch/submission.py -w wmt -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false     
+        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }}  -d wmt -f pytorch -s reference_algorithms/paper_baselines/adamw/pytorch/submission.py -w wmt -t reference_algorithms/paper_baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false     
diff --git a/.github/workflows/regression_tests_variants.yml b/.github/workflows/regression_tests_variants.yml
index 15eccba4c..ef1585d0d 100644
--- a/.github/workflows/regression_tests_variants.yml
+++ b/.github/workflows/regression_tests_variants.yml
@@ -44,7 +44,7 @@ jobs:
     - name: Run containerized workload
       run: |
         docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  
-        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  -d criteo1tb -f jax -s baselines/adamw/jax/submission.py -w criteo1tb_layernorm -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
+        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  -d criteo1tb -f jax -s reference_algorithms/paper_baselines/adamw/jax/submission.py -w criteo1tb_layernorm -t reference_algorithms/paper_baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
   criteo_resnet_jax:
     runs-on: self-hosted
     needs: build_and_push_jax_docker_image
@@ -53,7 +53,7 @@ jobs:
     - name: Run containerized workload
       run: |
         docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  
-        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  -d criteo1tb -f jax -s baselines/adamw/jax/submission.py -w criteo1tb_resnet -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
+        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }}  -d criteo1tb -f jax -s reference_algorithms/paper_baselines/adamw/jax/submission.py -w criteo1tb_resnet -t reference_algorithms/paper_baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
   criteo_layernorm_pytorch:
     runs-on: self-hosted
     needs: build_and_push_pytorch_docker_image
@@ -62,7 +62,7 @@ jobs:
     - name: Run containerized workload
       run: |
         docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }}
-        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }} -d criteo1tb -f pytorch -s baselines/adamw/pytorch/submission.py -w criteo1tb_layernorm -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
+        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }} -d criteo1tb -f pytorch -s reference_algorithms/paper_baselines/adamw/pytorch/submission.py -w criteo1tb_layernorm -t reference_algorithms/paper_baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
   criteo_resnet_pytorch:
     runs-on: self-hosted
     needs: build_and_push_pytorch_docker_image
@@ -71,7 +71,7 @@ jobs:
     - name: Run containerized workload
       run: |
         docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }}
-        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }} -d criteo1tb -f pytorch -s baselines/adamw/pytorch/submission.py -w criteo1tb_resnet -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
+        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }} -d criteo1tb -f pytorch -s reference_algorithms/paper_baselines/adamw/pytorch/submission.py -w criteo1tb_resnet -t reference_algorithms/paper_baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
   criteo_resnet_pytorch:
     runs-on: self-hosted
     needs: build_and_push_pytorch_docker_image
@@ -80,6 +80,6 @@ jobs:
     - name: Run containerized workload
       run: |
         docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }}
-        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }} -d criteo1tb -f pytorch -s baselines/adamw/pytorch/submission.py -w criteo1tb_embed_init -t baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
+        docker run  -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }} -d criteo1tb -f pytorch -s reference_algorithms/paper_baselines/adamw/pytorch/submission.py -w criteo1tb_embed_init -t reference_algorithms/paper_baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false
          
  
diff --git a/CALL_FOR_SUBMISSIONS.md b/CALL_FOR_SUBMISSIONS.md
index 30207ac7f..84697f577 100644
--- a/CALL_FOR_SUBMISSIONS.md
+++ b/CALL_FOR_SUBMISSIONS.md
@@ -13,8 +13,9 @@ Submissions can compete under two hyperparameter tuning rulesets (with separate
 
 ## Dates
 
-- **Call for submissions: November 28th, 2023**
-- Registration deadline to express non-binding intent to submit: January 28th, 2024
+- Call for submissions: November 28th, 2023
+- **Registration deadline to express non-binding intent to submit: February 28th, 2024**.\
+Please fill out the (mandatory but non-binding) [**registration form**](https://forms.gle/K7ty8MaYdi2AxJ4N8).
 - **Submission deadline: March 28th, 2024**
 - **Deadline for self-reporting preliminary results: May 28th, 2024**
 - [tentative] Announcement of all results: July 15th, 2024
diff --git a/COMPETITION_RULES.md b/COMPETITION_RULES.md
index 85f16c4cf..beca743e0 100644
--- a/COMPETITION_RULES.md
+++ b/COMPETITION_RULES.md
@@ -41,7 +41,7 @@ The Competition is open to English-speaking individuals and teams (made of indiv
 
 The Competition begins at 12:01am (ET) on November 28, 2023 and ends at 11:59pm (ET) on May 28, 2024, all according to Sponsor's time clock, which decisions are final (the "Competition Period"). There are several deadlines contained within the Competition Period:
 
-- **Intention to Submit.** You must register your Intention to Submit no later than 11:59pm ET on January 28, 2024.
+- **Intention to Submit.** You must register your Intention to Submit no later than 11:59pm ET on February 28, 2024.
 - **Submission Period.** You must complete your Submission and enter it after the Intention to Submit deadline, but no later than 11:59pm ET on March 28, 2024.
 - **Deadline for self-reporting results.** 11:59pm ET on May 28, 2024.
 
@@ -79,7 +79,7 @@ Submissions must use specific versions of PyTorch and JAX, provided by Sponsor.
 
 ## Scoring
 
-All otherwise qualified Submissions shall be scored. Submissions will be scored based on their required training time to reach the target performance on the validation set of each workload, using measuring techniques designed to give all Submissions equal parity. In the event that no Submission in a ruleset receives a score exceeding that of both [prize qualification baselines](./reference_algorithms/prize_qualification_baselines/README.md), no prizes will be awarded for this ruleset. The Teams with the highest scores will be determined to be winners ("Selected Teams"). In the event of a tie the prize money will be split equally between the winners.
+All otherwise qualified Submissions shall be scored. Submissions will be scored based on their required training time to reach the target performance on the validation set of each workload, using measuring techniques designed to give all Submissions equal parity. In the event that no Submission in a ruleset receives a score exceeding that of both [prize qualification baselines](./prize_qualification_baselines/README.md), no prizes will be awarded for this ruleset. The Teams with the highest scores will be determined to be winners ("Selected Teams"). In the event of a tie the prize money will be split equally between the winners.
 
 ## Submissions
 
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index b22cb5f3a..364bbee62 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -228,7 +228,7 @@ To run the below commands, use the versions installed via `pip install -e '.[dev
 To automatically fix formatting errors, run the following (*WARNING:* this will edit your code, so it is suggested to make a git commit first!):
 
 ```bash
-yapf -i -r -vv -p algorithmic_efficiency baselines datasets reference_algorithms tests *.py
+yapf -i -r -vv -p algorithmic_efficiency datasets prize_qualification_baselines reference_algorithms tests *.py
 ```
 
 To sort all import orderings, run the following:
@@ -247,8 +247,8 @@ To print out all offending pylint issues, run the following:
 
 ```bash
 pylint algorithmic_efficiency
-pylint baselines
 pylint datasets
+pylint prize_qualification_baselines
 pylint reference_algorithms
 pylint submission_runner.py
 pylint tests
diff --git a/DOCUMENTATION.md b/DOCUMENTATION.md
index de7a3b7f8..a25f5b689 100644
--- a/DOCUMENTATION.md
+++ b/DOCUMENTATION.md
@@ -38,10 +38,9 @@
     - [How can I know if my code can be run on benchmarking hardware?](#how-can-i-know-if-my-code-can-be-run-on-benchmarking-hardware)
     - [Are we allowed to use our own hardware to self-report the results?](#are-we-allowed-to-use-our-own-hardware-to-self-report-the-results)
     - [What can I do if running the benchmark is too expensive for me?](#what-can-i-do-if-running-the-benchmark-is-too-expensive-for-me)
-    - [Can I submit existing (i.e. published) training algorithms as submissions?](#can-i-submit-previously-published-training-algorithms-as-submissions)
+    - [Can I submit previously published training algorithms as submissions?](#can-i-submit-previously-published-training-algorithms-as-submissions)
 - [Disclaimers](#disclaimers)
   - [Shared Data Pipelines between JAX and PyTorch](#shared-data-pipelines-between-jax-and-pytorch)
-  - [Pytorch Conformer CUDA OOM](#pytorch-conformer-cuda-oom)
 
 ## Introduction
 
@@ -517,7 +516,7 @@ To ensure that all submitters can develop their submissions based on the same co
 
 #### My machine only has one GPU. How can I use this repo?
 
-You can run this repo on a machine with an arbitrary number of GPUs. However, the default batch sizes in our reference algorithms `algorithmic-efficiency/baselines` and `algorithmic-efficiency/reference_algorithms` are tuned for a machine with 8 16GB V100 GPUs. You may run into OOMs if you run these algorithms with fewer than 8 GPUs. If you run into these issues because you are using a machine with less total GPU memory, please reduce the batch sizes for the submission. Note that your final submission must 'fit' on the benchmarking hardware, so if you are using fewer
+You can run this repo on a machine with an arbitrary number of GPUs. However, the default batch sizes in our reference algorithms (e.g. `algorithmic-efficiency/prize_qualification_baselines` and `algorithmic-efficiency/reference_algorithms`) are tuned for a machine with 8 16GB V100 GPUs. You may run into OOMs if you run these algorithms with fewer than 8 GPUs. If you run into these issues because you are using a machine with less total GPU memory, please reduce the batch sizes for the submission. Note that your final submission must 'fit' on the benchmarking hardware, so if you are using fewer
 GPUs with higher per GPU memory, please monitor your memory usage to make sure it will fit on 8xV100 GPUs with 16GB of VRAM per card.
 
 #### How do I run this on my SLURM cluster?
@@ -576,4 +575,3 @@ The JAX and PyTorch versions of the Criteo, FastMRI, Librispeech, OGBG, and WMT
 
 Since we use PyTorch's [`DistributedDataParallel`](https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html#torch.nn.parallel.DistributedDataParallel) implementation, there is one Python process for each device. Depending on the hardware and the settings of the cluster, running a TensorFlow input pipeline in each Python process can lead to errors, since too many threads are created in each process. See [this PR thread](https://github.com/mlcommons/algorithmic-efficiency/pull/85) for more details.
 While this issue might not affect all setups, we currently implement a different strategy: we only run the TensorFlow input pipeline in one Python process (with `rank == 0`), and [broadcast](https://pytorch.org/docs/stable/distributed.html#torch.distributed.broadcast) the batches to all other devices. This introduces an additional communication overhead for each batch. See the [implementation for the WMT workload](https://github.com/mlcommons/algorithmic-efficiency/blob/main/algorithmic_efficiency/workloads/wmt/wmt_pytorch/workload.py#L215-L288) as an example.
-
diff --git a/GETTING_STARTED.md b/GETTING_STARTED.md
index b13f9f00c..96a7b7d6f 100644
--- a/GETTING_STARTED.md
+++ b/GETTING_STARTED.md
@@ -163,6 +163,7 @@ singularity build --fakeroot <singularity_image_name>.sif Singularity.def
 ```
 
 Note that this can take several minutes. Then, to start a shell session with GPU support (by using the `--nv` flag), we can run
+
 ```bash
 singularity shell --bind $HOME/data:/data,$HOME/experiment_runs:/experiment_runs \
     --nv <singularity_image_name>.sif
@@ -194,7 +195,7 @@ Make a submissions subdirectory to store your submission modules e.g. `algorithm
 
 ### Coding your Submission
 
-You can find examples of sumbission modules under `algorithmic-efficiency/baselines` and `algorithmic-efficiency/reference_algorithms`. \
+You can find examples of submission modules under `algorithmic-efficiency/prize_qualification_baselines` and `algorithmic-efficiency/reference_algorithms`. \
 A submission for the external ruleset will consist of a submission module and a tuning search space definition.
 
 1. Copy the template submission module `submissions/template/submission.py` into your submissions directory e.g. in `algorithmic-efficiency/my_submissions`.
@@ -210,7 +211,7 @@ A submission for the external ruleset will consist of a submission module and a
         }
         ```
 
-        For a complete example see [tuning_search_space.json](https://github.com/mlcommons/algorithmic-efficiency/blob/main/reference_algorithms/target_setting_algorithms/imagenet_resnet/tuning_search_space.json).
+        For a complete example see [tuning_search_space.json](/reference_algorithms/target_setting_algorithms/imagenet_resnet/tuning_search_space.json).
 
     2. Define a range of values for quasirandom sampling by specifing a `min`, `max` and `scaling` keys for the hyperparameter:
 
@@ -224,7 +225,7 @@ A submission for the external ruleset will consist of a submission module and a
         }
         ```
 
-        For a complete example see [tuning_search_space.json](https://github.com/mlcommons/algorithmic-efficiency/blob/main/baselines/nadamw/tuning_search_space.json).
+        For a complete example see [tuning_search_space.json](/reference_algorithms/paper_baselines/nadamw/tuning_search_space.json).
 
 ## Run your Submission
 
@@ -342,6 +343,6 @@ To produce performance profile and performance table:
 python3 scoring/score_submission.py --experiment_path=<path_to_experiment_dir> --output_dir=<output_dir>
 ```
 
-We provide the scores and performance profiles for the baseline algorithms in the "Baseline Results" section in [Benchmarking Neural Network Training Algorithms](https://arxiv.org/abs/2306.07179).
+We provide the scores and performance profiles for the [paper baseline algorithms](/reference_algorithms/paper_baselines/) in the "Baseline Results" section in [Benchmarking Neural Network Training Algorithms](https://arxiv.org/abs/2306.07179).
 
 **Good Luck!**
diff --git a/README.md b/README.md
index 941344903..65bae4d54 100644
--- a/README.md
+++ b/README.md
@@ -18,6 +18,7 @@
 [![Lint](https://github.com/mlcommons/algorithmic-efficiency/actions/workflows/linting.yml/badge.svg)](https://github.com/mlcommons/algorithmic-efficiency/actions/workflows/linting.yml)
 [![License: Apache 2.0](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://github.com/mlcommons/algorithmic-efficiency/blob/main/LICENSE.md)
 [![Code style: yapf](https://img.shields.io/badge/code%20style-yapf-orange)](https://github.com/google/yapf)
+[![Discord](https://dcbadge.vercel.app/api/server/5FPXK7SMt6?style=flat)](https://discord.gg/5FPXK7SMt6)
 
 ---
 
@@ -27,7 +28,8 @@
 
 > [!IMPORTANT]
 > Upcoming Deadline:
-> Registration deadline to express non-binding intent to submit: **January 28th, 2024**
+> Registration deadline to express non-binding intent to submit: **February 28th, 2024**.\
+> **If you consider submitting, please fill out the** (mandatory but non-binding) [**registration form**](https://forms.gle/K7ty8MaYdi2AxJ4N8).
 
 ## Table of Contents <!-- omit from toc -->
 
@@ -42,6 +44,9 @@
 
 ## Installation
 
+> [!TIP]
+> **If you have any questions about the benchmark competition or you run into any issues, please feel free to contact us.** Either [file an issue](https://github.com/mlcommons/algorithmic-efficiency/issues), ask a question on [our Discord](https://discord.gg/5FPXK7SMt6) or [join our weekly meetings](https://mlcommons.org/en/groups/research-algorithms/).
+
 You can install this package and dependencies in a [Python virtual environment](/GETTING_STARTED.md#python-virtual-environment) or use a [Docker/Singularity/Apptainer container](/GETTING_STARTED.md#docker) (recommended).
 We recommend using a Docker container (or alternatively, a Singularity/Apptainer container) to ensure a similar environment to our scoring and testing environments.
 Both options are described in detail in the [**Getting Started**](/GETTING_STARTED.md) document.
@@ -74,8 +79,8 @@ python3 submission_runner.py \
     --workload=mnist \
     --experiment_dir=$HOME/experiments \
     --experiment_name=my_first_experiment \
-    --submission_path=baselines/adamw/jax/submission.py \
-    --tuning_search_space=baselines/adamw/tuning_search_space.json
+    --submission_path=reference_algorithms/paper_baselines/adamw/jax/submission.py \
+    --tuning_search_space=reference_algorithms/paper_baselines/adamw/tuning_search_space.json
 ```
 
 *TL;DR running a PyTorch workload:*
@@ -86,8 +91,8 @@ python3 submission_runner.py \
     --workload=mnist \
     --experiment_dir=$HOME/experiments \
     --experiment_name=my_first_experiment \
-    --submission_path=baselines/adamw/jax/submission.py \
-    --tuning_search_space=baselines/adamw/tuning_search_space.json
+    --submission_path=reference_algorithms/paper_baselines/adamw/jax/submission.py \
+    --tuning_search_space=reference_algorithms/paper_baselines/adamw/tuning_search_space.json
 ```
 
 ## Call for Submissions
diff --git a/baselines/README.md b/baselines/README.md
deleted file mode 100644
index 76f2b9ba0..000000000
--- a/baselines/README.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Baseline submissions
-
-Valid baseline submissions for the [external tuning ruleset](../README.md#external-tuning-ruleset).
diff --git a/reference_algorithms/prize_qualification_baselines/README.md b/prize_qualification_baselines/README.md
similarity index 52%
rename from reference_algorithms/prize_qualification_baselines/README.md
rename to prize_qualification_baselines/README.md
index 100555964..f5bb007be 100644
--- a/reference_algorithms/prize_qualification_baselines/README.md
+++ b/prize_qualification_baselines/README.md
@@ -8,8 +8,8 @@ This directory contains the baseline(s) that submissions must beat to qualify fo
 
 The prize qualification baseline submissions for JAX are:
 
-- `reference_algorithms/prize_qualification_baselines/external_tuning/jax_nadamw_target_setting.py`
-- `feference_algorithms/prize_qualification_baselines/external_tuning/jax_nadamw_full_budget.py`
+- `prize_qualification_baselines/external_tuning/jax_nadamw_target_setting.py`
+- `prize_qualification_baselines/external_tuning/jax_nadamw_full_budget.py`
 
 Example command:
 
@@ -20,16 +20,16 @@ python3 submission_runner.py \
     --experiment_dir=<experiment_dir> \
     --experiment_name=<experiment_name> \
     --workload=<workload> \
-    --submission_path=reference_algorithms/prize_qualification_baselines/external_tuning/jax_nadamw_target_setting.py \
-    --tuning_search_space=reference_algorithms/prize_qualification_baselines/external_tuning/tuning_search_space.json
+    --submission_path=prize_qualification_baselines/external_tuning/jax_nadamw_target_setting.py \
+    --tuning_search_space=prize_qualification_baselines/external_tuning/tuning_search_space.json
 ```
 
 ### PyTorch
 
 The prize qualification baseline submissionss for PyTorch are:
 
-- `reference_algorithms/prize_qualification_baselines/external_tuning/pytorch_nadamw_target_setting.py`
-- `feference_algorithms/prize_qualification_baselines/external_tuning/pytorch_nadamw_full_budget.py`
+- `prize_qualification_baselines/external_tuning/pytorch_nadamw_target_setting.py`
+- `prize_qualification_baselines/external_tuning/pytorch_nadamw_full_budget.py`
 
 Example command:
 
@@ -40,8 +40,8 @@ torchrun --redirects 1:0,2:0,3:0,4:0,5:0,6:0,7:0 --standalone --nnodes=1 --nproc
     --experiment_dir=<experiment_dir> \
     --experiment_name=t<experiment_name> \
     --workload=<workload>\
-    --submission_path=reference_algorithms/prize_qualification_baselines/external_tuning/pytorch_nadamw_target_setting.py \
-    --tuning_search_space=reference_algorithms/prize_qualification_baselines/external_tuning/tuning_search_space.json
+    --submission_path=prize_qualification_baselines/external_tuning/pytorch_nadamw_target_setting.py \
+    --tuning_search_space=prize_qualification_baselines/external_tuning/tuning_search_space.json
 ```
 
 ## Self-tuning Ruleset
@@ -50,8 +50,8 @@ torchrun --redirects 1:0,2:0,3:0,4:0,5:0,6:0,7:0 --standalone --nnodes=1 --nproc
 
 The prize qualification baseline submissionss for jax are:
 
-- `reference_algorithms/prize_qualification_baselines/self_tuning/jax_nadamw_target_setting.py`
-- `feference_algorithms/prize_qualification_baselines/self_tuning/jax_nadamw_full_budget.py`
+- `prize_qualification_baselines/self_tuning/jax_nadamw_target_setting.py`
+- `prize_qualification_baselines/self_tuning/jax_nadamw_full_budget.py`
 
 Example command:
 
@@ -62,7 +62,7 @@ python3 submission_runner.py \
     --experiment_dir=<experiment_dir> \
     --experiment_name=<experiment_name> \
     --workload=<workload> \
-    --submission_path=reference_algorithms/prize_qualification_baselines/self_tuning/jax_nadamw_target_setting.py \
+    --submission_path=prize_qualification_baselines/self_tuning/jax_nadamw_target_setting.py \
     --tuning_ruleset=self
 ```
 
@@ -70,8 +70,8 @@ python3 submission_runner.py \
 
 The prize qualification baseline submissionss for PyTorch are:
 
-- `reference_algorithms/prize_qualification_baselines/self_tuning/pytorch_nadamw_target_setting.py`
-- `feference_algorithms/prize_qualification_baselines/self_tuning/pytorch_nadamw_full_budget.py`
+- `prize_qualification_baselines/self_tuning/pytorch_nadamw_target_setting.py`
+- `prize_qualification_baselines/self_tuning/pytorch_nadamw_full_budget.py`
 
 Example command:
 
@@ -82,6 +82,6 @@ torchrun --redirects 1:0,2:0,3:0,4:0,5:0,6:0,7:0 --standalone --nnodes=1 --nproc
     --experiment_dir=<experiment_dir> \
     --experiment_name=t<experiment_name> \
     --workload=<workload>\
-    --submission_path=reference_algorithms/prize_qualification_baselines/self_tuning/pytorch_nadamw_target_setting.py \
+    --submission_path=prize_qualification_baselines/self_tuning/pytorch_nadamw_target_setting.py \
     --tuning_ruleset=self
 ```
diff --git a/reference_algorithms/prize_qualification_baselines/external_tuning/jax_nadamw_full_budget.py b/prize_qualification_baselines/external_tuning/jax_nadamw_full_budget.py
similarity index 100%
rename from reference_algorithms/prize_qualification_baselines/external_tuning/jax_nadamw_full_budget.py
rename to prize_qualification_baselines/external_tuning/jax_nadamw_full_budget.py
diff --git a/reference_algorithms/prize_qualification_baselines/external_tuning/jax_nadamw_target_setting.py b/prize_qualification_baselines/external_tuning/jax_nadamw_target_setting.py
similarity index 100%
rename from reference_algorithms/prize_qualification_baselines/external_tuning/jax_nadamw_target_setting.py
rename to prize_qualification_baselines/external_tuning/jax_nadamw_target_setting.py
diff --git a/reference_algorithms/prize_qualification_baselines/external_tuning/pytorch_nadamw_full_budget.py b/prize_qualification_baselines/external_tuning/pytorch_nadamw_full_budget.py
similarity index 100%
rename from reference_algorithms/prize_qualification_baselines/external_tuning/pytorch_nadamw_full_budget.py
rename to prize_qualification_baselines/external_tuning/pytorch_nadamw_full_budget.py
diff --git a/reference_algorithms/prize_qualification_baselines/external_tuning/pytorch_nadamw_target_setting.py b/prize_qualification_baselines/external_tuning/pytorch_nadamw_target_setting.py
similarity index 100%
rename from reference_algorithms/prize_qualification_baselines/external_tuning/pytorch_nadamw_target_setting.py
rename to prize_qualification_baselines/external_tuning/pytorch_nadamw_target_setting.py
diff --git a/reference_algorithms/prize_qualification_baselines/external_tuning/tuning_search_space.json b/prize_qualification_baselines/external_tuning/tuning_search_space.json
similarity index 100%
rename from reference_algorithms/prize_qualification_baselines/external_tuning/tuning_search_space.json
rename to prize_qualification_baselines/external_tuning/tuning_search_space.json
diff --git a/reference_algorithms/prize_qualification_baselines/self_tuning/jax_nadamw_full_budget.py b/prize_qualification_baselines/self_tuning/jax_nadamw_full_budget.py
similarity index 100%
rename from reference_algorithms/prize_qualification_baselines/self_tuning/jax_nadamw_full_budget.py
rename to prize_qualification_baselines/self_tuning/jax_nadamw_full_budget.py
diff --git a/reference_algorithms/prize_qualification_baselines/self_tuning/jax_nadamw_target_setting.py b/prize_qualification_baselines/self_tuning/jax_nadamw_target_setting.py
similarity index 100%
rename from reference_algorithms/prize_qualification_baselines/self_tuning/jax_nadamw_target_setting.py
rename to prize_qualification_baselines/self_tuning/jax_nadamw_target_setting.py
diff --git a/reference_algorithms/prize_qualification_baselines/self_tuning/pytorch_nadamw_full_budget.py b/prize_qualification_baselines/self_tuning/pytorch_nadamw_full_budget.py
similarity index 100%
rename from reference_algorithms/prize_qualification_baselines/self_tuning/pytorch_nadamw_full_budget.py
rename to prize_qualification_baselines/self_tuning/pytorch_nadamw_full_budget.py
diff --git a/reference_algorithms/prize_qualification_baselines/self_tuning/pytorch_nadamw_target_setting.py b/prize_qualification_baselines/self_tuning/pytorch_nadamw_target_setting.py
similarity index 100%
rename from reference_algorithms/prize_qualification_baselines/self_tuning/pytorch_nadamw_target_setting.py
rename to prize_qualification_baselines/self_tuning/pytorch_nadamw_target_setting.py
diff --git a/reference_algorithms/paper_baselines/README.md b/reference_algorithms/paper_baselines/README.md
new file mode 100644
index 000000000..aadb7eab2
--- /dev/null
+++ b/reference_algorithms/paper_baselines/README.md
@@ -0,0 +1,14 @@
+# Baseline Submissions from the "Benchmarking Neural Network Training Algorithms" Paper
+
+This directory contains the baseline submissions for the [external tuning ruleset](../README.md#external-tuning-ruleset) as presented in our paper [Benchmarking Neural Network Training Algorithms](https://arxiv.org/abs/2306.07179). They are based on eight different update rules:
+
+- [Adafactor](/reference_algorithms/paper_baselines/adafactor)
+- [AdamW](/reference_algorithms/paper_baselines/adamw)
+- [LAMB](/reference_algorithms/paper_baselines/lamb)
+- [SGD with Momentum](/reference_algorithms/paper_baselines/momentum)
+- [NadamW](/reference_algorithms/paper_baselines/nadamw)
+- [SGD with Nesterov Momentum](/reference_algorithms/paper_baselines/nesterov)
+- [SAM](/reference_algorithms/paper_baselines/sam)
+- [Shampoo](/reference_algorithms/paper_baselines/shampoo/)
+
+Each update rule has two different tuning search spaces, one where the first momentum parameter (often denoted $\beta_1$) is tuned and one where it is set to a fixed value.
diff --git a/baselines/__init__.py b/reference_algorithms/paper_baselines/__init__.py
similarity index 100%
rename from baselines/__init__.py
rename to reference_algorithms/paper_baselines/__init__.py
diff --git a/baselines/adafactor/__init__.py b/reference_algorithms/paper_baselines/adafactor/__init__.py
similarity index 100%
rename from baselines/adafactor/__init__.py
rename to reference_algorithms/paper_baselines/adafactor/__init__.py
diff --git a/baselines/adafactor/jax/__init__.py b/reference_algorithms/paper_baselines/adafactor/jax/__init__.py
similarity index 100%
rename from baselines/adafactor/jax/__init__.py
rename to reference_algorithms/paper_baselines/adafactor/jax/__init__.py
diff --git a/baselines/adafactor/jax/sharded_adafactor.py b/reference_algorithms/paper_baselines/adafactor/jax/sharded_adafactor.py
similarity index 100%
rename from baselines/adafactor/jax/sharded_adafactor.py
rename to reference_algorithms/paper_baselines/adafactor/jax/sharded_adafactor.py
diff --git a/baselines/adafactor/jax/submission.py b/reference_algorithms/paper_baselines/adafactor/jax/submission.py
similarity index 98%
rename from baselines/adafactor/jax/submission.py
rename to reference_algorithms/paper_baselines/adafactor/jax/submission.py
index ec8020e7e..2dd85c29b 100644
--- a/baselines/adafactor/jax/submission.py
+++ b/reference_algorithms/paper_baselines/adafactor/jax/submission.py
@@ -10,7 +10,8 @@
 import optax
 
 from algorithmic_efficiency import spec
-from baselines.adafactor.jax.sharded_adafactor import sharded_adafactor
+from reference_algorithms.paper_baselines.adafactor.jax.sharded_adafactor import \
+    sharded_adafactor
 
 _GRAD_CLIP_EPS = 1e-6
 
diff --git a/baselines/adafactor/pytorch/__init__.py b/reference_algorithms/paper_baselines/adafactor/pytorch/__init__.py
similarity index 100%
rename from baselines/adafactor/pytorch/__init__.py
rename to reference_algorithms/paper_baselines/adafactor/pytorch/__init__.py
diff --git a/baselines/adafactor/pytorch/submission.py b/reference_algorithms/paper_baselines/adafactor/pytorch/submission.py
similarity index 100%
rename from baselines/adafactor/pytorch/submission.py
rename to reference_algorithms/paper_baselines/adafactor/pytorch/submission.py
diff --git a/baselines/adafactor/tuning_search_space.json b/reference_algorithms/paper_baselines/adafactor/tuning_search_space.json
similarity index 100%
rename from baselines/adafactor/tuning_search_space.json
rename to reference_algorithms/paper_baselines/adafactor/tuning_search_space.json
diff --git a/baselines/adafactor/tuning_search_space_no_beta1.json b/reference_algorithms/paper_baselines/adafactor/tuning_search_space_no_beta1.json
similarity index 100%
rename from baselines/adafactor/tuning_search_space_no_beta1.json
rename to reference_algorithms/paper_baselines/adafactor/tuning_search_space_no_beta1.json
diff --git a/baselines/adamw/__init__.py b/reference_algorithms/paper_baselines/adamw/__init__.py
similarity index 100%
rename from baselines/adamw/__init__.py
rename to reference_algorithms/paper_baselines/adamw/__init__.py
diff --git a/baselines/adamw/jax/__init__.py b/reference_algorithms/paper_baselines/adamw/jax/__init__.py
similarity index 100%
rename from baselines/adamw/jax/__init__.py
rename to reference_algorithms/paper_baselines/adamw/jax/__init__.py
diff --git a/baselines/adamw/jax/submission.py b/reference_algorithms/paper_baselines/adamw/jax/submission.py
similarity index 100%
rename from baselines/adamw/jax/submission.py
rename to reference_algorithms/paper_baselines/adamw/jax/submission.py
diff --git a/baselines/adamw/pytorch/__init__.py b/reference_algorithms/paper_baselines/adamw/pytorch/__init__.py
similarity index 100%
rename from baselines/adamw/pytorch/__init__.py
rename to reference_algorithms/paper_baselines/adamw/pytorch/__init__.py
diff --git a/baselines/adamw/pytorch/submission.py b/reference_algorithms/paper_baselines/adamw/pytorch/submission.py
similarity index 100%
rename from baselines/adamw/pytorch/submission.py
rename to reference_algorithms/paper_baselines/adamw/pytorch/submission.py
diff --git a/baselines/adamw/tuning_search_space.json b/reference_algorithms/paper_baselines/adamw/tuning_search_space.json
similarity index 100%
rename from baselines/adamw/tuning_search_space.json
rename to reference_algorithms/paper_baselines/adamw/tuning_search_space.json
diff --git a/baselines/adamw/tuning_search_space_no_beta1.json b/reference_algorithms/paper_baselines/adamw/tuning_search_space_no_beta1.json
similarity index 100%
rename from baselines/adamw/tuning_search_space_no_beta1.json
rename to reference_algorithms/paper_baselines/adamw/tuning_search_space_no_beta1.json
diff --git a/baselines/lamb/__init__.py b/reference_algorithms/paper_baselines/lamb/__init__.py
similarity index 100%
rename from baselines/lamb/__init__.py
rename to reference_algorithms/paper_baselines/lamb/__init__.py
diff --git a/baselines/lamb/jax/__init__.py b/reference_algorithms/paper_baselines/lamb/jax/__init__.py
similarity index 100%
rename from baselines/lamb/jax/__init__.py
rename to reference_algorithms/paper_baselines/lamb/jax/__init__.py
diff --git a/baselines/lamb/jax/submission.py b/reference_algorithms/paper_baselines/lamb/jax/submission.py
similarity index 100%
rename from baselines/lamb/jax/submission.py
rename to reference_algorithms/paper_baselines/lamb/jax/submission.py
diff --git a/baselines/lamb/pytorch/__init__.py b/reference_algorithms/paper_baselines/lamb/pytorch/__init__.py
similarity index 100%
rename from baselines/lamb/pytorch/__init__.py
rename to reference_algorithms/paper_baselines/lamb/pytorch/__init__.py
diff --git a/baselines/lamb/pytorch/submission.py b/reference_algorithms/paper_baselines/lamb/pytorch/submission.py
similarity index 100%
rename from baselines/lamb/pytorch/submission.py
rename to reference_algorithms/paper_baselines/lamb/pytorch/submission.py
diff --git a/baselines/lamb/tuning_search_space.json b/reference_algorithms/paper_baselines/lamb/tuning_search_space.json
similarity index 100%
rename from baselines/lamb/tuning_search_space.json
rename to reference_algorithms/paper_baselines/lamb/tuning_search_space.json
diff --git a/baselines/lamb/tuning_search_space_no_beta1.json b/reference_algorithms/paper_baselines/lamb/tuning_search_space_no_beta1.json
similarity index 100%
rename from baselines/lamb/tuning_search_space_no_beta1.json
rename to reference_algorithms/paper_baselines/lamb/tuning_search_space_no_beta1.json
diff --git a/baselines/momentum/__init__.py b/reference_algorithms/paper_baselines/momentum/__init__.py
similarity index 100%
rename from baselines/momentum/__init__.py
rename to reference_algorithms/paper_baselines/momentum/__init__.py
diff --git a/baselines/momentum/jax/__init__.py b/reference_algorithms/paper_baselines/momentum/jax/__init__.py
similarity index 100%
rename from baselines/momentum/jax/__init__.py
rename to reference_algorithms/paper_baselines/momentum/jax/__init__.py
diff --git a/baselines/momentum/jax/submission.py b/reference_algorithms/paper_baselines/momentum/jax/submission.py
similarity index 100%
rename from baselines/momentum/jax/submission.py
rename to reference_algorithms/paper_baselines/momentum/jax/submission.py
diff --git a/baselines/momentum/pytorch/__init__.py b/reference_algorithms/paper_baselines/momentum/pytorch/__init__.py
similarity index 100%
rename from baselines/momentum/pytorch/__init__.py
rename to reference_algorithms/paper_baselines/momentum/pytorch/__init__.py
diff --git a/baselines/momentum/pytorch/submission.py b/reference_algorithms/paper_baselines/momentum/pytorch/submission.py
similarity index 100%
rename from baselines/momentum/pytorch/submission.py
rename to reference_algorithms/paper_baselines/momentum/pytorch/submission.py
diff --git a/baselines/momentum/tuning_search_space.json b/reference_algorithms/paper_baselines/momentum/tuning_search_space.json
similarity index 100%
rename from baselines/momentum/tuning_search_space.json
rename to reference_algorithms/paper_baselines/momentum/tuning_search_space.json
diff --git a/baselines/momentum/tuning_search_space_no_beta1.json b/reference_algorithms/paper_baselines/momentum/tuning_search_space_no_beta1.json
similarity index 100%
rename from baselines/momentum/tuning_search_space_no_beta1.json
rename to reference_algorithms/paper_baselines/momentum/tuning_search_space_no_beta1.json
diff --git a/baselines/nadamw/__init__.py b/reference_algorithms/paper_baselines/nadamw/__init__.py
similarity index 100%
rename from baselines/nadamw/__init__.py
rename to reference_algorithms/paper_baselines/nadamw/__init__.py
diff --git a/baselines/nadamw/jax/__init__.py b/reference_algorithms/paper_baselines/nadamw/jax/__init__.py
similarity index 100%
rename from baselines/nadamw/jax/__init__.py
rename to reference_algorithms/paper_baselines/nadamw/jax/__init__.py
diff --git a/baselines/nadamw/jax/submission.py b/reference_algorithms/paper_baselines/nadamw/jax/submission.py
similarity index 100%
rename from baselines/nadamw/jax/submission.py
rename to reference_algorithms/paper_baselines/nadamw/jax/submission.py
diff --git a/baselines/nadamw/pytorch/__init__.py b/reference_algorithms/paper_baselines/nadamw/pytorch/__init__.py
similarity index 100%
rename from baselines/nadamw/pytorch/__init__.py
rename to reference_algorithms/paper_baselines/nadamw/pytorch/__init__.py
diff --git a/baselines/nadamw/pytorch/submission.py b/reference_algorithms/paper_baselines/nadamw/pytorch/submission.py
similarity index 100%
rename from baselines/nadamw/pytorch/submission.py
rename to reference_algorithms/paper_baselines/nadamw/pytorch/submission.py
diff --git a/baselines/nadamw/tuning_search_space.json b/reference_algorithms/paper_baselines/nadamw/tuning_search_space.json
similarity index 100%
rename from baselines/nadamw/tuning_search_space.json
rename to reference_algorithms/paper_baselines/nadamw/tuning_search_space.json
diff --git a/baselines/nadamw/tuning_search_space_no_beta1.json b/reference_algorithms/paper_baselines/nadamw/tuning_search_space_no_beta1.json
similarity index 100%
rename from baselines/nadamw/tuning_search_space_no_beta1.json
rename to reference_algorithms/paper_baselines/nadamw/tuning_search_space_no_beta1.json
diff --git a/baselines/nesterov/__init__.py b/reference_algorithms/paper_baselines/nesterov/__init__.py
similarity index 100%
rename from baselines/nesterov/__init__.py
rename to reference_algorithms/paper_baselines/nesterov/__init__.py
diff --git a/baselines/nesterov/jax/__init__.py b/reference_algorithms/paper_baselines/nesterov/jax/__init__.py
similarity index 100%
rename from baselines/nesterov/jax/__init__.py
rename to reference_algorithms/paper_baselines/nesterov/jax/__init__.py
diff --git a/baselines/nesterov/jax/submission.py b/reference_algorithms/paper_baselines/nesterov/jax/submission.py
similarity index 100%
rename from baselines/nesterov/jax/submission.py
rename to reference_algorithms/paper_baselines/nesterov/jax/submission.py
diff --git a/baselines/nesterov/pytorch/__init__.py b/reference_algorithms/paper_baselines/nesterov/pytorch/__init__.py
similarity index 100%
rename from baselines/nesterov/pytorch/__init__.py
rename to reference_algorithms/paper_baselines/nesterov/pytorch/__init__.py
diff --git a/baselines/nesterov/pytorch/submission.py b/reference_algorithms/paper_baselines/nesterov/pytorch/submission.py
similarity index 100%
rename from baselines/nesterov/pytorch/submission.py
rename to reference_algorithms/paper_baselines/nesterov/pytorch/submission.py
diff --git a/baselines/nesterov/tuning_search_space.json b/reference_algorithms/paper_baselines/nesterov/tuning_search_space.json
similarity index 100%
rename from baselines/nesterov/tuning_search_space.json
rename to reference_algorithms/paper_baselines/nesterov/tuning_search_space.json
diff --git a/baselines/nesterov/tuning_search_space_no_beta1.json b/reference_algorithms/paper_baselines/nesterov/tuning_search_space_no_beta1.json
similarity index 100%
rename from baselines/nesterov/tuning_search_space_no_beta1.json
rename to reference_algorithms/paper_baselines/nesterov/tuning_search_space_no_beta1.json
diff --git a/baselines/sam/__init__.py b/reference_algorithms/paper_baselines/sam/__init__.py
similarity index 100%
rename from baselines/sam/__init__.py
rename to reference_algorithms/paper_baselines/sam/__init__.py
diff --git a/baselines/sam/jax/__init__.py b/reference_algorithms/paper_baselines/sam/jax/__init__.py
similarity index 100%
rename from baselines/sam/jax/__init__.py
rename to reference_algorithms/paper_baselines/sam/jax/__init__.py
diff --git a/baselines/sam/jax/submission.py b/reference_algorithms/paper_baselines/sam/jax/submission.py
similarity index 100%
rename from baselines/sam/jax/submission.py
rename to reference_algorithms/paper_baselines/sam/jax/submission.py
diff --git a/baselines/sam/pytorch/__init__.py b/reference_algorithms/paper_baselines/sam/pytorch/__init__.py
similarity index 100%
rename from baselines/sam/pytorch/__init__.py
rename to reference_algorithms/paper_baselines/sam/pytorch/__init__.py
diff --git a/baselines/sam/pytorch/submission.py b/reference_algorithms/paper_baselines/sam/pytorch/submission.py
similarity index 100%
rename from baselines/sam/pytorch/submission.py
rename to reference_algorithms/paper_baselines/sam/pytorch/submission.py
diff --git a/baselines/sam/tuning_search_space.json b/reference_algorithms/paper_baselines/sam/tuning_search_space.json
similarity index 100%
rename from baselines/sam/tuning_search_space.json
rename to reference_algorithms/paper_baselines/sam/tuning_search_space.json
diff --git a/baselines/sam/tuning_search_space_no_beta1.json b/reference_algorithms/paper_baselines/sam/tuning_search_space_no_beta1.json
similarity index 100%
rename from baselines/sam/tuning_search_space_no_beta1.json
rename to reference_algorithms/paper_baselines/sam/tuning_search_space_no_beta1.json
diff --git a/baselines/shampoo/__init__.py b/reference_algorithms/paper_baselines/shampoo/__init__.py
similarity index 100%
rename from baselines/shampoo/__init__.py
rename to reference_algorithms/paper_baselines/shampoo/__init__.py
diff --git a/baselines/shampoo/jax/__init__.py b/reference_algorithms/paper_baselines/shampoo/jax/__init__.py
similarity index 100%
rename from baselines/shampoo/jax/__init__.py
rename to reference_algorithms/paper_baselines/shampoo/jax/__init__.py
diff --git a/baselines/shampoo/jax/distributed_shampoo.py b/reference_algorithms/paper_baselines/shampoo/jax/distributed_shampoo.py
similarity index 100%
rename from baselines/shampoo/jax/distributed_shampoo.py
rename to reference_algorithms/paper_baselines/shampoo/jax/distributed_shampoo.py
diff --git a/baselines/shampoo/jax/submission.py b/reference_algorithms/paper_baselines/shampoo/jax/submission.py
similarity index 98%
rename from baselines/shampoo/jax/submission.py
rename to reference_algorithms/paper_baselines/shampoo/jax/submission.py
index cb062faf3..9c6b66b7f 100644
--- a/baselines/shampoo/jax/submission.py
+++ b/reference_algorithms/paper_baselines/shampoo/jax/submission.py
@@ -10,7 +10,8 @@
 import optax
 
 from algorithmic_efficiency import spec
-from baselines.shampoo.jax.distributed_shampoo import distributed_shampoo
+from reference_algorithms.paper_baselines.shampoo.jax.distributed_shampoo import \
+    distributed_shampoo
 
 _GRAD_CLIP_EPS = 1e-6
 
diff --git a/baselines/shampoo/pytorch/__init__.py b/reference_algorithms/paper_baselines/shampoo/pytorch/__init__.py
similarity index 100%
rename from baselines/shampoo/pytorch/__init__.py
rename to reference_algorithms/paper_baselines/shampoo/pytorch/__init__.py
diff --git a/baselines/shampoo/tuning_search_space.json b/reference_algorithms/paper_baselines/shampoo/tuning_search_space.json
similarity index 100%
rename from baselines/shampoo/tuning_search_space.json
rename to reference_algorithms/paper_baselines/shampoo/tuning_search_space.json
diff --git a/baselines/shampoo/tuning_search_space_no_beta1.json b/reference_algorithms/paper_baselines/shampoo/tuning_search_space_no_beta1.json
similarity index 100%
rename from baselines/shampoo/tuning_search_space_no_beta1.json
rename to reference_algorithms/paper_baselines/shampoo/tuning_search_space_no_beta1.json
diff --git a/tests/test_baselines.py b/tests/test_baselines.py
index 0a26aa69d..f79e629e7 100644
--- a/tests/test_baselines.py
+++ b/tests/test_baselines.py
@@ -47,6 +47,8 @@
     'jax',
 ]
 
+baseline_path = "reference_algorithms/paper_baselines"
+
 named_parameters = []
 for f in frameworks:
   for b in baselines[f]:
@@ -55,8 +57,9 @@
             testcase_name=f'{b}_{f}',
             workload='mnist',
             framework=f'{f}',
-            submission_path=f'baselines/{b}/{f}/submission.py',
-            tuning_search_space=f'baselines/{b}/tuning_search_space.json'))
+            submission_path=f'{baseline_path}/{b}/{f}/submission.py',
+            tuning_search_space=f'{baseline_path}/{b}/tuning_search_space.json')
+    )
 
 
 class BaselineTest(parameterized.TestCase):