Autoscale (#177)

--------- Co-authored-by: pierre.delaunay <[email protected]>
mila-iqia · Nov 21, 2023 · 1f1bb16 · 1f1bb16
1 parent f98430b
commit 1f1bb16
Show file tree

Hide file tree

Showing 18 changed files with 947 additions and 141 deletions.
diff --git a/config/scaling.yaml b/config/scaling.yaml
@@ -0,0 +1,239 @@
+bert-fp16:
+  arg: --batch-size
+  model:
+    1: 4108.75 MiB
+    8: 8614.75 MiB
+    16: 14254.75 MiB
+    32: 24604.75 MiB
+    64: 47216.75 MiB
+    112: 81140.75 MiB
+  optimized: 128
+bert-fp32:
+  arg: --batch-size
+  model:
+    1: 4206.75 MiB
+    8: 10240.75 MiB
+    16: 17646.75 MiB
+    32: 31568.75 MiB
+    64: 61200.75 MiB
+    80: 76034.75 MiB
+  optimized: 128
+bert-tf32:
+  arg: --batch-size
+  model:
+    1: 4204.75 MiB
+    8: 10242.75 MiB
+    16: 17648.75 MiB
+    32: 31570.75 MiB
+    64: 61202.75 MiB
+    80: 76036.75 MiB
+  optimized: 128
+bert-tf32-fp16:
+  arg: --batch-size
+  model:
+    1: 4108.75 MiB
+    8: 8614.75 MiB
+    16: 14254.75 MiB
+    32: 24604.75 MiB
+    64: 47216.75 MiB
+    112: 81140.75 MiB
+  optimized: 128
+convnext_large-fp16:
+  arg: --batch-size
+  model:
+    1: 3228.75 MiB
+    8: 4726.75 MiB
+    16: 6254.75 MiB
+    32: 9418.75 MiB
+    40: 10940.75 MiB
+    64: 15238.75 MiB
+    128: 27466.75 MiB
+    416: 80628.75 MiB
+  optimized: 128
+convnext_large-fp32:
+  arg: --batch-size
+  model:
+    1: 3268.75 MiB
+    8: 5824.75 MiB
+    16: 8774.75 MiB
+    32: 14548.75 MiB
+    64: 26274.75 MiB
+    128: 49586.75 MiB
+    216: 80694.75 MiB
+  optimized: 128
+convnext_large-tf32:
+  arg: --batch-size
+  model:
+    1: 3268.75 MiB
+    8: 5824.75 MiB
+    16: 8774.75 MiB
+    32: 14548.75 MiB
+    64: 26274.75 MiB
+    128: 49586.75 MiB
+    216: 80694.75 MiB
+  optimized: 128
+convnext_large-tf32-fp16:
+  arg: --batch-size
+  model:
+    1: 3228.75 MiB
+    8: 4726.75 MiB
+    16: 6254.75 MiB
+    32: 9418.75 MiB
+    40: 10940.75 MiB
+    64: 15238.75 MiB
+    128: 27466.75 MiB
+    416: 80628.75 MiB
+  optimized: 128
+davit_large:
+  arg: --batch-size
+  model:
+    1: 4882.75 MiB
+    8: 6330.75 MiB
+    16: 8216.75 MiB
+    24: 10182.75 MiB
+    32: 12240.75 MiB
+    64: 19422.75 MiB
+    128: 34492.75 MiB
+    328: 81502.75 MiB
+  optimized: 128
+davit_large-multi:
+  arg: --batch-size
+  model:
+    1: 4862.75 MiB
+    8: 6330.75 MiB
+    16: 8216.75 MiB
+    24: 10730.75 MiB
+    32: 12240.75 MiB
+    64: 19422.75 MiB
+    128: 34248.75 MiB
+    328: 81742.75 MiB
+  optimized: 128
+focalnet:
+  arg: --batch-size
+  model:
+    1: 3128.75 MiB
+    8: 4368.75 MiB
+    16: 5608.75 MiB
+    32: 8566.75 MiB
+    40: 9850.75 MiB
+    64: 14750.75 MiB
+    128: 26398.75 MiB
+    424: 81368.75 MiB
+  optimized: 128
+opt-1_3b:
+  arg: --per_gpu_batch_size
+  optimized: 1
+opt-1_3b-multinode:
+  arg: --per_gpu_batch_size
+  optimized: 1
+opt-6_7b-multinode:
+  arg: --per_gpu_batch_size
+  optimized: 1
+reformer:
+  arg: --batch-size
+  model:
+    1: 1916.75 MiB
+    8: 4512.75 MiB
+    16: 7486.75 MiB
+    24: 10470.75 MiB
+    32: 13454.75 MiB
+    64: 25408.75 MiB
+    128: 49280.75 MiB
+    208: 79120.75 MiB
+  optimized: 128
+regnet_y_128gf:
+  arg: --batch-size
+  model:
+    1: 6876.75 MiB
+    8: 8524.75 MiB
+    16: 11426.75 MiB
+    32: 18324.75 MiB
+    64: 31558.75 MiB
+    128: 56484.75 MiB
+    184: 78714.75 MiB
+  optimized: 128
+resnet152:
+  arg: --batch-size
+  model:
+    1: 2710.75 MiB
+    8: 3298.75 MiB
+    16: 4164.75 MiB
+    32: 6202.75 MiB
+    64: 10120.75 MiB
+    72: 10860.75 MiB
+    128: 18076.75 MiB
+    640: 81354.75 MiB
+  optimized: 128
+resnet152-multi:
+  arg: --batch-size
+  model:
+    1: 2600.75 MiB
+    8: 3374.75 MiB
+    16: 4148.75 MiB
+    32: 6374.75 MiB
+    64: 10338.75 MiB
+    72: 10582.75 MiB
+    128: 18104.75 MiB
+    640: 81820.75 MiB
+  optimized: 128
+resnet50:
+  arg: --batch-size
+  model:
+    1: 1962.75 MiB
+    8: 2134.75 MiB
+    16: 2460.75 MiB
+    32: 3206.75 MiB
+    64: 4734.75 MiB
+    128: 8242.75 MiB
+    184: 11072.75 MiB
+    256: 14854.75 MiB
+    512: 27900.75 MiB
+    1552: 81146.75 MiB
+    1560: 81590.75 MiB
+  optimized: 64
+rwkv:
+  arg: --micro_bsz
+  model:
+    1: 3602.75 MiB
+    8: 4530.75 MiB
+    16: 5594.75 MiB
+    64: 11452.75 MiB
+    128: 19448.75 MiB
+    632: 81880.75 MiB
+  optimized: 16
+stargan:
+  arg: --batch_size
+  model:
+    1: 37896.75 MiB
+    8: 19165.75 MiB
+    16: 37430.75 MiB
+    32: 73824.75 MiB
+  optimized: 16
+super-slomo:
+  arg: --train_batch_size
+  model:
+    1: 3016.75 MiB
+    8: 10288.75 MiB
+    16: 18718.75 MiB
+    64: 66308.75 MiB
+    80: 81180.75 MiB
+  optimized: 32
+t5:
+  arg: --batch-size
+  model:
+    1: 4396.75 MiB
+    8: 18684.75 MiB
+    16: 35448.75 MiB
+    32: 68876.75 MiB
+  optimized: 128
+whisper:
+  arg: --batch-size
+  model:
+    1: 2070.75 MiB
+    8: 6108.75 MiB
+    16: 10540.75 MiB
+    32: 19282.75 MiB
+    64: 36728.75 MiB
+    128: 71638.75 MiB
+    144: 80412.75 MiB
+  optimized: 128
diff --git a/docs/conf.py b/docs/conf.py
@@ -17,9 +17,9 @@
 
 # -- Project information -----------------------------------------------------
 
-project = 'milabench'
-copyright = '2022, Mila IDT'
-author = 'Mila IDT'
+project = "milabench"
+copyright = "2022, Mila IDT"
+author = "Mila IDT"
 
 
 # -- General configuration ---------------------------------------------------
@@ -34,12 +34,12 @@
 ]
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 
 
 # -- Options for HTML output -------------------------------------------------
@@ -52,4 +52,4 @@
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
diff --git a/docs/index.rst b/docs/index.rst
@@ -11,7 +11,7 @@ Welcome to milabench's documentation!
    dev-usage.rst
    new_benchmarks.rst
    reference.rst
-
+   sizer.rst
 
 Indices and tables
 ==================

diff --git a/docs/sizer.rst b/docs/sizer.rst
@@ -0,0 +1,78 @@
+Scaling
+=======
+
+Milabench is able to select a batch size depending on the
+underlying GPU capacity.
+
+The feature is drivent by the ``config/scaling.yaml`` file, 
+which holds information about the memory usage of a given bench
+given the batch size.
+
+
+.. code-block:: yaml
+
+   convnext_large-fp32:
+     arg: --batch-size
+     default: 128
+     model:
+       8: 5824.75 MiB
+       16: 8774.75 MiB
+       32: 14548.75 MiB
+       64: 26274.75 MiB
+       128: 49586.75 MiB
+
+
+Auto Batch size
+---------------
+
+To enable batch resizing an environment variable can be specified.
+It will use the capacity inside the `system.yaml` configurattion file.
+
+.. code-block:: yaml
+
+   system:
+     arch: cuda
+     gpu:
+       capacity: 81920 MiB
+     nodes: []
+
+
+.. code-block:: bash
+    
+   MILABENCH_SIZER_AUTO=1 milabench run --system system.yaml
+
+
+For better performance, a multiple constraint can be added.
+This will force batch size to be a multiple of 8.
+
+.. code-block:: bash
+   
+   MILABENCH_SIZER_MULTIPLE=8 milabench run
+
+
+Batch size override
+-------------------
+
+The batch size can be globally overriden
+
+.. code-block:: bash
+
+   MILABENCH_SIZER_BATCH_SIZE=64 milabench run
+
+
+Memory Usage Extractor
+----------------------
+
+To automate batch size ``<=>`` memory usage data gathering
+a validation layer that retrieve the batch size and the memory usage
+can be enabled.
+
+In the example below, once milabench has finished running it will
+generate a new scaling configuration with the data extracted from the run.
+
+
+.. code-block:: bash
+
+   export MILABENCH_SIZER_SAVE="newscaling.yaml"
+   MILABENCH_SIZER_BATCH_SIZE=64 milabench run
+