Update

[ghstack-poisoned]
pytorch · Jan 24, 2025 · 17e7e77 · 17e7e77
2 parents 17d162c + 860da26
commit 17e7e77
Show file tree

Hide file tree

Showing 41 changed files with 2,345 additions and 620 deletions.
diff --git a/.github/workflows/doc_build.yml b/.github/workflows/doc_build.yml
@@ -9,6 +9,9 @@ on:
     tags:
       - v[0-9]+.[0-9]+.[0-9]
       - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
+    paths:
+      - 'docs/**'
+      - '!docs/**'
   pull_request:
   workflow_dispatch:
 

diff --git a/.github/workflows/float8_test.yml b/.github/workflows/float8_test.yml
@@ -25,9 +25,14 @@ jobs:
         include:
           - name: SM-89
             runs-on: linux.g6.4xlarge.experimental.nvidia.gpu
-            torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu121'
+            torch-spec: '--pre torch==2.7.0.dev20250122 --index-url https://download.pytorch.org/whl/nightly/cu124'
             gpu-arch-type: "cuda"
-            gpu-arch-version: "12.1"
+            gpu-arch-version: "12.4"
+          - name: H100
+            runs-on: linux.aws.h100
+            torch-spec: '--pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu124'
+            gpu-arch-type: "cuda"
+            gpu-arch-version: "12.4"
 
     permissions:
       id-token: write

diff --git a/.github/workflows/float8nocompile_test.yaml b/.github/workflows/float8nocompile_test.yaml
@@ -0,0 +1,55 @@
+name: Run Float8nocompile Tests
+
+on:
+  push:
+    branches:
+      - main
+      - 'gh/**'
+    paths:
+      - 'torchao/prototype/float8nocompile/**'
+      - '!torchao/prototype/float8nocompile/**'
+  pull_request:
+    branches:
+      - main
+      - 'gh/**'
+    paths:
+      - 'torchao/prototype/float8nocompile/**'
+      - '!torchao/prototype/float8nocompile/**'
+
+concurrency:
+  group: floatnocompile_test-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
+  cancel-in-progress: true
+
+env:
+  HF_TOKEN: ${{ secrets.HF_TOKEN }}
+
+jobs:
+  test:
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - name: SM-89
+            runs-on: linux.g6.4xlarge.experimental.nvidia.gpu
+            torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu121'
+            gpu-arch-type: "cuda"
+            gpu-arch-version: "12.1"
+
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    with:
+      timeout: 300
+      runner: ${{ matrix.runs-on }}
+      gpu-arch-type: ${{ matrix.gpu-arch-type }}
+      gpu-arch-version: ${{ matrix.gpu-arch-version }}
+      submodules: recursive
+      script: |
+        conda create -n venv python=3.9 -y
+        conda activate venv
+        export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
+        python -m pip install --upgrade pip
+        pip install ${{ matrix.torch-spec }}
+        pip install -r dev-requirements.txt
+        pip install .
+        cd torchao/prototype/float8nocompile
+        pytest kernels/ --verbose -s
+        pytest test/train_test.py --verbose -s
diff --git a/.github/workflows/nightly_smoke_test.yml b/.github/workflows/nightly_smoke_test.yml
@@ -21,9 +21,9 @@ jobs:
         include:
           - name: CUDA Nightly
             runs-on: linux.g5.12xlarge.nvidia.gpu
-            torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu121'
+            torch-spec: '--pre torch==2.7.0.dev20250122 --index-url https://download.pytorch.org/whl/nightly/cu124'
             gpu-arch-type: "cuda"
-            gpu-arch-version: "12.1"
+            gpu-arch-version: "12.4"
 
     permissions:
       id-token: write

diff --git a/.github/workflows/regression_test.yml b/.github/workflows/regression_test.yml
@@ -25,12 +25,12 @@ jobs:
         include:
           - name: CUDA Nightly
             runs-on: linux.g5.12xlarge.nvidia.gpu
-            torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu124'
+            torch-spec: '--pre torch==2.7.0.dev20250122 --index-url https://download.pytorch.org/whl/nightly/cu124'
             gpu-arch-type: "cuda"
             gpu-arch-version: "12.4"
           - name: CPU Nightly
             runs-on: linux.4xlarge
-            torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cpu'
+            torch-spec: '--pre torch==2.7.0.dev20250122 --index-url https://download.pytorch.org/whl/nightly/cpu'
             gpu-arch-type: "cpu"
             gpu-arch-version: ""
 

diff --git a/.gitignore b/.gitignore
@@ -262,7 +262,7 @@ docs/dev
 docs/build
 docs/source/tutorials/*
 docs/source/gen_modules/*
-docs/source/sg_execution_times
+docs/source/sg_execution_times.rst
 
 # LevelDB files
 *.sst

diff --git a/docs/source/_templates/layout.html b/docs/source/_templates/layout.html
@@ -2,7 +2,7 @@
 
 {% block sidebartitle %}
     <div class="version">
-        <a href='tba'>{{ version }} &#x25BC</a>
+        <a href='index.html'>{{ version }} &#x25BC</a>
     </div>
     {% include "searchbox.html" %}
 {% endblock %}
@@ -22,7 +22,7 @@
     // to point to the torchao repo.
     var overwrite = function (_) {
       if ($(this).length > 0) {
-        $(this)[0].href = "https://github.com/pytorch-labs/ao"
+        $(this)[0].href = "https://github.com/pytorch/ao"
       }
     }
     // PC

diff --git a/docs/source/api_ref_dtypes.rst b/docs/source/api_ref_dtypes.rst
@@ -6,19 +6,42 @@ torchao.dtypes
 
 .. currentmodule:: torchao.dtypes
 
+Layouts and Tensor Subclasses
+-----------------------------
+.. autosummary::
+    :toctree: generated/
+    :nosignatures:
+
+    NF4Tensor
+    AffineQuantizedTensor
+    Layout
+    PlainLayout
+    SemiSparseLayout
+    TensorCoreTiledLayout
+    Float8Layout
+    FloatxTensor
+    FloatxTensorCoreLayout
+    MarlinSparseLayout
+    BlockSparseLayout
+    UintxLayout
+    MarlinQQQTensor
+    MarlinQQQLayout
+    Int4CPULayout
+    CutlassInt4PackedLayout
+
+Quantization techniques
+-----------------------
 .. autosummary::
     :toctree: generated/
     :nosignatures:
 
-    to_nf4
     to_affine_quantized_intx
     to_affine_quantized_intx_static
+    to_affine_quantized_fpx
     to_affine_quantized_floatx
     to_affine_quantized_floatx_static
-    to_affine_quantized_fpx
-    NF4Tensor
-    AffineQuantizedTensor
-
+    to_marlinqqq_quantized_intx
+    to_nf4
 ..
   _NF4Tensor - add after fixing torchao/dtypes/nf4tensor.py:docstring
   of torchao.dtypes.nf4tensor.NF4Tensor.dequantize_scalers:6:Unexpected indentation.
diff --git a/docs/source/api_ref_sparsity.rst b/docs/source/api_ref_sparsity.rst
@@ -12,7 +12,7 @@ torchao.sparsity
 
     WandaSparsifier
     PerChannelNormObserver
-    apply_sparse_semi_structured
     apply_fake_sparsity
-
-
+    sparsify_
+    semi_sparse_weight
+    int8_dynamic_activation_int8_semi_sparse_weight