diff --git a/torchao/_models/llama/generate.py b/torchao/_models/llama/generate.py
index 7779927b9b..5635ed8d23 100644
--- a/torchao/_models/llama/generate.py
+++ b/torchao/_models/llama/generate.py
@@ -548,14 +548,15 @@ def ffn_or_attn_only(mod, fqn):
                 precision == torch.float32
             ), "int8_dynamic_activation_intx_weight requires fp32 precision"
 
-            # Build kernels in temp location, and load them in torch
-            # This requires an ARM CPU
-            from torchao.experimental.temp_build import temp_build_and_load_torchao_ops
-
-            temp_build_and_load_torchao_ops(
-                cmake_lists_path=os.path.dirname(os.path.realpath(__file__))
-                + "/../../experimental"
-            )
+            try:
+                torch.ops.torchao._pack_8bit_act_4bit_weight
+            except:
+                print(
+                    "Unable to load experimental torchao kernels.  Performance will be slow."
+                )
+                print(
+                    "To install the kernels, run `USE_CPP=1 pip install .` from ao on a machine with an ARM CPU"
+                )
 
             # Quantize model
             _quant_args = quantization.split("-")
diff --git a/torchao/experimental/tests/test_embedding_xbit_quantizer.py b/torchao/experimental/tests/test_embedding_xbit_quantizer.py
index 98eaf9a411..40bfc6f53e 100644
--- a/torchao/experimental/tests/test_embedding_xbit_quantizer.py
+++ b/torchao/experimental/tests/test_embedding_xbit_quantizer.py
@@ -5,57 +5,17 @@
 # LICENSE file in the root directory of this source tree.
 
 import copy
-import glob
-import os
-import subprocess
-import sys
 import tempfile
 import unittest
 
 import torch
 
-sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../..")))
 from torchao.experimental.quant_api import (
     IntxWeightEmbeddingQuantizer,
     _IntxWeightQuantizedEmbeddingFallback,
 )
 
 
-def cmake_build_torchao_ops(temp_build_dir):
-    from distutils.sysconfig import get_python_lib
-
-    print("Building torchao ops for ATen target")
-    cmake_prefix_path = get_python_lib()
-    dir_path = os.path.dirname(os.path.realpath(__file__))
-    subprocess.run(
-        [
-            "cmake",
-            "-DCMAKE_PREFIX_PATH=" + cmake_prefix_path,
-            "-DCMAKE_INSTALL_PREFIX=" + temp_build_dir.name,
-            "-S " + dir_path + "/../",
-            "-B " + temp_build_dir.name,
-        ]
-    )
-    subprocess.run(
-        [
-            "cmake",
-            "--build",
-            temp_build_dir.name,
-            "-j 16",
-            "--target install",
-            "--config Release",
-        ]
-    )
-
-
-temp_build_dir = tempfile.TemporaryDirectory()
-cmake_build_torchao_ops(temp_build_dir)
-libs = glob.glob(f"{temp_build_dir.name}/lib/libtorchao_ops_aten.*")
-libs = list(filter(lambda l: (l.endswith("so") or l.endswith("dylib")), libs))
-assert len(libs) == 1
-torch.ops.load_library(libs[0])
-
-
 class TestEmbeddingQuantizer(unittest.TestCase):
     def test_accuracy(self):
         group_size = 128
diff --git a/torchao/experimental/tests/test_linear_8bit_act_xbit_weight_quantizer.py b/torchao/experimental/tests/test_linear_8bit_act_xbit_weight_quantizer.py
index 17f839979b..926d15e262 100644
--- a/torchao/experimental/tests/test_linear_8bit_act_xbit_weight_quantizer.py
+++ b/torchao/experimental/tests/test_linear_8bit_act_xbit_weight_quantizer.py
@@ -5,57 +5,17 @@
 # LICENSE file in the root directory of this source tree.
 
 import copy
-import glob
-import os
-import subprocess
-import sys
 import tempfile
 import unittest
 
 import torch
 
-sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../..")))
 from torchao.experimental.quant_api import (
     Int8DynActIntxWeightLinearQuantizer,
     _Int8DynActIntxWeightQuantizedLinearFallback,
 )
 
 
-def cmake_build_torchao_ops(temp_build_dir):
-    from distutils.sysconfig import get_python_lib
-
-    print("Building torchao ops for ATen target")
-    cmake_prefix_path = get_python_lib()
-    dir_path = os.path.dirname(os.path.realpath(__file__))
-    subprocess.run(
-        [
-            "cmake",
-            "-DCMAKE_PREFIX_PATH=" + cmake_prefix_path,
-            "-DCMAKE_INSTALL_PREFIX=" + temp_build_dir.name,
-            "-S " + dir_path + "/../",
-            "-B " + temp_build_dir.name,
-        ]
-    )
-    subprocess.run(
-        [
-            "cmake",
-            "--build",
-            temp_build_dir.name,
-            "-j 16",
-            "--target install",
-            "--config Release",
-        ]
-    )
-
-
-temp_build_dir = tempfile.TemporaryDirectory()
-cmake_build_torchao_ops(temp_build_dir)
-libs = glob.glob(f"{temp_build_dir.name}/lib/libtorchao_ops_aten.*")
-libs = list(filter(lambda l: (l.endswith("so") or l.endswith("dylib")), libs))
-assert len(libs) == 1
-torch.ops.load_library(libs[0])
-
-
 class TestInt8DynActIntxWeightQuantizer(unittest.TestCase):
     def test_accuracy(self):
         group_size = 128
diff --git a/torchao/experimental/tests/test_linear_int8_dynamic_activation_intx_weight_subclass.py b/torchao/experimental/tests/test_linear_int8_dynamic_activation_intx_weight_subclass.py
index e521982051..61f6c6cc01 100644
--- a/torchao/experimental/tests/test_linear_int8_dynamic_activation_intx_weight_subclass.py
+++ b/torchao/experimental/tests/test_linear_int8_dynamic_activation_intx_weight_subclass.py
@@ -5,17 +5,11 @@
 # LICENSE file in the root directory of this source tree.
 
 import copy
-import glob
-import os
-import subprocess
-import sys
 import tempfile
 import unittest
 
 import torch
 
-sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../..")))
-
 from torchao.experimental.quant_api import (
     _Int8DynActIntxWeightQuantizedLinearFallback,
     int8_dynamic_activation_intx_weight,
@@ -24,41 +18,6 @@
 from torchao.utils import unwrap_tensor_subclass
 
 
-def cmake_build_torchao_ops(temp_build_dir):
-    from distutils.sysconfig import get_python_lib
-
-    print("Building torchao ops for ATen target")
-    cmake_prefix_path = get_python_lib()
-    dir_path = os.path.dirname(os.path.realpath(__file__))
-    subprocess.run(
-        [
-            "cmake",
-            "-DCMAKE_PREFIX_PATH=" + cmake_prefix_path,
-            "-DCMAKE_INSTALL_PREFIX=" + temp_build_dir.name,
-            "-S " + dir_path + "/../",
-            "-B " + temp_build_dir.name,
-        ]
-    )
-    subprocess.run(
-        [
-            "cmake",
-            "--build",
-            temp_build_dir.name,
-            "-j 16",
-            "--target install",
-            "--config Release",
-        ]
-    )
-
-
-temp_build_dir = tempfile.TemporaryDirectory()
-cmake_build_torchao_ops(temp_build_dir)
-libs = glob.glob(f"{temp_build_dir.name}/lib/libtorchao_ops_aten.*")
-libs = list(filter(lambda l: (l.endswith("so") or l.endswith("dylib")), libs))
-assert len(libs) == 1
-torch.ops.load_library(libs[0])
-
-
 class TestInt8DynamicActivationIntxWeight(unittest.TestCase):
     def test_accuracy(self):
         group_size = 128