diff --git a/torchao/_models/llama/generate.py b/torchao/_models/llama/generate.py index 7779927b9b..5635ed8d23 100644 --- a/torchao/_models/llama/generate.py +++ b/torchao/_models/llama/generate.py @@ -548,14 +548,15 @@ def ffn_or_attn_only(mod, fqn): precision == torch.float32 ), "int8_dynamic_activation_intx_weight requires fp32 precision" - # Build kernels in temp location, and load them in torch - # This requires an ARM CPU - from torchao.experimental.temp_build import temp_build_and_load_torchao_ops - - temp_build_and_load_torchao_ops( - cmake_lists_path=os.path.dirname(os.path.realpath(__file__)) - + "/../../experimental" - ) + try: + torch.ops.torchao._pack_8bit_act_4bit_weight + except: + print( + "Unable to load experimental torchao kernels. Performance will be slow." + ) + print( + "To install the kernels, run `USE_CPP=1 pip install .` from ao on a machine with an ARM CPU" + ) # Quantize model _quant_args = quantization.split("-") diff --git a/torchao/experimental/tests/test_embedding_xbit_quantizer.py b/torchao/experimental/tests/test_embedding_xbit_quantizer.py index 98eaf9a411..40bfc6f53e 100644 --- a/torchao/experimental/tests/test_embedding_xbit_quantizer.py +++ b/torchao/experimental/tests/test_embedding_xbit_quantizer.py @@ -5,57 +5,17 @@ # LICENSE file in the root directory of this source tree. import copy -import glob -import os -import subprocess -import sys import tempfile import unittest import torch -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))) from torchao.experimental.quant_api import ( IntxWeightEmbeddingQuantizer, _IntxWeightQuantizedEmbeddingFallback, ) -def cmake_build_torchao_ops(temp_build_dir): - from distutils.sysconfig import get_python_lib - - print("Building torchao ops for ATen target") - cmake_prefix_path = get_python_lib() - dir_path = os.path.dirname(os.path.realpath(__file__)) - subprocess.run( - [ - "cmake", - "-DCMAKE_PREFIX_PATH=" + cmake_prefix_path, - "-DCMAKE_INSTALL_PREFIX=" + temp_build_dir.name, - "-S " + dir_path + "/../", - "-B " + temp_build_dir.name, - ] - ) - subprocess.run( - [ - "cmake", - "--build", - temp_build_dir.name, - "-j 16", - "--target install", - "--config Release", - ] - ) - - -temp_build_dir = tempfile.TemporaryDirectory() -cmake_build_torchao_ops(temp_build_dir) -libs = glob.glob(f"{temp_build_dir.name}/lib/libtorchao_ops_aten.*") -libs = list(filter(lambda l: (l.endswith("so") or l.endswith("dylib")), libs)) -assert len(libs) == 1 -torch.ops.load_library(libs[0]) - - class TestEmbeddingQuantizer(unittest.TestCase): def test_accuracy(self): group_size = 128 diff --git a/torchao/experimental/tests/test_linear_8bit_act_xbit_weight_quantizer.py b/torchao/experimental/tests/test_linear_8bit_act_xbit_weight_quantizer.py index 17f839979b..926d15e262 100644 --- a/torchao/experimental/tests/test_linear_8bit_act_xbit_weight_quantizer.py +++ b/torchao/experimental/tests/test_linear_8bit_act_xbit_weight_quantizer.py @@ -5,57 +5,17 @@ # LICENSE file in the root directory of this source tree. import copy -import glob -import os -import subprocess -import sys import tempfile import unittest import torch -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))) from torchao.experimental.quant_api import ( Int8DynActIntxWeightLinearQuantizer, _Int8DynActIntxWeightQuantizedLinearFallback, ) -def cmake_build_torchao_ops(temp_build_dir): - from distutils.sysconfig import get_python_lib - - print("Building torchao ops for ATen target") - cmake_prefix_path = get_python_lib() - dir_path = os.path.dirname(os.path.realpath(__file__)) - subprocess.run( - [ - "cmake", - "-DCMAKE_PREFIX_PATH=" + cmake_prefix_path, - "-DCMAKE_INSTALL_PREFIX=" + temp_build_dir.name, - "-S " + dir_path + "/../", - "-B " + temp_build_dir.name, - ] - ) - subprocess.run( - [ - "cmake", - "--build", - temp_build_dir.name, - "-j 16", - "--target install", - "--config Release", - ] - ) - - -temp_build_dir = tempfile.TemporaryDirectory() -cmake_build_torchao_ops(temp_build_dir) -libs = glob.glob(f"{temp_build_dir.name}/lib/libtorchao_ops_aten.*") -libs = list(filter(lambda l: (l.endswith("so") or l.endswith("dylib")), libs)) -assert len(libs) == 1 -torch.ops.load_library(libs[0]) - - class TestInt8DynActIntxWeightQuantizer(unittest.TestCase): def test_accuracy(self): group_size = 128 diff --git a/torchao/experimental/tests/test_linear_int8_dynamic_activation_intx_weight_subclass.py b/torchao/experimental/tests/test_linear_int8_dynamic_activation_intx_weight_subclass.py index e521982051..61f6c6cc01 100644 --- a/torchao/experimental/tests/test_linear_int8_dynamic_activation_intx_weight_subclass.py +++ b/torchao/experimental/tests/test_linear_int8_dynamic_activation_intx_weight_subclass.py @@ -5,17 +5,11 @@ # LICENSE file in the root directory of this source tree. import copy -import glob -import os -import subprocess -import sys import tempfile import unittest import torch -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))) - from torchao.experimental.quant_api import ( _Int8DynActIntxWeightQuantizedLinearFallback, int8_dynamic_activation_intx_weight, @@ -24,41 +18,6 @@ from torchao.utils import unwrap_tensor_subclass -def cmake_build_torchao_ops(temp_build_dir): - from distutils.sysconfig import get_python_lib - - print("Building torchao ops for ATen target") - cmake_prefix_path = get_python_lib() - dir_path = os.path.dirname(os.path.realpath(__file__)) - subprocess.run( - [ - "cmake", - "-DCMAKE_PREFIX_PATH=" + cmake_prefix_path, - "-DCMAKE_INSTALL_PREFIX=" + temp_build_dir.name, - "-S " + dir_path + "/../", - "-B " + temp_build_dir.name, - ] - ) - subprocess.run( - [ - "cmake", - "--build", - temp_build_dir.name, - "-j 16", - "--target install", - "--config Release", - ] - ) - - -temp_build_dir = tempfile.TemporaryDirectory() -cmake_build_torchao_ops(temp_build_dir) -libs = glob.glob(f"{temp_build_dir.name}/lib/libtorchao_ops_aten.*") -libs = list(filter(lambda l: (l.endswith("so") or l.endswith("dylib")), libs)) -assert len(libs) == 1 -torch.ops.load_library(libs[0]) - - class TestInt8DynamicActivationIntxWeight(unittest.TestCase): def test_accuracy(self): group_size = 128