Skip to content

Commit

Permalink
Add executorch parallel
Browse files Browse the repository at this point in the history
Differential Revision: D62711909

Pull Request resolved: pytorch#953
  • Loading branch information
metascroy authored Sep 26, 2024
1 parent ceec750 commit e83c35d
Show file tree
Hide file tree
Showing 9 changed files with 58 additions and 9 deletions.
6 changes: 5 additions & 1 deletion torchao/experimental/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,15 @@ if(NOT TORCHAO_INCLUDE_DIRS)
set(TORCHAO_INCLUDE_DIRS ${TORCHAO_ROOT}/../..)
endif()

if (NOT TORCHAO_OP_TARGET)
message(FATAL_ERROR "TORCHAO_OP_TARGET is not set. Set it to ATEN or EXECUTORCH.")
endif()

if (NOT TORCHAO_PARALLEL_BACKEND)
if (TORCHAO_OP_TARGET STREQUAL "ATEN")
set(TORCHAO_PARALLEL_BACKEND "ATEN_OPENMP")
elseif(TORCHAO_OP_TARGET STREQUAL "EXECUTORCH")
set(TORCHAO_PARALLEL_BACKEND "PTHREADPOOL")
set(TORCHAO_PARALLEL_BACKEND "EXECUTORCH")
else()
message(TORCHAO_PARALLEL_BACKEND "TORCHAO_PARALLEL_BACKEND is not set. Please set it directly or set TORCHAO_OP_TARGET to get a default.")
endif()
Expand Down
8 changes: 8 additions & 0 deletions torchao/experimental/Utils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@ function(target_link_torchao_parallel_backend target_name torchao_parallel_backe
target_compile_definitions(${target_name} PRIVATE TORCHAO_PARALLEL_ATEN=1 AT_PARALLEL_OPENMP=1 INTRA_OP_PARALLEL=1)
target_link_libraries(${target_name} PRIVATE ${TORCH_INSTALL_PREFIX}/lib/libomp${CMAKE_SHARED_LIBRARY_SUFFIX})

elseif(TORCHAO_PARALLEL_BACKEND_TOUPPER STREQUAL "EXECUTORCH")
message(STATUS "Building with TORCHAO_PARALLEL_BACKEND=TORCHAO_PARALLEL_EXECUTORCH")
message(STATUS "EXECUTORCH_INCLUDE_DIRS: ${EXECUTORCH_INCLUDE_DIRS}")
message(STATUS "EXECUTORCH_LIBRARIES: ${EXECUTORCH_LIBRARIES}")
target_include_directories(${target_name} PRIVATE "${EXECUTORCH_INCLUDE_DIRS}")
target_link_libraries(${target_name} PRIVATE "${EXECUTORCH_LIBRARIES}")
target_compile_definitions(${target_name} PRIVATE TORCHAO_PARALLEL_EXECUTORCH=1)

elseif(TORCHAO_PARALLEL_BACKEND_TOUPPER STREQUAL "OPENMP")
message(STATUS "Building with TORCHAO_PARALLEL_BACKEND=OPENMP. You must set the CMake variable OpenMP_ROOT to the OMP library location before compiling. Do not use this option if Torch was built with OPENMP; use ATEN_OPENMP instead.")
find_package(OpenMP REQUIRED)
Expand Down
4 changes: 2 additions & 2 deletions torchao/experimental/build_torchao_ops.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ export CMAKE_OUT=/tmp/cmake-out/torchao
cmake -DCMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH} \
-DCMAKE_INSTALL_PREFIX=${CMAKE_OUT} \
-DTORCHAO_OP_TARGET="$1" \
-DEXECUTORCH_LIBRARIES=${EXECUTORCH_LIBRARIES} \
-DEXECUTORCH_INCLUDE_DIRS=${EXECUTORCH_INCLUDE_DIRS} \
-DEXECUTORCH_LIBRARIES="${EXECUTORCH_LIBRARIES}" \
-DEXECUTORCH_INCLUDE_DIRS="${EXECUTORCH_INCLUDE_DIRS}" \
-S . \
-B ${CMAKE_OUT}
cmake --build ${CMAKE_OUT} --target install --config Release
5 changes: 5 additions & 0 deletions torchao/experimental/kernels/cpu/aarch64/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,8 @@ if (CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64")
${TORCHAO_INCLUDE_DIRS}/torchao/experimental/kernels/cpu/aarch64/valpacking/interleave.cpp
)
endif()

install(
TARGETS torchao_kernels_aarch64
DESTINATION lib
)
5 changes: 5 additions & 0 deletions torchao/experimental/ops/linear/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,8 @@ include(${TORCHAO_ROOT}/Utils.cmake)

add_library(torchao_ops_linear_${TORCHAO_PARALLEL_BACKEND} STATIC channelwise_8bit_activation_groupwise_lowbit_weight.cpp)
target_link_torchao_parallel_backend(torchao_ops_linear_${TORCHAO_PARALLEL_BACKEND} "${TORCHAO_PARALLEL_BACKEND}")

install(
TARGETS torchao_ops_linear_${TORCHAO_PARALLEL_BACKEND}
DESTINATION lib
)
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ LinearTilingParams get_default_linear_tiling_params(

LinearTilingParams tiling_params;
auto num_threads = torchao::get_num_threads();
assert(num_threads >= 1);
TORCHAO_CHECK(num_threads >= 1, "num_threads must be >= 1");

tiling_params.mc_by_mr = 1;
int mc = tiling_params.mc_by_mr * ukernel_config.mr;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ if(TORCHAO_OP_TARGET STREQUAL "ATEN")
target_compile_definitions(linear_a8wxdq_${TORCHAO_OP_TARGET} PRIVATE USE_ATEN=1)
elseif(TORCHAO_OP_TARGET STREQUAL "EXECUTORCH")
message(STATUS "Building with TORCHAO_OP_TARGET=EXECUTORCH")
add_library(linear_a8wxdq_${TORCHAO_OP_TARGET} SHARED
add_library(linear_a8wxdq_${TORCHAO_OP_TARGET} STATIC
linear_a8wxdq_executorch/w2s.cpp
linear_a8wxdq_executorch/w2sz.cpp
linear_a8wxdq_executorch/w3s.cpp
Expand All @@ -29,9 +29,9 @@ elseif(TORCHAO_OP_TARGET STREQUAL "EXECUTORCH")
linear_a8wxdq_executorch/w5s.cpp
linear_a8wxdq_executorch/w5sz.cpp
)
target_include_directories(linear_a8wxdq_${TORCHAO_OP_TARGET} PRIVATE ${EXECUTORCH_INCLUDE_DIRS})
target_include_directories(linear_a8wxdq_${TORCHAO_OP_TARGET} PRIVATE "${EXECUTORCH_INCLUDE_DIRS}")
target_compile_definitions(linear_a8wxdq_${TORCHAO_OP_TARGET} PRIVATE USE_EXECUTORCH=1)
target_link_libraries(linear_a8wxdq_${TORCHAO_OP_TARGET} PRIVATE ${EXECUTORCH_LIBRARIES})
target_link_libraries(linear_a8wxdq_${TORCHAO_OP_TARGET} PRIVATE "${EXECUTORCH_LIBRARIES}")
target_link_libraries(linear_a8wxdq_${TORCHAO_OP_TARGET} PRIVATE torchao_kernels_aarch64)
target_link_libraries(linear_a8wxdq_${TORCHAO_OP_TARGET} PRIVATE torchao_ops_linear_${TORCHAO_PARALLEL_BACKEND})
else()
Expand Down
28 changes: 28 additions & 0 deletions torchao/experimental/ops/parallel-executorch-impl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Copyright (c) Meta Platforms, Inc. and affiliates.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.

#pragma once

#include <executorch/extension/threadpool/threadpool.h>

template <typename F>
void torchao::parallel_1d(const int64_t begin, const int64_t end, const F& f) {
torch::executorch::threadpool::get_threadpool()->run(
[&](size_t i) {
int64_t idx = begin + i;
f(idx);
},
end - begin);
}

inline void torchao::set_num_threads(int num_threads) {
torch::executorch::threadpool::get_threadpool()->_unsafe_reset_threadpool(
num_threads);
}

inline int torchao::get_num_threads() {
return torch::executorch::threadpool::get_threadpool()->get_thread_count();
}
3 changes: 1 addition & 2 deletions torchao/experimental/ops/parallel.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ int get_num_threads();
#ifdef TORCHAO_PARALLEL_EXECUTORCH
#pragma message( \
"TORCHAO_PARALLEL_EXECUTORCH is set. Using ExecuTorch parallel backend.")

#error "TORCHAO_PARALLEL_EXECUTORCH is not implemented yet"
#include <torchao/experimental/ops/parallel-executorch-impl.h>

#else
#ifdef TORCHAO_PARALLEL_PTHREADPOOL
Expand Down

0 comments on commit e83c35d

Please sign in to comment.