diff --git a/.gitmodules b/.gitmodules index 7207ef9b6..0777fa968 100644 --- a/.gitmodules +++ b/.gitmodules @@ -11,3 +11,6 @@ [submodule "third_party/libnop"] path = third_party/libnop url = https://github.com/google/libnop.git +[submodule "third_party/hipify"] + path = third_party/hipify + url = https://github.com/ROCmSoftwarePlatform/hipify-torch.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 640054ee3..54dfc3ea4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,7 +10,8 @@ project(tensorpipe LANGUAGES C CXX) set(CMAKE_CXX_STANDARD 14) -list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") +# ROCm related +list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake" "${PROJECT_SOURCE_DIR}/third_party/hipify/cmake") # Expose build options. include(Options) @@ -21,6 +22,17 @@ include(Sanitize) # Misc checks to cope with various compiler modes. include(MiscCheck) +# ROCm related +if (TP_USE_ROCM) + include(Hip) + if(TP_HAVE_HIP) + include(Hipify) + hipify(CUDA_SOURCE_DIR ${PROJECT_SOURCE_DIR}) + else() + message(FATAL_ERROR "Not able to find HIP installation, so cant compile with ROCm support.") + endif() +endif() + add_subdirectory(tensorpipe) install(EXPORT TensorpipeTargets diff --git a/cmake/Hip.cmake b/cmake/Hip.cmake new file mode 100644 index 000000000..79a67a014 --- /dev/null +++ b/cmake/Hip.cmake @@ -0,0 +1,162 @@ +set(TP_HAVE_HIP FALSE) + +IF(NOT DEFINED ENV{ROCM_PATH}) + SET(ROCM_PATH /opt/rocm) +ELSE() + SET(ROCM_PATH $ENV{ROCM_PATH}) +ENDIF() + +# HIP_PATH +IF(NOT DEFINED ENV{HIP_PATH}) + SET(HIP_PATH ${ROCM_PATH}/hip) +ELSE() + SET(HIP_PATH $ENV{HIP_PATH}) +ENDIF() + +IF(NOT EXISTS ${HIP_PATH}) + return() +ENDIF() + +# HCC_PATH +IF(NOT DEFINED ENV{HCC_PATH}) + SET(HCC_PATH ${ROCM_PATH}/hcc) +ELSE() + SET(HCC_PATH $ENV{HCC_PATH}) +ENDIF() + +# HSA_PATH +IF(NOT DEFINED ENV{HSA_PATH}) + SET(HSA_PATH ${ROCM_PATH}/hsa) +ELSE() + SET(HSA_PATH $ENV{HSA_PATH}) +ENDIF() + +# ROCBLAS_PATH +IF(NOT DEFINED ENV{ROCBLAS_PATH}) + SET(ROCBLAS_PATH ${ROCM_PATH}/rocblas) +ELSE() + SET(ROCBLAS_PATH $ENV{ROCBLAS_PATH}) +ENDIF() + +# ROCSPARSE_PATH +IF(NOT DEFINED ENV{ROCSPARSE_PATH}) + SET(ROCSPARSE_PATH ${ROCM_PATH}/rocsparse) +ELSE() + SET(ROCSPARSE_PATH $ENV{ROCSPARSE_PATH}) +ENDIF() + +# ROCFFT_PATH +IF(NOT DEFINED ENV{ROCFFT_PATH}) + SET(ROCFFT_PATH ${ROCM_PATH}/rocfft) +ELSE() + SET(ROCFFT_PATH $ENV{ROCFFT_PATH}) +ENDIF() + +# HIPSPARSE_PATH +IF(NOT DEFINED ENV{HIPSPARSE_PATH}) + SET(HIPSPARSE_PATH ${ROCM_PATH}/hipsparse) +ELSE() + SET(HIPSPARSE_PATH $ENV{HIPSPARSE_PATH}) +ENDIF() + +# THRUST_PATH +IF(DEFINED ENV{THRUST_PATH}) + SET(THRUST_PATH $ENV{THRUST_PATH}) +ELSE() + SET(THRUST_PATH ${ROCM_PATH}/include) +ENDIF() + +# HIPRAND_PATH +IF(NOT DEFINED ENV{HIPRAND_PATH}) + SET(HIPRAND_PATH ${ROCM_PATH}/hiprand) +ELSE() + SET(HIPRAND_PATH $ENV{HIPRAND_PATH}) +ENDIF() + +# ROCRAND_PATH +IF(NOT DEFINED ENV{ROCRAND_PATH}) + SET(ROCRAND_PATH ${ROCM_PATH}/rocrand) +ELSE() + SET(ROCRAND_PATH $ENV{ROCRAND_PATH}) +ENDIF() + +# MIOPEN_PATH +IF(NOT DEFINED ENV{MIOPEN_PATH}) + SET(MIOPEN_PATH ${ROCM_PATH}/miopen) +ELSE() + SET(MIOPEN_PATH $ENV{MIOPEN_PATH}) +ENDIF() + +IF(NOT DEFINED ENV{TP_ROCM_ARCH}) + SET(TP_ROCM_ARCH gfx900;gfx906;gfx908) +ELSE() + SET(TP_ROCM_ARCH $ENV{TP_ROCM_ARCH}) +ENDIF() + +# Add HIP to the CMAKE Module Path +set(CMAKE_MODULE_PATH ${HIP_PATH}/cmake ${CMAKE_MODULE_PATH}) + +# Disable Asserts In Code (Can't use asserts on HIP stack.) +ADD_DEFINITIONS(-DNDEBUG) + +# Find the HIP Package +find_package(HIP 1.0) + +IF(HIP_FOUND) + set(TP_HAVE_HIP TRUE) + + if(HIP_COMPILER STREQUAL clang) + set(hip_library_name amdhip64) + else() + set(hip_library_name hip_hcc) + endif() + message("HIP library name: ${hip_library_name}") + + set(CMAKE_HCC_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG}) + set(CMAKE_HCC_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE}) + FIND_LIBRARY(TP_HIP_HCC_LIBRARIES ${hip_library_name} HINTS ${HIP_PATH}/lib) + + list(APPEND HIP_CXX_FLAGS -fPIC) + list(APPEND HIP_CXX_FLAGS -D__HIP_PLATFORM_HCC__=1) + list(APPEND HIP_CXX_FLAGS -D__HIP_NO_HALF_OPERATORS__=1) + list(APPEND HIP_CXX_FLAGS -D__HIP_NO_HALF_CONVERSIONS__=1) + list(APPEND HIP_CXX_FLAGS -DHIP_VERSION=${HIP_VERSION_MAJOR}) + list(APPEND HIP_CXX_FLAGS -Wno-macro-redefined) + list(APPEND HIP_CXX_FLAGS -Wno-inconsistent-missing-override) + list(APPEND HIP_CXX_FLAGS -Wno-exceptions) + list(APPEND HIP_CXX_FLAGS -Wno-shift-count-negative) + list(APPEND HIP_CXX_FLAGS -Wno-shift-count-overflow) + list(APPEND HIP_CXX_FLAGS -Wno-unused-command-line-argument) + list(APPEND HIP_CXX_FLAGS -Wno-duplicate-decl-specifier) + + set(HIP_CLANG_FLAGS ${HIP_CXX_FLAGS}) + # Ask hcc to generate device code during compilation so we can use + # host linker to link. + list(APPEND HIP_CLANG_FLAGS -fno-gpu-rdc) + list(APPEND HIP_CLANG_FLAGS -Wno-defaulted-function-deleted) + foreach(tp_rocm_arch ${TP_ROCM_ARCH}) + list(APPEND HIP_CLANG_FLAGS --amdgpu-target=${tp_rocm_arch}) + endforeach() + + set(hip_DIR ${HIP_PATH}/lib/cmake/hip) + set(hsa-runtime64_DIR ${ROCM_PATH}/lib/cmake/hsa-runtime64) + set(AMDDeviceLibs_DIR ${ROCM_PATH}/lib/cmake/AMDDeviceLibs) + set(amd_comgr_DIR ${ROCM_PATH}/lib/cmake/amd_comgr) + set(rocrand_DIR ${ROCRAND_PATH}/lib/cmake/rocrand) + set(hiprand_DIR ${HIPRAND_PATH}/lib/cmake/hiprand) + set(rocblas_DIR ${ROCBLAS_PATH}/lib/cmake/rocblas) + set(miopen_DIR ${MIOPEN_PATH}/lib/cmake/miopen) + set(rocfft_DIR ${ROCFFT_PATH}/lib/cmake/rocfft) + set(hipfft_DIR ${HIPFFT_PATH}/lib/cmake/hipfft) + set(hipsparse_DIR ${HIPSPARSE_PATH}/lib/cmake/hipsparse) + set(rccl_DIR ${RCCL_PATH}/lib/cmake/rccl) + set(rocprim_DIR ${ROCPRIM_PATH}/lib/cmake/rocprim) + set(hipcub_DIR ${HIPCUB_PATH}/lib/cmake/hipcub) + set(rocthrust_DIR ${ROCTHRUST_PATH}/lib/cmake/rocthrust) + set(ROCclr_DIR ${ROCM_PATH}/rocclr/lib/cmake/rocclr) + + find_package(hip REQUIRED) + + set(TP_HIP_INCLUDE ${ROCM_PATH}/include ${TP_HIP_INCLUDE}) + set(TP_HIP_INCLUDE ${hip_INCLUDE_DIRS} $ $ ${TP_HIP_INCLUDE}) +ENDIF() diff --git a/cmake/Options.cmake b/cmake/Options.cmake index 10e09c94e..6034d45bb 100644 --- a/cmake/Options.cmake +++ b/cmake/Options.cmake @@ -31,6 +31,12 @@ endmacro() # TODO: Default to ON if CUDA available. option(TP_USE_CUDA "Enable support for CUDA tensors" OFF) +option(TP_USE_ROCM "Enable support for ROCM tensors" OFF) + +# if both TP_USE_CUDA and TP_USE_ROCM is set then break +if(TP_USE_CUDA AND TP_USE_ROCM) + message(FATAL_ERROR "TensorPipe does not support building for CUDA and ROCM at the same time. Please unset either TP_USE_CUDA or TP_USE_ROCM.") +endif() # Optional features option(TP_BUILD_BENCHMARK "Build benchmarks" OFF) diff --git a/tensorpipe/CMakeLists.txt b/tensorpipe/CMakeLists.txt index 5c3606471..8df78e991 100644 --- a/tensorpipe/CMakeLists.txt +++ b/tensorpipe/CMakeLists.txt @@ -79,7 +79,7 @@ list(APPEND TP_PUBLIC_HDRS ### cma -tp_conditional_backend( +TP_CONDITIONAL_BACKEND( TP_ENABLE_CMA "Enable cross-memory attach channel" "LINUX") if(TP_ENABLE_CMA) list(APPEND TP_SRCS @@ -100,6 +100,7 @@ list(APPEND TP_SRCS list(APPEND TP_PUBLIC_HDRS channel/mpt/factory.h) + ## Transports ### uv @@ -124,7 +125,7 @@ list(APPEND TP_LINK_LIBRARIES uv::uv) ### shm -tp_conditional_backend( +TP_CONDITIONAL_BACKEND( TP_ENABLE_SHM "Enable shared-memory transport" "LINUX") if(TP_ENABLE_SHM) list(APPEND TP_SRCS @@ -143,7 +144,7 @@ endif() ### ibv -tp_conditional_backend( +TP_CONDITIONAL_BACKEND( TP_ENABLE_IBV "Enable InfiniBand transport" "LINUX") if(TP_ENABLE_IBV) list(APPEND TP_SRCS @@ -219,9 +220,9 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/config.h DESTINATION ${TP_INSTALL_INCLUDEDIR}/tensorpipe) -## CUDA +## CUDA AND ROCM -if(TP_USE_CUDA) +if(TP_USE_CUDA OR TP_USE_ROCM) # TP_SRCS is the list of source files that we need to build libtensorpipe. set(TP_CUDA_SRCS) @@ -234,9 +235,23 @@ if(TP_USE_CUDA) # TP_INCLUDE_DIRS is list of include path to be used set(TP_CUDA_INCLUDE_DIRS) - find_package(CUDA REQUIRED) - list(APPEND TP_CUDA_LINK_LIBRARIES ${CUDA_LIBRARIES}) - list(APPEND TP_CUDA_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS}) + # TP_CUDA_COMPILE_DEFS is list of compiler definitions to be used + set(TP_CUDA_COMPILE_DEFS) + + if (TP_USE_CUDA) + set(TP_GPU_LIB_NAME "tensorpipe_cuda") + find_package(CUDA REQUIRED) + list(APPEND TP_CUDA_LINK_LIBRARIES ${CUDA_LIBRARIES}) + list(APPEND TP_CUDA_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS}) + elseif (TP_USE_ROCM) + set(TP_GPU_LIB_NAME "tensorpipe_hip") + # Finding of HIP package is already before hipifying the files + list(APPEND TP_CUDA_LINK_LIBRARIES ${TP_HIP_HCC_LIBRARIES}) + list(APPEND TP_CUDA_INCLUDE_DIRS ${TP_HIP_INCLUDE}) + list(APPEND TP_CUDA_COMPILE_DEFS TP_USE_ROCM) + option(TP_ROCM_TMP_COMPILE "This variable enables temp code for compilation on ROCm platform" OFF) + list(APPEND TP_CUDA_COMPILE_DEFS TP_ROCM_TMP_COMPILE) + endif() list(APPEND TP_CUDA_SRCS common/cuda_buffer.cc) @@ -246,12 +261,16 @@ if(TP_USE_CUDA) ### cuda_xth - list(APPEND TP_CUDA_SRCS - channel/cuda_xth/channel_impl.cc - channel/cuda_xth/context_impl.cc - channel/cuda_xth/factory.cc) - list(APPEND TP_CUDA_PUBLIC_HDRS - channel/cuda_xth/factory.h) + TP_CONDITIONAL_BACKEND( + TP_ENABLE_HIP_XTH "Enable HIP XTH communication channel" "TP_USE_ROCM") + if(TP_ENABLE_HIP_XTH OR TP_USE_CUDA) + list(APPEND TP_CUDA_SRCS + channel/cuda_xth/channel_impl.cc + channel/cuda_xth/context_impl.cc + channel/cuda_xth/factory.cc) + list(APPEND TP_CUDA_PUBLIC_HDRS + channel/cuda_xth/factory.h) + endif() ### cuda_basic @@ -265,9 +284,11 @@ if(TP_USE_CUDA) ### cuda_ipc - tp_conditional_backend( + TP_CONDITIONAL_BACKEND( TP_ENABLE_CUDA_IPC "Enable CUDA inter-process communication channel" "TP_USE_CUDA") - if(TP_ENABLE_CUDA_IPC) + TP_CONDITIONAL_BACKEND( + TP_ENABLE_HIP_IPC "Enable HIP inter-process communication channel" "TP_USE_ROCM") + if(TP_ENABLE_CUDA_IPC OR TP_ENABLE_HIP_IPC) list(APPEND TP_CUDA_SRCS channel/cuda_ipc/channel_impl.cc channel/cuda_ipc/context_impl.cc @@ -279,9 +300,11 @@ if(TP_USE_CUDA) ### cuda_gdr - tp_conditional_backend( + TP_CONDITIONAL_BACKEND( TP_ENABLE_CUDA_GDR "Enable CUDA GpuDirect (InfiniBand) channel" "LINUX") - if(TP_ENABLE_CUDA_GDR) + TP_CONDITIONAL_BACKEND( + TP_ENABLE_HIP_GDR "Enable HIP GpuDirect (InfiniBand) channel" "LINUX") + if((TP_ENABLE_CUDA_GDR AND TP_USE_CUDA) OR (TP_ENABLE_HIP_GDR AND TP_USE_ROCM)) list(APPEND TP_CUDA_SRCS common/ibv.cc channel/cuda_gdr/channel_impl.cc @@ -293,19 +316,26 @@ if(TP_USE_CUDA) set(TENSORPIPE_HAS_CUDA_GDR_CHANNEL 1) endif() + if(TP_USE_ROCM) + # Replace the cuda file names in TP_CUDA_SRCS & TP_CUDA_PUBLIC_HDRS file lists with hipified file names + get_hipified_list("${TP_CUDA_SRCS}" TP_CUDA_SRCS) + get_hipified_list("${TP_CUDA_PUBLIC_HDRS}" TP_CUDA_PUBLIC_HDRS) + endif() + configure_file(config_cuda.h.in config_cuda.h) - add_library(tensorpipe_cuda ${TP_STATIC_OR_SHARED} ${TP_CUDA_SRCS}) + add_library(${TP_GPU_LIB_NAME} ${TP_STATIC_OR_SHARED} ${TP_CUDA_SRCS}) if(BUILD_SHARED_LIBS) - set_target_properties(tensorpipe_cuda PROPERTIES POSITION_INDEPENDENT_CODE 1) + set_target_properties(${TP_GPU_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE 1) endif() - target_link_libraries(tensorpipe_cuda PUBLIC tensorpipe) - target_link_libraries(tensorpipe_cuda PRIVATE ${TP_CUDA_LINK_LIBRARIES}) - target_include_directories(tensorpipe_cuda PUBLIC ${TP_CUDA_INCLUDE_DIRS}) + target_link_libraries(${TP_GPU_LIB_NAME} PUBLIC tensorpipe) + target_link_libraries(${TP_GPU_LIB_NAME} PRIVATE ${TP_CUDA_LINK_LIBRARIES}) + target_include_directories(${TP_GPU_LIB_NAME} PUBLIC ${TP_CUDA_INCLUDE_DIRS}) + target_compile_definitions(${TP_GPU_LIB_NAME} PUBLIC ${TP_CUDA_COMPILE_DEFS}) - install(TARGETS tensorpipe_cuda + install(TARGETS ${TP_GPU_LIB_NAME} EXPORT TensorpipeTargets LIBRARY DESTINATION ${TP_INSTALL_LIBDIR} ARCHIVE DESTINATION ${TP_INSTALL_LIBDIR}) diff --git a/tensorpipe/benchmark/CMakeLists.txt b/tensorpipe/benchmark/CMakeLists.txt index d12c030aa..034cf6287 100644 --- a/tensorpipe/benchmark/CMakeLists.txt +++ b/tensorpipe/benchmark/CMakeLists.txt @@ -6,8 +6,34 @@ # TODO: Make those separate CMake projects. -add_executable(benchmark_transport benchmark_transport.cc options.cc transport_registry.cc channel_registry.cc) +set(TP_BM_TRANS_SRCS) + +set(TP_BM_PIPE_SRCS) + +list(APPEND TP_BM_TRANS_SRCS + benchmark_transport + benchmark_transport.cc + options.cc + transport_registry.cc + channel_registry.cc) + +list(APPEND TP_BM_PIPE_SRCS + benchmark_pipe.cc + options.cc + transport_registry.cc + channel_registry.cc) + +if(TP_USE_ROCM) + # Replace the cuda file names in TP_BM_TRANS_SRCS & TP_BM_PIPE_SRCS file lists with hipified file names + get_hipified_list("${TP_BM_TRANS_SRCS}" TP_BM_TRANS_SRCS) + get_hipified_list("${TP_BM_PIPE_SRCS}" TP_BM_PIPE_SRCS) + set(TP_GPU_LIB "tensorpipe_hip") +elseif(TP_USE_CUDA) + set(TP_GPU_LIB "tensorpipe_cuda") +endif() + +add_executable(benchmark_transport ${TP_BM_TRANS_SRCS}) target_link_libraries(benchmark_transport PRIVATE tensorpipe) -add_executable(benchmark_pipe benchmark_pipe.cc options.cc transport_registry.cc channel_registry.cc) -target_link_libraries(benchmark_pipe PRIVATE tensorpipe tensorpipe_cuda) +add_executable(benchmark_pipe ${TP_BM_PIPE_SRCS}) +target_link_libraries(benchmark_pipe PRIVATE tensorpipe ${TP_GPU_LIB}) diff --git a/tensorpipe/common/cuda_loop.h b/tensorpipe/common/cuda_loop.h index 6fc17f0f2..87f2a71af 100644 --- a/tensorpipe/common/cuda_loop.h +++ b/tensorpipe/common/cuda_loop.h @@ -20,6 +20,10 @@ #include +#ifdef TP_USE_ROCM +#define CUDART_CB +#endif + namespace tensorpipe { class CudaLoop { diff --git a/tensorpipe/test/CMakeLists.txt b/tensorpipe/test/CMakeLists.txt index 43ae51a56..29e53a423 100644 --- a/tensorpipe/test/CMakeLists.txt +++ b/tensorpipe/test/CMakeLists.txt @@ -68,11 +68,22 @@ if(TP_ENABLE_CMA) add_subdirectory(channel/cma) endif() -if(TP_USE_CUDA) - find_package(CUDA REQUIRED) - list(APPEND TP_TEST_LINK_LIBRARIES ${CUDA_LIBRARIES}) - list(APPEND TP_TEST_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS}) - list(APPEND TP_TEST_COMPILE_DEFS TP_USE_CUDA) +if(TP_USE_CUDA OR TP_USE_ROCM) + if(TP_USE_CUDA) + set(TP_GPU_KERNEL_LIB_NAME "tensorpipe_cuda_kernel") + set(TP_GPU_LIB_NAME "tensorpipe_cuda") + find_package(CUDA REQUIRED) + list(APPEND TP_TEST_LINK_LIBRARIES ${CUDA_LIBRARIES}) + list(APPEND TP_TEST_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS}) + list(APPEND TP_TEST_COMPILE_DEFS TP_USE_CUDA) + elseif(TP_USE_ROCM) + set(TP_GPU_KERNEL_LIB_NAME "tensorpipe_hip_kernel") + set(TP_GPU_LIB_NAME "tensorpipe_hip") + # Finding of HIP package is already before hipifying the files + list(APPEND TP_TEST_LINK_LIBRARIES ${TP_HIP_HCC_LIBRARIES}) + list(APPEND TP_TEST_INCLUDE_DIRS ${TP_HIP_INCLUDE}) + list(APPEND TP_TEST_COMPILE_DEFS TP_USE_ROCM) + endif() list(APPEND TP_TEST_SRCS channel/channel_test_cuda.cc @@ -82,27 +93,40 @@ if(TP_USE_CUDA) core/pipe_cuda_test.cc ) - list(APPEND TP_TEST_SRCS - channel/cuda_xth/cuda_xth_test.cc - channel/cuda_basic/cuda_basic_test.cc - ) + if(TP_ENABLE_HIP_XTH OR TP_USE_CUDA) + list(APPEND TP_TEST_SRCS + channel/cuda_xth/cuda_xth_test.cc + channel/cuda_basic/cuda_basic_test.cc + ) + endif() - if(TP_ENABLE_CUDA_IPC) + if(TP_ENABLE_CUDA_IPC OR TP_ENABLE_HIP_IPC) list(APPEND TP_TEST_SRCS channel/cuda_ipc/cuda_ipc_test.cc ) endif() - list(APPEND TP_TEST_SRCS - channel/cuda_gdr/cuda_gdr_test.cc - ) + if((TP_ENABLE_CUDA_GDR AND TP_USE_CUDA) OR (TP_ENABLE_HIP_GDR AND TP_USE_ROCM)) + list(APPEND TP_TEST_SRCS + channel/cuda_gdr/cuda_gdr_test.cc + ) + endif() - cuda_add_library(tensorpipe_cuda_kernel channel/kernel.cu) - list(APPEND TP_TEST_LINK_LIBRARIES tensorpipe_cuda_kernel) + if(TP_USE_CUDA) + cuda_add_library(${TP_GPU_KERNEL_LIB_NAME} channel/kernel.cu) + elseif(TP_USE_ROCM) + # Replace the cuda file names in TP_TEST_SRCS file list with hipified file names + get_hipified_list("${TP_TEST_SRCS}" TP_TEST_SRCS) + hip_add_library(${TP_GPU_KERNEL_LIB_NAME} channel/kernel.hip) + target_compile_options(${TP_GPU_KERNEL_LIB_NAME} PUBLIC ${HIP_CLANG_FLAGS}) + endif() + + list(APPEND TP_TEST_LINK_LIBRARIES ${TP_GPU_KERNEL_LIB_NAME}) - list(APPEND TP_TEST_LINK_LIBRARIES tensorpipe_cuda) + list(APPEND TP_TEST_LINK_LIBRARIES ${TP_GPU_LIB_NAME}) endif() + add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/googletest ${PROJECT_BINARY_DIR}/third_party/googletest EXCLUDE_FROM_ALL)