Skip to content

Commit

Permalink
Adding the beginning of CUDA capability
Browse files Browse the repository at this point in the history
  • Loading branch information
Nicholas Carrara committed Jul 11, 2023
1 parent ffcad6d commit 5f4abab
Show file tree
Hide file tree
Showing 4 changed files with 344 additions and 0 deletions.
28 changes: 28 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,34 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
include(GNUInstallDirs)
# include(CMakePackageConfigHelpers)

#-------------------------------------------------------------------#
# Cuda and Python configuration
#-------------------------------------------------------------------#

option(BUILD_CUDA, "Compile CUDA wrappers for LArNEST" OFF)
if(BUILD_CUDA)
enable_language(CUDA)
if(NOT DEFINED CMAKE_CUDA_STANDARD)
set(CMAKE_CUDA_STANDARD 11)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
endif()

# we also need extra stuff to make sure compile flags are correctly
# passed to nvcc / host compiler
include(protect_nvcc_flags)
include(protect_pthread_flag)
message(" CUDA compiler ID : ${CMAKE_CUDA_COMPILER_ID}")
message(" CUDA compiler Version : ${CMAKE_CUDA_COMPILER_VERSION}")
message(" C++ Compiler : ${CMAKE_CXX_COMPILER_ID} "
"${CMAKE_CXX_COMPILER_VERSION} "
"${CMAKE_CXX_COMPILER_WRAPPER}")
message(" ${CMAKE_CXX_COMPILER}")
message(" CUDA Compiler : ${CMAKE_CUDA_COMPILER}")
message(" CUDA Compiler exec : ${CUDA_NVCC_EXECUTABLE}")
message(" CUDA Compile flags : ${CMAKE_CUDA_FLAGS}")
message(" CUDA toolkit inc : ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}")
endif()

set(LArNEST_CORE_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/src/LArNEST.cxx
${CMAKE_CURRENT_SOURCE_DIR}/src/Logger.cxx
Expand Down
193 changes: 193 additions & 0 deletions cmake/cuda/FindCUDALibs.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
# ==============================================
# the following code is directly borrowed from
# https://github.com/svenevs/cmake-cuda-targets
# ==============================================

# TODO: make this an actual find module...allow VERSION, QUIET, etc

# TODO: don't use find_package(CUDA)? But the thread specifically states that
# we should *NOT* require that enable_language(CUDA) has been done,
# meaning that e.g. CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES may not exist?
#
# Solution?
# include(CheckLanguage)
# check_language(CUDA)
# if (NOT CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES)
# --> then we cannot win?
find_package(CUDA REQUIRED)

# Populate the list of default locations to search for the CUDA libraries.
# TODO: allow user bypass of this?
list(APPEND CUDALibs_HINTS "${CUDA_TOOLKIT_ROOT_DIR}/lib64")
list(APPEND CUDALibs_HINTS "${CUDA_TOOLKIT_ROOT_DIR}/lib")
list(APPEND CUDALibs_HINTS "${CUDA_TOOLKIT_ROOT_DIR}")

function(find_and_add_cuda_import_lib lib_name)
string(TOUPPER ${lib_name} LIB_NAME)
find_library(CUDA_${LIB_NAME} ${lib_name} HINTS ${CUDALibs_HINTS})
if (NOT CUDA_${LIB_NAME} STREQUAL CUDA_${LIB_NAME}-NOTFOUND)
add_library(CUDA::${lib_name} IMPORTED INTERFACE)
set_target_properties(CUDA::${lib_name}
PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES
"${CUDA_INCLUDE_DIRS}"
INTERFACE_LINK_LIBRARIES
"${CUDA_${LIB_NAME}}"
)
endif()
endfunction()

# TODO: how to make sure `dependency` can actually be used
# TODO: if dependency cannot be used, is it possible to
# delete CUDA::${lib_name}?
function(add_cuda_link_dependency lib_name dependency)
set_property(
TARGET CUDA::${lib_name}
APPEND
PROPERTY
INTERFACE_LINK_LIBRARIES ${dependency}
)
endfunction()

# Find the main CUDA runtime dynamic and static libraries.
# These are a hard dependency for all other libraries, and
# must be found.
# TODO: right way to error out?
find_and_add_cuda_import_lib(cudart)
find_and_add_cuda_import_lib(cudart_static)

# TODO: what about windows???
if (UNIX)
foreach (lib dl pthread rt)
add_cuda_link_dependency(cudart_static ${lib})
endforeach()
endif()

# TODO: nvBLAS and example. Depends on cuBLAS, but not sure how it works.
# Testing executable may need to find_package(BLAS)? It seems like the idea is
# you write a standard BLAS level 3 operation, and at link time nvBLAS will take
# over somehow?
# cuBLAS treated specially, static library needs to link against a BLAS
# library for *gemm_ symbols. So both dynamic and static library are only added
# if BLAS can be found for consistency (missing dynamic case will be a library
# load error at runtime).
# Find dynamic blas for cusolver dynamic target
set(BLA_STATIC OFF)
find_package(BLAS)
if (BLAS_FOUND)
find_and_add_cuda_import_lib(cublas)
add_cuda_link_dependency(cublas CUDA::cudart)
add_cuda_link_dependency(cublas ${BLAS_LIBRARIES})
endif()

# Find static blas for cublas static target
set(BLAS_FOUND OFF)
set(BLA_STATIC ON)
find_package(BLAS)
if (BLAS_FOUND)
find_and_add_cuda_import_lib(cublas_static)
add_cuda_link_dependency(cublas_static CUDA::cudart_static)
add_cuda_link_dependency(cublas_static ${BLAS_LIBRARIES})
endif()

# TODO: (nppi* nvblas)
foreach (cuda_lib cufft cufftw curand cusolver cusparse nvgraph nvjpeg)
# find the dynamic library
find_and_add_cuda_import_lib(${cuda_lib})
add_cuda_link_dependency(${cuda_lib} CUDA::cudart)

# TODO: if UNIX and VERSION >= 6.5
# find the static library
find_and_add_cuda_import_lib(${cuda_lib}_static)
add_cuda_link_dependency(${cuda_lib}_static CUDA::cudart_static)
endforeach()



# NVRTC (Runtime Compilation) is a shared library only.
# TODO: nvrtc needs -lcuda (*NOT* cudart), but -lcuda (at least on this system)
# is going to point to /lib64/libcuda.so.
#
# Since this is not in the HINTS paths searched above, what is the right
# way to create the CUDA::cuda target?
find_and_add_cuda_import_lib(nvrtc)
add_cuda_link_dependency(nvrtc cuda)

# NVTX is a shared library only.
# TODO: is this even useful outside of NSight Eclipse?
find_and_add_cuda_import_lib(nvToolsExt)
add_cuda_link_dependency(nvToolsExt CUDA::cudart)

# cuLIBOS is a static only library, see
#
# https://devblogs.nvidia.com/10-ways-cuda-6-5-improves-performance-productivity
#
# > Static CUDA Libraries
# > CUDA 6.5 (on Linux and Mac OS) now includes static library versions of the
# > cuBLAS, cuSPARSE, cuFFT, cuRAND, and NPP libraries. This can reduce the
# > number of dynamic library dependencies you need to include with your
# > deployed applications. These new static libraries depend on a common thread
# > abstraction layer library cuLIBOS (libculibos.a) distributed as part of the
# > CUDA toolkit.
find_and_add_cuda_import_lib(culibos)
# foreach (cuda_lib cublas cusparse cufft) # curand npp
foreach (cuda_lib cublas cufft cusparse curand nvjpeg)# npp
add_cuda_link_dependency(${cuda_lib}_static CUDA::culibos)
endforeach()

# cuSOLVER depends on cuBLAS and cuSPARSE
# NOTE: nvGRAPH relies on this, make sure it happens before nvGRAPH dependencies.
foreach (dep cublas cusparse)
add_cuda_link_dependency(cusolver CUDA::${dep})
add_cuda_link_dependency(cusolver_static CUDA::${dep}_static)
endforeach()

# nvGRAPH depends on cuBLAS, cuRAND, cuSPARSE, and cuSOLVER.
# NOTE: rely on link dependencies of cuSOLVER, this must happen after cusolver target.
foreach (dep cusolver curand)
add_cuda_link_dependency(nvgraph CUDA::${dep})
add_cuda_link_dependency(nvgraph_static CUDA::${dep}_static)
endforeach()

# NPP libraries and dependencies. See: https://docs.nvidia.com/cuda/npp/index.html
# TODO: document what nppc is (seems to be the underlying implementation for most of NPP?)
find_and_add_cuda_import_lib(nppc)
find_and_add_cuda_import_lib(nppc_static)

# Process the majority of the NPP libraries.
foreach (cuda_lib nppial nppicc nppidei nppif nppig nppim nppist nppitc npps)
# Find the libraries.
find_and_add_cuda_import_lib(${cuda_lib})
find_and_add_cuda_import_lib(${cuda_lib}_static)

# Designate dynamic link dependencies.
add_cuda_link_dependency(${cuda_lib} CUDA::cudart)
# TODO: add this in since it is needed in static or rely on existing dynamic links?
# add_cuda_link_dependency(${cuda_lib} CUDA::nppc)

# Designate static link dependencies.
add_cuda_link_dependency(${cuda_lib}_static CUDA::cudart_static)
add_cuda_link_dependency(${cuda_lib}_static CUDA::nppc_static)
add_cuda_link_dependency(${cuda_lib}_static CUDA::culibos)
endforeach()

# nppicom: JPEG compression and decompression functions in nppi_compression_functions.h
find_and_add_cuda_import_lib(nppicom)
find_and_add_cuda_import_lib(nppicom_static)

# nppisu: memory support functions in nppi_support_functions.h
find_and_add_cuda_import_lib(nppisu)
find_and_add_cuda_import_lib(nppisu_static)
add_cuda_link_dependency(nppisu CUDA::cudart)
add_cuda_link_dependency(nppisu_static CUDA::cudart_static)

# TODO: mysterious extra static libraries...what are they for?
find_and_add_cuda_import_lib(cudadevrt)
find_and_add_cuda_import_lib(cublas_device)

# TODO: VERSION 9.2, search libcufft_static_nocallback.a
# https://docs.nvidia.com/cuda/cufft/index.html#oned-complex-to-complex-transforms

# Do not expose these functions externally.
unset(find_and_add_cuda_import_lib)
unset(add_cuda_link_dependency)
93 changes: 93 additions & 0 deletions cmake/cuda/protect_nvcc_flags.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
# file Copyright.txt or https://cmake.org/licensing for details.

#[=======================================================================[.rst:
Protect flags
-------------
.. only:: html
.. contents::
CUDA Utilities
^^^^^^^^^^^^^^
This part of the protect flags module provides a set of utilities to assist users with CUDA as a language.
It adds:
.. command:: cmake_cuda_convert_flags
Take a list of flags or a target and convert the flags to pass through the CUDA compiler to
the host compiler by adding a LANGUAGE requirement.
This will make the flags are only used when the language is not CUDA.
``PROTECT_ONLY``
Just protect the flags, rather than passing them through to the host compiler.
``INTERFACE_TARGET <name>``
A target to take flags from to convert
``LIST <name>``
A list of flags to protect (in place).
#]=======================================================================]

# This is a private function that just converts a list
# It takes a name of a variable to modify in place
function(_CUDA_CONVERT_FLAGS flags_name)
set(old_flags "${${flags_name}}")

if(NOT "${old_flags}" STREQUAL "")
# Use old flags for non-CUDA targets
set(protected_flags "$<$<BUILD_INTERFACE:$<NOT:$<COMPILE_LANGUAGE:CUDA>>>:${old_flags}>")
# Add -Xcompiler wrapped flags for CUDA
if(NOT CCF_PROTECT_ONLY)
# These need to be comma separated now
string(REPLACE ";" "," cuda_flags "${old_flags}")
string(APPEND protected_flags "$<$<BUILD_INTERFACE:$<COMPILE_LANGUAGE:CUDA>>:-Xcompiler=${cuda_flags}>")
endif()
set(${flags_name} "${protected_flags}" PARENT_SCOPE)
endif()
endfunction()


function(CMAKE_CUDA_CONVERT_FLAGS)
cmake_parse_arguments(
CCF
"PROTECT_ONLY"
""
"INTERFACE_TARGET;LIST"
${ARGN})

foreach(EXISTING_TARGET IN LISTS CCF_INTERFACE_TARGET)
get_property(old_flags TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
_cuda_convert_flags(old_flags "${CCF_PROTECT_ONLY}")
set_property(TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_INCLUDE_DIRECTORIES "${old_flags}")
message("DEBUG : ${old_flags}")
endforeach()

foreach(EXISTING_LIST IN LISTS CCF_LIST)
set(LOCAL_LIST "${${EXITING_LIST}}")
_cuda_convert_flags(LOCAL_LIST "${CCF_PROTECT_ONLY}")
set(${EXISTING_LIST} "${LOCAL_LIST}" PARENT_SCOPE)
endforeach()
endfunction()

function(CUDA_CONVERT_FLAGS EXISTING_TARGET)
get_property(old_flags TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
if(NOT "${old_flags}" STREQUAL "")
string(REPLACE ";" "," CUDA_flags "${old_flags}")
set_property(TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_INCLUDE_DIRECTORIES
"$<$<BUILD_INTERFACE:$<NOT:$<COMPILE_LANGUAGE:CUDA>>>:${old_flags}>$<$<BUILD_INTERFACE:$<COMPILE_LANGUAGE:CUDA>>:-Xcompiler=${CUDA_flags}>"
)
endif()
# debug
#get_property(new_flags TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
#message("DEBUG : ${new_flags}")
endfunction()
30 changes: 30 additions & 0 deletions cmake/cuda/protect_pthread_flag.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#
# flags '-pthreads' is not support by nvcc, replace with
# '-Xcompiler -pthread'
#

function(CUDA_PROTECT_PTHREAD_FLAG EXISTING_TARGET)

get_property(olds_flags TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
if(NOT "${old_flags}" STREQUAL "")
string(REPLACE "-pthread" "-Xcompiler -pthread" new_flags "${old_flags}")
set_property(TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_INCLUDE_DIRECTORIES
"$<$<BUILD_INTERFACE:$<NOT:$<COMPILE_LANGUAGE:CUDA>>>:${old_flags}>$<$<BUILD_INTERFACE:$<COMPILE_LANGUAGE:CUDA>>:-Xcompiler=${new_flags}>"
)
endif()

get_property(olds_flags TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_COMPILE_OPTIONS)
if(NOT "${old_flags}" STREQUAL "")
string(REPLACE "-pthread" "-Xcompiler -pthread" new_flags "${old_flags}")
set_property(TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_COMPILE_OPTIONS
"$<$<BUILD_INTERFACE:$<NOT:$<COMPILE_LANGUAGE:CUDA>>>:${old_flags}>$<$<BUILD_INTERFACE:$<COMPILE_LANGUAGE:CUDA>>:-Xcompiler=${new_flags}>"
)
endif()

# debug
get_property(current_flags TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
message("DEBUG : TARGET=${EXISTING_TARGET} compile flags=${current_flags}")
get_property(current_flags TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_COMPILE_OPTIONS)
message("DEBUG : TARGET=${EXISTING_TARGET} compile flags=${current_flags}")

endfunction()

0 comments on commit 5f4abab

Please sign in to comment.