Adding the beginning of CUDA capability

NESTCollaboration · Jul 11, 2023 · 5f4abab · 5f4abab
1 parent ffcad6d
commit 5f4abab
Show file tree

Hide file tree

Showing 4 changed files with 344 additions and 0 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -41,6 +41,34 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 include(GNUInstallDirs)
 # include(CMakePackageConfigHelpers)
 
+#-------------------------------------------------------------------#
+# Cuda and Python configuration
+#-------------------------------------------------------------------#
+
+option(BUILD_CUDA, "Compile CUDA wrappers for LArNEST" OFF)
+if(BUILD_CUDA)
+  enable_language(CUDA)
+  if(NOT DEFINED CMAKE_CUDA_STANDARD)
+    set(CMAKE_CUDA_STANDARD 11)
+    set(CMAKE_CUDA_STANDARD_REQUIRED ON)
+  endif()
+
+  # we also need extra stuff to make sure compile flags are correctly
+  # passed to nvcc / host compiler
+  include(protect_nvcc_flags)
+  include(protect_pthread_flag)
+  message("  CUDA compiler ID      : ${CMAKE_CUDA_COMPILER_ID}")
+  message("  CUDA compiler Version : ${CMAKE_CUDA_COMPILER_VERSION}")
+  message("  C++ Compiler : ${CMAKE_CXX_COMPILER_ID} "
+    "${CMAKE_CXX_COMPILER_VERSION} "
+    "${CMAKE_CXX_COMPILER_WRAPPER}")
+  message("    ${CMAKE_CXX_COMPILER}")
+  message("  CUDA Compiler      : ${CMAKE_CUDA_COMPILER}")
+  message("  CUDA Compiler exec : ${CUDA_NVCC_EXECUTABLE}")
+  message("  CUDA Compile flags : ${CMAKE_CUDA_FLAGS}")
+  message("  CUDA toolkit inc   : ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}")
+endif()
+
 set(LArNEST_CORE_SOURCES
   ${CMAKE_CURRENT_SOURCE_DIR}/src/LArNEST.cxx
   ${CMAKE_CURRENT_SOURCE_DIR}/src/Logger.cxx

diff --git a/cmake/cuda/FindCUDALibs.cmake b/cmake/cuda/FindCUDALibs.cmake
@@ -0,0 +1,193 @@
+# ==============================================
+# the following code is directly borrowed from
+# https://github.com/svenevs/cmake-cuda-targets
+# ==============================================
+
+# TODO: make this an actual find module...allow VERSION, QUIET, etc
+
+# TODO: don't use find_package(CUDA)?  But the thread specifically states that
+#       we should *NOT* require that enable_language(CUDA) has been done,
+#       meaning that e.g. CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES may not exist?
+#
+#       Solution?
+#       include(CheckLanguage)
+#       check_language(CUDA)
+#       if (NOT CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES)
+#       --> then we cannot win?
+find_package(CUDA REQUIRED)
+
+# Populate the list of default locations to search for the CUDA libraries.
+# TODO: allow user bypass of this?
+list(APPEND CUDALibs_HINTS "${CUDA_TOOLKIT_ROOT_DIR}/lib64")
+list(APPEND CUDALibs_HINTS "${CUDA_TOOLKIT_ROOT_DIR}/lib")
+list(APPEND CUDALibs_HINTS "${CUDA_TOOLKIT_ROOT_DIR}")
+
+function(find_and_add_cuda_import_lib lib_name)
+  string(TOUPPER ${lib_name} LIB_NAME)
+  find_library(CUDA_${LIB_NAME} ${lib_name} HINTS ${CUDALibs_HINTS})
+  if (NOT CUDA_${LIB_NAME} STREQUAL CUDA_${LIB_NAME}-NOTFOUND)
+    add_library(CUDA::${lib_name} IMPORTED INTERFACE)
+    set_target_properties(CUDA::${lib_name}
+      PROPERTIES
+        INTERFACE_INCLUDE_DIRECTORIES
+          "${CUDA_INCLUDE_DIRS}"
+        INTERFACE_LINK_LIBRARIES
+          "${CUDA_${LIB_NAME}}"
+    )
+  endif()
+endfunction()
+
+# TODO: how to make sure `dependency` can actually be used
+# TODO: if dependency cannot be used, is it possible to
+#       delete CUDA::${lib_name}?
+function(add_cuda_link_dependency lib_name dependency)
+  set_property(
+    TARGET CUDA::${lib_name}
+    APPEND
+    PROPERTY
+      INTERFACE_LINK_LIBRARIES ${dependency}
+  )
+endfunction()
+
+# Find the main CUDA runtime dynamic and static libraries.
+# These are a hard dependency for all other libraries, and
+# must be found.
+# TODO: right way to error out?
+find_and_add_cuda_import_lib(cudart)
+find_and_add_cuda_import_lib(cudart_static)
+
+# TODO: what about windows???
+if (UNIX)
+  foreach (lib dl pthread rt)
+    add_cuda_link_dependency(cudart_static ${lib})
+  endforeach()
+endif()
+
+# TODO: nvBLAS and example.  Depends on cuBLAS, but not sure how it works.
+#       Testing executable may need to find_package(BLAS)?  It seems like the idea is
+#       you write a standard BLAS level 3 operation, and at link time nvBLAS will take
+#       over somehow?
+# cuBLAS treated specially, static library needs to link against a BLAS
+# library for *gemm_ symbols.  So both dynamic and static library are only added
+# if BLAS can be found for consistency (missing dynamic case will be a library
+# load error at runtime).
+# Find dynamic blas for cusolver dynamic target
+set(BLA_STATIC OFF)
+find_package(BLAS)
+if (BLAS_FOUND)
+  find_and_add_cuda_import_lib(cublas)
+  add_cuda_link_dependency(cublas CUDA::cudart)
+  add_cuda_link_dependency(cublas ${BLAS_LIBRARIES})
+endif()
+
+# Find static blas for cublas static target
+set(BLAS_FOUND OFF)
+set(BLA_STATIC ON)
+find_package(BLAS)
+if (BLAS_FOUND)
+  find_and_add_cuda_import_lib(cublas_static)
+  add_cuda_link_dependency(cublas_static CUDA::cudart_static)
+  add_cuda_link_dependency(cublas_static ${BLAS_LIBRARIES})
+endif()
+
+# TODO: (nppi* nvblas)
+foreach (cuda_lib cufft cufftw curand cusolver cusparse nvgraph nvjpeg)
+  # find the dynamic library
+  find_and_add_cuda_import_lib(${cuda_lib})
+  add_cuda_link_dependency(${cuda_lib} CUDA::cudart)
+
+  # TODO: if UNIX and VERSION >= 6.5
+  # find the static library
+  find_and_add_cuda_import_lib(${cuda_lib}_static)
+  add_cuda_link_dependency(${cuda_lib}_static CUDA::cudart_static)
+endforeach()
+
+
+
+# NVRTC (Runtime Compilation) is a shared library only.
+# TODO: nvrtc needs -lcuda (*NOT* cudart), but -lcuda (at least on this system)
+#       is going to point to /lib64/libcuda.so.
+#
+#       Since this is not in the HINTS paths searched above, what is the right
+#       way to create the CUDA::cuda target?
+find_and_add_cuda_import_lib(nvrtc)
+add_cuda_link_dependency(nvrtc cuda)
+
+# NVTX is a shared library only.
+# TODO: is this even useful outside of NSight Eclipse?
+find_and_add_cuda_import_lib(nvToolsExt)
+add_cuda_link_dependency(nvToolsExt CUDA::cudart)
+
+# cuLIBOS is a static only library, see
+#
+# https://devblogs.nvidia.com/10-ways-cuda-6-5-improves-performance-productivity
+#
+# > Static CUDA Libraries
+# > CUDA 6.5 (on Linux and Mac OS) now includes static library versions of the
+# > cuBLAS, cuSPARSE, cuFFT, cuRAND, and NPP libraries. This can reduce the
+# > number of dynamic library dependencies you need to include with your
+# > deployed applications. These new static libraries depend on a common thread
+# > abstraction layer library cuLIBOS (libculibos.a) distributed as part of the
+# > CUDA toolkit.
+find_and_add_cuda_import_lib(culibos)
+# foreach (cuda_lib cublas cusparse cufft) # curand npp
+foreach (cuda_lib cublas cufft cusparse curand nvjpeg)# npp
+  add_cuda_link_dependency(${cuda_lib}_static CUDA::culibos)
+endforeach()
+
+# cuSOLVER depends on cuBLAS and cuSPARSE
+# NOTE: nvGRAPH relies on this, make sure it happens before nvGRAPH dependencies.
+foreach (dep cublas cusparse)
+  add_cuda_link_dependency(cusolver CUDA::${dep})
+  add_cuda_link_dependency(cusolver_static CUDA::${dep}_static)
+endforeach()
+
+# nvGRAPH depends on cuBLAS, cuRAND, cuSPARSE, and cuSOLVER.
+# NOTE: rely on link dependencies of cuSOLVER, this must happen after cusolver target.
+foreach (dep cusolver curand)
+  add_cuda_link_dependency(nvgraph CUDA::${dep})
+  add_cuda_link_dependency(nvgraph_static CUDA::${dep}_static)
+endforeach()
+
+# NPP libraries and dependencies.  See: https://docs.nvidia.com/cuda/npp/index.html
+# TODO: document what nppc is (seems to be the underlying implementation for most of NPP?)
+find_and_add_cuda_import_lib(nppc)
+find_and_add_cuda_import_lib(nppc_static)
+
+# Process the majority of the NPP libraries.
+foreach (cuda_lib nppial nppicc nppidei nppif nppig nppim nppist nppitc npps)
+  # Find the libraries.
+  find_and_add_cuda_import_lib(${cuda_lib})
+  find_and_add_cuda_import_lib(${cuda_lib}_static)
+
+  # Designate dynamic link dependencies.
+  add_cuda_link_dependency(${cuda_lib} CUDA::cudart)
+  # TODO: add this in since it is needed in static or rely on existing dynamic links?
+  # add_cuda_link_dependency(${cuda_lib} CUDA::nppc)
+
+  # Designate static link dependencies.
+  add_cuda_link_dependency(${cuda_lib}_static CUDA::cudart_static)
+  add_cuda_link_dependency(${cuda_lib}_static CUDA::nppc_static)
+  add_cuda_link_dependency(${cuda_lib}_static CUDA::culibos)
+endforeach()
+
+# nppicom: JPEG compression and decompression functions in nppi_compression_functions.h
+find_and_add_cuda_import_lib(nppicom)
+find_and_add_cuda_import_lib(nppicom_static)
+
+# nppisu: memory support functions in nppi_support_functions.h
+find_and_add_cuda_import_lib(nppisu)
+find_and_add_cuda_import_lib(nppisu_static)
+add_cuda_link_dependency(nppisu CUDA::cudart)
+add_cuda_link_dependency(nppisu_static CUDA::cudart_static)
+
+# TODO: mysterious extra static libraries...what are they for?
+find_and_add_cuda_import_lib(cudadevrt)
+find_and_add_cuda_import_lib(cublas_device)
+
+# TODO: VERSION 9.2, search libcufft_static_nocallback.a
+#       https://docs.nvidia.com/cuda/cufft/index.html#oned-complex-to-complex-transforms
+
+# Do not expose these functions externally.
+unset(find_and_add_cuda_import_lib)
+unset(add_cuda_link_dependency)
diff --git a/cmake/cuda/protect_nvcc_flags.cmake b/cmake/cuda/protect_nvcc_flags.cmake
@@ -0,0 +1,93 @@
+# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+
+#[=======================================================================[.rst:
+
+Protect flags
+-------------
+
+.. only:: html
+
+   .. contents::
+
+CUDA Utilities
+^^^^^^^^^^^^^^
+
+This part of the protect flags module provides a set of utilities to assist users with CUDA as a language.
+
+
+It adds:
+
+
+.. command:: cmake_cuda_convert_flags
+    
+  Take a list of flags or a target and convert the flags to pass through the CUDA compiler to 
+  the host compiler by adding a LANGUAGE requirement.
+  This will make the flags are only used when the language is not CUDA.
+
+  ``PROTECT_ONLY``
+    Just protect the flags, rather than passing them through to the host compiler.
+  
+  ``INTERFACE_TARGET <name>``
+    A target to take flags from to convert
+
+  ``LIST <name>``
+    A list of flags to protect (in place).
+
+
+
+#]=======================================================================]
+
+# This is a private function that just converts a list
+# It takes a name of a variable to modify in place
+function(_CUDA_CONVERT_FLAGS flags_name)
+    set(old_flags "${${flags_name}}")
+
+    if(NOT "${old_flags}" STREQUAL "")
+        # Use old flags for non-CUDA targets
+        set(protected_flags "$<$<BUILD_INTERFACE:$<NOT:$<COMPILE_LANGUAGE:CUDA>>>:${old_flags}>")
+        # Add -Xcompiler wrapped flags for CUDA 
+        if(NOT CCF_PROTECT_ONLY)
+            # These need to be comma separated now
+            string(REPLACE ";" "," cuda_flags "${old_flags}")
+            string(APPEND protected_flags "$<$<BUILD_INTERFACE:$<COMPILE_LANGUAGE:CUDA>>:-Xcompiler=${cuda_flags}>")
+        endif()
+        set(${flags_name} "${protected_flags}" PARENT_SCOPE)
+    endif()
+endfunction()
+
+
+function(CMAKE_CUDA_CONVERT_FLAGS)
+    cmake_parse_arguments(
+        CCF
+        "PROTECT_ONLY"
+        ""
+        "INTERFACE_TARGET;LIST"
+        ${ARGN})
+
+    foreach(EXISTING_TARGET IN LISTS CCF_INTERFACE_TARGET)
+        get_property(old_flags TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
+        _cuda_convert_flags(old_flags "${CCF_PROTECT_ONLY}")
+        set_property(TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_INCLUDE_DIRECTORIES "${old_flags}")
+        message("DEBUG : ${old_flags}")
+    endforeach()
+
+    foreach(EXISTING_LIST IN LISTS CCF_LIST)
+        set(LOCAL_LIST "${${EXITING_LIST}}")
+        _cuda_convert_flags(LOCAL_LIST "${CCF_PROTECT_ONLY}")
+        set(${EXISTING_LIST} "${LOCAL_LIST}" PARENT_SCOPE)
+    endforeach()
+endfunction()
+
+function(CUDA_CONVERT_FLAGS EXISTING_TARGET)
+    get_property(old_flags TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
+    if(NOT "${old_flags}" STREQUAL "")
+        string(REPLACE ";" "," CUDA_flags "${old_flags}")
+        set_property(TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_INCLUDE_DIRECTORIES
+            "$<$<BUILD_INTERFACE:$<NOT:$<COMPILE_LANGUAGE:CUDA>>>:${old_flags}>$<$<BUILD_INTERFACE:$<COMPILE_LANGUAGE:CUDA>>:-Xcompiler=${CUDA_flags}>"
+            )
+    endif()
+    # debug
+    #get_property(new_flags TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
+    #message("DEBUG : ${new_flags}")
+endfunction()
diff --git a/cmake/cuda/protect_pthread_flag.cmake b/cmake/cuda/protect_pthread_flag.cmake
@@ -0,0 +1,30 @@
+#
+# flags '-pthreads' is not support by nvcc, replace with
+#  '-Xcompiler -pthread'
+#
+
+function(CUDA_PROTECT_PTHREAD_FLAG EXISTING_TARGET)
+
+  get_property(olds_flags TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
+  if(NOT "${old_flags}" STREQUAL "")
+    string(REPLACE "-pthread" "-Xcompiler -pthread" new_flags "${old_flags}")
+    set_property(TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_INCLUDE_DIRECTORIES
+      "$<$<BUILD_INTERFACE:$<NOT:$<COMPILE_LANGUAGE:CUDA>>>:${old_flags}>$<$<BUILD_INTERFACE:$<COMPILE_LANGUAGE:CUDA>>:-Xcompiler=${new_flags}>"
+      )
+  endif()
+
+  get_property(olds_flags TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_COMPILE_OPTIONS)
+  if(NOT "${old_flags}" STREQUAL "")
+    string(REPLACE "-pthread" "-Xcompiler -pthread" new_flags "${old_flags}")
+    set_property(TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_COMPILE_OPTIONS
+      "$<$<BUILD_INTERFACE:$<NOT:$<COMPILE_LANGUAGE:CUDA>>>:${old_flags}>$<$<BUILD_INTERFACE:$<COMPILE_LANGUAGE:CUDA>>:-Xcompiler=${new_flags}>"
+      )
+  endif()
+
+  # debug
+  get_property(current_flags TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
+  message("DEBUG : TARGET=${EXISTING_TARGET} compile flags=${current_flags}")
+  get_property(current_flags TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_COMPILE_OPTIONS)
+  message("DEBUG : TARGET=${EXISTING_TARGET} compile flags=${current_flags}")
+
+endfunction()