From 25133d76294f3944592d671c612eb55e3a5129d3 Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Mon, 23 Dec 2024 10:33:52 -0800 Subject: [PATCH] Fix multiple annotations with RDC (#78) Co-authored-by: Giorgis Georgakoudis --- lib/JitEngineDevice.hpp | 13 +++++++++++-- pass/ProteusPass.cpp | 8 ++++++-- tests/gpu/CMakeLists.txt | 2 ++ tests/gpu/file1_kernel_launcher.cpp | 28 ++++++++++++++++++++++++++++ tests/gpu/file2_kernel_launcher.cpp | 6 ++++++ tests/gpu/gpu_common.h | 2 +- tests/gpu/launcher.hpp | 16 ++++++++++++++++ 7 files changed, 70 insertions(+), 5 deletions(-) create mode 100644 tests/gpu/file1_kernel_launcher.cpp create mode 100644 tests/gpu/file2_kernel_launcher.cpp create mode 100644 tests/gpu/launcher.hpp diff --git a/lib/JitEngineDevice.hpp b/lib/JitEngineDevice.hpp index aa253171..1de7ad31 100644 --- a/lib/JitEngineDevice.hpp +++ b/lib/JitEngineDevice.hpp @@ -583,8 +583,17 @@ void JitEngineDevice::registerFunction(void *Handle, void *Kernel, int32_t NumRCs) { DBG(Logger::logs("proteus") << "Register function " << Kernel << " To Handle " << Handle << "\n"); - assert(!KernelToHandleMap.contains(Kernel) && - "Expected kernel inserted only once in the map"); + // NOTE: HIP RDC might call multiple times the registerFunction for the same + // kernel, which has weak linkage, when it comes from different translation + // units. Either the first or the second call can prevail and should be + // equivalent. We let the first one prevail. + if (KernelToHandleMap.contains(Kernel)) { + DBG(Logger::logs("proteus") + << "Warning: duplicate register function for kernel " + + std::string(KernelName) + << "\n"); + return; + } KernelToHandleMap[Kernel] = Handle; JITKernelInfoMap[Kernel] = diff --git a/pass/ProteusPass.cpp b/pass/ProteusPass.cpp index 6eb243de..8804da6a 100644 --- a/pass/ProteusPass.cpp +++ b/pass/ProteusPass.cpp @@ -249,8 +249,12 @@ class ProteusJitPassImpl { " to be a kernel function!"); } - if (JitFunctionInfoMap.contains(Fn)) - FATAL_ERROR("Duplicate jit annotation for Fn " + Fn->getName()); + if (JitFunctionInfoMap.contains(Fn)) { + DEBUG(Logger::logs("proteus-pass") + << "Warning: Duplicate jit annotation for Fn " + Fn->getName() + + "\n"); + continue; + } DEBUG(Logger::logs("proteus-pass") << "JIT Function " << Fn->getName() << "\n"); diff --git a/tests/gpu/CMakeLists.txt b/tests/gpu/CMakeLists.txt index f9adb727..d750e6d9 100644 --- a/tests/gpu/CMakeLists.txt +++ b/tests/gpu/CMakeLists.txt @@ -182,6 +182,7 @@ CREATE_GPU_TEST(kernel_unused_gvar kernel_unused_gvar.cpp kernel_unused_gvar_def CREATE_GPU_TEST(kernel_repeat kernel_repeat.cpp) CREATE_GPU_TEST(kernel_launch_exception kernel_launch_exception.cpp) CREATE_GPU_TEST(kernel_preset_bounds kernel_preset_bounds.cpp) +CREATE_GPU_TEST(multi_file_launcher file1_kernel_launcher.cpp file2_kernel_launcher.cpp) CREATE_GPU_TEST_RDC(kernel kernel.cpp) CREATE_GPU_TEST_RDC(kernel_cache kernel_cache.cpp) @@ -203,6 +204,7 @@ CREATE_GPU_TEST_RDC(kernel_calls_func kernel_calls_func.cpp device_func.cpp) CREATE_GPU_TEST_RDC(kernel_repeat kernel_repeat.cpp) CREATE_GPU_TEST_RDC(kernel_launch_exception kernel_launch_exception.cpp) CREATE_GPU_TEST_RDC(kernel_preset_bounds kernel_preset_bounds.cpp) +CREATE_GPU_TEST_RDC(multi_file_launcher file1_kernel_launcher.cpp file2_kernel_launcher.cpp) CREATE_GPU_LIBRARY(device_func_lib device_func.cpp) CREATE_GPU_TEST_RDC_LIBS(kernel_calls_func_lib device_func_lib kernel_calls_func_lib.cpp) diff --git a/tests/gpu/file1_kernel_launcher.cpp b/tests/gpu/file1_kernel_launcher.cpp new file mode 100644 index 00000000..f8961f0e --- /dev/null +++ b/tests/gpu/file1_kernel_launcher.cpp @@ -0,0 +1,28 @@ +// clang-format off +// RUN: ./multi_file_launcher.%ext | FileCheck %s --check-prefixes=CHECK,CHECK-FIRST +// Second run uses the object cache. +// RUN: ./multi_file_launcher.%ext | FileCheck %s --check-prefixes=CHECK,CHECK-SECOND +// clang-format on +#include +#include + +#include "gpu_common.h" +#include "launcher.hpp" + +void foo(); + +int main() { + gpuErrCheck(launcher(kernel_body)); + gpuErrCheck(gpuDeviceSynchronize()); + foo(); + gpuErrCheck(gpuDeviceSynchronize()); + return 0; +} + +// CHECK: Kernel body +// CHECK: Kernel body +// CHECK: JitCache hits 0 total 2 +// CHECK: HashValue {{[0-9]+}} NumExecs 1 NumHits 0 +// CHECK: HashValue {{[0-9]+}} NumExecs 1 NumHits 0 +// CHECK-FIRST: JitStorageCache hits 0 total 2 +// CHECK-SECOND: JitStorageCache hits 2 total 2 diff --git a/tests/gpu/file2_kernel_launcher.cpp b/tests/gpu/file2_kernel_launcher.cpp new file mode 100644 index 00000000..7c56b76f --- /dev/null +++ b/tests/gpu/file2_kernel_launcher.cpp @@ -0,0 +1,6 @@ +#include + +#include "gpu_common.h" +#include "launcher.hpp" + +void foo() { gpuErrCheck(launcher(kernel_body)); } diff --git a/tests/gpu/gpu_common.h b/tests/gpu/gpu_common.h index 7fbf296b..71c82ad2 100644 --- a/tests/gpu/gpu_common.h +++ b/tests/gpu/gpu_common.h @@ -28,4 +28,4 @@ gpuGetErrorString(err)); \ abort(); \ } \ - } \ No newline at end of file + } diff --git a/tests/gpu/launcher.hpp b/tests/gpu/launcher.hpp new file mode 100644 index 00000000..70ddaf69 --- /dev/null +++ b/tests/gpu/launcher.hpp @@ -0,0 +1,16 @@ +struct kernel_body_t { + __device__ void operator()() { printf("Kernel body\n"); } +}; + +const kernel_body_t kernel_body{}; + +template +__global__ __attribute__((annotate("jit"))) void kernel(LB lb) { + lb(); +} + +template gpuError_t __attribute__((always_inline)) launcher(T lb) { + auto func = reinterpret_cast(&kernel); + void *args[] = {(void *)&lb}; + return gpuLaunchKernel(func, 1, 1, args, 0, 0); +}