From 9b62871b877882242a4f910b84d1a5086f109097 Mon Sep 17 00:00:00 2001 From: Giorgis Georgakoudis Date: Fri, 20 Dec 2024 11:55:39 -0800 Subject: [PATCH] Refactor --- lib/JitEngineDevice.hpp | 15 +++++++++++---- pass/ProteusPass.cpp | 6 ++++-- tests/gpu/file1_kernel_launcher.cpp | 10 ++++++---- tests/gpu/file2_kernel_launcher.cpp | 4 +--- tests/gpu/gpu_common.h | 6 ------ tests/gpu/launcher.hpp | 19 +++++++++++-------- 6 files changed, 33 insertions(+), 27 deletions(-) diff --git a/lib/JitEngineDevice.hpp b/lib/JitEngineDevice.hpp index c77fdd4c..1de7ad31 100644 --- a/lib/JitEngineDevice.hpp +++ b/lib/JitEngineDevice.hpp @@ -583,10 +583,17 @@ void JitEngineDevice::registerFunction(void *Handle, void *Kernel, int32_t NumRCs) { DBG(Logger::logs("proteus") << "Register function " << Kernel << " To Handle " << Handle << "\n"); - assert(!KernelToHandleMap.contains(Kernel) && - "Expected kernel inserted only once in the map"); - //if (KernelToHandleMap.contains(Kernel)) - // return; + // NOTE: HIP RDC might call multiple times the registerFunction for the same + // kernel, which has weak linkage, when it comes from different translation + // units. Either the first or the second call can prevail and should be + // equivalent. We let the first one prevail. + if (KernelToHandleMap.contains(Kernel)) { + DBG(Logger::logs("proteus") + << "Warning: duplicate register function for kernel " + + std::string(KernelName) + << "\n"); + return; + } KernelToHandleMap[Kernel] = Handle; JITKernelInfoMap[Kernel] = diff --git a/pass/ProteusPass.cpp b/pass/ProteusPass.cpp index 3025c8da..8804da6a 100644 --- a/pass/ProteusPass.cpp +++ b/pass/ProteusPass.cpp @@ -250,8 +250,10 @@ class ProteusJitPassImpl { } if (JitFunctionInfoMap.contains(Fn)) { - // continue; - FATAL_ERROR("Duplicate jit annotation for Fn " + Fn->getName()); + DEBUG(Logger::logs("proteus-pass") + << "Warning: Duplicate jit annotation for Fn " + Fn->getName() + + "\n"); + continue; } DEBUG(Logger::logs("proteus-pass") diff --git a/tests/gpu/file1_kernel_launcher.cpp b/tests/gpu/file1_kernel_launcher.cpp index 411c752f..f8961f0e 100644 --- a/tests/gpu/file1_kernel_launcher.cpp +++ b/tests/gpu/file1_kernel_launcher.cpp @@ -1,24 +1,26 @@ +// clang-format off // RUN: ./multi_file_launcher.%ext | FileCheck %s --check-prefixes=CHECK,CHECK-FIRST // Second run uses the object cache. // RUN: ./multi_file_launcher.%ext | FileCheck %s --check-prefixes=CHECK,CHECK-SECOND +// clang-format on #include #include #include "gpu_common.h" #include "launcher.hpp" - void foo(); int main() { - launcher(my_kernel_body); + gpuErrCheck(launcher(kernel_body)); gpuErrCheck(gpuDeviceSynchronize()); foo(); + gpuErrCheck(gpuDeviceSynchronize()); return 0; } -// CHECK: File1 Kernel -// CHECK: File2 Kernel +// CHECK: Kernel body +// CHECK: Kernel body // CHECK: JitCache hits 0 total 2 // CHECK: HashValue {{[0-9]+}} NumExecs 1 NumHits 0 // CHECK: HashValue {{[0-9]+}} NumExecs 1 NumHits 0 diff --git a/tests/gpu/file2_kernel_launcher.cpp b/tests/gpu/file2_kernel_launcher.cpp index 62b8e453..7c56b76f 100644 --- a/tests/gpu/file2_kernel_launcher.cpp +++ b/tests/gpu/file2_kernel_launcher.cpp @@ -3,6 +3,4 @@ #include "gpu_common.h" #include "launcher.hpp" -void foo() { - launcher(my_kernel_body); -} +void foo() { gpuErrCheck(launcher(kernel_body)); } diff --git a/tests/gpu/gpu_common.h b/tests/gpu/gpu_common.h index b636461b..71c82ad2 100644 --- a/tests/gpu/gpu_common.h +++ b/tests/gpu/gpu_common.h @@ -29,9 +29,3 @@ abort(); \ } \ } - -struct kernel_body { - __device__ void operator()() { printf("Kernel"); } -}; - -const kernel_body my_kernel_body{}; diff --git a/tests/gpu/launcher.hpp b/tests/gpu/launcher.hpp index 427e921a..9cebbb49 100644 --- a/tests/gpu/launcher.hpp +++ b/tests/gpu/launcher.hpp @@ -1,13 +1,16 @@ -template -__global__ -__attribute__((annotate("jit"))) -void kernel(LB lb) { +struct kernel_body_t { + __device__ void operator()() { printf("Kernel body"); } +}; + +const kernel_body_t kernel_body{}; + +template +__global__ __attribute__((annotate("jit"))) void kernel(LB lb) { lb(); } -template -gpuError_t launcher(T lb) { - auto func = reinterpret_cast( & kernel ); - void *args[] = {(void*)&lb }; +template gpuError_t launcher(T lb) { + auto func = reinterpret_cast(&kernel); + void *args[] = {(void *)&lb}; return gpuLaunchKernel(func, 1, 1, args, 0, 0); }