diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 308fa742d2047..8e6b6e77f373b 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -1268,7 +1268,6 @@ void exec_graph_impl::updateImpl(std::shared_ptr Node) { EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); } else if (Kernel != nullptr) { PiKernel = Kernel->getHandleRef(); - auto SyclProg = Kernel->getProgramImpl(); EliminatedArgMask = Kernel->getKernelArgMask(); } else { std::tie(PiKernel, std::ignore, EliminatedArgMask, std::ignore) = diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 2861acedef3d8..26fa707a307f9 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -563,9 +563,9 @@ class kernel_bundle_impl { MContext, KernelID.get_name(), /*PropList=*/{}, SelectedImage->get_program_ref()); - std::shared_ptr KernelImpl = - std::make_shared(Kernel, detail::getSyclObjImpl(MContext), - SelectedImage, Self, ArgMask, CacheMutex); + std::shared_ptr KernelImpl = std::make_shared( + Kernel, detail::getSyclObjImpl(MContext), SelectedImage, Self, ArgMask, + SelectedImage->get_program_ref(), CacheMutex); return detail::createSyclObjFromImpl(KernelImpl); } diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index 89b240b816ff9..9bb58277dcea9 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -41,7 +41,7 @@ kernel_impl::kernel_impl(sycl::detail::pi::PiKernel Kernel, KernelBundleImplPtr KernelBundleImpl, const KernelArgMask *ArgMask) : MKernel(Kernel), MContext(ContextImpl), - MProgramImpl(std::move(ProgramImpl)), + MProgram(ProgramImpl->getHandleRef()), MCreatedFromSource(IsCreatedFromSource), MKernelBundleImpl(std::move(KernelBundleImpl)), MKernelArgMaskPtr{ArgMask} { @@ -55,15 +55,16 @@ kernel_impl::kernel_impl(sycl::detail::pi::PiKernel Kernel, "Input context must be the same as the context of cl_kernel", PI_ERROR_INVALID_CONTEXT); - MIsInterop = MProgramImpl->isInterop(); + MIsInterop = ProgramImpl->isInterop(); } kernel_impl::kernel_impl(sycl::detail::pi::PiKernel Kernel, ContextImplPtr ContextImpl, DeviceImageImplPtr DeviceImageImpl, KernelBundleImplPtr KernelBundleImpl, - const KernelArgMask *ArgMask, std::mutex *CacheMutex) - : MKernel(Kernel), MContext(std::move(ContextImpl)), MProgramImpl(nullptr), + const KernelArgMask *ArgMask, PiProgram ProgramPI, + std::mutex *CacheMutex) + : MKernel(Kernel), MContext(std::move(ContextImpl)), MProgram(ProgramPI), MCreatedFromSource(false), MDeviceImageImpl(std::move(DeviceImageImpl)), MKernelBundleImpl(std::move(KernelBundleImpl)), MKernelArgMaskPtr{ArgMask}, MCacheMutex{CacheMutex} { @@ -71,7 +72,7 @@ kernel_impl::kernel_impl(sycl::detail::pi::PiKernel Kernel, } kernel_impl::kernel_impl(ContextImplPtr Context, ProgramImplPtr ProgramImpl) - : MContext(Context), MProgramImpl(std::move(ProgramImpl)) {} + : MContext(Context), MProgram(ProgramImpl->getHandleRef()) {} kernel_impl::~kernel_impl() { // TODO catch an exception and put it to list of asynchronous exceptions diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp index e5952fd4d22c7..4a23fb4e39ef4 100644 --- a/sycl/source/detail/kernel_impl.hpp +++ b/sycl/source/detail/kernel_impl.hpp @@ -32,6 +32,7 @@ class kernel_bundle_impl; using ContextImplPtr = std::shared_ptr; using ProgramImplPtr = std::shared_ptr; using KernelBundleImplPtr = std::shared_ptr; +using sycl::detail::pi::PiProgram; class kernel_impl { public: /// Constructs a SYCL kernel instance from a PiKernel @@ -74,7 +75,8 @@ class kernel_impl { kernel_impl(sycl::detail::pi::PiKernel Kernel, ContextImplPtr ContextImpl, DeviceImageImplPtr DeviceImageImpl, KernelBundleImplPtr KernelBundleImpl, - const KernelArgMask *ArgMask, std::mutex *CacheMutex); + const KernelArgMask *ArgMask, PiProgram ProgramPI, + std::mutex *CacheMutex); /// Constructs a SYCL kernel for host device /// @@ -179,7 +181,7 @@ class kernel_impl { bool isInterop() const { return MIsInterop; } - ProgramImplPtr getProgramImpl() const { return MProgramImpl; } + PiProgram getProgramRef() const { return MProgram; } ContextImplPtr getContextImplPtr() const { return MContext; } std::mutex &getNoncacheableEnqueueMutex() { @@ -192,7 +194,7 @@ class kernel_impl { private: sycl::detail::pi::PiKernel MKernel; const ContextImplPtr MContext; - const ProgramImplPtr MProgramImpl; + const PiProgram MProgram = nullptr; bool MCreatedFromSource = true; const DeviceImageImplPtr MDeviceImageImpl; const KernelBundleImplPtr MKernelBundleImpl; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 7781fb7e1cd1e..056a4239fde1c 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1993,8 +1993,7 @@ void instrumentationAddExtraKernelMetadata( EliminatedArgMask = KernelImpl->getKernelArgMask(); Program = KernelImpl->getDeviceImage()->get_program_ref(); } else if (nullptr != SyclKernel) { - auto SyclProg = SyclKernel->getProgramImpl(); - Program = SyclProg->getHandleRef(); + Program = SyclKernel->getProgramRef(); if (!SyclKernel->isCreatedFromSource()) EliminatedArgMask = SyclKernel->getKernelArgMask(); } else { @@ -2489,8 +2488,7 @@ pi_int32 enqueueImpCommandBufferKernel( EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); } else if (Kernel != nullptr) { PiKernel = Kernel->getHandleRef(); - auto SyclProg = Kernel->getProgramImpl(); - PiProgram = SyclProg->getHandleRef(); + PiProgram = Kernel->getProgramRef(); EliminatedArgMask = Kernel->getKernelArgMask(); } else { std::tie(PiKernel, std::ignore, EliminatedArgMask, PiProgram) = @@ -2603,8 +2601,8 @@ pi_int32 enqueueImpKernel( assert(MSyclKernel->get_info() == Queue->get_context()); Kernel = MSyclKernel->getHandleRef(); - auto SyclProg = MSyclKernel->getProgramImpl(); - Program = SyclProg->getHandleRef(); + Program = MSyclKernel->getProgramRef(); + // Non-cacheable kernels use mutexes from kernel_impls. // TODO this can still result in a race condition if multiple SYCL // kernels are created with the same native handle. To address this, diff --git a/sycl/test-e2e/KernelAndProgram/kernel-bundle-find-run.cpp b/sycl/test-e2e/KernelAndProgram/kernel-bundle-find-run.cpp new file mode 100644 index 0000000000000..6a36fc3ad2bc2 --- /dev/null +++ b/sycl/test-e2e/KernelAndProgram/kernel-bundle-find-run.cpp @@ -0,0 +1,74 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +// This test finds a known kernel and runs it. + +#include + +using namespace sycl; + +// Kernel finder +class KernelFinder { + queue &Queue; + std::vector AllKernelIDs; + +public: + KernelFinder(queue &Q) : Queue(Q) { + // Obtain kernel bundle + kernel_bundle Bundle = + get_kernel_bundle(Queue.get_context()); + std::cout << "Bundle obtained\n"; + AllKernelIDs = sycl::get_kernel_ids(); + std::cout << "Number of kernels = " << AllKernelIDs.size() << std::endl; + for (auto K : AllKernelIDs) { + std::cout << "Kernel obtained: " << K.get_name() << std::endl; + } + } + + kernel get_kernel(const char *name) { + kernel_bundle Bundle = + get_kernel_bundle(Queue.get_context()); + for (auto K : AllKernelIDs) { + auto Kname = K.get_name(); + if (strcmp(name, Kname) == 0) { + kernel Kernel = Bundle.get_kernel(K); + std::cout << "Found kernel\n"; + return Kernel; + } + } + std::cout << "No kernel found\n"; + exit(1); + } +}; + +void sycl_kernel(queue Queue) { + range<1> R1{1}; + Queue.submit([&](handler &CGH) { + CGH.parallel_for(R1, [=](id<1> WIid) {}); + }); + Queue.wait(); +} + +int test_sycl_kernel(queue Queue) { + KernelFinder KF(Queue); + + kernel Kernel = KF.get_kernel("_ZTSZZ11sycl_kernelN4sycl3_V15queueEENKUlRNS0_" + "7handlerEE_clES3_E7KernelB"); + + range<1> R1{1}; + Queue.submit([&](handler &Handler) { Handler.parallel_for(R1, Kernel); }); + Queue.wait(); + + return 0; +} + +int main() { + queue Queue; + + sycl_kernel(Queue); + std::cout << "sycl_kernel done\n"; + test_sycl_kernel(Queue); + std::cout << "test_sycl_kernel done\n"; + + return 0; +} \ No newline at end of file