diff --git a/sycl/doc/syclcompat/README.md b/sycl/doc/syclcompat/README.md index 8c54784070779..a5110b2036931 100644 --- a/sycl/doc/syclcompat/README.md +++ b/sycl/doc/syclcompat/README.md @@ -1237,9 +1237,11 @@ expressions that might throw `sycl::exception` and `std::runtime_error`. If no exceptions are thrown, it returns `syclcompat::error_code::SUCCESS`. If a `sycl::exception` is caught, it returns `syclcompat::error_code::BACKEND_ERROR`. If a `std::runtime_error` exception is caught, + `syclcompat::error_code::DEFAULT_ERROR` is returned instead. For both cases, it prints the error message to the standard error stream. + ``` c++ namespace syclcompat { @@ -1268,6 +1270,7 @@ template class syclcompat_kernel_scalar; #define SYCLCOMPAT_EXPORT #endif + namespace syclcompat { enum error_code { SUCCESS = 0, BACKEND_ERROR = 1, DEFAULT_ERROR = 999 }; } @@ -1287,6 +1290,15 @@ to get the kernel information. Overloads are provided to allow either returning a `kernel_function_info` object, or to return by pointer argument. In the current version, `kernel_function_info` describes only maximum work-group size. +SYCLcompat also provides the `kernel_library` and `kernel_function` classes. +`kernel_library` facilitates the loading and unloading of kernel libraries. +`kernel_function` represents a specific kernel function within a loaded librariy +and can be invoked with specified arguments. +`load_kernel_library`, `load_kernel_library_mem`, and `unload_kernel_library` are +free functions to handle the loading and unloading of `kernel_library` objects. +`get_kernel_function`, and `invoke_kernel_function` offer a similar functionality +for `kernel_function` objects. + ``` c++ namespace syclcompat { @@ -1297,6 +1309,34 @@ struct kernel_function_info { static void get_kernel_function_info(kernel_function_info *kernel_info, const void *function); static kernel_function_info get_kernel_function_info(const void *function); + +class kernel_library { + kernel_library(); + kernel_library(void *ptr); + operator void *() const; +}; + +static kernel_library load_kernel_library(const std::string &name); +static kernel_library load_kernel_library_mem(char const *const image); +static void unload_kernel_library(const kernel_library &library); + +class kernel_function { + kernel_function(); + kernel_function(kernel_functor ptr); + operator void *() const; + void operator()(sycl::queue &q, const sycl::nd_range<3> &range, + unsigned int local_mem_size, void **args, void **extra); +}; + +static kernel_function get_kernel_function(kernel_library &library, + const std::string &name); +static void invoke_kernel_function(kernel_function &function, + sycl::queue &queue, + sycl::range<3> group_range, + sycl::range<3> local_range, + unsigned int local_mem_size, + void **kernel_params, void **extra); + } // namespace syclcompat ``` diff --git a/sycl/include/syclcompat/kernel.hpp b/sycl/include/syclcompat/kernel.hpp index 0132e98e43841..420832d561827 100644 --- a/sycl/include/syclcompat/kernel.hpp +++ b/sycl/include/syclcompat/kernel.hpp @@ -31,6 +31,25 @@ #pragma once +#ifdef _WIN32 +#include +#include +#else +#include +#endif + +#if defined(__has_include) && __has_include() +#include +#elif defined(__has_include) && __has_include() +#include +#else +#error "SYCLomatic runtime requires C++ filesystem support" +#endif + +#include +#include +#include + #include #include #include @@ -44,14 +63,16 @@ struct kernel_function_info { int max_work_group_size = 0; }; -static void get_kernel_function_info(kernel_function_info *kernel_info, - const void *function) { +static inline void get_kernel_function_info(kernel_function_info *kernel_info, + const void *function) { kernel_info->max_work_group_size = detail::dev_mgr::instance() .current_device() .get_info(); } -static kernel_function_info get_kernel_function_info(const void *function) { + +static inline kernel_function_info +get_kernel_function_info(const void *function) { kernel_function_info kernel_info; kernel_info.max_work_group_size = detail::dev_mgr::instance() @@ -60,4 +81,389 @@ static kernel_function_info get_kernel_function_info(const void *function) { return kernel_info; } +namespace detail { + +#if defined(__has_include) && __has_include() +namespace fs = std::filesystem; +#else +namespace fs = std::experimental::filesystem; +#endif + +/// Write data to temporary file and return absolute path to temporary file. +/// Temporary file is created in a temporary directory both of which have random +/// names with only the user having access permissions. Only one temporary file +/// will be created in the temporary directory. +static inline fs::path write_data_to_file(char const *const data, size_t size) { + std::error_code ec; + + if (sizeof(size_t) >= sizeof(std::streamsize) && + size > (std::numeric_limits::max)()) + throw std::runtime_error("[SYCLcompat] data file too large"); + + // random number generator + std::random_device dev; + std::mt19937 prng(dev()); + std::uniform_int_distribution rand(0); + + // find temporary directory + auto tmp_dir = fs::temp_directory_path(ec); + if (ec) + throw std::runtime_error("[SYCLcompat] could not find temporary directory"); + + // create private directory + std::stringstream directory; + fs::path directory_path; + constexpr int max_attempts = 5; + int i; + + for (i = 0; i < max_attempts; i++) { + directory << std::hex << rand(prng); + directory_path = tmp_dir / directory.str(); + if (fs::create_directory(directory_path)) { + break; + } + } + if (i == max_attempts) + throw std::runtime_error("[SYCLcompat] could not create directory"); + + // only allow owner permissions to private directory + fs::permissions(directory_path, fs::perms::owner_all, ec); + if (ec) + throw std::runtime_error( + "[SYCLcompat] could not set directory permissions"); + + // random filename in private directory + std::stringstream filename; + filename << std::hex << rand(prng); +#ifdef _WIN32 + auto filepath = directory_path / (filename.str() + ".dll"); +#else + auto filepath = directory_path / filename.str(); +#endif + + // write data to temporary file + auto outfile = std::ofstream(filepath, std::ios::out | std::ios::binary); + if (outfile) { + // only allow program to write file + fs::permissions(filepath, fs::perms::owner_write, ec); + if (ec) + throw std::runtime_error("[SYCLcompat] could not set permissions"); + + outfile.write(data, size); + if (!outfile.good()) + throw std::runtime_error("[SYCLcompat] could not write data"); + outfile.close(); + + // only allow program to read/execute file + fs::permissions(filepath, fs::perms::owner_read | fs::perms::owner_exec, + ec); + if (ec) + throw std::runtime_error("[SYCLcompat] could not set permissions"); + } else + throw std::runtime_error("[SYCLcompat] could not write data"); + + // check temporary file contents + auto infile = std::ifstream(filepath, std::ios::in | std::ios::binary); + if (infile) { + bool mismatch = false; + size_t cnt = 0; + + while (1) { + char c; + infile.get(c); + if (infile.eof()) + break; + if (c != data[cnt++]) + mismatch = true; + } + if (cnt != size || mismatch) + throw std::runtime_error( + "[SYCLcompat] file contents not written correctly"); + } else + throw std::runtime_error("[SYCLcompat] could not validate file"); + + if (!filepath.is_absolute()) + throw std::runtime_error("[SYCLcompat] temporary filepath is not absolute"); + + return filepath; +} + +static inline uint16_t extract16(unsigned char const *const ptr) { + uint16_t ret = 0; + + ret |= static_cast(ptr[0]) << 0; + ret |= static_cast(ptr[1]) << 8; + + return (ret); +} + +static inline uint32_t extract32(unsigned char const *const ptr) { + uint32_t ret = 0; + + ret |= static_cast(ptr[0]) << 0; + ret |= static_cast(ptr[1]) << 8; + ret |= static_cast(ptr[2]) << 16; + ret |= static_cast(ptr[3]) << 24; + + return (ret); +} + +static inline uint64_t extract64(unsigned char const *const ptr) { + uint64_t ret = 0; + + ret |= static_cast(ptr[0]) << 0; + ret |= static_cast(ptr[1]) << 8; + ret |= static_cast(ptr[2]) << 16; + ret |= static_cast(ptr[3]) << 24; + ret |= static_cast(ptr[4]) << 32; + ret |= static_cast(ptr[5]) << 40; + ret |= static_cast(ptr[6]) << 48; + ret |= static_cast(ptr[7]) << 56; + + return (ret); +} + +static inline uint64_t get_lib_size(char const *const blob) { +#ifdef _WIN32 + /////////////////////////////////////////////////////////////////////// + // Analyze DOS stub + unsigned char const *const ublob = + reinterpret_cast(blob); + if (ublob[0] != 0x4d || ublob[1] != 0x5a) { + throw std::runtime_error("[SYCLcompat] blob is not a Windows DLL."); + } + uint32_t pe_header_offset = extract32(ublob + 0x3c); + + /////////////////////////////////////////////////////////////////////// + // Ananlyze PE-header + unsigned char const *const pe_header = ublob + pe_header_offset; + + // signature + uint32_t pe_signature = extract32(pe_header + 0); + if (pe_signature != 0x00004550) { + throw std::runtime_error( + "[SYCLcompat] PE-header signature is not 0x00004550"); + } + + // machine + uint16_t machine = extract16(pe_header + 4); + if (machine != 0x8664) { + throw std::runtime_error("[SYCLcompat] only DLLs for x64 supported"); + } + + // number of sections + uint16_t number_of_sections = extract16(pe_header + 6); + + // sizeof optional header + uint16_t sizeof_optional_header = extract16(pe_header + 20); + + // magic + uint16_t magic = extract16(pe_header + 24); + if (magic != 0x10b && magic != 0x20b) { + throw std::runtime_error("[SYCLcompat] MAGIC is not 0x010b or 0x020b"); + } + + /////////////////////////////////////////////////////////////////////// + // Analyze tail of optional header + constexpr int coff_header_size = 24; + + unsigned char const *const tail_of_optional_header = + pe_header + coff_header_size + sizeof_optional_header; + if (extract64(tail_of_optional_header - 8) != 0) { + throw std::runtime_error("Optional header not zero-padded"); + } + + /////////////////////////////////////////////////////////////////////// + // Analyze last section header + constexpr int section_header_size = 40; + unsigned char const *const last_section_header = + tail_of_optional_header + section_header_size * (number_of_sections - 1); + + uint32_t sizeof_raw_data = extract32(last_section_header + 16); + uint32_t pointer_to_raw_data = extract32(last_section_header + 20); + + return sizeof_raw_data + pointer_to_raw_data; +#else + if (blob[0] != 0x7F || blob[1] != 'E' || blob[2] != 'L' || blob[3] != 'F') + throw std::runtime_error("[SYCLcompat] blob is not in ELF format"); + + if (blob[4] != 0x02) + throw std::runtime_error("[SYCLcompat] only 64-bit headers are supported"); + + if (blob[5] != 0x01) + throw std::runtime_error( + "[SYCLcompat] only little-endian headers are supported"); + + unsigned char const *const ublob = + reinterpret_cast(blob); + uint64_t e_shoff = extract64(ublob + 0x28); + uint16_t e_shentsize = extract16(ublob + 0x3A); + uint16_t e_shnum = extract16(ublob + 0x3C); + + return e_shoff + (e_shentsize * e_shnum); +#endif +} + +#ifdef _WIN32 +class path_lib_record { +public: + void operator=(const path_lib_record &) = delete; + ~path_lib_record() { + for (auto entry : lib_to_path) { + FreeLibrary(static_cast(entry.first)); + fs::permissions(entry.second, fs::perms::owner_all); + fs::remove_all(entry.second.remove_filename()); + } + } + static void record_lib_path(fs::path path, void *library) { + lib_to_path[library] = path; + } + static void remove_lib(void *library) { + auto path = lib_to_path[library]; + std::error_code ec; + + FreeLibrary(static_cast(library)); + fs::permissions(path, fs::perms::owner_all); + if (fs::remove_all(path.remove_filename(), ec) != 2 || ec) + // one directory and one temporary file should have been deleted + throw std::runtime_error("[SYCLcompat] directory delete failed"); + + lib_to_path.erase(library); + } + +private: + static inline std::unordered_map lib_to_path; +}; +#endif + +} // namespace detail + +class kernel_library { +public: + kernel_library() : ptr{nullptr} {} + kernel_library(void *ptr) : ptr{ptr} {} + + operator void *() const { return ptr; } + +private: + void *ptr; +#ifdef _WIN32 + static inline detail::path_lib_record single_instance_to_trigger_destructor; +#endif +}; + +namespace detail { + +static inline kernel_library load_dl_from_data(char const *const data, + size_t size) { + fs::path filename = write_data_to_file(data, size); +#ifdef _WIN32 + void *so = LoadLibraryW(filename.wstring().c_str()); +#else + void *so = dlopen(filename.c_str(), RTLD_LAZY); +#endif + if (so == nullptr) + throw std::runtime_error("[SYCLcompat] failed to load kernel library"); + +#ifdef _WIN32 + detail::path_lib_record::record_lib_path(filename, so); +#else + std::error_code ec; + + // Windows DLL cannot be deleted while in use + if (fs::remove_all(filename.remove_filename(), ec) != 2 || ec) + // one directory and one temporary file should have been deleted + throw std::runtime_error("[SYCLcompat] directory delete failed"); +#endif + + return so; +} + +} // namespace detail + +/// Load kernel library and return a handle to use the library. +/// \param [in] name The name of the library. +static inline kernel_library load_kernel_library(const std::string &name) { + std::ifstream ifs; + ifs.open(name, std::ios::in | std::ios::binary); + + std::stringstream buffer; + buffer << ifs.rdbuf(); + + const std::string buffer_string = buffer.str(); + return detail::load_dl_from_data(buffer_string.c_str(), buffer_string.size()); +} + +/// Load kernel library whose image is alreay in memory and return a handle to +/// use the library. +/// \param [in] image A pointer to the image in memory. +static inline kernel_library load_kernel_library_mem(char const *const image) { + const size_t size = detail::get_lib_size(image); + + return detail::load_dl_from_data(image, size); +} + +/// Unload kernel library. +/// \param [in,out] library Handle to the library to be closed. +static inline void unload_kernel_library(const kernel_library &library) { +#ifdef _WIN32 + detail::path_lib_record::remove_lib(library); +#else + dlclose(library); +#endif +} + +class kernel_function { +public: + kernel_function() : ptr{nullptr} {} + kernel_function(kernel_functor ptr) : ptr{ptr} {} + + operator void *() const { return ((void *)ptr); } + + void operator()(sycl::queue &q, const sycl::nd_range<3> &range, + unsigned int local_mem_size, void **args, void **extra) { + ptr(q, range, local_mem_size, args, extra); + } + +private: + kernel_functor ptr; +}; + +/// Find kernel function in a kernel library and return its address. +/// \param [in] library Handle to the kernel library. +/// \param [in] name Name of the kernel function. +static inline kernel_function get_kernel_function(kernel_library &library, + const std::string &name) { +#ifdef _WIN32 + kernel_functor fn = reinterpret_cast( + GetProcAddress(static_cast(static_cast(library)), + (name + std::string("_wrapper")).c_str())); +#else + kernel_functor fn = reinterpret_cast( + dlsym(library, (name + std::string("_wrapper")).c_str())); +#endif + if (fn == nullptr) + throw std::runtime_error("[SYCLcompat] failed to get function"); + return fn; +} + +/// Invoke a kernel function. +/// \param [in] function kernel function. +/// \param [in] queue SYCL queue used to execute kernel +/// \param [in] group_range SYCL group range +/// \param [in] local_range SYCL local range +/// \param [in] local_mem_size The size of local memory required by the kernel +/// function. +/// \param [in] kernel_params Array of pointers to kernel arguments. +/// \param [in] extra Extra arguments. +static inline void invoke_kernel_function(kernel_function &function, + sycl::queue &queue, + sycl::range<3> group_range, + sycl::range<3> local_range, + unsigned int local_mem_size, + void **kernel_params, void **extra) { + function(queue, sycl::nd_range<3>(group_range * local_range, local_range), + local_mem_size, kernel_params, extra); +} + } // namespace syclcompat diff --git a/sycl/test-e2e/syclcompat/kernel/Inputs/kernel_function.cpp b/sycl/test-e2e/syclcompat/kernel/Inputs/kernel_function.cpp index 409bb8603adba..d3b4502df0544 100644 --- a/sycl/test-e2e/syclcompat/kernel/Inputs/kernel_function.cpp +++ b/sycl/test-e2e/syclcompat/kernel/Inputs/kernel_function.cpp @@ -40,8 +40,10 @@ #include +#include #include #include +#include template void testTemplateKernel(T *data) {} @@ -62,76 +64,98 @@ int getFuncAttrs() { return threadPerBlock; } -#ifdef WIN32 +void test_get_func_attrs() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + syclcompat::device_ext &dev_ct1 = syclcompat::get_current_device(); -#define DECLARE_MODULE_VAR(var) HMODULE var -#define LOAD_LIB(lib) LoadLibraryA(lib) -#define LOAD_FUNCTOR(module, name) GetProcAddress(module, name) -#define FREE_LIB(module) FreeLibrary(module) + int size = dev_ct1.get_info(); + assert(getTemplateFuncAttrs() == size); + assert(getFuncAttrs() == size); +} -#else // LINUX +void call_library_func(syclcompat::kernel_library kernel_lib) { + sycl::queue q_ct1 = syclcompat::get_default_queue(); -#define DECLARE_MODULE_VAR(var) void *var -#define LOAD_LIB(lib) dlopen(lib, RTLD_LAZY) -#define LOAD_FUNCTOR(module, name) dlsym(module, name) -#define FREE_LIB(module) dlclose(module) + std::string FunctionName = "foo"; + syclcompat::kernel_function func; + SYCLCOMPAT_CHECK_ERROR( + func = syclcompat::get_kernel_function(kernel_lib, FunctionName.c_str())); -#endif + if (func == nullptr) { + std::cout << "Could not load function pointer" << std::endl << std::flush; + syclcompat::unload_kernel_library(kernel_lib); + assert(false); // FAIL + } -void test_kernel_functor_ptr() { - std::cout << __PRETTY_FUNCTION__ << std::endl; + int sharedSize = 10; + void **param = nullptr, **extra = nullptr; - syclcompat::device_ext &dev_ct1 = syclcompat::get_current_device(); - sycl::queue *q_ct1 = dev_ct1.default_queue(); + constexpr size_t NUM_ELEMENTS = 16; + int *dev = syclcompat::malloc(NUM_ELEMENTS); + syclcompat::fill(dev, 0, NUM_ELEMENTS); - int Size = dev_ct1.get_info(); - assert(getTemplateFuncAttrs() == Size); - assert(getFuncAttrs() == Size); + param = (void **)(&dev); + SYCLCOMPAT_CHECK_ERROR(syclcompat::invoke_kernel_function( + func, q_ct1, sycl::range<3>(1, 1, 2), sycl::range<3>(1, 1, 8), sharedSize, + param, extra)); + syclcompat::wait_and_throw(); + + int *host_mem = syclcompat::malloc_host(NUM_ELEMENTS); + syclcompat::memcpy(host_mem, dev, NUM_ELEMENTS); + for (int i = 0; i < NUM_ELEMENTS; i++) { + assert(host_mem[i] == i); + } - DECLARE_MODULE_VAR(M); - M = LOAD_LIB(TEST_SHARED_LIB); + SYCLCOMPAT_CHECK_ERROR(syclcompat::unload_kernel_library(kernel_lib)); - if (M == NULL) { + syclcompat::free(dev); + syclcompat::free(host_mem); +} + +void test_kernel_functor_ptr() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + + syclcompat::kernel_library kernel_lib; + SYCLCOMPAT_CHECK_ERROR(kernel_lib = + syclcompat::load_kernel_library(TEST_SHARED_LIB)); + + if (kernel_lib == nullptr) { std::cout << "Could not load the library" << std::endl; std::cout << " " << TEST_SHARED_LIB << std::endl << std::flush; assert(false); // FAIL } - std::string FunctionName = "foo_wrapper"; - syclcompat::kernel_functor F; - F = (syclcompat::kernel_functor)LOAD_FUNCTOR(M, FunctionName.c_str()); + call_library_func(kernel_lib); +} - if (F == NULL) { - std::cout << "Could not load function pointer" << std::endl << std::flush; - FREE_LIB(M); - assert(false); // FAIL - } +void test_kernel_functor_ptr_memory() { + std::cout << __PRETTY_FUNCTION__ << std::endl; - int sharedSize = 10; - void **param = nullptr, **extra = nullptr; - if (!q_ct1->get_device().has(sycl::aspect::usm_shared_allocations)) - return; - int *dev = sycl::malloc_shared(16, *q_ct1); - for (int i = 0; i < 16; i++) { - dev[i] = 0; - } - param = (void **)(&dev); - F(*q_ct1, - sycl::nd_range<3>(sycl::range<3>(1, 1, 2) * sycl::range<3>(1, 1, 8), - sycl::range<3>(1, 1, 8)), - sharedSize, param, extra); - q_ct1->wait_and_throw(); - - for (int i = 0; i < 16; i++) { - assert(dev[i] == i); + sycl::queue q_ct1 = syclcompat::get_default_queue(); + + std::ifstream ifs; + ifs.open(TEST_SHARED_LIB, std::ios::in | std::ios::binary); + + std::stringstream buffer; + buffer << ifs.rdbuf(); + + syclcompat::kernel_library kernel_lib; + SYCLCOMPAT_CHECK_ERROR( + kernel_lib = syclcompat::load_kernel_library_mem(buffer.str().c_str())); + + if (kernel_lib == nullptr) { + std::cout << "Could not load the library" << std::endl; + std::cout << " " << TEST_SHARED_LIB << std::endl << std::flush; + assert(false); } - sycl::free(dev, *q_ct1); - FREE_LIB(M); + call_library_func(kernel_lib); } int main() { + test_get_func_attrs(); test_kernel_functor_ptr(); + test_kernel_functor_ptr_memory(); return 0; } diff --git a/sycl/test-e2e/syclcompat/kernel/Inputs/kernel_module.cpp b/sycl/test-e2e/syclcompat/kernel/Inputs/kernel_module.cpp index 85eaec5d7d632..77afc2c15a889 100644 --- a/sycl/test-e2e/syclcompat/kernel/Inputs/kernel_module.cpp +++ b/sycl/test-e2e/syclcompat/kernel/Inputs/kernel_module.cpp @@ -32,25 +32,22 @@ #include -#ifdef _WIN32 -#define DLL_EXPORT __declspec(dllexport) -#else -#define DLL_EXPORT -#endif +#include void foo(int *k, sycl::nd_item<3> item_ct1, uint8_t *local_mem) { k[item_ct1.get_global_linear_id()] = item_ct1.get_global_linear_id(); } extern "C" { -DLL_EXPORT void foo_wrapper(sycl::queue &queue, const sycl::nd_range<3> &nr, - unsigned int localMemSize, void **kernelParams, - void **extra) { +SYCLCOMPAT_EXPORT void foo_wrapper(sycl::queue &queue, + const sycl::nd_range<3> &nr, + unsigned int local_mem_size, + void **kernel_params, void **extra) { int *k; - k = (int *)kernelParams[0]; + k = (int *)kernel_params[0]; queue.submit([&](sycl::handler &cgh) { - sycl::local_accessor local_acc_ct1(sycl::range<1>(localMemSize), - cgh); + sycl::local_accessor local_acc_ct1( + sycl::range<1>(local_mem_size), cgh); cgh.parallel_for(nr, [=](sycl::nd_item<3> item_ct1) { foo(k, item_ct1, local_acc_ct1.get_multi_ptr().get()); diff --git a/sycl/test-e2e/syclcompat/kernel/kernel_lin.cpp b/sycl/test-e2e/syclcompat/kernel/kernel_lin.cpp index ad341fabf9b64..eca55f738d83a 100644 --- a/sycl/test-e2e/syclcompat/kernel/kernel_lin.cpp +++ b/sycl/test-e2e/syclcompat/kernel/kernel_lin.cpp @@ -1,4 +1,6 @@ // REQUIRES: linux +// TODO: Supported for ROCM 5. Further development required to support AMDGPU. +// UNSUPPORTED: hip // RUN: %clangxx -fPIC -shared -fsycl -fsycl-targets=%{sycl_triple} %S/Inputs/kernel_module.cpp -o %t.so // RUN: %clangxx -DTEST_SHARED_LIB='"%t.so"' -ldl -fsycl -fsycl-targets=%{sycl_triple} %S/Inputs/kernel_function.cpp -o %t.out