diff --git a/src/runtime_src/core/common/api/hw_context_int.h b/src/runtime_src/core/common/api/hw_context_int.h index 245e7cffd0..b7b40e3753 100644 --- a/src/runtime_src/core/common/api/hw_context_int.h +++ b/src/runtime_src/core/common/api/hw_context_int.h @@ -1,5 +1,5 @@ // PDX-License-Identifier: Apache-2.0 -// Copyright (C) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (C) 2022-2025 Advanced Micro Devices, Inc. All rights reserved. #ifndef XRT_COMMON_API_HW_CONTEXT_INT_H #define XRT_COMMON_API_HW_CONTEXT_INT_H @@ -7,6 +7,7 @@ // This file defines implementation extensions to the XRT XCLBIN APIs. #include "core/include/xrt/xrt_hw_context.h" +#include "core/include/xrt/experimental/xrt_module.h" #include @@ -44,6 +45,12 @@ XRT_CORE_COMMON_EXPORT xrt::hw_context create_hw_context_from_implementation(void* hwctx_impl); +// Checks all the modules that are registered with given hw context +// and returns the module with the given kernel name +// throws if no module is found with given kernel name +xrt::module +get_module(const xrt::hw_context& hwctx, const std::string& kname); + }} // hw_context_int, xrt_core #endif diff --git a/src/runtime_src/core/common/api/module_int.h b/src/runtime_src/core/common/api/module_int.h index 8b5a2b189c..4a2ad15bef 100644 --- a/src/runtime_src/core/common/api/module_int.h +++ b/src/runtime_src/core/common/api/module_int.h @@ -1,5 +1,5 @@ // SPDX-License-Identifier: Apache-2.0 -// Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (C) 2023-2025 Advanced Micro Devices, Inc. All rights reserved. // // Xilinx Runtime (XRT) Experimental APIs @@ -7,12 +7,19 @@ #define _XRT_COMMON_MODULE_INT_H_ // This file defines implementation extensions to the XRT Kernel APIs. +#include "core/common/xclbin_parser.h" #include "core/include/xrt/xrt_bo.h" #include "core/include/xrt/experimental/xrt_module.h" +#include "ert.h" + #include namespace xrt_core::module_int { +struct kernel_info { + std::vector args; + xrt_core::xclbin::kernel_properties props; +}; // Fill in ERT command payload in ELF flow. The payload is after extra_cu_mask // and before CU arguments. @@ -31,9 +38,12 @@ patch(const xrt::module&, const std::string& argnm, size_t index, const xrt::bo& // Note that if size passed in is 0, real buffer size required will be returned // without any patching. This is useful if caller wishes to discover the exact size // of the control code buffer. +// New ELfs pack multiple control codes info in it, to identify which control code +// to run we use index XRT_CORE_COMMON_EXPORT void -patch(const xrt::module&, uint8_t*, size_t*, const std::vector>*); +patch(const xrt::module&, uint8_t*, size_t*, const std::vector>*, + uint32_t index = 0); // Patch scalar into control code at given argument XRT_CORE_COMMON_EXPORT @@ -54,6 +64,17 @@ get_ert_opcode(const xrt::module& module); void dump_scratchpad_mem(const xrt::module& module); +// Returns kernel info extracted from demangled kernel signature +// eg : DPU(void*, void*, void*) +// returns kernel name (DPU), kernel args and kernel properties +// throws exception if Elf passed has no kernel info +const kernel_info& +get_kernel_info(const xrt::module& module); + +// Get partition size if ELF has the info +uint32_t +get_partition_size(const xrt::module& module); + } // xrt_core::module_int #endif diff --git a/src/runtime_src/core/common/api/xrt_hw_context.cpp b/src/runtime_src/core/common/api/xrt_hw_context.cpp index de21cf2e9c..025802dbcb 100644 --- a/src/runtime_src/core/common/api/xrt_hw_context.cpp +++ b/src/runtime_src/core/common/api/xrt_hw_context.cpp @@ -1,13 +1,17 @@ // SPDX-License-Identifier: Apache-2.0 -// Copyright (C) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (C) 2022-2025 Advanced Micro Devices, Inc. All rights reserved. // This file implements XRT xclbin APIs as declared in // core/include/experimental/xrt_queue.h #define XRT_API_SOURCE // exporting xrt_hwcontext.h #define XCL_DRIVER_DLL_EXPORT // exporting xrt_xclbin.h #define XRT_CORE_COMMON_SOURCE // in same dll as coreutil + #include "core/include/xrt/xrt_hw_context.h" +#include "core/include/xrt/experimental/xrt_module.h" #include "hw_context_int.h" +#include "module_int.h" +#include "xclbin_int.h" #include "core/common/device.h" #include "core/common/trace.h" @@ -30,6 +34,8 @@ class hw_context_impl : public std::enable_shared_from_this std::shared_ptr m_core_device; xrt::xclbin m_xclbin; + std::map m_module_map; // map b/w kernel name and module + uint32_t m_partition_size = 0; cfg_param_type m_cfg_param; access_mode m_mode; std::unique_ptr m_hdl; @@ -53,6 +59,28 @@ class hw_context_impl : public std::enable_shared_from_this , m_hdl{m_core_device->create_hw_context(xclbin_id, m_cfg_param, m_mode)} {} + hw_context_impl(std::shared_ptr device, cfg_param_type cfg_param, access_mode mode) + : m_core_device{std::move(device)} + , m_cfg_param{std::move(cfg_param)} + , m_mode{mode} + {} + + hw_context_impl(std::shared_ptr device, const xrt::elf& elf, cfg_param_type cfg_param, access_mode mode) + : m_core_device{std::move(device)} + , m_cfg_param{std::move(cfg_param)} + , m_mode{mode} + { + // Create module object to parse Elf + auto module = xrt::module(elf); + // Get partition size and pass it to diver for hw ctx creation + m_partition_size = xrt_core::module_int::get_partition_size(module); + m_hdl = m_core_device->create_hw_context(m_partition_size, m_cfg_param, mode); + + // creation successful, store the module in the map + auto kernel_name = xrt_core::module_int::get_kernel_info(module).props.name; + m_module_map.emplace(std::move(kernel_name), std::move(module)); + } + std::shared_ptr get_shared_ptr() { @@ -86,6 +114,32 @@ class hw_context_impl : public std::enable_shared_from_this hw_context_impl& operator=(const hw_context_impl&) = delete; hw_context_impl& operator=(hw_context_impl&&) = delete; + void + add_config(const xrt::elf& elf) + { + auto module = xrt::module(elf); + auto kernel_name = xrt_core::module_int::get_kernel_info(module).props.name; + auto part_size = xrt_core::module_int::get_partition_size(module); + + // create hw ctx handle if not already created + if (!m_hdl) { + m_module_map.emplace(std::move(kernel_name), std::move(module)); + m_partition_size = part_size; + m_hdl = m_core_device->create_hw_context(m_partition_size, m_cfg_param, m_mode); + return; + } + + // add module only if partition size matches existing configuration + if (m_partition_size != part_size) + throw std::runtime_error("can not add config to ctx with different configuration\n"); + + // add module to map if kernel name is different, else throw + if (m_module_map.find(kernel_name) != m_module_map.end()) + throw std::runtime_error("config with kernel already exists, cannot add this config\n"); + + m_module_map.emplace(std::move(kernel_name), std::move(module)); + } + void update_qos(const qos_type& qos) { @@ -134,6 +188,15 @@ class hw_context_impl : public std::enable_shared_from_this { return m_usage_logger.get(); } + + xrt::module + get_module(const std::string& kname) const + { + if (auto itr = m_module_map.find(kname); itr != m_module_map.end()) + return itr->second; + + throw std::runtime_error("no module found with given kernel name in ctx"); + } }; } // xrt @@ -171,45 +234,58 @@ create_hw_context_from_implementation(void* hwctx_impl) return xrt::hw_context(impl_ptr->get_shared_ptr()); } +xrt::module +get_module(const xrt::hw_context& ctx, const std::string& kname) +{ + return ctx.get_handle()->get_module(kname); +} + } // xrt_core::hw_context_int //////////////////////////////////////////////////////////////// // xrt_hwcontext C++ API implmentations (xrt_hw_context.h) //////////////////////////////////////////////////////////////// namespace xrt { - +// common function called with hw ctx created from different ways static std::shared_ptr -alloc_hwctx_from_cfg(const xrt::device& device, const xrt::uuid& xclbin_id, const xrt::hw_context::cfg_param_type& cfg_param) +post_alloc_hwctx(const std::shared_ptr& handle) { - XRT_TRACE_POINT_SCOPE(xrt_hw_context); - auto handle = std::make_shared(device.get_handle(), xclbin_id, cfg_param); - // Update device is called with a raw pointer to dyanamically // link to callbacks that exist in XDP via a C-style interface // The create_hw_context_from_implementation function is then // called in XDP create a hw_context to the underlying implementation xrt_core::xdp::update_device(handle.get()); - handle->get_usage_logger()->log_hw_ctx_info(handle.get()); - return handle; } static std::shared_ptr -alloc_hwctx_from_mode(const xrt::device& device, const xrt::uuid& xclbin_id, xrt::hw_context::access_mode mode) +alloc_hwctx_from_cfg(const xrt::device& device, const xrt::uuid& xclbin_id, const xrt::hw_context::cfg_param_type& cfg_param) { XRT_TRACE_POINT_SCOPE(xrt_hw_context); - auto handle = std::make_shared(device.get_handle(), xclbin_id, mode); + return post_alloc_hwctx(std::make_shared(device.get_handle(), xclbin_id, cfg_param)); +} - // Update device is called with a raw pointer to dyanamically - // link to callbacks that exist in XDP via a C-style interface - // The create_hw_context_from_implementation function is then - // called in XDP create a hw_context to the underlying implementation - xrt_core::xdp::update_device(handle.get()); +static std::shared_ptr +alloc_hwctx_from_mode(const xrt::device& device, const xrt::uuid& xclbin_id, xrt::hw_context::access_mode mode) +{ + XRT_TRACE_POINT_SCOPE(xrt_hw_context); + return post_alloc_hwctx(std::make_shared(device.get_handle(), xclbin_id, mode)); +} - handle->get_usage_logger()->log_hw_ctx_info(handle.get()); +static std::shared_ptr +alloc_empty_hwctx(const xrt::device& device, const xrt::hw_context::cfg_param_type& cfg_param, xrt::hw_context::access_mode mode) +{ + XRT_TRACE_POINT_SCOPE(xrt_hw_context); + return post_alloc_hwctx(std::make_shared(device.get_handle(), cfg_param, mode)); +} - return handle; +static std::shared_ptr +alloc_hwctx_from_elf(const xrt::device& device, const xrt::elf& elf, const xrt::hw_context::cfg_param_type& cfg_param, + xrt::hw_context::access_mode mode) +{ + XRT_TRACE_POINT_SCOPE(xrt_hw_context); + return post_alloc_hwctx(std::make_shared(device.get_handle(), elf, cfg_param, mode)); } hw_context:: @@ -222,6 +298,23 @@ hw_context(const xrt::device& device, const xrt::uuid& xclbin_id, access_mode mo : detail::pimpl(alloc_hwctx_from_mode(device, xclbin_id, mode)) {} +hw_context:: +hw_context(const xrt::device& device, const xrt::elf& elf, const cfg_param_type& cfg_param, access_mode mode) + : detail::pimpl(alloc_hwctx_from_elf(device, elf, cfg_param, mode)) +{} + +hw_context:: +hw_context(const xrt::device& device, const cfg_param_type& cfg_param, access_mode mode) + : detail::pimpl(alloc_empty_hwctx(device, cfg_param, mode)) +{} + +void +hw_context:: +add_config(const xrt::elf& elf) +{ + get_handle()->add_config(elf); +} + void hw_context:: update_qos(const qos_type& qos) diff --git a/src/runtime_src/core/common/api/xrt_kernel.cpp b/src/runtime_src/core/common/api/xrt_kernel.cpp index cd72df3fe3..d25e7eae8f 100644 --- a/src/runtime_src/core/common/api/xrt_kernel.cpp +++ b/src/runtime_src/core/common/api/xrt_kernel.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // Copyright (C) 2020-2022 Xilinx, Inc. All rights reserved. -// Copyright (C) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (C) 2023-2025 Advanced Micro Devices, Inc. All rights reserved. // This file implements XRT kernel APIs as declared in // core/include/experimental/xrt_kernel.h @@ -1318,6 +1318,7 @@ class kernel_impl : public std::enable_shared_from_this size_t num_cumasks = 1; // Required number of command cu masks control_type protocol = control_type::none; // Default opcode uint32_t uid; // Internal unique id for debug + uint32_t m_ctrl_code_index = 0; // Index to identify which ctrl code to load in elf std::shared_ptr m_usage_logger = xrt_core::usage_metrics::get_usage_metrics_logger(); @@ -1506,6 +1507,16 @@ class kernel_impl : public std::enable_shared_from_this return data; // no skipping } + static uint32_t + get_ctrlcode_idx(const std::string& name) + { + // kernel name will be of format - : + if (auto i = name.find(":"); i != std::string::npos) + return std::stoul(name.substr(i+1, name.size()-i-1)); + + return 0; // default case + } + static uint32_t create_uid() { @@ -1581,10 +1592,26 @@ class kernel_impl : public std::enable_shared_from_this m_usage_logger->log_kernel_info(device->core_device.get(), hwctx, name, args.size()); } - // Delegating constructor with no module kernel_impl(std::shared_ptr dev, xrt::hw_context ctx, const std::string& nm) - : kernel_impl{std::move(dev), std::move(ctx), {}, nm} - {} + : name(nm.substr(0, nm.find(":"))) // kernel name + , device(std::move(dev)) // share ownership + , hwctx(std::move(ctx)) // hw context + , hwqueue(hwctx) // hw queue + , m_module(xrt_core::hw_context_int::get_module(hwctx, nm.substr(0, nm.find(":")))) + , properties(xrt_core::module_int::get_kernel_info(m_module).props) // kernel info present in Elf + , uid(create_uid()) + , m_ctrl_code_index(get_ctrlcode_idx(nm)) // control code index + { + XRT_DEBUGF("kernel_impl::kernel_impl(%d)\n", uid); + + // get kernel info from module and initialize kernel args + for (auto& arg : xrt_core::module_int::get_kernel_info(m_module).args) + args.emplace_back(arg); + + // amend args with computed data based on kernel protocol + amend_args(); + m_usage_logger->log_kernel_info(device->core_device.get(), hwctx, name, args.size()); + } std::shared_ptr get_shared_ptr() @@ -1648,6 +1675,12 @@ class kernel_impl : public std::enable_shared_from_this return name; } + uint32_t + get_ctrl_code_index() const + { + return m_ctrl_code_index; + } + xrt::xclbin get_xclbin() const { @@ -1942,15 +1975,18 @@ class run_impl return count++; } - // This function copies the module into a hw_context. The module + // This function copies the module into a hw_context. The module // will be associated with hwctx specific memory. + // If module has multiple control codes, index is used to identify + // the control code that needs to be run. + // By default control code at zeroth index is picked static xrt::module - copy_module(const xrt::module& module, const xrt::hw_context& hwctx) + copy_module(const xrt::module& module, const xrt::hw_context& hwctx, uint32_t ctrl_code_idx) { if (!module) return {}; - return {module, hwctx}; + return {module, hwctx, ctrl_code_idx}; } virtual std::unique_ptr @@ -2013,6 +2049,10 @@ class run_impl xrt::bo validate_bo_at_index(size_t index, const xrt::bo& bo) { + // ELF flow doesn't have arg connectivity, so skip validation + if (!kernel->get_xclbin()) + return bo; + xcl_bo_flags grp {xrt_core::bo::group_id(bo)}; if (validate_ip_arg_connectivity(index, grp.bank)) return bo; @@ -2062,15 +2102,14 @@ class run_impl { auto kcmd = pkt->get_ert_cmd(); auto payload = kernel->initialize_command(pkt); - - if (kcmd->opcode == ERT_START_DPU || kcmd->opcode == ERT_START_NPU || kcmd->opcode == ERT_START_NPU_PREEMPT) { + if (kcmd->opcode == ERT_START_DPU || kcmd->opcode == ERT_START_NPU || kcmd->opcode == ERT_START_NPU_PREEMPT || + kcmd->opcode == ERT_START_NPU_PREEMPT_ELF) { auto payload_past_dpu = initialize_dpu(payload); // adjust count to include the prepended ert_dpu_data structures kcmd->count += payload_past_dpu - payload; payload = payload_past_dpu; } - return payload; } @@ -2127,7 +2166,7 @@ class run_impl explicit run_impl(std::shared_ptr k) : kernel(std::move(k)) - , m_module{copy_module(kernel->get_module(), kernel->get_hw_context())} + , m_module{copy_module(kernel->get_module(), kernel->get_hw_context(), kernel->get_ctrl_code_index())} , m_hwqueue(kernel->get_hw_queue()) , ips(kernel->get_ips()) , cumask(kernel->get_cumask()) @@ -3436,7 +3475,7 @@ alloc_kernel(const std::shared_ptr& dev, xrt::kernel::cu_access_mode mode) { auto amode = hwctx_access_mode(mode); // legacy access mode to hwctx qos - return std::make_shared(dev, xrt::hw_context{dev->get_xrt_device(), xclbin_id, amode}, name); + return std::make_shared(dev, xrt::hw_context{dev->get_xrt_device(), xclbin_id, amode}, xrt::module{}, name); } static std::shared_ptr @@ -3444,7 +3483,8 @@ alloc_kernel_from_ctx(const std::shared_ptr& dev, const xrt::hw_context& hwctx, const std::string& name) { - return std::make_shared(dev, hwctx, name); + // Delegating constructor with no module + return std::make_shared(dev, hwctx, xrt::module{}, name); } static std::shared_ptr @@ -3456,6 +3496,14 @@ alloc_kernel_from_module(const std::shared_ptr& dev, return std::make_shared(dev, hwctx, module, name); } +static std::shared_ptr +alloc_kernel_from_name(const std::shared_ptr& dev, + const xrt::hw_context& hwctx, + const std::string& name) +{ + return std::make_shared(dev, hwctx, name); +} + static std::shared_ptr get_mailbox_impl(const xrt::run& run) { @@ -4141,6 +4189,10 @@ kernel(const xrt::hw_context& ctx, const xrt::module& mod, const std::string& na : xrt::kernel::kernel{alloc_kernel_from_module(get_device(ctx.get_device()), ctx, mod, name)} {} +kernel:: +kernel(const xrt::hw_context& ctx, const std::string& name) + : xrt::kernel::kernel{alloc_kernel_from_name(get_device(ctx.get_device()), ctx, name)} +{} } // xrt::ext //////////////////////////////////////////////////////////////// diff --git a/src/runtime_src/core/common/api/xrt_module.cpp b/src/runtime_src/core/common/api/xrt_module.cpp index 57fd2cbc3f..74b16f1e30 100644 --- a/src/runtime_src/core/common/api/xrt_module.cpp +++ b/src/runtime_src/core/common/api/xrt_module.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (C) 2023-2025 Advanced Micro Devices, Inc. All rights reserved. // SPDX-License-Identifier: Apache-2.0 #define XCL_DRIVER_DLL_EXPORT // exporting xrt_module.h #define XRT_API_SOURCE // exporting xrt_module.h @@ -35,6 +35,14 @@ #include #include #include +#include + +#ifdef _WIN32 +#include +#pragma comment(lib, "dbghelp.lib") +#else +#include +#endif #ifndef AIE_COLUMN_PAGE_SIZE # define AIE_COLUMN_PAGE_SIZE 8192 // NOLINT @@ -47,8 +55,9 @@ namespace // 0 if no padding is required. The page size should be // embedded as ELF metadata in the future. static constexpr size_t elf_page_size = AIE_COLUMN_PAGE_SIZE; -static constexpr uint8_t Elf_Amd_Aie2p = 69; -static constexpr uint8_t Elf_Amd_Aie2ps = 64; +static constexpr uint8_t Elf_Amd_Aie2p = 69; +static constexpr uint8_t Elf_Amd_Aie2ps = 64; +static constexpr uint8_t Elf_Amd_Aie2p_config = 70; // In aie2p max bd data words is 8 and in aie4/aie2ps its 9 // using max bd words as 9 to cover all cases @@ -101,6 +110,13 @@ struct buf m_data.resize(pad); } + + static const buf& + get_empty_buf() + { + static const buf b = {}; + return b; + } }; using instr_buf = buf; @@ -120,13 +136,14 @@ struct patcher { enum class symbol_type { uc_dma_remote_ptr_symbol_kind = 1, - shim_dma_base_addr_symbol_kind = 2, // patching scheme needed by AIE2PS firmware + shim_dma_base_addr_symbol_kind = 2, // patching scheme needed by AIE2PS firmware scalar_32bit_kind = 3, - control_packet_48 = 4, // patching scheme needed by firmware to patch control packet - shim_dma_48 = 5, // patching scheme needed by firmware to patch instruction buffer + control_packet_48 = 4, // patching scheme needed by firmware to patch control packet + shim_dma_48 = 5, // patching scheme needed by firmware to patch instruction buffer shim_dma_aie4_base_addr_symbol_kind = 6, // patching scheme needed by AIE4 firmware - control_packet_57 = 7, // patching scheme needed by firmware to patch control packet for aie2ps - unknown_symbol_kind = 8 + control_packet_57 = 7, // patching scheme needed by firmware to patch control packet for aie2ps + address_64 = 8, // patching scheme needed to patch pdi address + unknown_symbol_kind = 9 }; enum class buf_type { @@ -175,10 +192,16 @@ struct patcher , m_ctrlcode_patchinfo(std::move(ctrlcode_offset)) {} + void + patch64(uint32_t* data_to_patch, uint64_t addr) + { + *data_to_patch = static_cast(addr & 0xffffffff); + *(data_to_patch + 1) = static_cast((addr >> 32) & 0xffffffff); + } + // Replace certain bits of *data_to_patch with register_value. Which bits to be replaced is specified by mask // For *data_to_patch be 0xbb11aaaa and mask be 0x00ff0000 // To make *data_to_patch be 0xbb55aaaa, register_value must be 0x00550000 - void patch32(uint32_t* data_to_patch, uint64_t register_value, uint32_t mask) const { @@ -246,7 +269,8 @@ struct patcher bd_data_ptr[3] = (bd_data_ptr[3] & 0xFFFF0000) | (base_address >> 32); // NOLINT } - void patch_shim48(uint32_t* bd_data_ptr, uint64_t patch) const + void + patch_shim48(uint32_t* bd_data_ptr, uint64_t patch) const { // This patching scheme is originated from NPU firmware constexpr uint64_t ddr_aie_addr_offset = 0x80000000; @@ -276,6 +300,10 @@ struct patcher } switch (m_symbol_type) { + case symbol_type::address_64: + // new_value is a 64bit address + patch64(bd_data_ptr, new_value); + break; case symbol_type::scalar_32bit_kind: // new_value is a register value if (item.mask) @@ -308,33 +336,55 @@ struct patcher } }; - XRT_CORE_UNUSED void - dump_bo(xrt::bo& bo, const std::string& filename) - { - std::ofstream ofs(filename, std::ios::out | std::ios::binary); - if (!ofs.is_open()) - throw std::runtime_error("Failure opening file " + filename + " for writing!"); +XRT_CORE_UNUSED void +dump_bo(xrt::bo& bo, const std::string& filename) +{ + std::ofstream ofs(filename, std::ios::out | std::ios::binary); + if (!ofs.is_open()) + throw std::runtime_error("Failure opening file " + filename + " for writing!"); - auto buf = bo.map(); - ofs.write(buf, static_cast(bo.size())); - } + auto buf = bo.map(); + ofs.write(buf, static_cast(bo.size())); +} - XRT_CORE_UNUSED std::string - generate_key_string(const std::string& argument_name, patcher::buf_type type) - { - std::string buf_string = std::to_string(static_cast(type)); - return argument_name + buf_string; - } +XRT_CORE_UNUSED std::string +generate_key_string(const std::string& argument_name, patcher::buf_type type, uint32_t index) +{ + std::string buf_string = std::to_string(static_cast(type)); + return argument_name + buf_string + std::to_string(index); +} + +static std::string +demangle(const std::string& mangled_name) +{ +#ifdef _WIN32 + char demangled_name[1024]; + if (UnDecorateSymbolName(mangled_name.c_str(), demangled_name, sizeof(demangled_name), UNDNAME_COMPLETE)) + return std::string(demangled_name); + else + throw std::runtime_error("Error demangling kernel signature"); +#else + int status = 0; + char* demangled_name = abi::__cxa_demangle(mangled_name.c_str(), nullptr, nullptr, &status); + + if (status) + throw std::runtime_error("Error demangling kernel signature"); + + std::string result {demangled_name}; + std::free(demangled_name); // Free the allocated memory by api + return result; +#endif +} } // namespace namespace xrt { - // class module_impl - Base class for different implementations class module_impl { xrt::uuid m_cfg_uuid; // matching hw configuration id + public: explicit module_impl(xrt::uuid cfg_uuid) : m_cfg_uuid(std::move(cfg_uuid)) @@ -367,32 +417,44 @@ class module_impl throw std::runtime_error("Not supported"); } - virtual const instr_buf& - get_instr() const + virtual std::pair + get_instr(uint32_t /*index*/ = 0) const { throw std::runtime_error("Not supported"); } - virtual const buf& + virtual std::pair get_preempt_save() const { - throw std::runtime_error("Not supported"); + throw std::runtime_error("Not supported"); } - virtual const buf& + virtual std::pair get_preempt_restore() const { - throw std::runtime_error("Not supported"); + throw std::runtime_error("Not supported"); + } + + virtual const buf& + get_pdi(const std::string& /*pdi_name*/) const + { + throw std::runtime_error("Not supported"); + } + + virtual const std::unordered_set& + get_patch_pdis(uint32_t /*index*/ = 0) const + { + throw std::runtime_error("Not supported"); } virtual size_t get_scratch_pad_mem_size() const { - throw std::runtime_error("Not supported"); + throw std::runtime_error("Not supported"); } - virtual const control_packet& - get_ctrlpkt() const + virtual std::pair + get_ctrlpkt(uint32_t /*index*/ = 0) const { throw std::runtime_error("Not supported"); } @@ -443,8 +505,9 @@ class module_impl // @param index - argument index // @param bo - global argument to patch into ctrlcode // @param buf_type - whether it is control-code, control-packet, preempt-save or preempt-restore + // @param sec_index - index of section to be patched virtual void - patch_instr(xrt::bo&, const std::string&, size_t, const xrt::bo&, patcher::buf_type) + patch_instr(xrt::bo&, const std::string&, size_t, const xrt::bo&, patcher::buf_type, uint32_t) { throw std::runtime_error("Not supported "); } @@ -478,9 +541,10 @@ class module_impl // @param index - argument index // @param patch - patch value // @param buf_type - whether it is control-code, control-packet, preempt-save or preempt-restore - // @Return true if symbol was patched, false otherwise // + // @param sec_index - index of section to be patched + // @Return true if symbol was patched, false otherwise virtual bool - patch_it(uint8_t*, const std::string&, size_t, uint64_t, patcher::buf_type) + patch_it(uint8_t*, const std::string&, size_t, uint64_t, patcher::buf_type, uint32_t) { throw std::runtime_error("Not supported"); } @@ -508,6 +572,66 @@ class module_impl { throw std::runtime_error("Not supported"); } + + // get partition size if elf has the info + virtual uint32_t + get_partition_size() const + { + throw std::runtime_error("Not supported"); + } + + // get kernel info (name, properties and args) if elf has the info + virtual const xrt_core::module_int::kernel_info& + get_kernel_info() const + { + throw std::runtime_error("Not supported"); + } +}; + +// class module_userptr - Opaque userptr provided by application +class module_userptr : public module_impl +{ + std::vector m_ctrlcode; + instr_buf m_instr_buf; + control_packet m_ctrl_pkt; + + // Create a ctrlcode object from the userptr. + static std::vector + initialize_ctrlcode(const char* userptr, size_t sz) + { + std::vector ctrlcodes; + ctrlcodes.resize(1); + ctrlcodes[0].append_section_data(reinterpret_cast(userptr), sz); + return ctrlcodes; + } + +public: + module_userptr(const char* userptr, size_t sz, const xrt::uuid& uuid) + : module_impl{ uuid } + , m_ctrlcode{ initialize_ctrlcode(userptr, sz) } + {} + + module_userptr(const void* userptr, size_t sz, const xrt::uuid& uuid) + : module_userptr(static_cast(userptr), sz, uuid) + {} + + const std::vector& + get_data() const override + { + return m_ctrlcode; + } + + std::pair + get_instr(uint32_t /*index*/) const override + { + return {0, m_instr_buf}; + } + + std::pair + get_ctrlpkt(uint32_t /*index*/) const override + { + return {0, m_ctrl_pkt}; + } }; // class module_elf - Elf provided by application @@ -524,125 +648,302 @@ class module_impl // of a scalar object used as an argument. The relocations are used to // construct patcher objects for each argument. class module_elf : public module_impl +{ +protected: + const ELFIO::elfio& m_elfio; // we should not modify underlying elf + uint8_t m_os_abi = Elf_Amd_Aie2p; + std::map m_arg2patcher; + + explicit module_elf(xrt::elf elf) + : module_impl{ elf.get_cfg_uuid() } + , m_elfio(xrt_core::elf_int::get_elfio(elf)) + , m_os_abi(m_elfio.get_os_abi()) + {} + +public: + bool + patch_it(uint8_t* base, const std::string& argnm, size_t index, uint64_t patch, + patcher::buf_type type, uint32_t sec_index) override + { + const auto key_string = generate_key_string(argnm, type, sec_index); + auto it = m_arg2patcher.find(key_string); + auto not_found_use_argument_name = (it == m_arg2patcher.end()); + if (not_found_use_argument_name) {// Search using index + auto index_string = std::to_string(index); + const auto key_index_string = generate_key_string(index_string, type, sec_index); + it = m_arg2patcher.find(key_index_string); + if (it == m_arg2patcher.end()) + return false; + } + + it->second.patch_it(base, patch); + if (xrt_core::config::get_xrt_debug()) { + if (not_found_use_argument_name) { + std::stringstream ss; + ss << "Patched " << patcher::to_string(type) << " using argument index " << index << " with value " << std::hex << patch; + xrt_core::message::send( xrt_core::message::severity_level::debug, "xrt_module", ss.str()); + } + else { + std::stringstream ss; + ss << "Patched " << patcher::to_string(type) << " using argument name " << argnm << " with value " << std::hex << patch; + xrt_core::message::send( xrt_core::message::severity_level::debug, "xrt_module", ss.str()); + } + } + return true; + } + + uint8_t + get_os_abi() const override + { + return m_os_abi; + } + + size_t + number_of_arg_patchers() const override + { + return m_arg2patcher.size(); + } +}; + +// module class for ELFs with os_abi - Elf_Amd_Aie2p & ELF_Amd_Aie2p_config +class module_elf_aie2p : public module_elf { // rela->addend have offset to base-bo-addr info along with schema // [0:3] bit are used for patching schema, [4:31] used for base-bo-addr constexpr static uint32_t addend_shift = 4; constexpr static uint32_t addend_mask = ~((uint32_t)0) << addend_shift; constexpr static uint32_t schema_mask = ~addend_mask; - xrt::elf m_elf; - uint8_t m_os_abi = Elf_Amd_Aie2p; - std::vector m_ctrlcodes; - std::map m_arg2patcher; - instr_buf m_instr_buf; - control_packet m_ctrl_packet; - bool m_ctrl_packet_exist = false; + + // New Elf of Aie2p contain multiple ctrltext, ctrldata sections + // sections will be of format .ctrltext.* where .* has index of that section type + // Below maps has this index as key and value is pair of
+ std::map> m_instr_buf_map; + std::map> m_ctrl_packet_map; + + // Also these new Elfs have multiple PDI sections of format .pdi.* + // Below map has pdi section symbol name as key and section data as value + std::map m_pdi_buf_map; + // map storing pdi symbols that needs patching in ctrl codes + std::map> m_ctrl_pdi_map; + buf m_save_buf; + uint32_t m_save_buf_sec_idx = UINT32_MAX; bool m_save_buf_exist = false; + buf m_restore_buf; + uint32_t m_restore_buf_sec_idx = UINT32_MAX; bool m_restore_buf_exist = false; + size_t m_scratch_pad_mem_size = 0; + uint32_t m_partition_size = UINT32_MAX; std::set m_ctrlpkt_pm_dynsyms; // preemption dynsyms in elf std::map m_ctrlpkt_pm_bufs; // preemption buffers map - // The ELF sections embed column and page information in their - // names. Extract the column and page information from the - // section name, default to single column and page when nothing - // is specified. Note that in some usecases the extracted column - // is actually the index of column microblase controller; the term - // column and uC index is used interchangably in such cases. - static std::pair - get_column_and_page(const std::string& name) + xrt_core::module_int::kernel_info m_kernel_info; + + static uint32_t + get_section_name_index(const std::string& name) { - constexpr size_t first_dot = 9; // .ctrltext.. - auto dot1 = name.find_first_of(".", first_dot); - auto dot2 = name.find_first_of(".", first_dot + 1); - auto col = dot1 != std::string::npos - ? std::stoi(name.substr(dot1 + 1, dot2)) - : 0; - auto page = dot2 != std::string::npos - ? std::stoi(name.substr(dot2 + 1)) - : 0; - return { col, page }; + // Elf_Amd_Aie2p has sections .sec_name + // Elf_Amd_Aie2p_config has sections .sec_name.* + auto pos = name.find_last_of("."); + return (pos == 0) ? 0 : std::stoul(name.substr(pos + 1, 1)); } - // Extract instruction buffer from ELF sections without assuming anything - // about order of sections in the ELF file. - instr_buf - initialize_instr_buf(const ELFIO::elfio& elf) + void + initialize_partition_size() + { + static constexpr const char* partition_section_name {".note.xrt.configuration"}; + // note 0 in .note.xrt.configuration section has partition size + static constexpr ELFIO::Elf_Word partition_note_num = 0; + + auto partition_section = m_elfio.sections[partition_section_name]; + if (!partition_section) + return; // elf doesn't have partition info section, partition size holds UINT32_MAX + + ELFIO::note_section_accessor accessor(m_elfio, partition_section); + ELFIO::Elf_Word type; + std::string name; + char* desc; + ELFIO::Elf_Word desc_size; + if (!accessor.get_note(partition_note_num, type, name, desc, desc_size)) + throw std::runtime_error("Failed to get partition info, partition note not found\n"); + m_partition_size = std::stoul(std::string{static_cast(desc), desc_size}); + } + + std::string + get_kernel_signature() + { + static constexpr const char* symtab_section_name {".symtab"}; + std::string kernel_signature = ""; + + ELFIO::section* symtab = m_elfio.sections[symtab_section_name]; + if (!symtab) + return kernel_signature; // elf doesn't have .symtab section, kernel_signature will be empty string + + // Get the symbol table + const ELFIO::symbol_section_accessor symbols(m_elfio, symtab); + // Iterate over all symbols + for (ELFIO::Elf_Xword i = 0; i < symbols.get_symbols_num(); ++i) { + std::string name; + ELFIO::Elf64_Addr value; + ELFIO::Elf_Xword size; + unsigned char bind; + unsigned char type; + ELFIO::Elf_Half section_index; + unsigned char other; + + // Read symbol data + if (symbols.get_symbol(i, name, value, size, bind, type, section_index, other)) { + // there will be only 1 kernel signature symbol entry in .symtab section whose + // type is FUNC + if (type == ELFIO::STT_FUNC) { + kernel_signature = demangle(name); + break; + } + } + } + return kernel_signature; + } + + static std::vector + split(const std::string& s, char delimiter) { - instr_buf instrbuf; + std::vector tokens; + std::stringstream ss(s); + std::string item; - for (const auto& sec : elf.sections) { - auto name = sec->get_name(); - // intr buffer is in the .ctrltext section. - if (name.find(patcher::to_string(patcher::buf_type::ctrltext)) == std::string::npos) - continue; - - instrbuf.append_section_data(sec.get()); - break; + while (getline(ss, item, delimiter)) + tokens.push_back(item); + + return tokens; + } + + static std::vector + construct_kernel_args(const std::string& signature) + { + std::vector args; + + // kernel signature - name(argtype, argtype ...) + size_t start_pos = signature.find('('); + size_t end_pos = signature.find(')', start_pos); + + if (start_pos == std::string::npos || end_pos == std::string::npos || start_pos > end_pos) + throw std::runtime_error("Failed to construct kernel args"); + + std::string argstring = signature.substr(start_pos + 1, end_pos - start_pos - 1); + std::vector argstrings = split(argstring, ','); + + size_t count = 0; + size_t offset = 0; + for (const std::string& str : argstrings) { + xrt_core::xclbin::kernel_argument arg; + arg.name = "argv" + std::to_string(count); + arg.hosttype = "no-type"; + arg.port = "no-port"; + arg.index = count; + arg.offset = offset; + arg.dir = xrt_core::xclbin::kernel_argument::direction::input; + // if arg has pointer(*) in its name (eg: char*, void*) it is of type global otherwise scalar + arg.type = (str.find('*') != std::string::npos) + ? xrt_core::xclbin::kernel_argument::argtype::global + : xrt_core::xclbin::kernel_argument::argtype::scalar; + + // At present only global args are supported + // TODO : Add support for scalar args in ELF flow + if (arg.type == xrt_core::xclbin::kernel_argument::argtype::scalar) + throw std::runtime_error("scalar args are not yet supported for this kind of kernel"); + else { + // global arg + static constexpr size_t global_arg_size = 0x8; + arg.size = global_arg_size; + + offset += global_arg_size; + } + + args.emplace_back(arg); + count++; } + return args; + } + + void + initialize_kernel_info() + { + auto kernel_signature = get_kernel_signature(); + // extract kernel name + size_t pos = kernel_signature.find('('); + if (pos == std::string::npos) + return; // Elf doesn't contain kernel info aie2p type + std::string kernel_name = kernel_signature.substr(0, pos); + + // construct kernel args and properties and cache them + // this info is used at the time of xrt::kernel object creation + m_kernel_info.args = construct_kernel_args(kernel_signature); - return instrbuf; + // fill kernel properties + m_kernel_info.props.name = kernel_name; + m_kernel_info.props.type = xrt_core::xclbin::kernel_properties::kernel_type::dpu; } - // Extract control-packet buffer from ELF sections without assuming anything + // Extract buffer from ELF sections without assuming anything // about order of sections in the ELF file. - bool - initialize_ctrl_packet(const ELFIO::elfio& elf, control_packet& ctrlpacket) + template + void + initialize_buf(patcher::buf_type type, std::map>& map) { - for (const auto& sec : elf.sections) { + for (const auto& sec : m_elfio.sections) { auto name = sec->get_name(); - // ctrl packet is in the .ctrldata section. - if (name.find(patcher::to_string(patcher::buf_type::ctrldata)) == std::string::npos) + auto sec_index = sec->get_index(); + buf_type buf; + // Instruction, control pkt buffers are in section of type .ctrltext.* .ctrldata.*. + if (name.find(patcher::to_string(type)) == std::string::npos) continue; - - ctrlpacket.append_section_data(sec.get()); - return true; + + uint32_t index = get_section_name_index(name); + buf.append_section_data(sec.get()); + map.emplace(std::make_pair(index, std::make_pair(sec_index, buf))); } - return false; } - // Extract preempt_save buffer from ELF sections - // return true if section exist - bool - initialize_save_buf(const ELFIO::elfio& elf, buf& save_buf) + void + initialize_pdi_buf() { - for (const auto& sec : elf.sections) { + for (const auto& sec : m_elfio.sections) { auto name = sec->get_name(); - if (name.find(patcher::to_string(patcher::buf_type::preempt_save)) == std::string::npos) + if (name.find(patcher::to_string(patcher::buf_type::pdi)) == std::string::npos) continue; - - save_buf.append_section_data(sec.get()); - return true; + + buf pdi_buf; + pdi_buf.append_section_data(sec.get()); + m_pdi_buf_map.emplace(std::make_pair(name, pdi_buf)); } - return false; } - // Extract preempt_restore buffer from ELF sections + // Extract preempt_save/preempt_restore buffer from ELF sections // return true if section exist bool - initialize_restore_buf(const ELFIO::elfio& elf, buf& restore_buf) + initialize_save_restore_buf(buf& buf, uint32_t& index, patcher::buf_type type) { - for (const auto& sec : elf.sections) { + for (const auto& sec : m_elfio.sections) { auto name = sec->get_name(); - if (name.find(patcher::to_string(patcher::buf_type::preempt_restore)) == std::string::npos) + if (name.find(patcher::to_string(type)) == std::string::npos) continue; - restore_buf.append_section_data(sec.get()); + buf.append_section_data(sec.get()); + index = sec->get_index(); return true; } - return false; } // Extract ctrlpkt preemption buffers from ELF sections // and store it in map with section name as key void - initialize_ctrlpkt_pm_bufs(const ELFIO::elfio& elf) + initialize_ctrlpkt_pm_bufs() { - for (const auto& sec : elf.sections) { + for (const auto& sec : m_elfio.sections) { auto name = sec->get_name(); if (name.find(patcher::to_string(patcher::buf_type::ctrlpkt_pm)) == std::string::npos) continue; @@ -651,216 +952,370 @@ class module_elf : public module_impl } } - // Extract control code from ELF sections without assuming anything - // about order of sections in the ELF file. Build helper data - // structures that manages the control code data per page for each - // microblaze controller (uC), then create ctrlcode objects from the - // data. - std::vector - initialize_column_ctrlcode(const ELFIO::elfio& elf, std::vector& pad_offsets) + std::pair + determine_section_type(const std::string& section_name) { - // Elf sections for a single page - struct elf_page - { - ELFIO::section* ctrltext = nullptr; - ELFIO::section* ctrldata = nullptr; - }; + if (section_name.find(patcher::to_string(patcher::buf_type::ctrltext)) != std::string::npos) { + auto index = get_section_name_index(section_name); + if (index >= m_instr_buf_map.size()) + throw std::runtime_error("Invalid section passed, section info is not cached\n"); + return { m_instr_buf_map[index].second.size(), patcher::buf_type::ctrltext}; + } + else if (!m_ctrl_packet_map.empty() && + section_name.find(patcher::to_string(patcher::buf_type::ctrldata)) != std::string::npos) { + auto index = get_section_name_index(section_name); + if (index >= m_ctrl_packet_map.size()) + throw std::runtime_error("Invalid section passed, section info is not cached\n"); + return { m_ctrl_packet_map[index].second.size(), patcher::buf_type::ctrldata}; + } + else if (m_save_buf_exist && (section_name == patcher::to_string(patcher::buf_type::preempt_save))) + return { m_save_buf.size(), patcher::buf_type::preempt_save }; + else if (m_restore_buf_exist && (section_name == patcher::to_string(patcher::buf_type::preempt_restore))) + return { m_restore_buf.size(), patcher::buf_type::preempt_restore }; + else if (!m_pdi_buf_map.empty() && + section_name.find(patcher::to_string(patcher::buf_type::pdi)) != std::string::npos) { + if (m_pdi_buf_map.find(section_name) == m_pdi_buf_map.end()) + throw std::runtime_error("Invalid pdi section passed, section info is not cached\n"); + return { m_pdi_buf_map[section_name].size(), patcher::buf_type::pdi }; + } + else + throw std::runtime_error("Invalid section name " + section_name); + } - // Elf sections for a single column, the column control code is - // divided into pages of some architecture defined size. - struct elf_sections - { - using page_index = uint32_t; - std::map pages; - }; + void + initialize_arg_patchers() + { + auto dynsym = m_elfio.sections[".dynsym"]; + auto dynstr = m_elfio.sections[".dynstr"]; + auto dynsec = m_elfio.sections[".rela.dyn"]; - // Elf ctrl code for a partition spanning multiple uC, where each - // uC has its own control code. For architectures where a - // partition is not divided into multiple controllers, there will - // be just one entry in the associative map. - // ucidx -> [page -> [ctrltext, ctrldata]] - using uc_index = uint32_t; - std::map uc_sections; + if (!dynsym || !dynstr || !dynsec) + return; - // Iterate sections in elf, collect ctrltext and ctrldata - // per column and page - for (const auto& sec : elf.sections) { - auto name = sec->get_name(); - if (name.find(patcher::to_string(patcher::buf_type::ctrltext)) != std::string::npos) { - auto [ucidx, page] = get_column_and_page(sec->get_name()); - uc_sections[ucidx].pages[page].ctrltext = sec.get(); + auto name = dynsec->get_name(); + + // Iterate over all relocations and construct a patcher for each + // relocation that refers to a symbol in the .dynsym section. + auto begin = reinterpret_cast(dynsec->get_data()); + auto end = begin + dynsec->get_size() / sizeof(const ELFIO::Elf32_Rela); + for (auto rela = begin; rela != end; ++rela) { + auto symidx = ELFIO::get_sym_and_type::get_r_sym(rela->r_info); + auto type = ELFIO::get_sym_and_type::get_r_type(rela->r_info); + + auto dynsym_offset = symidx * sizeof(ELFIO::Elf32_Sym); + if (dynsym_offset >= dynsym->get_size()) + throw std::runtime_error("Invalid symbol index " + std::to_string(symidx)); + auto sym = reinterpret_cast(dynsym->get_data() + dynsym_offset); + + auto dynstr_offset = sym->st_name; + if (dynstr_offset >= dynstr->get_size()) + throw std::runtime_error("Invalid symbol name offset " + std::to_string(dynstr_offset)); + auto symname = dynstr->get_data() + dynstr_offset; + + if (!m_scratch_pad_mem_size && (strcmp(symname, Scratch_Pad_Mem_Symbol) == 0)) { + m_scratch_pad_mem_size = static_cast(sym->st_size); } - else if (name.find(patcher::to_string(patcher::buf_type::ctrldata)) != std::string::npos) { - auto [ucidx, page] = get_column_and_page(sec->get_name()); - uc_sections[ucidx].pages[page].ctrldata = sec.get(); + + static constexpr const char* ctrlpkt_pm_dynsym = "ctrlpkt-pm"; + if (std::string(symname).find(ctrlpkt_pm_dynsym) != std::string::npos) { + // store ctrlpkt preemption symbols which is later used for patching instr buf + m_ctrlpkt_pm_dynsyms.emplace(symname); } - } - // Create uC control code from the collected data. If page - // requirement, then pad to page size for page of a column so that - // embedded processor can load a page at a time. Note, that not - // all column uC need be used, so account for holes in - // uc_sections. Leverage that uc_sections is a std::map and that - // std::map stores its elements in ascending order of keys (this - // is asserted) - std::vector ctrlcodes; // indexed by uC index - static_assert(std::is_same_v>, "fix std::map assumption"); - ctrlcodes.resize(uc_sections.empty() ? 0 : uc_sections.rbegin()->first + 1); - pad_offsets.resize(ctrlcodes.size()); + // Get control code section referenced by the symbol, col, and page + auto section = m_elfio.sections[sym->st_shndx]; + if (!section) + throw std::runtime_error("Invalid section index " + std::to_string(sym->st_shndx)); - for (auto& [ucidx, elf_sects] : uc_sections) { - for (auto& [page, page_sec] : elf_sects.pages) { - if (page_sec.ctrltext) - ctrlcodes[ucidx].append_section_data(page_sec.ctrltext); + auto offset = rela->r_offset; + auto [sec_size, buf_type] = determine_section_type(section->get_name()); + auto sec_index = section->get_index(); - if (page_sec.ctrldata) - ctrlcodes[ucidx].append_section_data(page_sec.ctrldata); + if (offset >= sec_size) + throw std::runtime_error("Invalid offset " + std::to_string(offset)); - ctrlcodes[ucidx].pad_to_page(page); + if (std::string(symname).find("pdi") != std::string::npos) { + // pdi symbol, add to map of which ctrl code needs it + auto idx = get_section_name_index(section->get_name()); + m_ctrl_pdi_map[idx].insert(symname); } - pad_offsets[ucidx] = ctrlcodes[ucidx].size(); - } - // Append pad section to the control code. - // This section may contain scratchpad/control-packet etc - for (const auto& sec : elf.sections) { - auto name = sec->get_name(); - if (name.find(patcher::to_string(patcher::buf_type::pad)) == std::string::npos) - continue; + patcher::symbol_type patch_scheme; + uint32_t add_end_addr; + auto abi_version = static_cast(m_elfio.get_abi_version()); + if (abi_version != 1) { + add_end_addr = rela->r_addend; + patch_scheme = static_cast(type); + } + else { + // rela addend have offset to base_bo_addr info along with schema + add_end_addr = (rela->r_addend & addend_mask) >> addend_shift; + patch_scheme = static_cast(rela->r_addend & schema_mask); + } - auto ucidx = get_col_idx(name); - ctrlcodes[ucidx].append_section_data(sec.get()); + std::string argnm{ symname, symname + std::min(strlen(symname), dynstr->get_size()) }; + patcher::patch_info pi = patch_scheme == patcher::symbol_type::scalar_32bit_kind ? + // st_size is is encoded using register value mask for scaler_32 + // for other pacthing scheme it is encoded using size of dma + patcher::patch_info{ offset, add_end_addr, static_cast(sym->st_size) } : + patcher::patch_info{ offset, add_end_addr, 0 }; + + auto key_string = generate_key_string(argnm, buf_type, sec_index); + + if (auto search = m_arg2patcher.find(key_string); search != m_arg2patcher.end()) + search->second.m_ctrlcode_patchinfo.emplace_back(pi); + else { + m_arg2patcher.emplace(std::move(key_string), patcher{patch_scheme, {pi}, buf_type}); + } } + } - return ctrlcodes; +public: + explicit module_elf_aie2p(const xrt::elf& elf) + : module_elf(elf) + { + initialize_partition_size(); + initialize_kernel_info(); + initialize_buf(patcher::buf_type::ctrltext, m_instr_buf_map); + initialize_buf(patcher::buf_type::ctrldata, m_ctrl_packet_map); + + m_save_buf_exist = initialize_save_restore_buf(m_save_buf, + m_save_buf_sec_idx, + patcher::buf_type::preempt_save); + m_restore_buf_exist = initialize_save_restore_buf(m_restore_buf, + m_restore_buf_sec_idx, + patcher::buf_type::preempt_restore); + if (m_save_buf_exist != m_restore_buf_exist) + throw std::runtime_error{ "Invalid elf because preempt save and restore is not paired" }; + + initialize_pdi_buf(); + initialize_ctrlpkt_pm_bufs(); + initialize_arg_patchers(); } - // This function returns the column number for which this arg belongs to - static int - get_col_idx(const std::string& name) + ert_cmd_opcode + get_ert_opcode() const override { - // arg name will be of format - .control_code-.* - std::regex expr("\\d+"); // Regular expression to match one or more digits - std::smatch match; - if (!(std::regex_search(name, match, expr))) - throw std::runtime_error("incorrect section name found when parsing ctrlpkt"); + if (!m_pdi_buf_map.empty()) + return ERT_START_NPU_PREEMPT_ELF; - return std::stoi(match.str()); + if (m_save_buf_exist && m_restore_buf_exist) + return ERT_START_NPU_PREEMPT; + + return ERT_START_NPU; } - std::pair - determine_section_type(const std::string& section_name) + const std::unordered_set& + get_patch_pdis(uint32_t index = 0) const override { - if (section_name == patcher::to_string(patcher::buf_type::ctrltext)) - return { m_instr_buf.size(), patcher::buf_type::ctrltext}; + static const std::unordered_set empty_set = {}; + auto it = m_ctrl_pdi_map.find(index); + if (it != m_ctrl_pdi_map.end()) + return it->second; - else if (m_ctrl_packet_exist && (section_name == patcher::to_string(patcher::buf_type::ctrldata))) - return { m_ctrl_packet.size(), patcher::buf_type::ctrldata}; + return empty_set; + } - else if (m_save_buf_exist && (section_name == patcher::to_string(patcher::buf_type::preempt_save))) - return { m_save_buf.size(), patcher::buf_type::preempt_save }; + const buf& + get_pdi(const std::string& pdi_name) const override + { + auto it = m_pdi_buf_map.find(pdi_name); + if (it != m_pdi_buf_map.end()) + return it->second; - else if (m_restore_buf_exist && (section_name == patcher::to_string(patcher::buf_type::preempt_restore))) - return { m_restore_buf.size(), patcher::buf_type::preempt_restore }; + return buf::get_empty_buf(); + } - else - throw std::runtime_error("Invalid section name " + section_name); + std::pair + get_instr(uint32_t index) const override + { + auto it = m_instr_buf_map.find(index); + if (it != m_instr_buf_map.end()) + return it->second; + return std::make_pair(UINT32_MAX, instr_buf::get_empty_buf()); } - std::map - initialize_arg_patchers(const ELFIO::elfio& elf) + std::pair + get_ctrlpkt(uint32_t index) const override { - auto dynsym = elf.sections[".dynsym"]; - auto dynstr = elf.sections[".dynstr"]; + auto it = m_ctrl_packet_map.find(index); + if (it != m_ctrl_packet_map.end()) + return it->second; + return std::make_pair(UINT32_MAX, control_packet::get_empty_buf()); + } - std::map arg2patchers; + const std::set& + get_ctrlpkt_pm_dynsyms() const override + { + return m_ctrlpkt_pm_dynsyms; + } - for (const auto& sec : elf.sections) { - auto name = sec->get_name(); - if (name.find(".rela.dyn") == std::string::npos) - continue; + const std::map& + get_ctrlpkt_pm_bufs() const override + { + return m_ctrlpkt_pm_bufs; + } - // Iterate over all relocations and construct a patcher for each - // relocation that refers to a symbol in the .dynsym section. - auto begin = reinterpret_cast(sec->get_data()); - auto end = begin + sec->get_size() / sizeof(const ELFIO::Elf32_Rela); - for (auto rela = begin; rela != end; ++rela) { - auto symidx = ELFIO::get_sym_and_type::get_r_sym(rela->r_info); - auto type = ELFIO::get_sym_and_type::get_r_type(rela->r_info); + size_t + get_scratch_pad_mem_size() const override + { + return m_scratch_pad_mem_size; + } - auto dynsym_offset = symidx * sizeof(ELFIO::Elf32_Sym); - if (dynsym_offset >= dynsym->get_size()) - throw std::runtime_error("Invalid symbol index " + std::to_string(symidx)); - auto sym = reinterpret_cast(dynsym->get_data() + dynsym_offset); + std::pair + get_preempt_save() const override + { + return {m_save_buf_sec_idx, m_save_buf}; + } - auto dynstr_offset = sym->st_name; - if (dynstr_offset >= dynstr->get_size()) - throw std::runtime_error("Invalid symbol name offset " + std::to_string(dynstr_offset)); - auto symname = dynstr->get_data() + dynstr_offset; + std::pair + get_preempt_restore() const override + { + return {m_restore_buf_sec_idx, m_restore_buf}; + } - if (!m_scratch_pad_mem_size && (strcmp(symname, Scratch_Pad_Mem_Symbol) == 0)) { - m_scratch_pad_mem_size = static_cast(sym->st_size); - } + virtual uint32_t + get_partition_size() const override + { + if (m_partition_size == UINT32_MAX) + throw std::runtime_error("No partition info available, wrong ELF passed\n"); + return m_partition_size; + } - static constexpr const char* ctrlpkt_pm_dynsym = "ctrlpkt-pm"; - if (std::string(symname).find(ctrlpkt_pm_dynsym) != std::string::npos) { - // store ctrlpkt preemption symbols which is later used for patching instr buf - m_ctrlpkt_pm_dynsyms.emplace(symname); - } + virtual const xrt_core::module_int::kernel_info& + get_kernel_info() const override + { + // sanity to check if kernel info is available by checking kernel name is empty + if (m_kernel_info.props.name.empty()) + throw std::runtime_error("No kernel info available, wrong ELF passed\n"); + return m_kernel_info; + } +}; - // Get control code section referenced by the symbol, col, and page - auto section = elf.sections[sym->st_shndx]; - if (!section) - throw std::runtime_error("Invalid section index " + std::to_string(sym->st_shndx)); +// module class for ELFs with os_abi - Elf_Amd_Aie2ps +class module_elf_aie2ps : public module_elf +{ + std::vector m_ctrlcodes; - auto offset = rela->r_offset; - auto [sec_size, buf_type] = determine_section_type(section->get_name()); + // The ELF sections embed column and page information in their + // names. Extract the column and page information from the + // section name, default to single column and page when nothing + // is specified. Note that in some usecases the extracted column + // is actually the index of column microblase controller; the term + // column and uC index is used interchangably in such cases. + static std::pair + get_column_and_page(const std::string& name) + { + constexpr size_t first_dot = 9; // .ctrltext.. + auto dot1 = name.find_first_of(".", first_dot); + auto dot2 = name.find_first_of(".", first_dot + 1); + auto col = dot1 != std::string::npos + ? std::stoi(name.substr(dot1 + 1, dot2)) + : 0; + auto page = dot2 != std::string::npos + ? std::stoi(name.substr(dot2 + 1)) + : 0; + return { col, page }; + } + + // Extract control code from ELF sections without assuming anything + // about order of sections in the ELF file. Build helper data + // structures that manages the control code data per page for each + // microblaze controller (uC), then create ctrlcode objects from the + // data. + void + initialize_column_ctrlcode(std::vector& pad_offsets) + { + // Elf sections for a single page + struct elf_page + { + ELFIO::section* ctrltext = nullptr; + ELFIO::section* ctrldata = nullptr; + }; - if (offset >= sec_size) - throw std::runtime_error("Invalid offset " + std::to_string(offset)); + // Elf sections for a single column, the column control code is + // divided into pages of some architecture defined size. + struct elf_sections + { + using page_index = uint32_t; + std::map pages; + }; - patcher::symbol_type patch_scheme; - uint32_t add_end_addr; - auto abi_version = static_cast(elf.get_abi_version()); - if (abi_version != 1) { - add_end_addr = rela->r_addend; - patch_scheme = static_cast(type); - } - else { - // rela addend have offset to base_bo_addr info along with schema - add_end_addr = (rela->r_addend & addend_mask) >> addend_shift; - patch_scheme = static_cast(rela->r_addend & schema_mask); - } + // Elf ctrl code for a partition spanning multiple uC, where each + // uC has its own control code. For architectures where a + // partition is not divided into multiple controllers, there will + // be just one entry in the associative map. + // ucidx -> [page -> [ctrltext, ctrldata]] + using uc_index = uint32_t; + std::map uc_sections; - std::string argnm{ symname, symname + std::min(strlen(symname), dynstr->get_size()) }; - patcher::patch_info pi = patch_scheme == patcher::symbol_type::scalar_32bit_kind ? - // st_size is is encoded using register value mask for scaler_32 - // for other pacthing scheme it is encoded using size of dma - patcher::patch_info{ offset, add_end_addr, static_cast(sym->st_size) } : - patcher::patch_info{ offset, add_end_addr, 0 }; + // Iterate sections in elf, collect ctrltext and ctrldata + // per column and page + for (const auto& sec : m_elfio.sections) { + auto name = sec->get_name(); + if (name.find(patcher::to_string(patcher::buf_type::ctrltext)) != std::string::npos) { + auto [ucidx, page] = get_column_and_page(sec->get_name()); + uc_sections[ucidx].pages[page].ctrltext = sec.get(); + } + else if (name.find(patcher::to_string(patcher::buf_type::ctrldata)) != std::string::npos) { + auto [ucidx, page] = get_column_and_page(sec->get_name()); + uc_sections[ucidx].pages[page].ctrldata = sec.get(); + } + } + + // Create uC control code from the collected data. If page + // requirement, then pad to page size for page of a column so that + // embedded processor can load a page at a time. Note, that not + // all column uC need be used, so account for holes in + // uc_sections. Leverage that uc_sections is a std::map and that + // std::map stores its elements in ascending order of keys (this + // is asserted) + static_assert(std::is_same_v>, "fix std::map assumption"); + m_ctrlcodes.resize(uc_sections.empty() ? 0 : uc_sections.rbegin()->first + 1); + pad_offsets.resize(m_ctrlcodes.size()); + for (auto& [ucidx, elf_sects] : uc_sections) { + for (auto& [page, page_sec] : elf_sects.pages) { + if (page_sec.ctrltext) + m_ctrlcodes[ucidx].append_section_data(page_sec.ctrltext); - auto key_string = generate_key_string(argnm, buf_type); + if (page_sec.ctrldata) + m_ctrlcodes[ucidx].append_section_data(page_sec.ctrldata); - if (auto search = arg2patchers.find(key_string); search != arg2patchers.end()) - search->second.m_ctrlcode_patchinfo.emplace_back(pi); - else { - arg2patchers.emplace(std::move(key_string), patcher{ patch_scheme, {pi}, buf_type}); - } + m_ctrlcodes[ucidx].pad_to_page(page); } + pad_offsets[ucidx] = m_ctrlcodes[ucidx].size(); } - return arg2patchers; + // Append pad section to the control code. + // This section may contain scratchpad/control-packet etc + for (const auto& sec : m_elfio.sections) { + auto name = sec->get_name(); + if (name.find(patcher::to_string(patcher::buf_type::pad)) == std::string::npos) + continue; + auto ucidx = get_col_idx(name); + m_ctrlcodes[ucidx].append_section_data(sec.get()); + } } - std::map - initialize_arg_patchers(const ELFIO::elfio& elf, const std::vector& ctrlcodes, const std::vector& pad_offsets) + // This function returns the column number for which this arg belongs to + static int + get_col_idx(const std::string& name) { - auto dynsym = elf.sections[".dynsym"]; - auto dynstr = elf.sections[".dynstr"]; + // arg name will be of format - .control_code-.* + std::regex expr("\\d+"); // Regular expression to match one or more digits + std::smatch match; + if (!(std::regex_search(name, match, expr))) + throw std::runtime_error("incorrect section name found when parsing ctrlpkt"); + return std::stoi(match.str()); + } - std::map arg2patcher; + void + initialize_arg_patchers(const std::vector& ctrlcodes, const std::vector& pad_offsets) + { + auto dynsym = m_elfio.sections[".dynsym"]; + auto dynstr = m_elfio.sections[".dynstr"]; - for (const auto& sec : elf.sections) { + for (const auto& sec : m_elfio.sections) { auto name = sec->get_name(); if (name.find(".rela.dyn") == std::string::npos) continue; @@ -876,7 +1331,6 @@ class module_elf : public module_impl if (dynsym_offset >= dynsym->get_size()) throw std::runtime_error("Invalid symbol index " + std::to_string(symidx)); auto sym = reinterpret_cast(dynsym->get_data() + dynsym_offset); - auto type = ELFIO::get_sym_and_type::get_r_type(rela->r_info); auto dynstr_offset = sym->st_name; if (dynstr_offset >= dynstr->get_size()) @@ -884,14 +1338,13 @@ class module_elf : public module_impl auto symname = dynstr->get_data() + dynstr_offset; // patching can be done to ctrlcode or ctrlpkt section - auto patch_sec = elf.sections[sym->st_shndx]; + auto patch_sec = m_elfio.sections[sym->st_shndx]; if (!patch_sec) throw std::runtime_error("Invalid section index " + std::to_string(sym->st_shndx)); auto patch_sec_name = patch_sec->get_name(); size_t abs_offset = 0; patcher::buf_type buf_type; - if (patch_sec_name.find(patcher::to_string(patcher::buf_type::pad)) != std::string::npos) { auto col = get_col_idx(patch_sec_name); for (int i = 0; i < col; ++i) @@ -908,7 +1361,6 @@ class module_elf : public module_impl auto sec_offset = page * elf_page_size + rela->r_offset + 16; // NOLINT magic number 16?? if (sec_offset >= column_ctrlcode_size) throw std::runtime_error("Invalid ctrlcode offset " + std::to_string(sec_offset)); - // The control code for all columns will be represented as one // contiguous buffer object. The patcher will need to know // the offset into the buffer object for the particular column @@ -924,115 +1376,36 @@ class module_elf : public module_impl // Construct the patcher for the argument with the symbol name std::string argnm{ symname, symname + std::min(strlen(symname), dynstr->get_size()) }; - //TODO consolidate all of this logic in baseclass which can be used for - //aie2p and other aie class devices - patcher::symbol_type patch_scheme; - uint32_t add_end_addr; - auto abi_version = static_cast(elf.get_abi_version()); - if (abi_version != 1) { - add_end_addr = rela->r_addend; - patch_scheme = static_cast(type); - } - else { - // rela addend have offset to base_bo_addr info along with schema - add_end_addr = (rela->r_addend & addend_mask) >> addend_shift; - patch_scheme = static_cast(rela->r_addend & schema_mask); - } - - auto key_string = generate_key_string(argnm, buf_type); - - // One arg may need to be patched at multiple offsets of control code - // arg2patcher map contains a key & value pair of arg & patcher object - // patcher object uses m_ctrlcode_patchinfo vector to store multiple offsets - // this vector size would be equal to number of places which needs patching - // On first occurrence of arg, Create a new patcher object and - // Initialize the m_ctrlcode_patchinfo vector of the single patch_info structure - // On all further occurences of arg, add patch_info structure to existing vector - - if (auto search = arg2patcher.find(key_string); search != arg2patcher.end()) - search->second.m_ctrlcode_patchinfo.emplace_back(patcher::patch_info{abs_offset, add_end_addr, 0}); + auto symbol_type = static_cast(rela->r_addend); + auto key_string = generate_key_string(argnm, buf_type, UINT32_MAX); + // One arg may need to be patched at multiple offsets of control code + // arg2patcher map contains a key & value pair of arg & patcher object + // patcher object uses m_ctrlcode_patchinfo vector to store multiple offsets + // this vector size would be equal to number of places which needs patching + // On first occurrence of arg, Create a new patcher object and + // Initialize the m_ctrlcode_patchinfo vector of the single patch_info structure + // On all further occurences of arg, add patch_info structure to existing vector + if (auto search = m_arg2patcher.find(key_string); search != m_arg2patcher.end()) + search->second.m_ctrlcode_patchinfo.emplace_back(patcher::patch_info{abs_offset, 0, 0}); else - arg2patcher.emplace(std::move(key_string), patcher{patch_scheme, {{abs_offset, add_end_addr}}, buf_type}); - } - } - - return arg2patcher; - } - - bool - patch_it(uint8_t* base, const std::string& argnm, size_t index, uint64_t patch, patcher::buf_type type) override - { - auto key_string = generate_key_string(argnm, type); - auto it = m_arg2patcher.find(key_string); - auto not_found_use_argument_name = (it == m_arg2patcher.end()); - if (not_found_use_argument_name) {// Search using index - auto index_string = std::to_string(index); - const std::string key_index_string = generate_key_string(index_string, type); - it = m_arg2patcher.find(key_index_string); - if (it == m_arg2patcher.end()) - return false; - } - - it->second.patch_it(base, patch); - if (xrt_core::config::get_xrt_debug()) { - if (not_found_use_argument_name) { - std::stringstream ss; - ss << "Patched " << patcher::to_string(type) << " using argument index " << index << " with value " << std::hex << patch; - xrt_core::message::send( xrt_core::message::severity_level::debug, "xrt_module", ss.str()); - } - else { - std::stringstream ss; - ss << "Patched " << patcher::to_string(type) << " using argument name " << argnm << " with value " << std::hex << patch; - xrt_core::message::send( xrt_core::message::severity_level::debug, "xrt_module", ss.str()); + m_arg2patcher.emplace(std::move(key_string), patcher{symbol_type, {{abs_offset, 0}}, buf_type}); } } - return true; } - uint8_t - get_os_abi() const override +public: + explicit module_elf_aie2ps(const xrt::elf& elf) + : module_elf(elf) { - return m_os_abi; + std::vector pad_offsets; + initialize_column_ctrlcode(pad_offsets); + initialize_arg_patchers(m_ctrlcodes, pad_offsets); } ert_cmd_opcode get_ert_opcode() const override { - if (m_os_abi == Elf_Amd_Aie2ps) - return ERT_START_DPU; - - if (m_os_abi != Elf_Amd_Aie2p) - throw std::runtime_error("ELF os_abi Not supported"); - - if (m_save_buf_exist && m_restore_buf_exist) - return ERT_START_NPU_PREEMPT; - - return ERT_START_NPU; - } - -public: - explicit module_elf(xrt::elf elf) - : module_impl{ elf.get_cfg_uuid() } - , m_elf(std::move(elf)) - , m_os_abi{ xrt_core::elf_int::get_elfio(m_elf).get_os_abi() } - { - if (m_os_abi == Elf_Amd_Aie2ps) { - std::vector pad_offsets; - m_ctrlcodes = initialize_column_ctrlcode(xrt_core::elf_int::get_elfio(m_elf), pad_offsets); - m_arg2patcher = initialize_arg_patchers(xrt_core::elf_int::get_elfio(m_elf), m_ctrlcodes, pad_offsets); - } - else if (m_os_abi == Elf_Amd_Aie2p) { - m_instr_buf = initialize_instr_buf(xrt_core::elf_int::get_elfio(m_elf)); - m_ctrl_packet_exist = initialize_ctrl_packet(xrt_core::elf_int::get_elfio(m_elf), m_ctrl_packet); - - m_save_buf_exist = initialize_save_buf(xrt_core::elf_int::get_elfio(m_elf), m_save_buf); - m_restore_buf_exist = initialize_restore_buf(xrt_core::elf_int::get_elfio(m_elf), m_restore_buf); - if (m_save_buf_exist != m_restore_buf_exist) - throw std::runtime_error{ "Invalid elf because preempt save and restore is not paired" }; - - initialize_ctrlpkt_pm_bufs(xrt_core::elf_int::get_elfio(m_elf)); - m_arg2patcher = initialize_arg_patchers(xrt_core::elf_int::get_elfio(m_elf)); - } + return ERT_START_DPU; } const std::vector& @@ -1040,103 +1413,9 @@ class module_elf : public module_impl { return m_ctrlcodes; } - - const instr_buf& - get_instr() const override - { - return m_instr_buf; - } - - const buf& - get_preempt_save() const override - { - return m_save_buf; - } - - const buf& - get_preempt_restore() const override - { - return m_restore_buf; - } - - size_t - get_scratch_pad_mem_size() const override - { - return m_scratch_pad_mem_size; - } - - const control_packet& - get_ctrlpkt() const override - { - return m_ctrl_packet; - } - - const std::set& - get_ctrlpkt_pm_dynsyms() const override - { - return m_ctrlpkt_pm_dynsyms; - } - - const std::map& - get_ctrlpkt_pm_bufs() const override - { - return m_ctrlpkt_pm_bufs; - } - - size_t - number_of_arg_patchers() const override - { - return m_arg2patcher.size(); - } -}; - -// class module_userptr - Opaque userptr provided by application -class module_userptr : public module_impl -{ - std::vector m_ctrlcode; - instr_buf m_instr_buf; - control_packet m_ctrl_pkt; - - // Create a ctrlcode object from the userptr. - static std::vector - initialize_ctrlcode(const char* userptr, size_t sz) - { - std::vector ctrlcodes; - ctrlcodes.resize(1); - ctrlcodes[0].append_section_data(reinterpret_cast(userptr), sz); - return ctrlcodes; - } - -public: - module_userptr(const char* userptr, size_t sz, const xrt::uuid& uuid) - : module_impl{ uuid } - , m_ctrlcode{ initialize_ctrlcode(userptr, sz) } - {} - - module_userptr(const void* userptr, size_t sz, const xrt::uuid& uuid) - : module_userptr(static_cast(userptr), sz, uuid) - {} - - const std::vector& - get_data() const override - { - return m_ctrlcode; - } - - const instr_buf& - get_instr() const override - { - return m_instr_buf; - } - - const control_packet& - get_ctrlpkt() const override - { - return m_ctrl_pkt; - } }; -// class module_sram - Create an hwct specific (sram) module from parent +// class module_sram - Create an hwctx specific (sram) module from parent // // Allocate a buffer object to hold the ctrlcodes for each column created // by parent module. The ctrlcodes are concatenated into a single buffer @@ -1145,6 +1424,9 @@ class module_sram : public module_impl { std::shared_ptr m_parent; xrt::hw_context m_hwctx; + // New ELFs have multiple ctrl sections + // we need index to identify which ctrl section to pick from parent module + uint32_t m_index; // The instruction buffer object contains the ctrlcodes for each // column. The ctrlcodes are concatenated into a single buffer @@ -1168,6 +1450,9 @@ class module_sram : public module_impl // payload to identify the ctrlcode for each column processor. std::vector> m_column_bo_address; + uint32_t m_instr_sec_idx; + uint32_t m_ctrlpkt_sec_idx; + // Arguments patched in the ctrlcode buffer object // Must match number of argument patchers in parent module std::set m_patched_args; @@ -1252,7 +1537,12 @@ class module_sram : public module_impl // find the control-code-* sym-name and patch it in instruction buffer // This name is an agreement between aiebu and XRT auto sym_name = std::string(Control_Code_Symbol) + "-" + std::to_string(i); - if (patch_instr_value(m_buffer, sym_name, std::numeric_limits::max() , m_buffer.address() + offset, patcher::buf_type::ctrltext)) + if (patch_instr_value(m_buffer, + sym_name, + std::numeric_limits::max(), + m_buffer.address() + offset, + patcher::buf_type::ctrltext, + UINT32_MAX /*section index is not considered in aie2ps*/)) m_patched_args.insert(sym_name); offset += col_data[i].size(); } @@ -1279,7 +1569,9 @@ class module_sram : public module_impl create_instr_buf(const module_impl* parent) { XRT_DEBUGF("-> module_sram::create_instr_buf()\n"); - const auto& data = parent->get_instr(); + auto instr_buf_info = parent->get_instr(m_index); + m_instr_sec_idx = instr_buf_info.first; + const instr_buf& data = instr_buf_info.second; size_t sz = data.size(); if (sz == 0) throw std::runtime_error("Invalid instruction buffer size"); @@ -1299,10 +1591,10 @@ class module_sram : public module_impl xrt_core::message::send(xrt_core::message::severity_level::debug, "xrt_module", ss.str()); } - const auto& preempt_save_data = parent->get_preempt_save(); + auto [save_sec_idx, preempt_save_data] = parent->get_preempt_save(); auto preempt_save_data_size = preempt_save_data.size(); - const auto& preempt_restore_data = parent->get_preempt_restore(); + auto [restore_sec_idx, preempt_restore_data] = parent->get_preempt_restore(); auto preempt_restore_data_size = preempt_restore_data.size(); if ((preempt_save_data_size > 0) && (preempt_restore_data_size > 0)) { @@ -1331,8 +1623,10 @@ class module_sram : public module_impl if ((preempt_save_data_size > 0) && (preempt_restore_data_size > 0)) { m_scratch_pad_mem = xrt::ext::bo{ m_hwctx, m_parent->get_scratch_pad_mem_size() }; - patch_instr(m_preempt_save_bo, Scratch_Pad_Mem_Symbol, 0, m_scratch_pad_mem, patcher::buf_type::preempt_save); - patch_instr(m_preempt_restore_bo, Scratch_Pad_Mem_Symbol, 0, m_scratch_pad_mem, patcher::buf_type::preempt_restore); + patch_instr(m_preempt_save_bo, Scratch_Pad_Mem_Symbol, 0, m_scratch_pad_mem, + patcher::buf_type::preempt_save, save_sec_idx); + patch_instr(m_preempt_restore_bo, Scratch_Pad_Mem_Symbol, 0, m_scratch_pad_mem, + patcher::buf_type::preempt_restore, restore_sec_idx); if (is_dump_preemption_codes()) { std::stringstream ss; @@ -1341,8 +1635,18 @@ class module_sram : public module_impl } } + // patch all pdi addresses + auto pdi_symbols = parent->get_patch_pdis(m_index); + for (const auto& symbol : pdi_symbols) { + const auto& pdi_data = parent->get_pdi(symbol); + auto pdi_bo = xrt::bo{ m_hwctx, pdi_data.size(), xrt::bo::flags::cacheable, 1 /* fix me */ }; + fill_bo_with_data(pdi_bo, pdi_data); + // patch instr buffer with pdi address + patch_instr(m_instr_bo, symbol, 0, pdi_bo, patcher::buf_type::ctrltext, m_instr_sec_idx); + } + if (m_ctrlpkt_bo) { - patch_instr(m_instr_bo, Control_Packet_Symbol, 0, m_ctrlpkt_bo, patcher::buf_type::ctrltext); + patch_instr(m_instr_bo, Control_Packet_Symbol, 0, m_ctrlpkt_bo, patcher::buf_type::ctrltext, m_instr_sec_idx); } // patch ctrlpkt pm buffers @@ -1354,7 +1658,7 @@ class module_sram : public module_impl auto bo_itr = m_ctrlpkt_pm_bos.find(sec_name); if (bo_itr == m_ctrlpkt_pm_bos.end()) throw std::runtime_error("Unable to find ctrlpkt pm buffer for symbol " + dynsym); - patch_instr(m_instr_bo, dynsym, 0, bo_itr->second, patcher::buf_type::ctrltext); + patch_instr(m_instr_bo, dynsym, 0, bo_itr->second, patcher::buf_type::ctrltext, m_instr_sec_idx); } XRT_DEBUGF("<- module_sram::create_instr_buf()\n"); @@ -1363,7 +1667,9 @@ class module_sram : public module_impl void create_ctrlpkt_buf(const module_impl* parent) { - const auto& data = parent->get_ctrlpkt(); + auto ctrl_pkt_info = parent->get_ctrlpkt(m_index); + m_ctrlpkt_sec_idx = ctrl_pkt_info.first; + const control_packet& data = ctrl_pkt_info.second; size_t sz = data.size(); if (sz == 0) { @@ -1376,12 +1682,12 @@ class module_sram : public module_impl fill_ctrlpkt_buf(m_ctrlpkt_bo, data); if (is_dump_control_packet()) { - std::string dump_file_name = "ctr_packet_pre_patch" + std::to_string(get_id()) + ".bin"; - dump_bo(m_ctrlpkt_bo, dump_file_name); + std::string dump_file_name = "ctr_packet_pre_patch" + std::to_string(get_id()) + ".bin"; + dump_bo(m_ctrlpkt_bo, dump_file_name); - std::stringstream ss; - ss << "dumped file " << dump_file_name; - xrt_core::message::send(xrt_core::message::severity_level::debug, "xrt_module", ss.str()); + std::stringstream ss; + ss << "dumped file " << dump_file_name; + xrt_core::message::send(xrt_core::message::severity_level::debug, "xrt_module", ss.str()); } } @@ -1417,30 +1723,31 @@ class module_sram : public module_impl } void - patch_instr(xrt::bo& bo_ctrlcode, const std::string& argnm, size_t index, const xrt::bo& bo, patcher::buf_type type) override + patch_instr(xrt::bo& bo_ctrlcode, const std::string& argnm, size_t index, const xrt::bo& bo, + patcher::buf_type type, uint32_t sec_idx) override { - patch_instr_value(bo_ctrlcode, argnm, index, bo.address(), type); + patch_instr_value(bo_ctrlcode, argnm, index, bo.address(), type, sec_idx); } void patch_value(const std::string& argnm, size_t index, uint64_t value) { bool patched = false; - if (m_parent->get_os_abi() == Elf_Amd_Aie2p) { + if (m_parent->get_os_abi() == Elf_Amd_Aie2p || m_parent->get_os_abi() == Elf_Amd_Aie2p_config) { // patch control-packet buffer if (m_ctrlpkt_bo) { - if (m_parent->patch_it(m_ctrlpkt_bo.map(), argnm, index, value, patcher::buf_type::ctrldata)) + if (m_parent->patch_it(m_ctrlpkt_bo.map(), argnm, index, value, patcher::buf_type::ctrldata, m_ctrlpkt_sec_idx)) patched = true; } // patch instruction buffer - if (m_parent->patch_it(m_instr_bo.map(), argnm, index, value, patcher::buf_type::ctrltext)) + if (m_parent->patch_it(m_instr_bo.map(), argnm, index, value, patcher::buf_type::ctrltext, m_instr_sec_idx)) patched = true; } else { - if (m_parent->patch_it(m_buffer.map(), argnm, index, value, patcher::buf_type::ctrltext)) + if (m_parent->patch_it(m_buffer.map(), argnm, index, value, patcher::buf_type::ctrltext, UINT32_MAX)) patched = true; - if (m_parent->patch_it(m_buffer.map(), argnm, index, value, patcher::buf_type::pad)) + if (m_parent->patch_it(m_buffer.map(), argnm, index, value, patcher::buf_type::pad, UINT32_MAX)) patched = true; } @@ -1450,32 +1757,17 @@ class module_sram : public module_impl } } - bool - patch_instr_value(xrt::bo& bo, const std::string& argnm, size_t index, uint64_t value, patcher::buf_type type) + bool + patch_instr_value(xrt::bo& bo, const std::string& argnm, size_t index, uint64_t value, + patcher::buf_type type, uint32_t sec_index) { - if (!m_parent->patch_it(bo.map(), argnm, index, value, type)) + if (!m_parent->patch_it(bo.map(), argnm, index, value, type, sec_index)) return false; m_dirty = true; return true; } - void - patch(const std::string& argnm, size_t index, const xrt::bo& bo) override - { - patch_value(argnm, index, bo.address()); - } - - void - patch(const std::string& argnm, size_t index, const void* value, size_t size) override - { - if (size > 8) // NOLINT - throw std::runtime_error{ "patch_value() only supports 64-bit values or less" }; - - auto arg_value = *static_cast(value); - patch_value(argnm, index, arg_value); - } - // Check that all arguments have been patched and sync the buffer // to device if it is dirty. void @@ -1493,7 +1785,7 @@ class module_sram : public module_impl } m_buffer.sync(XCL_BO_SYNC_BO_TO_DEVICE); } - else if (os_abi == Elf_Amd_Aie2p) { + else if (os_abi == Elf_Amd_Aie2p || os_abi == Elf_Amd_Aie2p_config) { m_instr_bo.sync(XCL_BO_SYNC_BO_TO_DEVICE); if (is_dump_control_codes()) { @@ -1544,31 +1836,33 @@ class module_sram : public module_impl } uint32_t* - fill_ert_aie2p(uint32_t *payload) const - { - if (m_preempt_save_bo && m_preempt_restore_bo) { - // npu preemption - auto npu = reinterpret_cast(payload); - npu->instruction_buffer = m_instr_bo.address(); - npu->instruction_buffer_size = static_cast(m_instr_bo.size()); - npu->save_buffer = m_preempt_save_bo.address(); - npu->save_buffer_size = static_cast(m_preempt_save_bo.size()); - npu->restore_buffer = m_preempt_restore_bo.address(); - npu->restore_buffer_size = static_cast(m_preempt_restore_bo.size()); - npu->instruction_prop_count = 0; // Reserved for future use - payload += sizeof(ert_npu_preempt_data) / sizeof(uint32_t); - - return payload; - } + fill_ert_aie2p_preempt_data(uint32_t *payload) const + { + // npu preemption in elf_flow + auto npu = reinterpret_cast(payload); + npu->instruction_buffer = m_instr_bo.address(); + npu->instruction_buffer_size = static_cast(m_instr_bo.size()); + npu->instruction_prop_count = 0; // Reserved for future use + if (m_preempt_save_bo && m_preempt_restore_bo) { + npu->save_buffer = m_preempt_save_bo.address(); + npu->save_buffer_size = static_cast(m_preempt_save_bo.size()); + npu->restore_buffer = m_preempt_restore_bo.address(); + npu->restore_buffer_size = static_cast(m_preempt_restore_bo.size()); + } + payload += sizeof(ert_npu_preempt_data) / sizeof(uint32_t); + return payload; + } - // npu non-preemption - auto npu = reinterpret_cast(payload); - npu->instruction_buffer = m_instr_bo.address(); - npu->instruction_buffer_size = static_cast(m_instr_bo.size()); - npu->instruction_prop_count = 0; // Reserved for future use - payload += sizeof(ert_npu_data) / sizeof(uint32_t); + uint32_t* + fill_ert_aie2p_non_preempt_data(uint32_t *payload) const + { + auto npu = reinterpret_cast(payload); + npu->instruction_buffer = m_instr_bo.address(); + npu->instruction_buffer_size = static_cast(m_instr_bo.size()); + npu->instruction_prop_count = 0; // Reserved for future use + payload += sizeof(ert_npu_data) / sizeof(uint32_t); - return payload; + return payload; } uint32_t* @@ -1592,10 +1886,11 @@ class module_sram : public module_impl } public: - module_sram(std::shared_ptr parent, xrt::hw_context hwctx) + module_sram(std::shared_ptr parent, xrt::hw_context hwctx, uint32_t index = 0) : module_impl{ parent->get_cfg_uuid() } , m_parent{ std::move(parent) } , m_hwctx{ std::move(hwctx) } + , m_index{ index } { if (xrt_core::config::get_xrt_debug()) { m_debug_mode.debug_flags.dump_control_codes = xrt_core::config::get_feature_toggle("Debug.dump_control_codes"); @@ -1607,8 +1902,8 @@ class module_sram : public module_impl auto os_abi = m_parent.get()->get_os_abi(); - if (os_abi == Elf_Amd_Aie2p) { - // make sure to create control-packet buffers first because we may + if (os_abi == Elf_Amd_Aie2p || os_abi == Elf_Amd_Aie2p_config) { + // make sure to create control-packet buffer first because we may // need to patch control-packet address to instruction buffer create_ctrlpkt_buf(m_parent.get()); create_ctrlpkt_pm_bufs(m_parent.get()); @@ -1626,10 +1921,19 @@ class module_sram : public module_impl { auto os_abi = m_parent.get()->get_os_abi(); - if (os_abi == Elf_Amd_Aie2p) - return fill_ert_aie2p(payload); - - return fill_ert_aie2ps(payload); + switch (os_abi) { + case Elf_Amd_Aie2p : + if (m_preempt_save_bo && m_preempt_restore_bo) + return fill_ert_aie2p_preempt_data(payload); + else + return fill_ert_aie2p_non_preempt_data(payload); + case Elf_Amd_Aie2p_config : + return fill_ert_aie2p_preempt_data(payload); + case Elf_Amd_Aie2ps : + return fill_ert_aie2ps(payload); + default : + throw std::runtime_error("unknown ELF type passed\n"); + } } xrt::bo& @@ -1657,6 +1961,22 @@ class module_sram : public module_impl msg.append(dump_file_name); xrt_core::message::send(xrt_core::message::severity_level::debug, "xrt_module", msg); } + + void + patch(const std::string& argnm, size_t index, const xrt::bo& bo) override + { + patch_value(argnm, index, bo.address()); + } + + void + patch(const std::string& argnm, size_t index, const void* value, size_t size) override + { + if (size > 8) // NOLINT + throw std::runtime_error{ "patch_value() only supports 64-bit values or less" }; + + auto arg_value = *static_cast(value); + patch_value(argnm, index, arg_value); + } }; } // namespace xrt @@ -1675,21 +1995,28 @@ fill_ert_dpu_data(const xrt::module& module, uint32_t* payload) void patch(const xrt::module& module, const std::string& argnm, size_t index, const xrt::bo& bo) { - module.get_handle()->patch(argnm, index, bo); + auto module_sram = std::dynamic_pointer_cast(module.get_handle()); + if (!module_sram) + throw std::runtime_error("Getting module_sram failed, wrong module object passed\n"); + module_sram->patch(argnm, index, bo); } void -patch(const xrt::module& module, uint8_t* ibuf, size_t* sz, const std::vector>* args) +patch(const xrt::module& module, uint8_t* ibuf, size_t* sz, const std::vector>* args, + uint32_t idx) { auto hdl = module.get_handle(); size_t orig_sz = *sz; const buf* inst = nullptr; + uint32_t patch_index = UINT32_MAX; + auto os_abi = hdl->get_os_abi(); - if (hdl->get_os_abi() == Elf_Amd_Aie2p) { - const auto& instr_buf = hdl->get_instr(); - inst = &instr_buf; + if (os_abi == Elf_Amd_Aie2p || os_abi == Elf_Amd_Aie2p_config) { + auto buf_info = hdl->get_instr(idx); + patch_index = buf_info.first; + inst = &(buf_info.second); } - else if(hdl->get_os_abi() == Elf_Amd_Aie2ps) { + else if(os_abi == Elf_Amd_Aie2ps) { const auto& instr_buf = hdl->get_data(); if (instr_buf.size() != 1) throw std::runtime_error{"Patch failed: only support patching single column"}; @@ -1709,7 +2036,7 @@ patch(const xrt::module& module, uint8_t* ibuf, size_t* sz, const std::vectorpatch_it(ibuf, arg_name, index, arg_addr, patcher::buf_type::ctrltext)) + if (!hdl->patch_it(ibuf, arg_name, index, arg_addr, patcher::buf_type::ctrltext, patch_index)) throw std::runtime_error{"Failed to patch " + arg_name}; index++; } @@ -1718,7 +2045,10 @@ patch(const xrt::module& module, uint8_t* ibuf, size_t* sz, const std::vectorpatch(argnm, index, value, size); + auto module_sram = std::dynamic_pointer_cast(module.get_handle()); + if (!module_sram) + throw std::runtime_error("Getting module_sram failed, wrong module object passed\n"); + module_sram->patch(argnm, index, value, size); } void @@ -1743,8 +2073,38 @@ dump_scratchpad_mem(const xrt::module& module) module_sram->dump_scratchpad_mem(); } +const xrt_core::module_int::kernel_info& +get_kernel_info(const xrt::module& module) +{ + return module.get_handle()->get_kernel_info(); +} + +uint32_t +get_partition_size(const xrt::module& module) +{ + return module.get_handle()->get_partition_size(); +} + } // xrt_core::module_int +namespace +{ +static std::shared_ptr +construct_module_elf(const xrt::elf& elf) +{ + auto os_abi = xrt_core::elf_int::get_elfio(elf).get_os_abi(); + switch (os_abi) { + case Elf_Amd_Aie2p : + case Elf_Amd_Aie2p_config : + return std::make_shared(elf); + case Elf_Amd_Aie2ps : + return std::make_shared(elf); + default : + throw std::runtime_error("unknown ELF type passed\n"); + } +} +} + //////////////////////////////////////////////////////////////// // xrt_module C++ API implementation (xrt_module.h) //////////////////////////////////////////////////////////////// @@ -1752,7 +2112,7 @@ namespace xrt { module:: module(const xrt::elf& elf) -: detail::pimpl{ std::make_shared(elf) } +: detail::pimpl(construct_module_elf(elf)) {} module:: @@ -1765,6 +2125,11 @@ module(const xrt::module& parent, const xrt::hw_context& hwctx) : detail::pimpl{ std::make_shared(parent.handle, hwctx) } {} +module:: +module(const xrt::module& parent, const xrt::hw_context& hwctx, uint32_t ctrl_code_idx) +: detail::pimpl{ std::make_shared(parent.handle, hwctx, ctrl_code_idx) } +{} + xrt::uuid module:: get_cfg_uuid() const diff --git a/src/runtime_src/core/common/ishim.h b/src/runtime_src/core/common/ishim.h index 981c917f8b..e27a509a13 100755 --- a/src/runtime_src/core/common/ishim.h +++ b/src/runtime_src/core/common/ishim.h @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // Copyright (C) 2019-2022 Xilinx, Inc. All rights reserved. -// Copyright (C) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (C) 2022-2025 Advanced Micro Devices, Inc. All rights reserved. #ifndef core_common_ishim_h #define core_common_ishim_h @@ -157,6 +157,15 @@ struct ishim const xrt::hw_context::cfg_param_type& /*cfg_params*/, xrt::hw_context::access_mode /*mode*/) const = 0; + // creates hw context using partition size + // Used in elf flow + // This function is not supported by all platforms + virtual std::unique_ptr + create_hw_context(uint32_t /*partition_size*/, + const xrt::hw_context::cfg_param_type& /*cfg_params*/, + xrt::hw_context::access_mode /*mode*/) const + { throw not_supported_error{__func__}; } + // Registers an xclbin with shim, but does not load it. // This is no-op for most platform shims virtual void diff --git a/src/runtime_src/core/include/shim_int.h b/src/runtime_src/core/include/shim_int.h index 13e4f29093..0b1cdf56ed 100644 --- a/src/runtime_src/core/include/shim_int.h +++ b/src/runtime_src/core/include/shim_int.h @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // Copyright (C) 2021-2022 Xilinx, Inc. All rights reserved. -// Copyright (C) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (C) 2022-2025 Advanced Micro Devices, Inc. All rights reserved. #ifndef SHIM_INT_H_ #define SHIM_INT_H_ @@ -75,6 +75,9 @@ create_hw_context(xclDeviceHandle handle, const xrt::hw_context::cfg_param_type& cfg_param, xrt::hw_context::access_mode mode); +std::unique_ptr +create_hw_context(xclDeviceHandle handle, uint32_t partition_size); + // get_hw_queue() - xrt_core::hwqueue_handle* get_hw_queue(xclDeviceHandle handle, xrt_core::hwctx_handle* ctxhdl); diff --git a/src/runtime_src/core/include/xrt/detail/ert.h b/src/runtime_src/core/include/xrt/detail/ert.h index b097bba75d..44077539e4 100644 --- a/src/runtime_src/core/include/xrt/detail/ert.h +++ b/src/runtime_src/core/include/xrt/detail/ert.h @@ -643,28 +643,29 @@ struct cu_cmd_state_timestamps { * @ERT_START_NPU_PREEMPT: instruction buffer command with preemption format on NPU */ enum ert_cmd_opcode { - ERT_START_CU = 0, - ERT_START_KERNEL = 0, - ERT_CONFIGURE = 2, - ERT_EXIT = 3, - ERT_ABORT = 4, - ERT_EXEC_WRITE = 5, - ERT_CU_STAT = 6, - ERT_START_COPYBO = 7, - ERT_SK_CONFIG = 8, - ERT_SK_START = 9, - ERT_SK_UNCONFIG = 10, - ERT_INIT_CU = 11, - ERT_START_FA = 12, - ERT_CLK_CALIB = 13, - ERT_MB_VALIDATE = 14, - ERT_START_KEY_VAL = 15, - ERT_ACCESS_TEST_C = 16, - ERT_ACCESS_TEST = 17, - ERT_START_DPU = 18, - ERT_CMD_CHAIN = 19, - ERT_START_NPU = 20, - ERT_START_NPU_PREEMPT = 21, + ERT_START_CU = 0, + ERT_START_KERNEL = 0, + ERT_CONFIGURE = 2, + ERT_EXIT = 3, + ERT_ABORT = 4, + ERT_EXEC_WRITE = 5, + ERT_CU_STAT = 6, + ERT_START_COPYBO = 7, + ERT_SK_CONFIG = 8, + ERT_SK_START = 9, + ERT_SK_UNCONFIG = 10, + ERT_INIT_CU = 11, + ERT_START_FA = 12, + ERT_CLK_CALIB = 13, + ERT_MB_VALIDATE = 14, + ERT_START_KEY_VAL = 15, + ERT_ACCESS_TEST_C = 16, + ERT_ACCESS_TEST = 17, + ERT_START_DPU = 18, + ERT_CMD_CHAIN = 19, + ERT_START_NPU = 20, + ERT_START_NPU_PREEMPT = 21, + ERT_START_NPU_PREEMPT_ELF = 22, }; /** @@ -997,7 +998,12 @@ ert_valid_opcode(struct ert_packet *pkt) /* 1 mandatory cumask + extra_cu_masks + ert_npu_data */ valid = (skcmd->count >= 1+ skcmd->extra_cu_masks + sizeof(struct ert_npu_data) / sizeof(uint32_t)); break; - case ERT_START_NPU_PREEMPT: + case ERT_START_NPU_PREEMPT: + skcmd = to_start_krnl_pkg(pkt); + /* 1 mandatory cumask + extra_cu_masks + ert_npu_preempt_data */ + valid = (skcmd->count >= 1+ skcmd->extra_cu_masks + sizeof(struct ert_npu_preempt_data) / sizeof(uint32_t)); + break; + case ERT_START_NPU_PREEMPT_ELF: skcmd = to_start_krnl_pkg(pkt); /* 1 mandatory cumask + extra_cu_masks + ert_npu_preempt_data */ valid = (skcmd->count >= 1+ skcmd->extra_cu_masks + sizeof(struct ert_npu_preempt_data) / sizeof(uint32_t)); @@ -1111,6 +1117,15 @@ get_ert_npu_preempt_data(struct ert_start_kernel_cmd* pkt) return (struct ert_npu_preempt_data*) (pkt->data + pkt->extra_cu_masks); } +static inline struct ert_npu_preempt_data* +get_ert_npu_elf_data(struct ert_start_kernel_cmd* pkt) +{ + if (pkt->opcode != ERT_START_NPU_PREEMPT_ELF) + return NULL; + // past extra cu_masks embedded in the packet data + return (struct ert_npu_preempt_data*) (pkt->data + pkt->extra_cu_masks); +} + static inline uint32_t* get_ert_regmap_begin(struct ert_start_kernel_cmd* pkt) { @@ -1129,6 +1144,11 @@ get_ert_regmap_begin(struct ert_start_kernel_cmd* pkt) + sizeof(struct ert_npu_preempt_data) / sizeof(uint32_t) + get_ert_npu_preempt_data(pkt)->instruction_prop_count; + case ERT_START_NPU_PREEMPT_ELF: + return pkt->data + pkt->extra_cu_masks + + sizeof(struct ert_npu_preempt_data) / sizeof(uint32_t) + + get_ert_npu_elf_data(pkt)->instruction_prop_count; + default: // skip past embedded extra cu_masks return pkt->data + pkt->extra_cu_masks; diff --git a/src/runtime_src/core/include/xrt/experimental/xrt_ext.h b/src/runtime_src/core/include/xrt/experimental/xrt_ext.h index dc1b79d6f7..c9c1e7b8de 100644 --- a/src/runtime_src/core/include/xrt/experimental/xrt_ext.h +++ b/src/runtime_src/core/include/xrt/experimental/xrt_ext.h @@ -1,5 +1,5 @@ // SPDX-License-Identifier: Apache-2.0 -// Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (C) 2023-2025 Advanced Micro Devices, Inc. All rights reserved. #ifndef XRT_EXT_H_ #define XRT_EXT_H_ @@ -254,6 +254,23 @@ class kernel : public xrt::kernel */ XRT_API_EXPORT kernel(const xrt::hw_context& ctx, const xrt::module& mod, const std::string& name); + + /** + * kernel() - Constructor from kernel name + * + * @param ctx + * The hardware context that this kernel is created in + * @param name + * Name of kernel function to construct + * + * Constructs a kernel object by searching through all the ELF files + * that are registered with the provided context. The function looks + * for an ELF file that contains a kernel with the specified name. + * Once a matching ELF file is found, it is used to construct the + * kernel object. + */ + XRT_API_EXPORT + kernel(const xrt::hw_context& ctx, const std::string& name); }; } // xrt::ext diff --git a/src/runtime_src/core/include/xrt/experimental/xrt_module.h b/src/runtime_src/core/include/xrt/experimental/xrt_module.h index 892db4b45d..ca351f07b2 100644 --- a/src/runtime_src/core/include/xrt/experimental/xrt_module.h +++ b/src/runtime_src/core/include/xrt/experimental/xrt_module.h @@ -1,5 +1,5 @@ // SPDX-License-Identifier: Apache-2.0 -// Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (C) 2023-2025 Advanced Micro Devices, Inc. All rights reserved. #ifndef XRT_MODULE_H_ #define XRT_MODULE_H_ @@ -95,6 +95,26 @@ class module : public detail::pimpl XRT_API_EXPORT module(const xrt::module& parent, const xrt::hw_context& hwctx); + /** + * module() - Constructor associate module with hardware context + * + * @param parent + * Parent module with instruction buffer to move into hwctx + * @param hwctx + * Hardware context to associate with module + * @param ctrl_code_idx + * index of control code inside the parent module + * + * Copy content of existing module into an allocation associated + * with the specified hardware context. + * If module has multiple control codes, index is used to identify + * the control code that needs to be run. + * + * Throws if module is not compatible with hardware context + */ + XRT_API_EXPORT + module(const xrt::module& parent, const xrt::hw_context& hwctx, uint32_t ctrl_code_idx); + /** * get_cfg_uuid() - Get the uuid of the hardware configuration * diff --git a/src/runtime_src/core/include/xrt/xrt_hw_context.h b/src/runtime_src/core/include/xrt/xrt_hw_context.h index 0290c1b067..b879a32010 100644 --- a/src/runtime_src/core/include/xrt/xrt_hw_context.h +++ b/src/runtime_src/core/include/xrt/xrt_hw_context.h @@ -1,5 +1,5 @@ // SPDX-License-Identifier: Apache-2.0 -// Copyright (C) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (C) 2022-2025 Advanced Micro Devices, Inc. All rights reserved. #ifndef XRT_HW_CONTEXT_H_ #define XRT_HW_CONTEXT_H_ @@ -9,6 +9,8 @@ #include "xrt/xrt_device.h" #include "xrt/xrt_uuid.h" +#include "experimental/xrt_elf.h" + #ifdef __cplusplus #include @@ -79,6 +81,60 @@ class hw_context : public detail::pimpl */ hw_context() = default; + /** + * hw_context() - Constructor with QoS control and access control + * + * @param device + * Device where context is created + * @param cfg_param + * Configuration Parameters (incl. Quality of Service) + * @param mode + * Access control for the context + * + * When application uses this constructor no hw resources are allocated + * It acts as placeholder and is used for setting QoS and access control + * Applications can later add configuration Elfs using add_config api. + * The QoS definition is subject to change, so this API is not guaranteed + * to be ABI compatible in future releases + */ + XRT_API_EXPORT + hw_context(const xrt::device& device, const cfg_param_type& cfg_param, access_mode mode); + + /** + * hw_context() - Constructor with Elf file + * + * @param device + * Device where context is created + * @param elf + * XRT Elf object created from config Elf file + * @param cfg_param + * Configuration Parameters (incl. Quality of Service) + * @param mode + * Access control for the context + * + * The QoS definition is subject to change, so this API is not guaranteed + * to be ABI compatible in future releases. When cfg_param and access_mode + * are not passed hw context with shared access mode is created. + */ + XRT_API_EXPORT + hw_context(const xrt::device& device, const xrt::elf& elf, + const cfg_param_type& cfg_param = cfg_param_type{}, + access_mode mode = access_mode::shared); + + /** + * add_config() - adds config Elf file to the context + * + * @param elf + * XRT Elf object created from config Elf file + * + * Adds config Elf to context if it is the first config added + * If config already exists, it will be added only when configuration matches + * with existing one else an exception is thrown + */ + XRT_API_EXPORT + void + add_config(const xrt::elf& elf); + /** * hw_context() - Constructor with QoS control * @@ -86,7 +142,7 @@ class hw_context : public detail::pimpl * Device where context is created * @param xclbin_id * UUID of xclbin that should be assigned to HW resources - * @cfg_param + * @param cfg_param * Configuration Parameters (incl. Quality of Service) * * The QoS definition is subject to change, so this API is not guaranteed @@ -172,6 +228,7 @@ class hw_context : public detail::pimpl /** * get_xclbin_uuid() - UUID of xclbin from which context was created + * Returns empty uuid if context was created without xclbin (created with Elf) */ XRT_API_EXPORT xrt::uuid @@ -179,6 +236,7 @@ class hw_context : public detail::pimpl /** * get_xclbin() - Retrieve underlying xclbin matching the UUID + * Returns empty xclbin if context was created without xclbin (created with Elf) */ XRT_API_EXPORT xrt::xclbin