From 25d77a7bf90027aa4e439f9b6ade2f0a9f1196e7 Mon Sep 17 00:00:00 2001 From: Bryan Wilder Field Lozano Date: Tue, 28 Jan 2025 23:42:58 -0800 Subject: [PATCH] Remove `get_completion_queue_reader_core()` API from Device (#17263) --- tt_metal/api/tt-metalium/device.hpp | 2 -- tt_metal/api/tt-metalium/device_impl.hpp | 4 +--- .../api/tt-metalium/hardware_command_queue.hpp | 4 +++- tt_metal/api/tt-metalium/mesh_device.hpp | 1 - tt_metal/distributed/mesh_device.cpp | 8 +++----- tt_metal/impl/device/device.cpp | 15 ++++++++++++--- tt_metal/impl/dispatch/hardware_command_queue.cpp | 8 +++++--- 7 files changed, 24 insertions(+), 18 deletions(-) diff --git a/tt_metal/api/tt-metalium/device.hpp b/tt_metal/api/tt-metalium/device.hpp index 8339787ef25..07389104658 100644 --- a/tt_metal/api/tt-metalium/device.hpp +++ b/tt_metal/api/tt-metalium/device.hpp @@ -255,8 +255,6 @@ class IDevice { virtual std::tuple create_sub_device_manager_with_fabric( tt::stl::Span sub_devices, DeviceAddr local_l1_size) = 0; - virtual uint32_t get_completion_queue_reader_core() const = 0; - virtual bool is_mmio_capable() const = 0; virtual std::vector> get_tunnels_from_mmio() const = 0; diff --git a/tt_metal/api/tt-metalium/device_impl.hpp b/tt_metal/api/tt-metalium/device_impl.hpp index 07bc6b6ee5e..2d32dab8b85 100644 --- a/tt_metal/api/tt-metalium/device_impl.hpp +++ b/tt_metal/api/tt-metalium/device_impl.hpp @@ -41,7 +41,7 @@ class Device : public IDevice { std::size_t trace_region_size, tt::stl::Span l1_bank_remap = {}, bool minimal = false, - uint32_t worker_core = 0, + uint32_t worker_thread_core = 0, uint32_t completion_queue_reader_core = 0); ~Device() override; @@ -244,8 +244,6 @@ class Device : public IDevice { std::tuple create_sub_device_manager_with_fabric( tt::stl::Span sub_devices, DeviceAddr local_l1_size) override; - uint32_t get_completion_queue_reader_core() const override { return completion_queue_reader_core_; } - bool is_mmio_capable() const override; std::vector> get_tunnels_from_mmio() const override { return tunnels_from_mmio_; } diff --git a/tt_metal/api/tt-metalium/hardware_command_queue.hpp b/tt_metal/api/tt-metalium/hardware_command_queue.hpp index a2562b7892b..7d1f9e99044 100644 --- a/tt_metal/api/tt-metalium/hardware_command_queue.hpp +++ b/tt_metal/api/tt-metalium/hardware_command_queue.hpp @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include @@ -75,7 +76,7 @@ using CompletionReaderVariant = std::variant tid; std::shared_ptr trace_ctx; std::thread completion_queue_thread; diff --git a/tt_metal/api/tt-metalium/mesh_device.hpp b/tt_metal/api/tt-metalium/mesh_device.hpp index ee682f3b5f9..1661d49f58a 100644 --- a/tt_metal/api/tt-metalium/mesh_device.hpp +++ b/tt_metal/api/tt-metalium/mesh_device.hpp @@ -223,7 +223,6 @@ class MeshDevice : public IDevice, public std::enable_shared_from_this create_sub_device_manager_with_fabric( tt::stl::Span sub_devices, DeviceAddr local_l1_size) override; - uint32_t get_completion_queue_reader_core() const override; bool is_mmio_capable() const override; std::vector> get_tunnels_from_mmio() const override; MemoryBlockTable get_memory_block_table(const BufferType& buffer_type) const override; diff --git a/tt_metal/distributed/mesh_device.cpp b/tt_metal/distributed/mesh_device.cpp index c9c29b8b90a..8b5ce295b56 100644 --- a/tt_metal/distributed/mesh_device.cpp +++ b/tt_metal/distributed/mesh_device.cpp @@ -127,7 +127,8 @@ MeshDevice::MeshDevice( mesh_shape_(mesh_shape), type_(type), mesh_id_(generate_unique_mesh_id()), - parent_mesh_(std::move(parent_mesh)) { + parent_mesh_(std::move(parent_mesh)) +{ work_executor_ = std::make_unique(0 /* worker_core */, mesh_id_); work_executor_->initialize(); work_executor_->set_worker_mode(WorkExecutorMode::SYNCHRONOUS); @@ -787,10 +788,7 @@ void MeshDevice::reset_sub_device_stall_group() { uint32_t MeshDevice::num_sub_devices() const { return sub_device_manager_tracker_->get_active_sub_device_manager()->num_sub_devices(); } -uint32_t MeshDevice::get_completion_queue_reader_core() const { - TT_THROW("get_completion_queue_reader_core() is not supported on MeshDevice - use individual devices instead"); - return reference_device()->get_completion_queue_reader_core(); -} + bool MeshDevice::is_mmio_capable() const { TT_THROW("is_mmio_capable() is not supported on MeshDevice - use individual devices instead"); return reference_device()->is_mmio_capable(); diff --git a/tt_metal/impl/device/device.cpp b/tt_metal/impl/device/device.cpp index 7d73166acb6..d1890036374 100644 --- a/tt_metal/impl/device/device.cpp +++ b/tt_metal/impl/device/device.cpp @@ -38,8 +38,16 @@ namespace tt { namespace tt_metal { Device::Device( - chip_id_t device_id, const uint8_t num_hw_cqs, size_t l1_small_size, size_t trace_region_size, tt::stl::Span l1_bank_remap, bool minimal, uint32_t worker_core, uint32_t completion_queue_reader_core) : - id_(device_id), worker_thread_core_(worker_core), completion_queue_reader_core_(completion_queue_reader_core), work_executor_(worker_core, device_id) { + chip_id_t device_id, + const uint8_t num_hw_cqs, + size_t l1_small_size, + size_t trace_region_size, + tt::stl::Span l1_bank_remap, + bool minimal, + uint32_t worker_thread_core, + uint32_t completion_queue_reader_core) : + id_(device_id), worker_thread_core_(worker_thread_core), completion_queue_reader_core_(completion_queue_reader_core), work_executor_(worker_thread_core, device_id) +{ ZoneScoped; this->initialize(num_hw_cqs, l1_small_size, trace_region_size, l1_bank_remap, minimal); } @@ -963,7 +971,8 @@ void Device::init_command_queue_host() { hw_command_queues_.reserve(num_hw_cqs()); sw_command_queues_.reserve(num_hw_cqs()); for (size_t cq_id = 0; cq_id < num_hw_cqs(); cq_id++) { - hw_command_queues_.push_back(std::make_unique(this, cq_id, dispatch_downstream_noc)); + hw_command_queues_.push_back( + std::make_unique(this, cq_id, dispatch_downstream_noc, completion_queue_reader_core_)); sw_command_queues_.push_back(std::make_unique(this, cq_id)); } } diff --git a/tt_metal/impl/dispatch/hardware_command_queue.cpp b/tt_metal/impl/dispatch/hardware_command_queue.cpp index dbf66f25ca7..344d123198b 100644 --- a/tt_metal/impl/dispatch/hardware_command_queue.cpp +++ b/tt_metal/impl/dispatch/hardware_command_queue.cpp @@ -43,8 +43,10 @@ Buffer& get_buffer_object(const std::variant, std } // namespace -HWCommandQueue::HWCommandQueue(IDevice* device, uint32_t id, NOC noc_index) : - manager(device->sysmem_manager()), completion_queue_thread{} { +HWCommandQueue::HWCommandQueue(IDevice* device, uint32_t id, NOC noc_index, uint32_t completion_queue_reader_core) : + manager(device->sysmem_manager()), + completion_queue_thread{}, + completion_queue_reader_core(completion_queue_reader_core) { ZoneScopedN("CommandQueue_constructor"); this->device = device; this->id = id; @@ -86,7 +88,7 @@ HWCommandQueue::HWCommandQueue(IDevice* device, uint32_t id, NOC noc_index) : std::thread completion_queue_thread = std::thread(&HWCommandQueue::read_completion_queue, this); this->completion_queue_thread = std::move(completion_queue_thread); // Set the affinity of the completion queue reader. - set_device_thread_affinity(this->completion_queue_thread, device->get_completion_queue_reader_core()); + set_device_thread_affinity(this->completion_queue_thread, this->completion_queue_reader_core); for (uint32_t i = 0; i < dispatch_constants::DISPATCH_MESSAGE_ENTRIES; i++) { this->expected_num_workers_completed[i] = 0;