From a332c2dd80431a06ae2240c4bd1c3356c2394f40 Mon Sep 17 00:00:00 2001 From: Chris Freehill Date: Wed, 6 Nov 2024 22:15:04 -0600 Subject: [PATCH] rocr: Dynamically allocate static global memory To allow non-POD global variables to last until the last thread has exited, use "new" to allocate the memory instead of static allocation. Change-Id: Ica571b61ff8068a52e472c49cb1c44917e60c8c8 --- runtime/hsa-runtime/core/inc/amd_aql_queue.h | 14 +++++++++----- runtime/hsa-runtime/core/inc/host_queue.h | 6 ++++-- runtime/hsa-runtime/core/inc/runtime.h | 6 ++++-- runtime/hsa-runtime/image/image_runtime.h | 12 ++++++++---- runtime/hsa-runtime/pcs/pcs_runtime.h | 12 ++++++++---- 5 files changed, 33 insertions(+), 17 deletions(-) diff --git a/runtime/hsa-runtime/core/inc/amd_aql_queue.h b/runtime/hsa-runtime/core/inc/amd_aql_queue.h index c446c0d87..d14bd7440 100644 --- a/runtime/hsa-runtime/core/inc/amd_aql_queue.h +++ b/runtime/hsa-runtime/core/inc/amd_aql_queue.h @@ -337,14 +337,18 @@ class AqlQueue : public core::Queue, private core::LocalSignal, public core::Doo } // Queue count - used to ref count queue_event_ static __forceinline std::atomic& queue_count() { - static std::atomic queue_count_(0); - return queue_count_; + // This allocation is meant to last until the last thread has exited. + // It is intentionally not freed. + static std::atomic* queue_count_ = new std::atomic(0); + return *queue_count_; } // Mutex for queue_event_ manipulation - static __forceinline KernelMutex& queue_lock() { - static KernelMutex queue_lock_; - return queue_lock_; +KernelMutex& queue_lock() { + // This allocation is meant to last until the last thread has exited. + // It is intentionally not freed. + static KernelMutex* queue_lock_ = new KernelMutex(); + return *queue_lock_; } // Async scratch single limit - may be modified after init size_t async_scratch_single_limit_; diff --git a/runtime/hsa-runtime/core/inc/host_queue.h b/runtime/hsa-runtime/core/inc/host_queue.h index bf052026c..8d2fae183 100644 --- a/runtime/hsa-runtime/core/inc/host_queue.h +++ b/runtime/hsa-runtime/core/inc/host_queue.h @@ -189,8 +189,10 @@ class HostQueue : public Queue { // Host queue id counter, starting from 0x80000000 to avoid overlaping // with aql queue id. static __forceinline std::atomic& queue_count() { - static std::atomic queue_count_; - return queue_count_; + // This allocation is meant to last until the last thread has exited. + // It is intentionally not freed. + static std::atomic* queue_count_ = new std::atomic(); + return *queue_count_; } DISALLOW_COPY_AND_ASSIGN(HostQueue); diff --git a/runtime/hsa-runtime/core/inc/runtime.h b/runtime/hsa-runtime/core/inc/runtime.h index d8541de49..360ac9f0b 100644 --- a/runtime/hsa-runtime/core/inc/runtime.h +++ b/runtime/hsa-runtime/core/inc/runtime.h @@ -584,8 +584,10 @@ class Runtime { // Will be created before any user could call hsa_init but also could be // destroyed before incorrectly written programs call hsa_shutdown. static __forceinline KernelMutex& bootstrap_lock() { - static KernelMutex bootstrap_lock_; - return bootstrap_lock_; + // This allocation is meant to last until the last thread has exited. + // It is intentionally not freed. + static KernelMutex* bootstrap_lock_ = new KernelMutex; + return *bootstrap_lock_; } Runtime(); diff --git a/runtime/hsa-runtime/image/image_runtime.h b/runtime/hsa-runtime/image/image_runtime.h index 520b6f80a..ebb2b5b79 100644 --- a/runtime/hsa-runtime/image/image_runtime.h +++ b/runtime/hsa-runtime/image/image_runtime.h @@ -164,13 +164,17 @@ class ImageRuntime { /// Pointer to singleton object. static __forceinline std::atomic& get_instance() { - static std::atomic instance_(NULL); - return instance_; + // This allocation is meant to last until the last thread has exited. + // It is intentionally not freed. + static std::atomic* instance_ = new std::atomic(); + return *instance_; } static __forceinline std::mutex& instance_mutex() { - static std::mutex instance_mutex_; - return instance_mutex_; + // This allocation is meant to last until the last thread has exited. + // It is intentionally not freed. + static std::mutex* instance_mutex_ = new std::mutex(); + return *instance_mutex_; } /// @brief Contains mapping of agent and its corresponding ::ImageManager diff --git a/runtime/hsa-runtime/pcs/pcs_runtime.h b/runtime/hsa-runtime/pcs/pcs_runtime.h index b860ae6a4..d78ea2959 100644 --- a/runtime/hsa-runtime/pcs/pcs_runtime.h +++ b/runtime/hsa-runtime/pcs/pcs_runtime.h @@ -153,12 +153,16 @@ class PcsRuntime { /// Pointer to singleton object. static __forceinline std::atomic& get_instance() { - static std::atomic instance_(nullptr); - return instance_; + // This allocation is meant to last until the last thread has exited. + // It is intentionally not freed. + static std::atomic* instance_ = new std::atomic(); + return *instance_; } static __forceinline std::mutex& instance_mutex() { - static std::mutex instance_mutex_; - return instance_mutex_; + // This allocation is meant to last until the last thread has exited. + // It is intentionally not freed. + static std::mutex* instance_mutex_ = new std::mutex(); + return *instance_mutex_; } // Map of pc sampling sessions indexed by hsa_ven_amd_pcs_t handle std::map pc_sampling_;