rocr: Dynamically allocate static global memory

To allow non-POD global variables to last until the last thread has exited, use "new" to allocate the memory instead of static allocation. Change-Id: Ica571b61ff8068a52e472c49cb1c44917e60c8c8
ROCm · Nov 7, 2024 · a332c2d · a332c2d
1 parent b410c40
commit a332c2d
Show file tree

Hide file tree

Showing 5 changed files with 33 additions and 17 deletions.
diff --git a/runtime/hsa-runtime/core/inc/amd_aql_queue.h b/runtime/hsa-runtime/core/inc/amd_aql_queue.h
@@ -337,14 +337,18 @@ class AqlQueue : public core::Queue, private core::LocalSignal, public core::Doo
   }
   // Queue count - used to ref count queue_event_
   static __forceinline std::atomic<uint32_t>& queue_count() {
-    static std::atomic<uint32_t> queue_count_(0);
-    return queue_count_;
+    // This allocation is meant to last until the last thread has exited.
+    // It is intentionally not freed.
+    static std::atomic<uint32_t>* queue_count_ = new std::atomic<uint32_t>(0);
+    return *queue_count_;
   }
 
   // Mutex for queue_event_ manipulation
-  static __forceinline KernelMutex& queue_lock() {
-    static KernelMutex queue_lock_;
-    return queue_lock_;
+KernelMutex& queue_lock() {
+  // This allocation is meant to last until the last thread has exited.
+  // It is intentionally not freed.
+  static KernelMutex* queue_lock_ = new KernelMutex();
+  return *queue_lock_;
 }
   // Async scratch single limit - may be modified after init
   size_t async_scratch_single_limit_;

diff --git a/runtime/hsa-runtime/core/inc/host_queue.h b/runtime/hsa-runtime/core/inc/host_queue.h
@@ -189,8 +189,10 @@ class HostQueue : public Queue {
   // Host queue id counter, starting from 0x80000000 to avoid overlaping
   // with aql queue id.
   static __forceinline std::atomic<uint32_t>& queue_count() {
-    static std::atomic<uint32_t> queue_count_;
-    return queue_count_;
+    // This allocation is meant to last until the last thread has exited.
+    // It is intentionally not freed.
+    static std::atomic<uint32_t>* queue_count_ = new std::atomic<uint32_t>();
+    return *queue_count_;
   }
 
   DISALLOW_COPY_AND_ASSIGN(HostQueue);

diff --git a/runtime/hsa-runtime/core/inc/runtime.h b/runtime/hsa-runtime/core/inc/runtime.h
@@ -584,8 +584,10 @@ class Runtime {
   // Will be created before any user could call hsa_init but also could be
   // destroyed before incorrectly written programs call hsa_shutdown.
   static __forceinline KernelMutex& bootstrap_lock() {
-    static KernelMutex bootstrap_lock_;
-    return bootstrap_lock_;
+    // This allocation is meant to last until the last thread has exited.
+    // It is intentionally not freed.
+    static KernelMutex* bootstrap_lock_ = new KernelMutex;
+    return *bootstrap_lock_;
   }
   Runtime();
 

diff --git a/runtime/hsa-runtime/image/image_runtime.h b/runtime/hsa-runtime/image/image_runtime.h
@@ -164,13 +164,17 @@ class ImageRuntime {
 
   /// Pointer to singleton object.
   static __forceinline std::atomic<ImageRuntime*>& get_instance() {
-    static std::atomic<ImageRuntime*> instance_(NULL);
-    return instance_;
+    // This allocation is meant to last until the last thread has exited.
+    // It is intentionally not freed.
+    static std::atomic<ImageRuntime*>* instance_ = new std::atomic<ImageRuntime*>();
+    return *instance_;
   }
 
   static __forceinline std::mutex& instance_mutex() {
-    static std::mutex instance_mutex_;
-    return instance_mutex_;
+    // This allocation is meant to last until the last thread has exited.
+    // It is intentionally not freed.
+    static std::mutex* instance_mutex_ = new std::mutex();
+    return *instance_mutex_;
   }
 
   /// @brief Contains mapping of agent and its corresponding ::ImageManager

diff --git a/runtime/hsa-runtime/pcs/pcs_runtime.h b/runtime/hsa-runtime/pcs/pcs_runtime.h
@@ -153,12 +153,16 @@ class PcsRuntime {
 
   /// Pointer to singleton object.
   static __forceinline std::atomic<PcsRuntime*>& get_instance() {
-    static std::atomic<PcsRuntime*> instance_(nullptr);
-    return instance_;
+    // This allocation is meant to last until the last thread has exited.
+    // It is intentionally not freed.
+    static std::atomic<PcsRuntime*>* instance_ = new std::atomic<PcsRuntime*>();
+    return *instance_;
   }
   static __forceinline std::mutex& instance_mutex() {
-   static std::mutex instance_mutex_;
-   return instance_mutex_;
+    // This allocation is meant to last until the last thread has exited.
+    // It is intentionally not freed.
+   static std::mutex* instance_mutex_ = new std::mutex();
+   return *instance_mutex_;
 }
   // Map of pc sampling sessions indexed by hsa_ven_amd_pcs_t handle
   std::map<uint64_t, PcSamplingSession> pc_sampling_;