SC-SGS · G-071 · May 22, 2024 · May 13, 2024 · May 17, 2024
diff --git a/include/cppuddle/common/config.hpp b/include/cppuddle/common/config.hpp
@@ -53,9 +53,7 @@ constexpr size_t max_number_gpus = CPPUDDLE_HAVE_MAX_NUMBER_GPUS;
 #ifndef CPPUDDLE_HAVE_HPX
 static_assert(max_number_gpus == 1, "Non HPX builds do not support multigpu");
 #endif
-//static_assert(number_instances >= max_number_gpus);
 static_assert(max_number_gpus > 0);
-//constexpr size_t instances_per_gpu = number_instances / max_number_gpus;
 
 /// Uses HPX thread information to determine which GPU should be used
 inline size_t get_device_id(const size_t number_gpus) {

diff --git a/include/cppuddle/kernel_aggregation/detail/aggregation_executor_pools.hpp b/include/cppuddle/kernel_aggregation/detail/aggregation_executor_pools.hpp
@@ -50,7 +50,7 @@ class aggregation_pool {
   static decltype(auto) request_executor_slice(void) {
     if (!is_initialized) {
       throw std::runtime_error(
-          std::string("Trying to use cppuddle aggregation pool without first calling init") +
+          std::string("ERROR: Trying to use cppuddle aggregation pool without first calling init!\n") +
           " Agg poolname: " + std::string(kernelname));
     }
     const size_t gpu_id = cppuddle::get_device_id(number_devices);
@@ -127,6 +127,20 @@ class aggregation_pool {
   aggregation_pool &operator=(aggregation_pool &&other) = delete;
 };
 
+template <typename aggregation_region_t>
+void init_area_aggregation_pool(
+    const size_t max_slices) {
+    constexpr size_t number_aggregation_executors = 128;
+    constexpr size_t number_gpus = cppuddle::max_number_gpus;
+    aggregated_executor_modes executor_mode = aggregated_executor_modes::EAGER;
+    if (max_slices == 1) {
+      executor_mode = aggregated_executor_modes::STRICT;
+    }
+    aggregation_region_t::init(
+        number_aggregation_executors, max_slices, executor_mode, number_gpus);
+}
+
+
 } // namespace detail
 } // namespace kernel_aggregation
 } // namespace cppuddle

diff --git a/include/cppuddle/kernel_aggregation/kernel_aggregation_interface.hpp b/include/cppuddle/kernel_aggregation/kernel_aggregation_interface.hpp
@@ -6,6 +6,7 @@
 #ifndef KERNEL_AGGREGATION_INTERFACE_HPP
 #define KERNEL_AGGREGATION_INTERFACE_HPP
 
+#include "cppuddle/executor_recycling/executor_pools_interface.hpp"
 #include "cppuddle/kernel_aggregation/detail/aggregation_executors_and_allocators.hpp"
 #include "cppuddle/kernel_aggregation/detail/aggregation_executor_pools.hpp"
 
@@ -43,6 +44,30 @@ using aggregation_pool =
     cppuddle::kernel_aggregation::detail::aggregation_pool<kernelname, Interface,
     Pool>;
 
+/// Start an aggregation region (passsed via lambda)
+template <const char* region_name, typename executor_t, typename return_type>
+hpx::future<return_type> aggregation_region(const size_t team_size,
+    std::function<return_type(size_t, size_t,
+        typename cppuddle::kernel_aggregation::detail::aggregated_executor<
+            executor_t>::executor_slice&)> &&aggregation_area) {
+    using aggregation_pool_t = cppuddle::kernel_aggregation::aggregation_pool<region_name,
+        executor_t, cppuddle::executor_recycling::round_robin_pool_impl<executor_t>>;
+    static hpx::once_flag pool_init;
+    hpx::call_once(pool_init,
+        detail::init_area_aggregation_pool<aggregation_pool_t>, team_size);
+    auto executor_slice_fut = aggregation_pool_t::request_executor_slice();
+    auto ret_fut = executor_slice_fut.value().then(hpx::annotated_function(
+        [aggregation_area](auto &&fut) {
+          typename cppuddle::kernel_aggregation::detail::aggregated_executor<
+              executor_t>::Executor_Slice agg_exec = fut.get();
+          const size_t slice_id = agg_exec.id;
+          const size_t number_slices = agg_exec.number_slices;
+          return aggregation_area(slice_id, number_slices, agg_exec);
+        },
+        region_name));
+    return ret_fut;
+}
+
 } // namespace kernel_aggregation 
 } // namespace cppuddle