From 38176bd795efefdd49c476be75592e58a48d08e2 Mon Sep 17 00:00:00 2001 From: Luca Bertagna Date: Wed, 12 Oct 2022 17:34:12 -0600 Subject: [PATCH] Fix view_reduction in case the result var comes from shared mem --- src/ekat/kokkos/ekat_kokkos_utils.hpp | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/ekat/kokkos/ekat_kokkos_utils.hpp b/src/ekat/kokkos/ekat_kokkos_utils.hpp index c394a761..784c1f40 100644 --- a/src/ekat/kokkos/ekat_kokkos_utils.hpp +++ b/src/ekat/kokkos/ekat_kokkos_utils.hpp @@ -117,6 +117,17 @@ void view_reduction (const TeamMember& team, using PackType = typename std::remove_reference::type; constexpr int vector_size = PackType::n; + // We need to use a temporary, since we don't know whether result refers to a thread-local + // variable (e.g., automatic variable) or to shared-memory (e.g., an entry of a view). + // Hence, perform calculations on a local var, then copy back into the output result. + ValueType temp = result; + + // Note: this team barrier is needed in some extreme case. Without it, it *could* happen that, + // if result is a ref to shared mem (e.g., an entry of a view) rather than thread-local, + // one team member might reach the end of the fcn (hence, updating result) *before* + // another thread might have the chance to init temp. + team.team_barrier(); + // Perform a packed reduction over scalar indices const bool has_garbage_begin = begin%vector_size != 0; const bool has_garbage_end = end%vector_size != 0; @@ -130,7 +141,7 @@ void view_reduction (const TeamMember& team, const int first_indx = begin%vector_size; Kokkos::single(Kokkos::PerThread(team),[&] { for (int j=first_indx; j(input(k),local_sum); - }, result); + }, temp); } else { PackType packed_result(0); impl::parallel_reduce(team, pack_loop_begin, pack_loop_end, @@ -152,7 +163,7 @@ void view_reduction (const TeamMember& team, local_packed_sum += input(k); }, packed_result); - result += ekat::reduce_sum(packed_result); + temp += ekat::reduce_sum(packed_result); } } @@ -165,10 +176,11 @@ void view_reduction (const TeamMember& team, ConstExceptGnu int last_indx = end%vector_size; Kokkos::single(Kokkos::PerThread(team),[&] { for (int j=0; j