From 9deb8f8ef2c292b611253a998da1ec64e2ce7872 Mon Sep 17 00:00:00 2001 From: Luca Bertagna Date: Wed, 12 Oct 2022 14:30:04 -0600 Subject: [PATCH 1/3] Fix CPP precision settings in kokkos stuff unit tests --- tests/kokkos/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/kokkos/CMakeLists.txt b/tests/kokkos/CMakeLists.txt index f6641b82..013283a3 100644 --- a/tests/kokkos/CMakeLists.txt +++ b/tests/kokkos/CMakeLists.txt @@ -5,7 +5,7 @@ if (EKAT_TEST_DOUBLE_PRECISION) EkatCreateUnitTest(kokkos_utils${DP_POSTFIX} kokkos_utils_tests.cpp LIBS ekat PRINT_OMP_AFFINITY - COMPILER_DEFS EKAT_TEST_SINGLE_PRECISION + COMPILER_DEFS EKAT_TEST_DOUBLE_PRECISION THREADS 1 ${EKAT_TEST_MAX_THREADS} ${EKAT_TEST_THREAD_INC} ) @@ -31,7 +31,7 @@ if (EKAT_TEST_SINGLE_PRECISION) EkatCreateUnitTest(wsm${SP_POSTFIX} workspace_tests.cpp LIBS ekat PRINT_OMP_AFFINITY - COMPILER_DEFS EKAT_TEST_DOUBLE_PRECISION + COMPILER_DEFS EKAT_TEST_SINGLE_PRECISION THREADS 1 ${EKAT_TEST_MAX_THREADS} ${EKAT_TEST_THREAD_INC} ) endif () From 38176bd795efefdd49c476be75592e58a48d08e2 Mon Sep 17 00:00:00 2001 From: Luca Bertagna Date: Wed, 12 Oct 2022 17:34:12 -0600 Subject: [PATCH 2/3] Fix view_reduction in case the result var comes from shared mem --- src/ekat/kokkos/ekat_kokkos_utils.hpp | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/ekat/kokkos/ekat_kokkos_utils.hpp b/src/ekat/kokkos/ekat_kokkos_utils.hpp index c394a761..784c1f40 100644 --- a/src/ekat/kokkos/ekat_kokkos_utils.hpp +++ b/src/ekat/kokkos/ekat_kokkos_utils.hpp @@ -117,6 +117,17 @@ void view_reduction (const TeamMember& team, using PackType = typename std::remove_reference::type; constexpr int vector_size = PackType::n; + // We need to use a temporary, since we don't know whether result refers to a thread-local + // variable (e.g., automatic variable) or to shared-memory (e.g., an entry of a view). + // Hence, perform calculations on a local var, then copy back into the output result. + ValueType temp = result; + + // Note: this team barrier is needed in some extreme case. Without it, it *could* happen that, + // if result is a ref to shared mem (e.g., an entry of a view) rather than thread-local, + // one team member might reach the end of the fcn (hence, updating result) *before* + // another thread might have the chance to init temp. + team.team_barrier(); + // Perform a packed reduction over scalar indices const bool has_garbage_begin = begin%vector_size != 0; const bool has_garbage_end = end%vector_size != 0; @@ -130,7 +141,7 @@ void view_reduction (const TeamMember& team, const int first_indx = begin%vector_size; Kokkos::single(Kokkos::PerThread(team),[&] { for (int j=first_indx; j(input(k),local_sum); - }, result); + }, temp); } else { PackType packed_result(0); impl::parallel_reduce(team, pack_loop_begin, pack_loop_end, @@ -152,7 +163,7 @@ void view_reduction (const TeamMember& team, local_packed_sum += input(k); }, packed_result); - result += ekat::reduce_sum(packed_result); + temp += ekat::reduce_sum(packed_result); } } @@ -165,10 +176,11 @@ void view_reduction (const TeamMember& team, ConstExceptGnu int last_indx = end%vector_size; Kokkos::single(Kokkos::PerThread(team),[&] { for (int j=0; j Date: Wed, 12 Oct 2022 17:36:20 -0600 Subject: [PATCH 3/3] Use pack size to 2^n while testing view_reduction --- tests/kokkos/kokkos_utils_tests.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/kokkos/kokkos_utils_tests.cpp b/tests/kokkos/kokkos_utils_tests.cpp index 5b2061bf..14cb003c 100644 --- a/tests/kokkos/kokkos_utils_tests.cpp +++ b/tests/kokkos/kokkos_utils_tests.cpp @@ -364,10 +364,10 @@ TEST_CASE("view_reduction", "[kokkos_utils]") test_view_reduction (); // Sum subset of entries, non-zero starting value - test_view_reduction (1.0/3.0,2,11); - test_view_reduction (1.0/3.0,2,11); - test_view_reduction (1.0/3.0,2,11); - test_view_reduction (1.0/3.0,2,11); + test_view_reduction (1.0/3.0,2,11); + test_view_reduction (1.0/3.0,2,11); + test_view_reduction (1.0/3.0,2,11); + test_view_reduction (1.0/3.0,2,11); }