Skip to content

Commit

Permalink
Merge Pull Request #256 from E3SM-Project/EKAT/bartgol/fix-view-reduc…
Browse files Browse the repository at this point in the history
…tion

Automatically Merged using E3SM Pull Request AutoTester
PR Title: Fix view_reduction to work with output variable coming from either private or shared memory
PR Author: bartgol
  • Loading branch information
E3SM-Bot authored Oct 13, 2022
2 parents dbe3d1c + 4c8d649 commit 1052776
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 10 deletions.
20 changes: 16 additions & 4 deletions src/ekat/kokkos/ekat_kokkos_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,17 @@ void view_reduction (const TeamMember& team,
using PackType = typename std::remove_reference<decltype(input(0))>::type;
constexpr int vector_size = PackType::n;

// We need to use a temporary, since we don't know whether result refers to a thread-local
// variable (e.g., automatic variable) or to shared-memory (e.g., an entry of a view).
// Hence, perform calculations on a local var, then copy back into the output result.
ValueType temp = result;

// Note: this team barrier is needed in some extreme case. Without it, it *could* happen that,
// if result is a ref to shared mem (e.g., an entry of a view) rather than thread-local,
// one team member might reach the end of the fcn (hence, updating result) *before*
// another thread might have the chance to init temp.
team.team_barrier();

// Perform a packed reduction over scalar indices
const bool has_garbage_begin = begin%vector_size != 0;
const bool has_garbage_end = end%vector_size != 0;
Expand All @@ -130,7 +141,7 @@ void view_reduction (const TeamMember& team,
const int first_indx = begin%vector_size;
Kokkos::single(Kokkos::PerThread(team),[&] {
for (int j=first_indx; j<vector_size; ++j) {
result += temp_input[j];
temp += temp_input[j];
}
});
}
Expand All @@ -143,7 +154,7 @@ void view_reduction (const TeamMember& team,
[&](const int k, ValueType& local_sum) {
// Sum over pack entries and add to local_sum
ekat::reduce_sum<Serialize>(input(k),local_sum);
}, result);
}, temp);
} else {
PackType packed_result(0);
impl::parallel_reduce<Serialize>(team, pack_loop_begin, pack_loop_end,
Expand All @@ -152,7 +163,7 @@ void view_reduction (const TeamMember& team,
local_packed_sum += input(k);
}, packed_result);

result += ekat::reduce_sum<Serialize>(packed_result);
temp += ekat::reduce_sum<Serialize>(packed_result);
}
}

Expand All @@ -165,10 +176,11 @@ void view_reduction (const TeamMember& team,
ConstExceptGnu int last_indx = end%vector_size;
Kokkos::single(Kokkos::PerThread(team),[&] {
for (int j=0; j<last_indx; ++j) {
result += temp_input[j];
temp += temp_input[j];
}
});
}
result = temp;
}
} //namespace impl

Expand Down
4 changes: 2 additions & 2 deletions tests/kokkos/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ if (EKAT_TEST_DOUBLE_PRECISION)
EkatCreateUnitTest(kokkos_utils${DP_POSTFIX} kokkos_utils_tests.cpp
LIBS ekat
PRINT_OMP_AFFINITY
COMPILER_DEFS EKAT_TEST_SINGLE_PRECISION
COMPILER_DEFS EKAT_TEST_DOUBLE_PRECISION
THREADS 1 ${EKAT_TEST_MAX_THREADS} ${EKAT_TEST_THREAD_INC}
)

Expand All @@ -31,7 +31,7 @@ if (EKAT_TEST_SINGLE_PRECISION)
EkatCreateUnitTest(wsm${SP_POSTFIX} workspace_tests.cpp
LIBS ekat
PRINT_OMP_AFFINITY
COMPILER_DEFS EKAT_TEST_DOUBLE_PRECISION
COMPILER_DEFS EKAT_TEST_SINGLE_PRECISION
THREADS 1 ${EKAT_TEST_MAX_THREADS} ${EKAT_TEST_THREAD_INC}
)
endif ()
Expand Down
8 changes: 4 additions & 4 deletions tests/kokkos/kokkos_utils_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -364,10 +364,10 @@ TEST_CASE("view_reduction", "[kokkos_utils]")
test_view_reduction<Real,false,false,3,4> ();

// Sum subset of entries, non-zero starting value
test_view_reduction<Real, true,true,16,3> (1.0/3.0,2,11);
test_view_reduction<Real,false,true,16,3> (1.0/3.0,2,11);
test_view_reduction<Real, true,false,16,3> (1.0/3.0,2,11);
test_view_reduction<Real,false,false,16,3> (1.0/3.0,2,11);
test_view_reduction<Real, true,true,18,4> (1.0/3.0,2,11);
test_view_reduction<Real,false,true,18,4> (1.0/3.0,2,11);
test_view_reduction<Real, true,false,18,4> (1.0/3.0,2,11);
test_view_reduction<Real,false,false,18,4> (1.0/3.0,2,11);

}

Expand Down

0 comments on commit 1052776

Please sign in to comment.