diff --git a/CarpetX/src/io_meta.cxx b/CarpetX/src/io_meta.cxx index e63f972af..40b9355e7 100644 --- a/CarpetX/src/io_meta.cxx +++ b/CarpetX/src/io_meta.cxx @@ -75,6 +75,9 @@ YAML::Emitter &operator<<(YAML::Emitter &yaml, const reduction_t &reduction) { case reduction_t::maximum_location: yaml << "maximum_location"; break; + case reduction_t::sum_location: + yaml << "sum_location"; + break; default: assert(0); } diff --git a/CarpetX/src/io_meta.hxx b/CarpetX/src/io_meta.hxx index de5c95260..8395ee7fb 100644 --- a/CarpetX/src/io_meta.hxx +++ b/CarpetX/src/io_meta.hxx @@ -32,6 +32,7 @@ enum class reduction_t { norm_inf, minimum_location, maximum_location, + sum_location, }; struct output_file_description_t { diff --git a/CarpetX/src/io_norm.cxx b/CarpetX/src/io_norm.cxx index 7f27380ef..3a9a7e319 100644 --- a/CarpetX/src/io_norm.cxx +++ b/CarpetX/src/io_norm.cxx @@ -79,6 +79,11 @@ void OutputNorms(const cGH *restrict cctkGH) { reductions.push_back(buf.str()); } } + for (int d = 0; d < dim; ++d) { + std::ostringstream buf; + buf << "sumloc[" << d << "]"; + reductions.push_back(buf.str()); + } if (is_root) { static std::once_flag create_directory; @@ -215,6 +220,8 @@ void OutputNorms(const cGH *restrict cctkGH) { for (int d = 0; d < dim; ++d) file << sep << red.maxloc[d]; } + for (int d = 0; d < dim; ++d) + file << sep << red.sumloc[d]; } } @@ -240,8 +247,9 @@ void OutputNorms(const cGH *restrict cctkGH) { ofd.reductions.push_back(reduction_t::minimum_location); ofd.reductions.push_back(reduction_t::maximum_location); } + ofd.reductions.push_back(reduction_t::sum_location); ofd.format_name = "CarpetX/norms/TSV"; - ofd.format_version = {1, 0, 0}; + ofd.format_version = {1, 1, 0}; OutputMeta_RegisterOutputFile(std::move(ofd)); } diff --git a/CarpetX/src/reduction.hxx b/CarpetX/src/reduction.hxx index cf0179a11..6668f3741 100644 --- a/CarpetX/src/reduction.hxx +++ b/CarpetX/src/reduction.hxx @@ -1,6 +1,7 @@ #ifndef CARPETX_CARPETX_REDUCTION_HXX #define CARPETX_CARPETX_REDUCTION_HXX +#include "defs.hxx" #include "vect.hxx" #include @@ -14,28 +15,16 @@ namespace CarpetX { using namespace std; using namespace Arith; -template constexpr T pow21(T x) noexcept { return x * x; } - -template constexpr T fmax1(T x, T y) noexcept { - if (isnan(x)) - return x; - if (isnan(y)) - return y; - return fmax(x, y); -} -template constexpr T fmin1(T x, T y) noexcept { - if (isnan(x)) - return x; - if (isnan(y)) - return y; - return fmin(x, y); -} - template struct reduction { // TODO: contains_inf, contains_nan? T min, max, sum, sum2; T vol, maxabs, sumabs, sum2abs; - vect minloc, maxloc; + vect minloc, maxloc, sumloc; + + // We currently omit minloc/maxloc/sumloc (TODO: fix this) + using tuple_type = amrex::GpuTuple; + constexpr reduction(tuple_type); + constexpr operator tuple_type() const; constexpr reduction(); constexpr reduction(const vect &p, const T &V, const T &x); @@ -49,9 +38,9 @@ template struct reduction { constexpr T avg() const noexcept { return sum / vol; } constexpr T sdv() const noexcept { - // Splitting pow21(sum/vol) improves floating-point accuracy - // return sqrt(fmax1(T(0), sum2 / vol - pow21(sum / vol))); - return sqrt(fmax1(T(0), sum2 / vol - pow21(sum) / pow21(vol))); + // Splitting pow2(sum/vol) improves floating-point accuracy + // return sqrt(max1(T(0), sum2 / vol - pow2(sum / vol))); + return sqrt(max1(T(0), sum2 / vol - pow2(sum) / pow2(vol))); } constexpr T norm0() const noexcept { return vol; } constexpr T norm1() const noexcept { return sumabs / vol; } @@ -62,27 +51,42 @@ template struct reduction { friend ostream &operator<<(ostream &os, const reduction &red); }; +template +constexpr reduction::reduction(tuple_type tuple) + : min(amrex::get<0>(tuple)), max(amrex::get<1>(tuple)), + sum(amrex::get<2>(tuple)), sum2(amrex::get<3>(tuple)), + vol(amrex::get<4>(tuple)), maxabs(amrex::get<5>(tuple)), + sumabs(amrex::get<6>(tuple)), sum2abs(amrex::get<7>(tuple)), minloc{}, + maxloc{}, sumloc{} {} + +template +constexpr reduction::operator reduction::tuple_type() const { + return tuple_type{min, max, sum, sum2, vol, maxabs, sumabs, sum2abs}; +} + template constexpr reduction::reduction() : min(1.0 / 0.0), max(-1.0 / 0.0), sum(0.0), sum2(0.0), vol(0.0), maxabs(0.0), sumabs(0.0), sum2abs(0.0), - minloc(vect::pure(0.0 / 0.0)), maxloc(vect::pure(0.0 / 0.0)) { -} + minloc(vect::pure(0.0 / 0.0)), maxloc(vect::pure(0.0 / 0.0)), + sumloc(vect::pure(0.0)) {} template constexpr reduction::reduction(const vect &p, const T &V, const T &x) - : min(x), max(x), sum(V * x), sum2(V * pow21(x)), vol(V), maxabs(fabs(x)), - sumabs(V * fabs(x)), sum2abs(V * pow21(fabs(x))), minloc(p), maxloc(p) {} + : min(x), max(x), sum(V * x), sum2(V * pow2(x)), vol(V), maxabs(fabs(x)), + sumabs(V * fabs(x)), sum2abs(V * pow2(fabs(x))), minloc(p), maxloc(p), + sumloc(x * p) {} template constexpr reduction::reduction(const reduction &x, const reduction &y) - : min(fmin1(x.min, y.min)), max(fmax1(x.max, y.max)), sum(x.sum + y.sum), + : min(min1(x.min, y.min)), max(max1(x.max, y.max)), sum(x.sum + y.sum), sum2(x.sum2 + y.sum2), vol(x.vol + y.vol), - maxabs(fmax1(x.maxabs, y.maxabs)), sumabs(x.sumabs + y.sumabs), + maxabs(max1(x.maxabs, y.maxabs)), sumabs(x.sumabs + y.sumabs), sum2abs(x.sum2abs + y.sum2abs), minloc(x.min <= y.min ? x.minloc : y.minloc), - maxloc(x.max >= y.max ? x.maxloc : y.maxloc) {} + maxloc(x.max >= y.max ? x.maxloc : y.maxloc), + sumloc(x.sumloc + y.sumloc) {} template ostream &operator<<(ostream &os, const reduction &red) { @@ -97,6 +101,7 @@ ostream &operator<<(ostream &os, const reduction &red) { << " sum2abs: " << red.sum2abs << "\n" << " minloc: " << red.minloc << "\n" << " maxloc: " << red.maxloc << "\n" + << " sumloc: " << red.sumloc << "\n" << " avg: " << red.avg() << "\n" << " sdv: " << red.sdv() << "\n" << " norm1: " << red.norm1() << "\n" @@ -105,6 +110,17 @@ ostream &operator<<(ostream &os, const reduction &red) { << "}\n"; } +template struct combine { + using value_type = typename reduction::tuple_type; + constexpr AMREX_GPU_DEVICE value_type init() const { + return (value_type)reduction(); + } + constexpr AMREX_GPU_DEVICE value_type operator()(const value_type &x, + const value_type &y) const { + return (value_type)reduction(x), reduction(y); + } +}; + typedef reduction reduction_CCTK_REAL; #pragma omp declare reduction(reduction:reduction_CCTK_REAL : omp_out += omp_in)