From da1712b9c107da337d4753bfe4503c39e0918a56 Mon Sep 17 00:00:00 2001 From: "Steven R. Brandt" Date: Thu, 25 May 2023 15:49:46 -0500 Subject: [PATCH 1/7] Unify derivatives between CarpetX and SpacetimeX. (1) Discard existing derivs.hxx and derivs.cxx (2) Merge z4c and weyl derivs.hxx (3) move epsdiss to Derivs --- CarpetX/src/io_openpmd.cxx | 1 + Derivs/param.ccl | 4 + Derivs/src/derivs.cxx | 150 ------ Derivs/src/derivs.hxx | 915 ++++++++++++++++++------------------- Derivs/src/make.code.defn | 2 +- Loop/src/loop.hxx | 83 ---- 6 files changed, 459 insertions(+), 696 deletions(-) delete mode 100644 Derivs/src/derivs.cxx diff --git a/CarpetX/src/io_openpmd.cxx b/CarpetX/src/io_openpmd.cxx index dd9fb8bc1..eeb4b2131 100644 --- a/CarpetX/src/io_openpmd.cxx +++ b/CarpetX/src/io_openpmd.cxx @@ -1028,6 +1028,7 @@ void carpetx_openpmd_t::OutputOpenPMD(const cGH *const cctkGH, series->setIterationEncoding(iterationEncoding); { + // Does not work in containers char const *const user = getenv("USER"); if (user) series->setAuthor(user); diff --git a/Derivs/param.ccl b/Derivs/param.ccl index ae7b8ac68..d10a5fc20 100644 --- a/Derivs/param.ccl +++ b/Derivs/param.ccl @@ -1 +1,5 @@ # Parameter definitions for thorn Derivs +CCTK_REAL epsdiss "Dissipation coefficient " STEERABLE=always +{ + 0.0:* :: "" +} 0.32 diff --git a/Derivs/src/derivs.cxx b/Derivs/src/derivs.cxx deleted file mode 100644 index 43dc63eef..000000000 --- a/Derivs/src/derivs.cxx +++ /dev/null @@ -1,150 +0,0 @@ -#include "derivs.hxx" - -namespace Derivs { -using namespace Arith; -using namespace Loop; - -//////////////////////////////////////////////////////////////////////////////// - -// Tile-based multi-dimensional derivative operators - -template -CCTK_ATTRIBUTE_NOINLINE void -calc_derivs(const vec, dim> &dgf, const GridDescBaseDevice &grid, - const GF3D5 &gf, const GF3D5layout layout, - const vect dx, const int deriv_order) { - using vreal = simd; - using vbool = simdl; - constexpr std::size_t vsize = std::tuple_size_v; - - switch (deriv_order) { - - case 2: - grid.loop_int_device( - grid.nghostzones, - [=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE { - const vbool mask = mask_for_loop_tail(p.i, p.imax); - const GF3D5index index(layout, p.I); - const auto dval = calc_deriv<2>(gf, mask, layout, p.I, dx); - dgf.store(mask, index, dval); - }); - break; - - case 4: - grid.loop_int_device( - grid.nghostzones, - [=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE { - const vbool mask = mask_for_loop_tail(p.i, p.imax); - const GF3D5index index(layout, p.I); - const auto dval = calc_deriv<4>(gf, mask, layout, p.I, dx); - dgf.store(mask, index, dval); - }); - break; - - default: - CCTK_VERROR("Unsupported derivative order %d", deriv_order); - } -} - -template -CCTK_ATTRIBUTE_NOINLINE void -calc_derivs2(const vec, dim> &dgf, const smat, dim> &ddgf, - const GridDescBaseDevice &grid, const GF3D5 &gf, - const GF3D5layout layout, const vect dx, - const int deriv_order) { - using vreal = simd; - using vbool = simdl; - constexpr std::size_t vsize = std::tuple_size_v; - - switch (deriv_order) { - - case 2: - grid.loop_int_device( - grid.nghostzones, - [=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE { - const vbool mask = mask_for_loop_tail(p.i, p.imax); - const GF3D5index index(layout, p.I); - const auto dval = calc_deriv<2>(gf, mask, layout, p.I, dx); - const auto ddval = calc_deriv2<2>(gf, mask, layout, p.I, dx); - dgf.store(mask, index, dval); - ddgf.store(mask, index, ddval); - }); - break; - - case 4: - grid.loop_int_device( - grid.nghostzones, - [=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE { - const vbool mask = mask_for_loop_tail(p.i, p.imax); - const GF3D5index index(layout, p.I); - const auto dval = calc_deriv<4>(gf, mask, layout, p.I, dx); - const auto ddval = calc_deriv2<4>(gf, mask, layout, p.I, dx); - dgf.store(mask, index, dval); - ddgf.store(mask, index, ddval); - }); - break; - - default: - CCTK_VERROR("Unsupported derivative order %d", deriv_order); - } -} - -//////////////////////////////////////////////////////////////////////////////// - -// Template instantiations - -using T = CCTK_REAL; - -template CCTK_DEVICE CCTK_HOST Arith::vec, Loop::dim> -calc_deriv<2>(const Loop::GF3D5 &gf, const Arith::simdl &mask, - const Loop::GF3D5layout &layout, - const Arith::vect &I, - const Arith::vect &dx); -template CCTK_DEVICE CCTK_HOST Arith::vec, Loop::dim> -calc_deriv<4>(const Loop::GF3D5 &gf, const Arith::simdl &mask, - const Loop::GF3D5layout &layout, - const Arith::vect &I, - const Arith::vect &dx); - -template CCTK_DEVICE CCTK_HOST Arith::vec -calc_deriv<2>(const Loop::GF3D5 &gf, const Loop::GF3D5layout &layout, - const Arith::vect &I, - const Arith::vect &dx); -template CCTK_DEVICE CCTK_HOST Arith::vec -calc_deriv<4>(const Loop::GF3D5 &gf, const Loop::GF3D5layout &layout, - const Arith::vect &I, - const Arith::vect &dx); - -template CCTK_DEVICE CCTK_HOST Arith::smat, Loop::dim> -calc_deriv2<2>(const Loop::GF3D5 &gf, const Arith::simdl &mask, - const Loop::GF3D5layout &layout, - const Arith::vect &I, - const Arith::vect &dx); -template CCTK_DEVICE CCTK_HOST Arith::smat, Loop::dim> -calc_deriv2<4>(const Loop::GF3D5 &gf, const Arith::simdl &mask, - const Loop::GF3D5layout &layout, - const Arith::vect &I, - const Arith::vect &dx); - -template CCTK_DEVICE CCTK_HOST Arith::smat -calc_deriv2<2>(const Loop::GF3D5 &gf, const Loop::GF3D5layout &layout, - const Arith::vect &I, - const Arith::vect &dx); -template CCTK_DEVICE CCTK_HOST Arith::smat -calc_deriv2<4>(const Loop::GF3D5 &gf, const Loop::GF3D5layout &layout, - const Arith::vect &I, - const Arith::vect &dx); - -template void calc_derivs<0, 0, 0>(const vec, dim> &dgf, - const GridDescBaseDevice &grid, - const GF3D5 &gf, - const GF3D5layout layout, - const vect dx, - const int deriv_order); - -template void calc_derivs2<0, 0, 0>( - const vec, dim> &dgf, const smat, dim> &ddgf, - const GridDescBaseDevice &grid, const GF3D5 &gf, - const GF3D5layout layout, const vect dx, const int deriv_order); - -} // namespace Derivs diff --git a/Derivs/src/derivs.hxx b/Derivs/src/derivs.hxx index 8634ec59c..64735bc2d 100644 --- a/Derivs/src/derivs.hxx +++ b/Derivs/src/derivs.hxx @@ -1,519 +1,510 @@ -#ifndef DERIVS_HXX -#define DERIVS_HXX - -#include -#include +#ifndef Derivs_derivx_hxx +#define Derivs_derivx_hxx #include #include #include #include -#include -#include -#include -#include -#include +#include +#include +#include + #include +// Unified derivative header -namespace Derivs { +// TODO: Make this a runtime parameter +constexpr int deriv_order = 4; //////////////////////////////////////////////////////////////////////////////// -namespace stencils { -using namespace Arith; -using namespace Loop; +using Arith::pow2; -// Stencil coefficients +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST T pow3(const T &x) { + return pow2(x) * x; +} -enum symmetry { none, symmetric, antisymmetric }; +//////////////////////////////////////////////////////////////////////////////// -template struct stencil { - static constexpr std::ptrdiff_t N = I1 - I0 + 1; - static_assert(N >= 0, ""); - static_assert(S == none || S == symmetric || S == antisymmetric, ""); +using Arith::simd; +using Arith::simdl; - int divisor; - std::array coeffs; +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +deriv1d(const simdl &mask, const T *restrict const var, const ptrdiff_t di, + const T dx) { + const auto load = [&](const int n) { + return maskz_loadu(mask, &var[n * di]) - maskz_loadu(mask, &var[-n * di]); + }; + if constexpr (deriv_order == 2) + return 1 / T(2) * load(1) / dx; + if constexpr (deriv_order == 4) + return (-1 / T(12) * load(2) + 2 / T(3) * load(1)) / dx; + if constexpr (deriv_order == 6) + return (1 / T(60) * load(3) - 3 / T(20) * load(2) + 3 / T(4) * load(1)) / + dx; +} - template - inline CCTK_ATTRIBUTE_ALWAYS_INLINE - CCTK_DEVICE CCTK_HOST std::result_of_t - apply(const Array &arr) const { - using R = std::result_of_t; - if constexpr (S == symmetric) { - R r{0}; - for (std::ptrdiff_t n = 0; n < N / 2; ++n) { - const std::ptrdiff_t n1 = N - 1 - n; - r += coeffs[n] * (arr(n + I0) + arr(n1 + I0)); - } - if (N % 2 != 0) { - const std::ptrdiff_t n = N / 2 + 1; - r += coeffs[n] * arr(n + I0); +using Loop::GF3D2; +using Arith::vect; +using Loop::dim; +using Arith::vec; + +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +deriv(const simdl &mask, const GF3D2 &gf_, const vect &I, + const vec &dx) { + static_assert(dir >= 0 && dir < D, ""); + const auto &DI = vect::unit; + const ptrdiff_t di = gf_.delta(DI(dir)); + return deriv1d(mask, &gf_(I), di, dx(dir)); +} +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST vec, dim> +deriv(const simdl &mask, const GF3D2 &gf_, const vect &I, + const vec &dx) { + return {deriv<0>(mask, gf_, I, dx), deriv<1>(mask, gf_, I, dx), + deriv<2>(mask, gf_, I, dx)}; +} + +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +deriv2_1d(const simdl &mask, const T *restrict const var, const ptrdiff_t di, + const T dx) { + const auto load = [&](const int n) { + return maskz_loadu(mask, &var[n * di]) + maskz_loadu(mask, &var[-n * di]); + }; + const auto load0 = [&]() { return maskz_loadu(mask, &var[0]); }; + if constexpr (deriv_order == 2) + return (load(1) - 2 * load0()) / pow2(dx); + if constexpr (deriv_order == 4) + return (1 / T(12) * load(2) - 4 / T(3) * load(1) + 5 / T(2) * load0()) / + pow2(dx); + if constexpr (deriv_order == 6) + return (1 / T(90) * load(3) - 3 / T(20) * load(2) + 3 / T(2) * load(1) - + 49 / T(18) * load0()) / + pow2(dx); +} + +using std::tuple_size_v; +using Arith::min; +using std::array; +using Arith::div_floor; +using Arith::div_ceil; +using Arith::mask_for_loop_tail; + +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +deriv2_2d(const int vavail, const simdl &mask, const T *restrict const var, + const ptrdiff_t di, const ptrdiff_t dj, const T dx, const T dy) { + constexpr size_t vsize = tuple_size_v >; + if (di == 1) { + assert(vavail > 0); + constexpr int maxnpoints = deriv_order + 1 + vsize - 1; + const int npoints = deriv_order + 1 + min(int(vsize), vavail) - 1; + array, div_ceil(maxnpoints, int(vsize))> arrx; + for (int i = 0; i < maxnpoints; i += vsize) { + if (i < npoints) { + const simdl mask1 = Arith::mask_for_loop_tail >(i, npoints); + arrx[div_floor(i, int(vsize))] = + deriv1d(mask1, &var[i - deriv_order / 2], dj, dy); } - r /= divisor; - return std::move(r); } - if constexpr (antisymmetric) { - R r{0}; - for (std::ptrdiff_t n = 0; n < N / 2; ++n) { - const std::ptrdiff_t n1 = N - 1 - n; - r += coeffs[n] * (arr(n + I0) - arr(n1 + I0)); +#ifdef CCTK_DEBUG + for (int i = npoints; i < align_ceil(maxnpoints, int(vsize)); ++i) + ((T *)&arrx[0])[i] = Arith::nan()(); // unused +#endif + const T *const varx = (T *)&arrx[0] + deriv_order / 2; + return deriv1d(mask, varx, 1, dx); + } else { + assert(dj != 1); + array, deriv_order + 1> arrx; + for (int j = -deriv_order / 2; j <= deriv_order / 2; ++j) + if (j == 0) { +#ifdef CCTK_DEBUG + arrx[deriv_order / 2 + j] = Arith::nan >()(); // unused +#endif + } else { + arrx[deriv_order / 2 + j] = deriv1d(mask, &var[j * dj], di, dx); } - r /= divisor; - return std::move(r); - } - R r{0}; - for (std::ptrdiff_t n = 0; n < N; ++n) - r += coeffs[n] * arr(n + I0); - return std::move(r); + const T *const varx = (T *)(&arrx[deriv_order / 2]); + return deriv1d(mask, varx, vsize, dy); } -}; - -// Interpolate at i = 0 -constexpr stencil<0, 0, symmetric> interp{1, {1}}; - -// Derivative at i = 0 -constexpr stencil<-1, +1, antisymmetric> deriv1_o2{2, {-1, 0, +1}}; - -constexpr stencil<-2, +2, antisymmetric> deriv1_o4{12, {-1, +8, 0, -8, +1}}; - -constexpr stencil<-1, +1, symmetric> deriv2_o2{1, {-2, +1, -2}}; - -constexpr stencil<-2, +2, symmetric> deriv2_o4{12, {-1, +16, -30, +16, -1}}; - -// Interpolate at i = 1/2 -constexpr stencil<-0, +1, symmetric> interp_c_o1{2, {1, 1}}; - -constexpr stencil<-1, +2, symmetric> interp_c_o3{16, {-1, +9, +9, -1}}; - -constexpr stencil<-2, +3, symmetric> interp_c_o5{ - 256, {+3, -25, +150, +150, -25, +3}}; - -constexpr stencil<-3, +4, symmetric> interp_c_o7{ - 2048, {-5, +49, -245, +1225, +1225, -245, +49, -5}}; - -// Derivative at i = 1/2 -constexpr stencil<-0, +1, antisymmetric> deriv1_c_o2{1, {-1, +1}}; - -constexpr stencil<-1, +2, antisymmetric> deriv1_c_o4{12, {-1, +15, -15, +1}}; - -} // namespace stencils +} -namespace detail { -using namespace Arith; -using namespace Loop; +using Loop::GF3D5; +using Loop::GF3D5layout; +using Loop::GF3D5index; +using Arith::smat; +using Loop::PointDesc; -// Pointwise one-dimensional operators -template -inline CCTK_ATTRIBUTE_ALWAYS_INLINE CCTK_DEVICE CCTK_HOST simd -interp1d(const simdl &mask, const T *restrict const var) { - return maskz_loadu(mask, var); +template +CCTK_ATTRIBUTE_NOINLINE void +calc_derivs(const cGH *restrict const cctkGH, const GF3D2 &gf1, + const GF3D5 &gf0, const vec, dim> &dgf0, + const GF3D5layout &layout0) { + DECLARE_CCTK_ARGUMENTS; + + typedef simd vreal; + typedef simdl vbool; + constexpr size_t vsize = tuple_size_v; + + const vec dx([&](int a) { return CCTK_DELTA_SPACE(a); }); + + const Loop::GridDescBaseDevice grid(cctkGH); + grid.loop_int_device<0, 0, 0, vsize>( + grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { + const vbool mask = Arith::mask_for_loop_tail(p.i, p.imax); + const GF3D5index index0(layout0, p.I); + const auto val = gf1(mask, p.I); + gf0.store(mask, index0, val); + const auto dval = deriv(mask, gf1, p.I, dx); + dgf0.store(mask, index0, dval); + }); } -template > -inline CCTK_ATTRIBUTE_ALWAYS_INLINE - CCTK_DEVICE CCTK_HOST std::enable_if_t - deriv1d(const TS var, const T dx) { - const T c1 = 1 / (2 * dx); - return c1 * (var(1) - var(-1)); +using std::enable_if_t; + +template +inline ARITH_INLINE + ARITH_DEVICE ARITH_HOST enable_if_t<(dir1 == dir2), simd > + deriv2(const int vavail, const simdl &mask, const GF3D2 &gf_, + const vect &I, const vec &dx) { + static_assert(dir1 >= 0 && dir1 < D, ""); + static_assert(dir2 >= 0 && dir2 < D, ""); + const auto &DI = vect::unit; + const ptrdiff_t di = gf_.delta(DI(dir1)); + return deriv2_1d(mask, &gf_(I), di, dx(dir1)); } -template > -inline CCTK_ATTRIBUTE_ALWAYS_INLINE - CCTK_DEVICE CCTK_HOST std::enable_if_t - deriv1d(const TS var, const T dx) { - const T c1 = 2 / (3 * dx); - const T c2 = -1 / (12 * dx); - return c2 * (var(2) - var(-2)) + c1 * (var(1) - var(-1)); +template +inline ARITH_INLINE + ARITH_DEVICE ARITH_HOST enable_if_t<(dir1 != dir2), simd > + deriv2(const int vavail, const simdl &mask, const GF3D2 &gf_, + const vect &I, const vec &dx) { + static_assert(dir1 >= 0 && dir1 < D, ""); + static_assert(dir2 >= 0 && dir2 < D, ""); + const auto &DI = vect::unit; + const ptrdiff_t di = gf_.delta(DI(dir1)); + const ptrdiff_t dj = gf_.delta(DI(dir2)); + return deriv2_2d(vavail, mask, &gf_(I), di, dj, dx(dir1), dx(dir2)); } -template -inline CCTK_ATTRIBUTE_ALWAYS_INLINE - CCTK_DEVICE CCTK_HOST std::enable_if_t > - deriv1d_upwind(const simdl &mask, const T *restrict const var, - const std::ptrdiff_t di, const simd &vel, const T dx) { - // arXiv:1111.2177 [gr-qc], (71) - - // if (sign) - // // + [ 0 -1 +1 0 0] - // // + 1/2 [+1 -2 +1 0 0] - // // [+1/2 -2 +3/2 0 0] - // return (1 / T(2) * var[-2 * di] // - // - 2 * var[-di] // - // + 3 / T(2) * var[0]) / - // dx; - // else - // // + [ 0 0 -1 +1 0 ] - // // - 1/2 [ 0 0 +1 -2 +1 ] - // // [ 0 0 -3/2 +2 -1/2] - // return (-3 / T(2) * var[0] // - // + 2 * var[+di] // - // - 1 / T(2) * var[+2 * di]) / - // dx; - - // + 1/2 [+1/2 -2 +3/2 0 0 ] - // + 1/2 [ 0 0 -3/2 +2 -1/2] - // [+1/4 -1 0 +1 -1/4] - constexpr T c1s = 1; - constexpr T c2s = -1 / T(4); - const simd symm = - c2s * (maskz_loadu(mask, &var[2 * di]) - - maskz_loadu(mask, &var[-2 * di])) // - + c1s(maskz_loadu(mask, &var[di]) - maskz_loadu(mask, &var[-di])); - // + 1/2 [+1/2 -2 +3/2 0 0 ] - // - 1/2 [ 0 0 -3/2 +2 -1/2] - // [+1/4 -1 +3/2 -1 +1/4] - constexpr T c0a = 3 / T(2); - constexpr T c1a = -1; - constexpr T c2a = 1 / T(4); - const simd anti = - c2a * (maskz_loadu(mask, &var[2 * di]) + - maskz_loadu(mask, &var[-2 * di])) // - + c1a(maskz_loadu(mask, &var[di]) + maskz_loadu(mask, &var[-di])) // - + c0a * maskz_loadu(mask, &var[0]); - using std::fabs; - return (vel * symm - fabs(vel) * anti) / dx; +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST smat, dim> +deriv2(const int vavail, const simdl &mask, const GF3D2 &gf_, + const vect &I, const vec &dx) { + return {deriv2<0, 0>(vavail, mask, gf_, I, dx), + deriv2<0, 1>(vavail, mask, gf_, I, dx), + deriv2<0, 2>(vavail, mask, gf_, I, dx), + deriv2<1, 1>(vavail, mask, gf_, I, dx), + deriv2<1, 2>(vavail, mask, gf_, I, dx), + deriv2<2, 2>(vavail, mask, gf_, I, dx)}; } -template -inline CCTK_ATTRIBUTE_ALWAYS_INLINE - CCTK_DEVICE CCTK_HOST std::enable_if_t > - deriv1d_upwind(const simdl &mask, const T *restrict const var, - const std::ptrdiff_t di, const simd &vel, const T dx) { - // arXiv:1111.2177 [gr-qc], (71) - - // A fourth order stencil for a first derivative, shifted by one grid point - - // if (sign) - // return (-1 / T(12) * var[-3 * di] // - // + 1 / T(2) * var[-2 * di] // - // - 3 / T(2) * var[-di] // - // + 5 / T(6) * var[0] // - // + 1 / T(4) * var[+di]) / - // dx; - // else - // return (-1 / T(4) * var[-di] // - // - 5 / T(6) * var[0] // - // + 3 / T(2) * var[+di] // - // - 1 / T(2) * var[+2 * di] // - // + 1 / T(12) * var[+3 * di]) / - // dx; - - constexpr T c1s = 7 / T(8); - constexpr T c2s = -1 / T(4); - constexpr T c3s = 1 / T(24); - const simd symm = - c3s * (maskz_loadu(mask, &var[3 * di]) - - maskz_loadu(mask, &var[-3 * di])) // - + c2s * (maskz_loadu(mask, &var[2 * di]) - - maskz_loadu(mask, &var[-2 * di])) // - + c1s * (maskz_loadu(mask, &var[di]) - maskz_loadu(mask, &var[-di])); - constexpr T c0a = 5 / T(6); - constexpr T c1a = -5 / T(8); - constexpr T c2a = 1 / T(4); - constexpr T c3a = -1 / T(24); - const simd anti = - c3a * (maskz_loadu(mask, &var[3 * di]) + - maskz_loadu(mask, &var[-3 * di])) // - + c2a * (maskz_loadu(mask, &var[2 * di]) + - maskz_loadu(mask, &var[-2 * di])) // - + c1a * (maskz_loadu(mask, &var[di]) + maskz_loadu(mask, &var[-di])) // - + c0a * maskz_loadu(mask, &var[0]); - using std::fabs; - return (vel * symm - fabs(vel) * anti) / dx; +template +CCTK_ATTRIBUTE_NOINLINE void +calc_derivs2(const cGH *restrict const cctkGH, const GF3D2 &gf1, + const GF3D5 &gf0, const vec, dim> &dgf0, + const smat, dim> &ddgf0, const GF3D5layout &layout0) { + DECLARE_CCTK_ARGUMENTS; + + typedef simd vreal; + typedef simdl vbool; + constexpr size_t vsize = tuple_size_v; + + const vec dx([&](int a) { return CCTK_DELTA_SPACE(a); }); + + const Loop::GridDescBaseDevice grid(cctkGH); + grid.loop_int_device<0, 0, 0, vsize>( + grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { + const vbool mask = Arith::mask_for_loop_tail(p.i, p.imax); + const int vavail = p.imax - p.i; + const GF3D5index index0(layout0, p.I); + const auto val = gf1(mask, p.I); + gf0.store(mask, index0, val); + const auto dval = deriv(mask, gf1, p.I, dx); + dgf0.store(mask, index0, dval); + const auto ddval = deriv2(vavail, mask, gf1, p.I, dx); + ddgf0.store(mask, index0, ddval); + }); } -template > -inline CCTK_ATTRIBUTE_ALWAYS_INLINE - CCTK_DEVICE CCTK_HOST std::enable_if_t - deriv2_1d(const TS var, const T dx) { - // constexpr T c0 = -2 / pow2(dx); - // constexpr T c1 = 1 / pow2(dx); - // return c1 * (var(-1) + var(1)) + c0 * var(0); - const T c0 = 1 / pow2(dx); - return c0 * ((var(1) - var(0)) - (var(0) - var(-1))); +template +void CCTK_ATTRIBUTE_NOINLINE calc_derivs( + const cGH *restrict const cctkGH, const vec, dim> &gf0_, + const vec, dim> &gf_, const vec, dim>, dim> &dgf_, + const GF3D5layout &layout) { + for (int a = 0; a < 3; ++a) + calc_derivs(cctkGH, gf0_(a), gf_(a), dgf_(a), layout); } -template > -inline CCTK_ATTRIBUTE_ALWAYS_INLINE - CCTK_DEVICE CCTK_HOST std::enable_if_t - deriv2_1d(const TS var, const T dx) { - // constexpr T c0 = -5 / T(2); - // constexpr T c1 = 4 / T(3); - // constexpr T c2 = -1 / T(12); - // return (c2 * (var(-2) + var(2)) + c1 * (var(-1) + var(1)) + c0 * var(0)) / - // pow2(dx); - const T c0 = 15 / (12 * pow2(dx)); - const T c1 = -1 / (12 * pow2(dx)); - return c1 * ((var(4) - var(1)) - (var(3) - var(0))) + - c0 * ((var(3) - var(2)) - (var(2) - var(1))); +template +void CCTK_ATTRIBUTE_NOINLINE calc_derivs2( + const cGH *restrict const cctkGH, const vec, dim> &gf0_, + const vec, dim> &gf_, const vec, dim>, dim> &dgf_, + const vec, dim>, dim> &ddgf_, const GF3D5layout &layout) { + for (int a = 0; a < 3; ++a) + calc_derivs2(cctkGH, gf0_(a), gf_(a), dgf_(a), ddgf_(a), layout); } -template > -inline CCTK_ATTRIBUTE_ALWAYS_INLINE CCTK_DEVICE CCTK_HOST R -deriv2_2d(const TS var, const T dx, const T dy) { - // We assume that the x-direction might be special since it might - // be SIMD-vectorized. We assume that the y-direction is not - // SIMD-vectorized. - - // Calculate y-derivative first. - // (If we wanted to calculate the x-derivative first, then we would - // be difficult to determine the extent of the `n`-loop, since it - // needs to include enough room for the SIMD y-derivative later.) - static_assert(sizeof(R) % sizeof(T) == 0, ""); - constexpr std::ptrdiff_t vsize = sizeof(R) / sizeof(T); - constexpr std::ptrdiff_t ndyvars = - align_ceil(std::ptrdiff_t(2 * deriv_order + 1), vsize); - std::array dyvar; -#ifdef CCTK_DEBUG - for (std::ptrdiff_t n = 0; n < ndyvars; ++n) - dyvar[n] = Arith::nan()(); -#endif - for (std::ptrdiff_t n = 0; n < ndyvars; ++n) { - std::ptrdiff_t di = vsize * n - deriv_order; - if (vsize == 1 && di == 0) - continue; - dyvar[n] = deriv1d( - [&](int dj) CCTK_ATTRIBUTE_ALWAYS_INLINE { return var(di, dj); }, dy); - } - - // Calculate x-derivative next - return deriv1d( - [&](int di) - CCTK_ATTRIBUTE_ALWAYS_INLINE { return dyvar[di + deriv_order]; }, - dx); +template +CCTK_ATTRIBUTE_NOINLINE void +calc_copy(const cGH *restrict const cctkGH, const GF3D2 &gf1, + const GF3D5 &gf0, const GF3D5layout &layout0) { + DECLARE_CCTK_ARGUMENTS; + + typedef simd vreal; + typedef simdl vbool; + constexpr size_t vsize = tuple_size_v; + + const vec dx([&](int a) { return CCTK_DELTA_SPACE(a); }); + + const Loop::GridDescBaseDevice grid(cctkGH); + grid.loop_int_device<0, 0, 0, vsize>( + grid.nghostzones, + [=] ARITH_DEVICE ARITH_HOST(const PointDesc &p) ARITH_INLINE { + const vbool mask = Arith::mask_for_loop_tail(p.i, p.imax); + const GF3D5index index0(layout0, p.I); + const auto val = gf1(mask, p.I); + gf0.store(mask, index0, val); + }); } -template -inline CCTK_ATTRIBUTE_ALWAYS_INLINE - CCTK_DEVICE CCTK_HOST std::enable_if_t > - diss1d(const simdl &mask, const T *restrict const var, - const std::ptrdiff_t di, const T dx) { - constexpr T c0 = 6; - constexpr T c1 = -4; - constexpr T c2 = 1; - return (c2 * (maskz_loadu(mask, &var[2 * di]) + - maskz_loadu(mask, &var[-2 * di])) // - + c1 * (maskz_loadu(mask, &var[di]) + maskz_loadu(mask, &var[-di])) // - + c0 * maskz_loadu(mask, &var[0])) / - dx; +template +void CCTK_ATTRIBUTE_NOINLINE calc_copy(const cGH *restrict const cctkGH, + const vec, dim> &gf0_, + const vec, dim> &gf_, + const GF3D5layout &layout) { + for (int a = 0; a < 3; ++a) + calc_copy(cctkGH, gf0_(a), gf_(a), layout); } -template -inline CCTK_ATTRIBUTE_ALWAYS_INLINE - CCTK_DEVICE CCTK_HOST std::enable_if_t > - diss1d(const simdl &mask, const T *restrict const var, - const std::ptrdiff_t di, const T dx) { - constexpr T c0 = -20; - constexpr T c1 = 15; - constexpr T c2 = -6; - constexpr T c3 = 1; - return (c3 * (maskz_loadu(mask, &var[3 * di]) + - maskz_loadu(mask, &var[-3 * di])) // - + c2 * *(maskz_loadu(mask, &var[2 * di]) + - maskz_loadu(mask, &var[-2 * di])) // - + c1 * (maskz_loadu(mask, &var[di]) + maskz_loadu(mask, &var[-di])) // - + c0 * maskz_loadu(mask, &var[0])) / - dx; +template +CCTK_ATTRIBUTE_NOINLINE void calc_derivs( + const cGH *restrict const cctkGH, const smat, dim> &gf0_, + const smat, dim> &gf_, const smat, dim>, dim> &dgf_, + const GF3D5layout &layout) { + for (int a = 0; a < 3; ++a) + for (int b = a; b < 3; ++b) + calc_derivs(cctkGH, gf0_(a, b), gf_(a, b), dgf_(a, b), layout); } -} // namespace detail - -//////////////////////////////////////////////////////////////////////////////// +template +CCTK_ATTRIBUTE_NOINLINE void calc_derivs2( + const cGH *restrict const cctkGH, const smat, dim> &gf0_, + const smat, dim> &gf_, const smat, dim>, dim> &dgf_, + const smat, dim>, dim> &ddgf_, const GF3D5layout &layout) { + for (int a = 0; a < 3; ++a) + for (int b = a; b < 3; ++b) + calc_derivs2(cctkGH, gf0_(a, b), gf_(a, b), dgf_(a, b), ddgf_(a, b), + layout); +} -// Pointwise multi-dimensional derivative operators - -template -inline CCTK_ATTRIBUTE_ALWAYS_INLINE - CCTK_DEVICE CCTK_HOST Arith::vec, Loop::dim> - calc_deriv(const Loop::GF3D5 &gf, const Arith::simdl &mask, - const Loop::GF3D5layout &layout, - const Arith::vect &I, - const Arith::vect &dx) { - using namespace Arith; - using namespace Loop; - // We use explicit index calculations to avoid unnecessary integer - // multiplications - const T *restrict const ptr = &gf(layout, I); - const std::array offsets{ - layout.delta(1, 0, 0), - layout.delta(0, 1, 0), - layout.delta(0, 0, 1), - }; - return { - detail::deriv1d( - [&](int di) CCTK_ATTRIBUTE_ALWAYS_INLINE { - return maskz_loadu(mask, &ptr[di * offsets[0]]); - }, - dx[0]), - detail::deriv1d( - [&](int di) CCTK_ATTRIBUTE_ALWAYS_INLINE { - return maskz_loadu(mask, &ptr[di * offsets[1]]); - }, - dx[1]), - detail::deriv1d( - [&](int di) CCTK_ATTRIBUTE_ALWAYS_INLINE { - return maskz_loadu(mask, &ptr[di * offsets[2]]); - }, - dx[2]), - }; +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +deriv1d_upwind(const simdl &mask, const T *restrict const var, + const ptrdiff_t di, const simd &vel, const T dx) { + // arXiv:1111.2177 [gr-qc], (71) + if constexpr (deriv_order == 2) { + // if (sign) + // // + [ 0 -1 +1 0 0] + // // + 1/2 [+1 -2 +1 0 0] + // // [+1/2 -2 +3/2 0 0] + // return (1 / T(2) * var[-2 * di] // + // - 2 * var[-di] // + // + 3 / T(2) * var[0]) / + // dx; + // else + // // + [ 0 0 -1 +1 0 ] + // // - 1/2 [ 0 0 +1 -2 +1 ] + // // [ 0 0 -3/2 +2 -1/2] + // return (-3 / T(2) * var[0] // + // + 2 * var[+di] // + // - 1 / T(2) * var[+2 * di]) / + // dx; + + // + 1/2 [+1/2 -2 +3/2 0 0 ] + // + 1/2 [ 0 0 -3/2 +2 -1/2] + // [+1/4 -1 0 +1 -1/4] + const simd symm = + 1 / T(4) * + (maskz_loadu(mask, &var[-2 * di]) - + maskz_loadu(mask, &var[2 * di])) // + - (maskz_loadu(mask, &var[-di]) - maskz_loadu(mask, &var[di])); + // + 1/2 [+1/2 -2 +3/2 0 0 ] + // - 1/2 [ 0 0 -3/2 +2 -1/2] + // [+1/4 -1 +3/2 -1 +1/4] + const simd anti = + 1 / T(4) * + (maskz_loadu(mask, &var[-2 * di]) + + maskz_loadu(mask, &var[2 * di])) // + - (maskz_loadu(mask, &var[-di]) + maskz_loadu(mask, &var[di])) // + + 3 / T(2) * maskz_loadu(mask, &var[0]); + return (vel * symm - fabs(vel) * anti) / dx; + } + if constexpr (deriv_order == 4) { + // A fourth order stencil for a first derivative, shifted by one grid point + + // if (sign) + // return (-1 / T(12) * var[-3 * di] // + // + 1 / T(2) * var[-2 * di] // + // - 3 / T(2) * var[-di] // + // + 5 / T(6) * var[0] // + // + 1 / T(4) * var[+di]) / + // dx; + // else + // return (-1 / T(4) * var[-di] // + // - 5 / T(6) * var[0] // + // + 3 / T(2) * var[+di] // + // - 1 / T(2) * var[+2 * di] // + // + 1 / T(12) * var[+3 * di]) / + // dx; + + const simd symm = + -1 / T(24) * + (maskz_loadu(mask, &var[-3 * di]) - + maskz_loadu(mask, &var[3 * di])) // + + 1 / T(4) * + (maskz_loadu(mask, &var[-2 * di]) - + maskz_loadu(mask, &var[2 * di])) // + - + 7 / T(8) * (maskz_loadu(mask, &var[-di]) - maskz_loadu(mask, &var[di])); + const simd anti = + -1 / T(24) * + (maskz_loadu(mask, &var[-3 * di]) + + maskz_loadu(mask, &var[3 * di])) // + + 1 / T(4) * + (maskz_loadu(mask, &var[-2 * di]) + + maskz_loadu(mask, &var[2 * di])) // + - 5 / T(8) * + (maskz_loadu(mask, &var[-di]) + maskz_loadu(mask, &var[di])) // + + 5 / T(6) * maskz_loadu(mask, &var[0]); + return (vel * symm - fabs(vel) * anti) / dx; + } } -template -inline CCTK_ATTRIBUTE_ALWAYS_INLINE - CCTK_DEVICE CCTK_HOST Arith::vec - calc_deriv(const Loop::GF3D5 &gf, const Loop::GF3D5layout &layout, - const Arith::vect &I, - const Arith::vect &dx) { - using namespace Arith; - using namespace Loop; - // We use explicit index calculations to avoid unnecessary integer - // multiplications - const T *restrict const ptr = &gf(layout, I); - const std::array offsets{ - layout.delta(1, 0, 0), - layout.delta(0, 1, 0), - layout.delta(0, 0, 1), - }; - return { - detail::deriv1d( - [&](int di) - CCTK_ATTRIBUTE_ALWAYS_INLINE { return ptr[di * offsets[0]]; }, - dx[0]), - detail::deriv1d( - [&](int di) - CCTK_ATTRIBUTE_ALWAYS_INLINE { return ptr[di * offsets[1]]; }, - dx[1]), - detail::deriv1d( - [&](int di) - CCTK_ATTRIBUTE_ALWAYS_INLINE { return ptr[di * offsets[2]]; }, - dx[2]), - }; +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +deriv_upwind(const simdl &mask, const GF3D2 &gf_, + const vect &I, const vec, D> &vel, + const vec &dx) { + static_assert(dir >= 0 && dir < D, ""); + const auto &DI = vect::unit; + const ptrdiff_t di = gf_.delta(DI(dir)); + return deriv1d_upwind(mask, &gf_(I), di, vel(dir), dx(dir)); } -template -inline CCTK_ATTRIBUTE_ALWAYS_INLINE - CCTK_DEVICE CCTK_HOST Arith::smat, Loop::dim> - calc_deriv2(const Loop::GF3D5 &gf, const Arith::simdl &mask, - const Loop::GF3D5layout &layout, - const Arith::vect &I, - const Arith::vect &dx) { - using namespace Arith; - using namespace Loop; - // We use explicit index calculations to avoid unnecessary integer - // multiplications - const T *restrict const ptr = &gf(layout, I); - const std::array offsets{ - layout.delta(1, 0, 0), - layout.delta(0, 1, 0), - layout.delta(0, 0, 1), - }; - return { - detail::deriv2_1d( - [&](int di) CCTK_ATTRIBUTE_ALWAYS_INLINE { - return maskz_loadu(mask, &ptr[di * offsets[0]]); - }, - dx[0]), - detail::deriv2_2d( - [&](int di, int dj) CCTK_ATTRIBUTE_ALWAYS_INLINE { - return maskz_loadu(mask, &ptr[di * offsets[0] + dj * offsets[1]]); - }, - dx[0], dx[1]), - detail::deriv2_2d( - [&](int di, int dj) CCTK_ATTRIBUTE_ALWAYS_INLINE { - return maskz_loadu(mask, &ptr[di * offsets[0] + dj * offsets[2]]); - }, - dx[0], dx[2]), - detail::deriv2_1d( - [&](int di) CCTK_ATTRIBUTE_ALWAYS_INLINE { - return maskz_loadu(mask, &ptr[di * offsets[1]]); - }, - dx[1]), - detail::deriv2_2d( - [&](int di, int dj) CCTK_ATTRIBUTE_ALWAYS_INLINE { - return maskz_loadu(mask, &ptr[di * offsets[1] + dj * offsets[2]]); - }, - dx[1], dx[2]), - detail::deriv2_1d( - [&](int di) CCTK_ATTRIBUTE_ALWAYS_INLINE { - return maskz_loadu(mask, &ptr[di * offsets[2]]); - }, - dx[2]), - }; +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +deriv_upwind(const simdl &mask, const GF3D2 &gf_, + const vect &I, const vec, dim> &vel, + const vec &dx) { + return deriv_upwind<0>(mask, gf_, I, vel, dx) + + deriv_upwind<1>(mask, gf_, I, vel, dx) + + deriv_upwind<2>(mask, gf_, I, vel, dx); } -template -inline CCTK_ATTRIBUTE_ALWAYS_INLINE - CCTK_DEVICE CCTK_HOST Arith::smat - calc_deriv2(const Loop::GF3D5 &gf, const Loop::GF3D5layout &layout, - const Arith::vect &I, - const Arith::vect &dx) { - using namespace Arith; - using namespace Loop; - // We use explicit index calculations to avoid unnecessary integer - // multiplications - const T *restrict const ptr = &gf(layout, I); - const std::array offsets{ - layout.delta(1, 0, 0), - layout.delta(0, 1, 0), - layout.delta(0, 0, 1), - }; - return { - detail::deriv2_1d( - [&](int di) - CCTK_ATTRIBUTE_ALWAYS_INLINE { return ptr[di * offsets[0]]; }, - dx[0]), - detail::deriv2_2d( - [&](int di, int dj) CCTK_ATTRIBUTE_ALWAYS_INLINE { - return ptr[di * offsets[0] + dj * offsets[1]]; - }, - dx[0], dx[1]), - detail::deriv2_2d( - [&](int di, int dj) CCTK_ATTRIBUTE_ALWAYS_INLINE { - return ptr[di * offsets[0] + dj * offsets[2]]; - }, - dx[0], dx[2]), - detail::deriv2_1d( - [&](int di) - CCTK_ATTRIBUTE_ALWAYS_INLINE { return ptr[di * offsets[1]]; }, - dx[1]), - detail::deriv2_2d( - [&](int di, int dj) CCTK_ATTRIBUTE_ALWAYS_INLINE { - return ptr[di * offsets[1] + dj * offsets[2]]; - }, - dx[1], dx[2]), - detail::deriv2_1d( - [&](int di) - CCTK_ATTRIBUTE_ALWAYS_INLINE { return ptr[di * offsets[2]]; }, - dx[2]), - }; +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +deriv1d_diss(const simdl &mask, const T *restrict const var, + const ptrdiff_t di, const T dx) { + if constexpr (deriv_order == 2) + return ((maskz_loadu(mask, &var[-2 * di]) + + maskz_loadu(mask, &var[+2 * di])) // + - + 4 * (maskz_loadu(mask, &var[-di]) + maskz_loadu(mask, &var[+di])) // + + 6 * maskz_loadu(mask, &var[0])) / + dx; + if constexpr (deriv_order == 4) + return ((maskz_loadu(mask, &var[-3 * di]) + + maskz_loadu(mask, &var[+3 * di])) // + - 6 * (maskz_loadu(mask, &var[-2 * di]) + + maskz_loadu(mask, &var[+2 * di])) // + + 15 * (maskz_loadu(mask, &var[-di]) + + maskz_loadu(mask, &var[+di])) // + - 20 * maskz_loadu(mask, &var[0])) / + dx; } -//////////////////////////////////////////////////////////////////////////////// +using Arith::pown; -// Tile-based multi-dimensional operators +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +deriv_diss(const simdl &mask, const GF3D2 &gf_, + const vect &I, const vec &dx) { + static_assert(dir >= 0 && dir < D, ""); + const auto &DI = vect::unit; + const ptrdiff_t di = gf_.delta(DI(dir)); + return deriv1d_diss(mask, &gf_(I), di, dx(dir)); +} -template -CCTK_ATTRIBUTE_NOINLINE void -calc_derivs(const Arith::vec, Loop::dim> &dgf, - const Loop::GridDescBaseDevice &grid, - const Loop::GF3D5 &gf, const Loop::GF3D5layout layout, - const Arith::vect dx, const int deriv_order); +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +diss(const simdl &mask, const GF3D2 &gf_, const vect &I, + const vec &dx) { + // arXiv:gr-qc/0610128, (63), with r=2 + constexpr int diss_order = deriv_order + 2; + constexpr int sign = diss_order % 4 == 0 ? -1 : +1; + return sign / T(pown(2, deriv_order + 2)) * + (deriv_diss<0>(mask, gf_, I, dx) // + + deriv_diss<1>(mask, gf_, I, dx) // + + deriv_diss<2>(mask, gf_, I, dx)); +} -template +template CCTK_ATTRIBUTE_NOINLINE void -calc_derivs2(const Arith::vec, Loop::dim> &dgf, - const Arith::smat, Loop::dim> &ddgf, - const Loop::GridDescBaseDevice &grid, - const Loop::GF3D5 &gf, const Loop::GF3D5layout layout, - const Arith::vect dx, const int deriv_order); +apply_upwind_diss(const cGH *restrict const cctkGH, const GF3D2 &gf_, + const vec, dim> &gf_betaG_, + const GF3D2 &gf_rhs_) { + DECLARE_CCTK_ARGUMENTS; + //DECLARE_CCTK_PARAMETERS; + const CCTK_REAL& epsdiss = *(CCTK_REAL*)CCTK_ParameterGet("epsdiss", "Derivs", nullptr); + + const vec dx([&](int a) { return CCTK_DELTA_SPACE(a); }); + + typedef simd vreal; + typedef simdl vbool; + constexpr size_t vsize = tuple_size_v; + + if (epsdiss == 0) { + + const Loop::GridDescBaseDevice grid(cctkGH); + grid.loop_int_device<0, 0, 0, vsize>( + grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { + const vbool mask = mask_for_loop_tail(p.i, p.imax); + const vec betaG = gf_betaG_(mask, p.I); + const vreal rhs_old = gf_rhs_(mask, p.I); + const vreal rhs_new = + rhs_old + deriv_upwind(mask, gf_, p.I, betaG, dx); + gf_rhs_.store(mask, p.I, rhs_new); + }); + + } else { + + const Loop::GridDescBaseDevice grid(cctkGH); + grid.loop_int_device<0, 0, 0, vsize>( + grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { + const vbool mask = mask_for_loop_tail(p.i, p.imax); + const vec betaG = gf_betaG_(mask, p.I); + const vreal rhs_old = gf_rhs_(mask, p.I); + const vreal rhs_new = rhs_old + + deriv_upwind(mask, gf_, p.I, betaG, dx) + + epsdiss * diss(mask, gf_, p.I, dx); + gf_rhs_.store(mask, p.I, rhs_new); + }); + } +} -} // namespace Derivs +template +void CCTK_ATTRIBUTE_NOINLINE calc_copy(const cGH *restrict const cctkGH, + const smat, dim> &gf0_, + const smat, dim> &gf_, + const GF3D5layout &layout) { + for (int a = 0; a < 3; ++a) + for (int b = a; b < 3; ++b) + calc_copy(cctkGH, gf0_(a, b), gf_(a, b), layout); +} -#endif // #ifndef DERIVS_HXX +#endif diff --git a/Derivs/src/make.code.defn b/Derivs/src/make.code.defn index bf1bff1c3..77206c7e9 100644 --- a/Derivs/src/make.code.defn +++ b/Derivs/src/make.code.defn @@ -1,7 +1,7 @@ # Main make.code.defn file for thorn Derivs # Source files in this directory -SRCS = derivs.cxx +SRCS = # Subdirectories containing source files SUBDIRS = diff --git a/Loop/src/loop.hxx b/Loop/src/loop.hxx index 8792233e9..c41f10ebb 100644 --- a/Loop/src/loop.hxx +++ b/Loop/src/loop.hxx @@ -1001,89 +1001,6 @@ template struct GF3D2 { //////////////////////////////////////////////////////////////////////////////// -template struct GF3D3layout { - static_assert(NI >= 0); - static_assert(NJ >= 0); - static_assert(NK >= 0); - - static constexpr int ni = NI; - static constexpr int nj = NJ; - static constexpr int nk = NK; - - static constexpr int off = OFF; - - static constexpr int di = 1; - static constexpr int dj = NI * di; - static constexpr int dk = NJ * dj; - static constexpr int np = NK * dk; - - constexpr int linear(int i, int j, int k) const { - return i * di + j * dj + k * dk - OFF; - } - constexpr int linear(const vect &I) const { - return (*this)(I[0], I[1], I[2]); - } -}; - -template -struct makeGF3D3layout { -private: - static constexpr int ni = imax0 - imin0; - static constexpr int nj = imax1 - imin1; - static constexpr int nk = imax2 - imin2; - static constexpr int di = GF3D3layout::di; - static constexpr int dj = GF3D3layout::dj; - static constexpr int dk = GF3D3layout::dk; - static constexpr int off = imin0 * di + imin1 * dj + imin2 * dk; - -public: - typedef GF3D3layout type; -}; -template -using makeGF3D3layout_t = - typename makeGF3D3layout::type; - -template -struct GF3D3 : GF3D3layout { - using GF3D3layout::np; - using GF3D3layout::linear; - - vect arr; - - constexpr T &restrict operator()(int i, int j, int k) { - return arr[linear(i, j, k)]; - } - constexpr const T &restrict operator()(int i, int j, int k) const { - return arr[linear(i, j, k)]; - } - constexpr T &restrict operator()(const vect &I) { - return (*this)(I[0], I[1], I[2]); - } - constexpr const T &restrict operator()(const vect &I) const { - return (*this)(I[0], I[1], I[2]); - } -}; - -template -struct GF3D3ptr : GF3D3layout { - using GF3D3layout::np; - using GF3D3layout::linear; - - T *restrict ptr; - - GF3D3ptr() = delete; - GF3D3ptr(T *restrict ptr) : ptr(ptr) {} - - constexpr T &restrict operator()(int i, int j, int k) const { - return ptr[linear(i, j, k)]; - } - constexpr T &restrict operator()(const vect &I) const { - return (*this)(I[0], I[1], I[2]); - } -}; - -//////////////////////////////////////////////////////////////////////////////// - struct GF3D5layout { #ifdef CCTK_DEBUG vect imin, imax; From cf6c9caa1afcba928f871f302b8a34e301b758db Mon Sep 17 00:00:00 2001 From: "Steven R. Brandt" Date: Fri, 26 May 2023 11:54:41 -0500 Subject: [PATCH 2/7] Add `using Arith::align_ceil;` --- Derivs/src/derivs.hxx | 1 + 1 file changed, 1 insertion(+) diff --git a/Derivs/src/derivs.hxx b/Derivs/src/derivs.hxx index 47c8b85c0..6a51e6e5a 100644 --- a/Derivs/src/derivs.hxx +++ b/Derivs/src/derivs.hxx @@ -92,6 +92,7 @@ using Arith::min; using std::array; using Arith::div_floor; using Arith::div_ceil; +using Arith::align_ceil; using Arith::mask_for_loop_tail; template From c94a6a222afdf97d49e86d2cb43f6b7716d6fd23 Mon Sep 17 00:00:00 2001 From: "Steven R. Brandt" Date: Mon, 24 Jul 2023 13:23:11 -0500 Subject: [PATCH 3/7] Preserve Derivs/src/derivs.{hxx,cxx} --- Derivs/src/derivs-spacetimex.hxx | 512 +++++++++++++++++ Derivs/src/derivs.hxx | 909 ++++++++++++++++--------------- Derivs/src/make.code.defn | 2 +- 3 files changed, 971 insertions(+), 452 deletions(-) create mode 100644 Derivs/src/derivs-spacetimex.hxx diff --git a/Derivs/src/derivs-spacetimex.hxx b/Derivs/src/derivs-spacetimex.hxx new file mode 100644 index 000000000..6a51e6e5a --- /dev/null +++ b/Derivs/src/derivs-spacetimex.hxx @@ -0,0 +1,512 @@ +#ifndef CARPETX_DERIVS_DERIVS_HXX +#define CARPETX_DERIVS_DERIVS_HXX + +#include +#include +#include +#include + +#include +#include +#include + +#include +// Unified derivative header + +// TODO: Make this a runtime parameter +constexpr int deriv_order = 4; + +//////////////////////////////////////////////////////////////////////////////// + +using Arith::pow2; + +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST T pow3(const T &x) { + return pow2(x) * x; +} + +//////////////////////////////////////////////////////////////////////////////// + +using Arith::simd; +using Arith::simdl; + +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +deriv1d(const simdl &mask, const T *restrict const var, const ptrdiff_t di, + const T dx) { + const auto load = [&](const int n) { + return maskz_loadu(mask, &var[n * di]) - maskz_loadu(mask, &var[-n * di]); + }; + if constexpr (deriv_order == 2) + return 1 / T(2) * load(1) / dx; + if constexpr (deriv_order == 4) + return (-1 / T(12) * load(2) + 2 / T(3) * load(1)) / dx; + if constexpr (deriv_order == 6) + return (1 / T(60) * load(3) - 3 / T(20) * load(2) + 3 / T(4) * load(1)) / + dx; +} + +using Loop::GF3D2; +using Arith::vect; +using Loop::dim; +using Arith::vec; + +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +deriv(const simdl &mask, const GF3D2 &gf_, const vect &I, + const vec &dx) { + static_assert(dir >= 0 && dir < D, ""); + const auto &DI = vect::unit; + const ptrdiff_t di = gf_.delta(DI(dir)); + return deriv1d(mask, &gf_(I), di, dx(dir)); +} +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST vec, dim> +deriv(const simdl &mask, const GF3D2 &gf_, const vect &I, + const vec &dx) { + return {deriv<0>(mask, gf_, I, dx), deriv<1>(mask, gf_, I, dx), + deriv<2>(mask, gf_, I, dx)}; +} + +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +deriv2_1d(const simdl &mask, const T *restrict const var, const ptrdiff_t di, + const T dx) { + const auto load = [&](const int n) { + return maskz_loadu(mask, &var[n * di]) + maskz_loadu(mask, &var[-n * di]); + }; + const auto load0 = [&]() { return maskz_loadu(mask, &var[0]); }; + if constexpr (deriv_order == 2) + return (load(1) - 2 * load0()) / pow2(dx); + if constexpr (deriv_order == 4) + return (1 / T(12) * load(2) - 4 / T(3) * load(1) + 5 / T(2) * load0()) / + pow2(dx); + if constexpr (deriv_order == 6) + return (1 / T(90) * load(3) - 3 / T(20) * load(2) + 3 / T(2) * load(1) - + 49 / T(18) * load0()) / + pow2(dx); +} + +using std::tuple_size_v; +using Arith::min; +using std::array; +using Arith::div_floor; +using Arith::div_ceil; +using Arith::align_ceil; +using Arith::mask_for_loop_tail; + +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +deriv2_2d(const int vavail, const simdl &mask, const T *restrict const var, + const ptrdiff_t di, const ptrdiff_t dj, const T dx, const T dy) { + constexpr size_t vsize = tuple_size_v >; + if (di == 1) { + assert(vavail > 0); + constexpr int maxnpoints = deriv_order + 1 + vsize - 1; + const int npoints = deriv_order + 1 + min(int(vsize), vavail) - 1; + array, div_ceil(maxnpoints, int(vsize))> arrx; + for (int i = 0; i < maxnpoints; i += vsize) { + if (i < npoints) { + const simdl mask1 = Arith::mask_for_loop_tail >(i, npoints); + arrx[div_floor(i, int(vsize))] = + deriv1d(mask1, &var[i - deriv_order / 2], dj, dy); + } + } +#ifdef CCTK_DEBUG + for (int i = npoints; i < align_ceil(maxnpoints, int(vsize)); ++i) + ((T *)&arrx[0])[i] = Arith::nan()(); // unused +#endif + const T *const varx = (T *)&arrx[0] + deriv_order / 2; + return deriv1d(mask, varx, 1, dx); + } else { + assert(dj != 1); + array, deriv_order + 1> arrx; + for (int j = -deriv_order / 2; j <= deriv_order / 2; ++j) + if (j == 0) { +#ifdef CCTK_DEBUG + arrx[deriv_order / 2 + j] = Arith::nan >()(); // unused +#endif + } else { + arrx[deriv_order / 2 + j] = deriv1d(mask, &var[j * dj], di, dx); + } + const T *const varx = (T *)(&arrx[deriv_order / 2]); + return deriv1d(mask, varx, vsize, dy); + } +} + +using Loop::GF3D5; +using Loop::GF3D5layout; +using Loop::GF3D5index; +using Arith::smat; +using Loop::PointDesc; + + +template +CCTK_ATTRIBUTE_NOINLINE void +calc_derivs(const cGH *restrict const cctkGH, const GF3D2 &gf1, + const GF3D5 &gf0, const vec, dim> &dgf0, + const GF3D5layout &layout0) { + DECLARE_CCTK_ARGUMENTS; + + typedef simd vreal; + typedef simdl vbool; + constexpr size_t vsize = tuple_size_v; + + const vec dx([&](int a) { return CCTK_DELTA_SPACE(a); }); + + const Loop::GridDescBaseDevice grid(cctkGH); + grid.loop_int_device<0, 0, 0, vsize>( + grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { + const vbool mask = Arith::mask_for_loop_tail(p.i, p.imax); + const GF3D5index index0(layout0, p.I); + const auto val = gf1(mask, p.I); + gf0.store(mask, index0, val); + const auto dval = deriv(mask, gf1, p.I, dx); + dgf0.store(mask, index0, dval); + }); +} + +using std::enable_if_t; + +template +inline ARITH_INLINE + ARITH_DEVICE ARITH_HOST enable_if_t<(dir1 == dir2), simd > + deriv2(const int vavail, const simdl &mask, const GF3D2 &gf_, + const vect &I, const vec &dx) { + static_assert(dir1 >= 0 && dir1 < D, ""); + static_assert(dir2 >= 0 && dir2 < D, ""); + const auto &DI = vect::unit; + const ptrdiff_t di = gf_.delta(DI(dir1)); + return deriv2_1d(mask, &gf_(I), di, dx(dir1)); +} + +template +inline ARITH_INLINE + ARITH_DEVICE ARITH_HOST enable_if_t<(dir1 != dir2), simd > + deriv2(const int vavail, const simdl &mask, const GF3D2 &gf_, + const vect &I, const vec &dx) { + static_assert(dir1 >= 0 && dir1 < D, ""); + static_assert(dir2 >= 0 && dir2 < D, ""); + const auto &DI = vect::unit; + const ptrdiff_t di = gf_.delta(DI(dir1)); + const ptrdiff_t dj = gf_.delta(DI(dir2)); + return deriv2_2d(vavail, mask, &gf_(I), di, dj, dx(dir1), dx(dir2)); +} + +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST smat, dim> +deriv2(const int vavail, const simdl &mask, const GF3D2 &gf_, + const vect &I, const vec &dx) { + return {deriv2<0, 0>(vavail, mask, gf_, I, dx), + deriv2<0, 1>(vavail, mask, gf_, I, dx), + deriv2<0, 2>(vavail, mask, gf_, I, dx), + deriv2<1, 1>(vavail, mask, gf_, I, dx), + deriv2<1, 2>(vavail, mask, gf_, I, dx), + deriv2<2, 2>(vavail, mask, gf_, I, dx)}; +} + +template +CCTK_ATTRIBUTE_NOINLINE void +calc_derivs2(const cGH *restrict const cctkGH, const GF3D2 &gf1, + const GF3D5 &gf0, const vec, dim> &dgf0, + const smat, dim> &ddgf0, const GF3D5layout &layout0) { + DECLARE_CCTK_ARGUMENTS; + + typedef simd vreal; + typedef simdl vbool; + constexpr size_t vsize = tuple_size_v; + + const vec dx([&](int a) { return CCTK_DELTA_SPACE(a); }); + + const Loop::GridDescBaseDevice grid(cctkGH); + grid.loop_int_device<0, 0, 0, vsize>( + grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { + const vbool mask = Arith::mask_for_loop_tail(p.i, p.imax); + const int vavail = p.imax - p.i; + const GF3D5index index0(layout0, p.I); + const auto val = gf1(mask, p.I); + gf0.store(mask, index0, val); + const auto dval = deriv(mask, gf1, p.I, dx); + dgf0.store(mask, index0, dval); + const auto ddval = deriv2(vavail, mask, gf1, p.I, dx); + ddgf0.store(mask, index0, ddval); + }); +} + +template +void CCTK_ATTRIBUTE_NOINLINE calc_derivs( + const cGH *restrict const cctkGH, const vec, dim> &gf0_, + const vec, dim> &gf_, const vec, dim>, dim> &dgf_, + const GF3D5layout &layout) { + for (int a = 0; a < 3; ++a) + calc_derivs(cctkGH, gf0_(a), gf_(a), dgf_(a), layout); +} + +template +void CCTK_ATTRIBUTE_NOINLINE calc_derivs2( + const cGH *restrict const cctkGH, const vec, dim> &gf0_, + const vec, dim> &gf_, const vec, dim>, dim> &dgf_, + const vec, dim>, dim> &ddgf_, const GF3D5layout &layout) { + for (int a = 0; a < 3; ++a) + calc_derivs2(cctkGH, gf0_(a), gf_(a), dgf_(a), ddgf_(a), layout); +} + +template +CCTK_ATTRIBUTE_NOINLINE void +calc_copy(const cGH *restrict const cctkGH, const GF3D2 &gf1, + const GF3D5 &gf0, const GF3D5layout &layout0) { + DECLARE_CCTK_ARGUMENTS; + + typedef simd vreal; + typedef simdl vbool; + constexpr size_t vsize = tuple_size_v; + + const vec dx([&](int a) { return CCTK_DELTA_SPACE(a); }); + + const Loop::GridDescBaseDevice grid(cctkGH); + grid.loop_int_device<0, 0, 0, vsize>( + grid.nghostzones, + [=] ARITH_DEVICE ARITH_HOST(const PointDesc &p) ARITH_INLINE { + const vbool mask = Arith::mask_for_loop_tail(p.i, p.imax); + const GF3D5index index0(layout0, p.I); + const auto val = gf1(mask, p.I); + gf0.store(mask, index0, val); + }); +} + +template +void CCTK_ATTRIBUTE_NOINLINE calc_copy(const cGH *restrict const cctkGH, + const vec, dim> &gf0_, + const vec, dim> &gf_, + const GF3D5layout &layout) { + for (int a = 0; a < 3; ++a) + calc_copy(cctkGH, gf0_(a), gf_(a), layout); +} + +template +CCTK_ATTRIBUTE_NOINLINE void calc_derivs( + const cGH *restrict const cctkGH, const smat, dim> &gf0_, + const smat, dim> &gf_, const smat, dim>, dim> &dgf_, + const GF3D5layout &layout) { + for (int a = 0; a < 3; ++a) + for (int b = a; b < 3; ++b) + calc_derivs(cctkGH, gf0_(a, b), gf_(a, b), dgf_(a, b), layout); +} + +template +CCTK_ATTRIBUTE_NOINLINE void calc_derivs2( + const cGH *restrict const cctkGH, const smat, dim> &gf0_, + const smat, dim> &gf_, const smat, dim>, dim> &dgf_, + const smat, dim>, dim> &ddgf_, const GF3D5layout &layout) { + for (int a = 0; a < 3; ++a) + for (int b = a; b < 3; ++b) + calc_derivs2(cctkGH, gf0_(a, b), gf_(a, b), dgf_(a, b), ddgf_(a, b), + layout); +} + +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +deriv1d_upwind(const simdl &mask, const T *restrict const var, + const ptrdiff_t di, const simd &vel, const T dx) { + // arXiv:1111.2177 [gr-qc], (71) + if constexpr (deriv_order == 2) { + // if (sign) + // // + [ 0 -1 +1 0 0] + // // + 1/2 [+1 -2 +1 0 0] + // // [+1/2 -2 +3/2 0 0] + // return (1 / T(2) * var[-2 * di] // + // - 2 * var[-di] // + // + 3 / T(2) * var[0]) / + // dx; + // else + // // + [ 0 0 -1 +1 0 ] + // // - 1/2 [ 0 0 +1 -2 +1 ] + // // [ 0 0 -3/2 +2 -1/2] + // return (-3 / T(2) * var[0] // + // + 2 * var[+di] // + // - 1 / T(2) * var[+2 * di]) / + // dx; + + // + 1/2 [+1/2 -2 +3/2 0 0 ] + // + 1/2 [ 0 0 -3/2 +2 -1/2] + // [+1/4 -1 0 +1 -1/4] + const simd symm = + 1 / T(4) * + (maskz_loadu(mask, &var[-2 * di]) - + maskz_loadu(mask, &var[2 * di])) // + - (maskz_loadu(mask, &var[-di]) - maskz_loadu(mask, &var[di])); + // + 1/2 [+1/2 -2 +3/2 0 0 ] + // - 1/2 [ 0 0 -3/2 +2 -1/2] + // [+1/4 -1 +3/2 -1 +1/4] + const simd anti = + 1 / T(4) * + (maskz_loadu(mask, &var[-2 * di]) + + maskz_loadu(mask, &var[2 * di])) // + - (maskz_loadu(mask, &var[-di]) + maskz_loadu(mask, &var[di])) // + + 3 / T(2) * maskz_loadu(mask, &var[0]); + return (vel * symm - fabs(vel) * anti) / dx; + } + if constexpr (deriv_order == 4) { + // A fourth order stencil for a first derivative, shifted by one grid point + + // if (sign) + // return (-1 / T(12) * var[-3 * di] // + // + 1 / T(2) * var[-2 * di] // + // - 3 / T(2) * var[-di] // + // + 5 / T(6) * var[0] // + // + 1 / T(4) * var[+di]) / + // dx; + // else + // return (-1 / T(4) * var[-di] // + // - 5 / T(6) * var[0] // + // + 3 / T(2) * var[+di] // + // - 1 / T(2) * var[+2 * di] // + // + 1 / T(12) * var[+3 * di]) / + // dx; + + const simd symm = + -1 / T(24) * + (maskz_loadu(mask, &var[-3 * di]) - + maskz_loadu(mask, &var[3 * di])) // + + 1 / T(4) * + (maskz_loadu(mask, &var[-2 * di]) - + maskz_loadu(mask, &var[2 * di])) // + - + 7 / T(8) * (maskz_loadu(mask, &var[-di]) - maskz_loadu(mask, &var[di])); + const simd anti = + -1 / T(24) * + (maskz_loadu(mask, &var[-3 * di]) + + maskz_loadu(mask, &var[3 * di])) // + + 1 / T(4) * + (maskz_loadu(mask, &var[-2 * di]) + + maskz_loadu(mask, &var[2 * di])) // + - 5 / T(8) * + (maskz_loadu(mask, &var[-di]) + maskz_loadu(mask, &var[di])) // + + 5 / T(6) * maskz_loadu(mask, &var[0]); + return (vel * symm - fabs(vel) * anti) / dx; + } +} + +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +deriv_upwind(const simdl &mask, const GF3D2 &gf_, + const vect &I, const vec, D> &vel, + const vec &dx) { + static_assert(dir >= 0 && dir < D, ""); + const auto &DI = vect::unit; + const ptrdiff_t di = gf_.delta(DI(dir)); + return deriv1d_upwind(mask, &gf_(I), di, vel(dir), dx(dir)); +} + +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +deriv_upwind(const simdl &mask, const GF3D2 &gf_, + const vect &I, const vec, dim> &vel, + const vec &dx) { + return deriv_upwind<0>(mask, gf_, I, vel, dx) + + deriv_upwind<1>(mask, gf_, I, vel, dx) + + deriv_upwind<2>(mask, gf_, I, vel, dx); +} + +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +deriv1d_diss(const simdl &mask, const T *restrict const var, + const ptrdiff_t di, const T dx) { + if constexpr (deriv_order == 2) + return ((maskz_loadu(mask, &var[-2 * di]) + + maskz_loadu(mask, &var[+2 * di])) // + - + 4 * (maskz_loadu(mask, &var[-di]) + maskz_loadu(mask, &var[+di])) // + + 6 * maskz_loadu(mask, &var[0])) / + dx; + if constexpr (deriv_order == 4) + return ((maskz_loadu(mask, &var[-3 * di]) + + maskz_loadu(mask, &var[+3 * di])) // + - 6 * (maskz_loadu(mask, &var[-2 * di]) + + maskz_loadu(mask, &var[+2 * di])) // + + 15 * (maskz_loadu(mask, &var[-di]) + + maskz_loadu(mask, &var[+di])) // + - 20 * maskz_loadu(mask, &var[0])) / + dx; +} + +using Arith::pown; + +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +deriv_diss(const simdl &mask, const GF3D2 &gf_, + const vect &I, const vec &dx) { + static_assert(dir >= 0 && dir < D, ""); + const auto &DI = vect::unit; + const ptrdiff_t di = gf_.delta(DI(dir)); + return deriv1d_diss(mask, &gf_(I), di, dx(dir)); +} + +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +diss(const simdl &mask, const GF3D2 &gf_, const vect &I, + const vec &dx) { + // arXiv:gr-qc/0610128, (63), with r=2 + constexpr int diss_order = deriv_order + 2; + constexpr int sign = diss_order % 4 == 0 ? -1 : +1; + return sign / T(pown(2, deriv_order + 2)) * + (deriv_diss<0>(mask, gf_, I, dx) // + + deriv_diss<1>(mask, gf_, I, dx) // + + deriv_diss<2>(mask, gf_, I, dx)); +} + +template +CCTK_ATTRIBUTE_NOINLINE void +apply_upwind_diss(const cGH *restrict const cctkGH, const GF3D2 &gf_, + const vec, dim> &gf_betaG_, + const GF3D2 &gf_rhs_) { + DECLARE_CCTK_ARGUMENTS; + //DECLARE_CCTK_PARAMETERS; + const CCTK_REAL& epsdiss = *(CCTK_REAL*)CCTK_ParameterGet("epsdiss", "Derivs", nullptr); + + const vec dx([&](int a) { return CCTK_DELTA_SPACE(a); }); + + typedef simd vreal; + typedef simdl vbool; + constexpr size_t vsize = tuple_size_v; + + if (epsdiss == 0) { + + const Loop::GridDescBaseDevice grid(cctkGH); + grid.loop_int_device<0, 0, 0, vsize>( + grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { + const vbool mask = mask_for_loop_tail(p.i, p.imax); + const vec betaG = gf_betaG_(mask, p.I); + const vreal rhs_old = gf_rhs_(mask, p.I); + const vreal rhs_new = + rhs_old + deriv_upwind(mask, gf_, p.I, betaG, dx); + gf_rhs_.store(mask, p.I, rhs_new); + }); + + } else { + + const Loop::GridDescBaseDevice grid(cctkGH); + grid.loop_int_device<0, 0, 0, vsize>( + grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { + const vbool mask = mask_for_loop_tail(p.i, p.imax); + const vec betaG = gf_betaG_(mask, p.I); + const vreal rhs_old = gf_rhs_(mask, p.I); + const vreal rhs_new = rhs_old + + deriv_upwind(mask, gf_, p.I, betaG, dx) + + epsdiss * diss(mask, gf_, p.I, dx); + gf_rhs_.store(mask, p.I, rhs_new); + }); + } +} + +template +void CCTK_ATTRIBUTE_NOINLINE calc_copy(const cGH *restrict const cctkGH, + const smat, dim> &gf0_, + const smat, dim> &gf_, + const GF3D5layout &layout) { + for (int a = 0; a < 3; ++a) + for (int b = a; b < 3; ++b) + calc_copy(cctkGH, gf0_(a, b), gf_(a, b), layout); +} + +#endif // #ifndef CARPETX_DERIVS_DERIVS_HXX diff --git a/Derivs/src/derivs.hxx b/Derivs/src/derivs.hxx index 6a51e6e5a..d7a54c26f 100644 --- a/Derivs/src/derivs.hxx +++ b/Derivs/src/derivs.hxx @@ -1,512 +1,519 @@ #ifndef CARPETX_DERIVS_DERIVS_HXX #define CARPETX_DERIVS_DERIVS_HXX +#include +#include #include #include #include #include +#include -#include -#include -#include - +#include +#include +#include +#include #include -// Unified derivative header -// TODO: Make this a runtime parameter -constexpr int deriv_order = 4; +namespace Derivs { //////////////////////////////////////////////////////////////////////////////// -using Arith::pow2; - -template -inline ARITH_INLINE ARITH_DEVICE ARITH_HOST T pow3(const T &x) { - return pow2(x) * x; -} - -//////////////////////////////////////////////////////////////////////////////// +namespace stencils { +using namespace Arith; +using namespace Loop; -using Arith::simd; -using Arith::simdl; +// Stencil coefficients -template -inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd -deriv1d(const simdl &mask, const T *restrict const var, const ptrdiff_t di, - const T dx) { - const auto load = [&](const int n) { - return maskz_loadu(mask, &var[n * di]) - maskz_loadu(mask, &var[-n * di]); - }; - if constexpr (deriv_order == 2) - return 1 / T(2) * load(1) / dx; - if constexpr (deriv_order == 4) - return (-1 / T(12) * load(2) + 2 / T(3) * load(1)) / dx; - if constexpr (deriv_order == 6) - return (1 / T(60) * load(3) - 3 / T(20) * load(2) + 3 / T(4) * load(1)) / - dx; -} +enum symmetry { none, symmetric, antisymmetric }; -using Loop::GF3D2; -using Arith::vect; -using Loop::dim; -using Arith::vec; - -template -inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd -deriv(const simdl &mask, const GF3D2 &gf_, const vect &I, - const vec &dx) { - static_assert(dir >= 0 && dir < D, ""); - const auto &DI = vect::unit; - const ptrdiff_t di = gf_.delta(DI(dir)); - return deriv1d(mask, &gf_(I), di, dx(dir)); -} -template -inline ARITH_INLINE ARITH_DEVICE ARITH_HOST vec, dim> -deriv(const simdl &mask, const GF3D2 &gf_, const vect &I, - const vec &dx) { - return {deriv<0>(mask, gf_, I, dx), deriv<1>(mask, gf_, I, dx), - deriv<2>(mask, gf_, I, dx)}; -} +template struct stencil { + static constexpr std::ptrdiff_t N = I1 - I0 + 1; + static_assert(N >= 0, ""); + static_assert(S == none || S == symmetric || S == antisymmetric, ""); -template -inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd -deriv2_1d(const simdl &mask, const T *restrict const var, const ptrdiff_t di, - const T dx) { - const auto load = [&](const int n) { - return maskz_loadu(mask, &var[n * di]) + maskz_loadu(mask, &var[-n * di]); - }; - const auto load0 = [&]() { return maskz_loadu(mask, &var[0]); }; - if constexpr (deriv_order == 2) - return (load(1) - 2 * load0()) / pow2(dx); - if constexpr (deriv_order == 4) - return (1 / T(12) * load(2) - 4 / T(3) * load(1) + 5 / T(2) * load0()) / - pow2(dx); - if constexpr (deriv_order == 6) - return (1 / T(90) * load(3) - 3 / T(20) * load(2) + 3 / T(2) * load(1) - - 49 / T(18) * load0()) / - pow2(dx); -} + int divisor; + std::array coeffs; -using std::tuple_size_v; -using Arith::min; -using std::array; -using Arith::div_floor; -using Arith::div_ceil; -using Arith::align_ceil; -using Arith::mask_for_loop_tail; - -template -inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd -deriv2_2d(const int vavail, const simdl &mask, const T *restrict const var, - const ptrdiff_t di, const ptrdiff_t dj, const T dx, const T dy) { - constexpr size_t vsize = tuple_size_v >; - if (di == 1) { - assert(vavail > 0); - constexpr int maxnpoints = deriv_order + 1 + vsize - 1; - const int npoints = deriv_order + 1 + min(int(vsize), vavail) - 1; - array, div_ceil(maxnpoints, int(vsize))> arrx; - for (int i = 0; i < maxnpoints; i += vsize) { - if (i < npoints) { - const simdl mask1 = Arith::mask_for_loop_tail >(i, npoints); - arrx[div_floor(i, int(vsize))] = - deriv1d(mask1, &var[i - deriv_order / 2], dj, dy); + template + inline CCTK_ATTRIBUTE_ALWAYS_INLINE + CCTK_DEVICE CCTK_HOST std::result_of_t + apply(const Array &arr) const { + using R = std::result_of_t; + if constexpr (S == symmetric) { + R r{0}; + for (std::ptrdiff_t n = 0; n < N / 2; ++n) { + const std::ptrdiff_t n1 = N - 1 - n; + r += coeffs[n] * (arr(n + I0) + arr(n1 + I0)); + } + if (N % 2 != 0) { + const std::ptrdiff_t n = N / 2 + 1; + r += coeffs[n] * arr(n + I0); } + r /= divisor; + return std::move(r); } -#ifdef CCTK_DEBUG - for (int i = npoints; i < align_ceil(maxnpoints, int(vsize)); ++i) - ((T *)&arrx[0])[i] = Arith::nan()(); // unused -#endif - const T *const varx = (T *)&arrx[0] + deriv_order / 2; - return deriv1d(mask, varx, 1, dx); - } else { - assert(dj != 1); - array, deriv_order + 1> arrx; - for (int j = -deriv_order / 2; j <= deriv_order / 2; ++j) - if (j == 0) { -#ifdef CCTK_DEBUG - arrx[deriv_order / 2 + j] = Arith::nan >()(); // unused -#endif - } else { - arrx[deriv_order / 2 + j] = deriv1d(mask, &var[j * dj], di, dx); + if constexpr (antisymmetric) { + R r{0}; + for (std::ptrdiff_t n = 0; n < N / 2; ++n) { + const std::ptrdiff_t n1 = N - 1 - n; + r += coeffs[n] * (arr(n + I0) - arr(n1 + I0)); } - const T *const varx = (T *)(&arrx[deriv_order / 2]); - return deriv1d(mask, varx, vsize, dy); + r /= divisor; + return std::move(r); + } + R r{0}; + for (std::ptrdiff_t n = 0; n < N; ++n) + r += coeffs[n] * arr(n + I0); + return std::move(r); } -} +}; -using Loop::GF3D5; -using Loop::GF3D5layout; -using Loop::GF3D5index; -using Arith::smat; -using Loop::PointDesc; +// Interpolate at i = 0 +constexpr stencil<0, 0, symmetric> interp{1, {1}}; +// Derivative at i = 0 +constexpr stencil<-1, +1, antisymmetric> deriv1_o2{2, {-1, 0, +1}}; -template -CCTK_ATTRIBUTE_NOINLINE void -calc_derivs(const cGH *restrict const cctkGH, const GF3D2 &gf1, - const GF3D5 &gf0, const vec, dim> &dgf0, - const GF3D5layout &layout0) { - DECLARE_CCTK_ARGUMENTS; - - typedef simd vreal; - typedef simdl vbool; - constexpr size_t vsize = tuple_size_v; - - const vec dx([&](int a) { return CCTK_DELTA_SPACE(a); }); - - const Loop::GridDescBaseDevice grid(cctkGH); - grid.loop_int_device<0, 0, 0, vsize>( - grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { - const vbool mask = Arith::mask_for_loop_tail(p.i, p.imax); - const GF3D5index index0(layout0, p.I); - const auto val = gf1(mask, p.I); - gf0.store(mask, index0, val); - const auto dval = deriv(mask, gf1, p.I, dx); - dgf0.store(mask, index0, dval); - }); -} +constexpr stencil<-2, +2, antisymmetric> deriv1_o4{12, {-1, +8, 0, -8, +1}}; -using std::enable_if_t; - -template -inline ARITH_INLINE - ARITH_DEVICE ARITH_HOST enable_if_t<(dir1 == dir2), simd > - deriv2(const int vavail, const simdl &mask, const GF3D2 &gf_, - const vect &I, const vec &dx) { - static_assert(dir1 >= 0 && dir1 < D, ""); - static_assert(dir2 >= 0 && dir2 < D, ""); - const auto &DI = vect::unit; - const ptrdiff_t di = gf_.delta(DI(dir1)); - return deriv2_1d(mask, &gf_(I), di, dx(dir1)); -} +constexpr stencil<-1, +1, symmetric> deriv2_o2{1, {-2, +1, -2}}; -template -inline ARITH_INLINE - ARITH_DEVICE ARITH_HOST enable_if_t<(dir1 != dir2), simd > - deriv2(const int vavail, const simdl &mask, const GF3D2 &gf_, - const vect &I, const vec &dx) { - static_assert(dir1 >= 0 && dir1 < D, ""); - static_assert(dir2 >= 0 && dir2 < D, ""); - const auto &DI = vect::unit; - const ptrdiff_t di = gf_.delta(DI(dir1)); - const ptrdiff_t dj = gf_.delta(DI(dir2)); - return deriv2_2d(vavail, mask, &gf_(I), di, dj, dx(dir1), dx(dir2)); -} +constexpr stencil<-2, +2, symmetric> deriv2_o4{12, {-1, +16, -30, +16, -1}}; -template -inline ARITH_INLINE ARITH_DEVICE ARITH_HOST smat, dim> -deriv2(const int vavail, const simdl &mask, const GF3D2 &gf_, - const vect &I, const vec &dx) { - return {deriv2<0, 0>(vavail, mask, gf_, I, dx), - deriv2<0, 1>(vavail, mask, gf_, I, dx), - deriv2<0, 2>(vavail, mask, gf_, I, dx), - deriv2<1, 1>(vavail, mask, gf_, I, dx), - deriv2<1, 2>(vavail, mask, gf_, I, dx), - deriv2<2, 2>(vavail, mask, gf_, I, dx)}; -} +// Interpolate at i = 1/2 +constexpr stencil<-0, +1, symmetric> interp_c_o1{2, {1, 1}}; -template -CCTK_ATTRIBUTE_NOINLINE void -calc_derivs2(const cGH *restrict const cctkGH, const GF3D2 &gf1, - const GF3D5 &gf0, const vec, dim> &dgf0, - const smat, dim> &ddgf0, const GF3D5layout &layout0) { - DECLARE_CCTK_ARGUMENTS; - - typedef simd vreal; - typedef simdl vbool; - constexpr size_t vsize = tuple_size_v; - - const vec dx([&](int a) { return CCTK_DELTA_SPACE(a); }); - - const Loop::GridDescBaseDevice grid(cctkGH); - grid.loop_int_device<0, 0, 0, vsize>( - grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { - const vbool mask = Arith::mask_for_loop_tail(p.i, p.imax); - const int vavail = p.imax - p.i; - const GF3D5index index0(layout0, p.I); - const auto val = gf1(mask, p.I); - gf0.store(mask, index0, val); - const auto dval = deriv(mask, gf1, p.I, dx); - dgf0.store(mask, index0, dval); - const auto ddval = deriv2(vavail, mask, gf1, p.I, dx); - ddgf0.store(mask, index0, ddval); - }); +constexpr stencil<-1, +2, symmetric> interp_c_o3{16, {-1, +9, +9, -1}}; + +constexpr stencil<-2, +3, symmetric> interp_c_o5{ + 256, {+3, -25, +150, +150, -25, +3}}; + +constexpr stencil<-3, +4, symmetric> interp_c_o7{ + 2048, {-5, +49, -245, +1225, +1225, -245, +49, -5}}; + +// Derivative at i = 1/2 +constexpr stencil<-0, +1, antisymmetric> deriv1_c_o2{1, {-1, +1}}; + +constexpr stencil<-1, +2, antisymmetric> deriv1_c_o4{12, {-1, +15, -15, +1}}; + +} // namespace stencils + +namespace detail { +using namespace Arith; +using namespace Loop; + +// Pointwise one-dimensional operators + +template +inline CCTK_ATTRIBUTE_ALWAYS_INLINE CCTK_DEVICE CCTK_HOST simd +interp1d(const simdl &mask, const T *restrict const var) { + return maskz_loadu(mask, var); } -template -void CCTK_ATTRIBUTE_NOINLINE calc_derivs( - const cGH *restrict const cctkGH, const vec, dim> &gf0_, - const vec, dim> &gf_, const vec, dim>, dim> &dgf_, - const GF3D5layout &layout) { - for (int a = 0; a < 3; ++a) - calc_derivs(cctkGH, gf0_(a), gf_(a), dgf_(a), layout); +template > +inline CCTK_ATTRIBUTE_ALWAYS_INLINE + CCTK_DEVICE CCTK_HOST std::enable_if_t + deriv1d(const TS var, const T dx) { + const T c1 = 1 / (2 * dx); + return c1 * (var(1) - var(-1)); } -template -void CCTK_ATTRIBUTE_NOINLINE calc_derivs2( - const cGH *restrict const cctkGH, const vec, dim> &gf0_, - const vec, dim> &gf_, const vec, dim>, dim> &dgf_, - const vec, dim>, dim> &ddgf_, const GF3D5layout &layout) { - for (int a = 0; a < 3; ++a) - calc_derivs2(cctkGH, gf0_(a), gf_(a), dgf_(a), ddgf_(a), layout); +template > +inline CCTK_ATTRIBUTE_ALWAYS_INLINE + CCTK_DEVICE CCTK_HOST std::enable_if_t + deriv1d(const TS var, const T dx) { + const T c1 = 2 / (3 * dx); + const T c2 = -1 / (12 * dx); + return c2 * (var(2) - var(-2)) + c1 * (var(1) - var(-1)); } -template -CCTK_ATTRIBUTE_NOINLINE void -calc_copy(const cGH *restrict const cctkGH, const GF3D2 &gf1, - const GF3D5 &gf0, const GF3D5layout &layout0) { - DECLARE_CCTK_ARGUMENTS; - - typedef simd vreal; - typedef simdl vbool; - constexpr size_t vsize = tuple_size_v; - - const vec dx([&](int a) { return CCTK_DELTA_SPACE(a); }); - - const Loop::GridDescBaseDevice grid(cctkGH); - grid.loop_int_device<0, 0, 0, vsize>( - grid.nghostzones, - [=] ARITH_DEVICE ARITH_HOST(const PointDesc &p) ARITH_INLINE { - const vbool mask = Arith::mask_for_loop_tail(p.i, p.imax); - const GF3D5index index0(layout0, p.I); - const auto val = gf1(mask, p.I); - gf0.store(mask, index0, val); - }); +template +inline CCTK_ATTRIBUTE_ALWAYS_INLINE + CCTK_DEVICE CCTK_HOST std::enable_if_t > + deriv1d_upwind(const simdl &mask, const T *restrict const var, + const std::ptrdiff_t di, const simd &vel, const T dx) { + // arXiv:1111.2177 [gr-qc], (71) + + // if (sign) + // // + [ 0 -1 +1 0 0] + // // + 1/2 [+1 -2 +1 0 0] + // // [+1/2 -2 +3/2 0 0] + // return (1 / T(2) * var[-2 * di] // + // - 2 * var[-di] // + // + 3 / T(2) * var[0]) / + // dx; + // else + // // + [ 0 0 -1 +1 0 ] + // // - 1/2 [ 0 0 +1 -2 +1 ] + // // [ 0 0 -3/2 +2 -1/2] + // return (-3 / T(2) * var[0] // + // + 2 * var[+di] // + // - 1 / T(2) * var[+2 * di]) / + // dx; + + // + 1/2 [+1/2 -2 +3/2 0 0 ] + // + 1/2 [ 0 0 -3/2 +2 -1/2] + // [+1/4 -1 0 +1 -1/4] + constexpr T c1s = 1; + constexpr T c2s = -1 / T(4); + const simd symm = + c2s * (maskz_loadu(mask, &var[2 * di]) - + maskz_loadu(mask, &var[-2 * di])) // + + c1s(maskz_loadu(mask, &var[di]) - maskz_loadu(mask, &var[-di])); + // + 1/2 [+1/2 -2 +3/2 0 0 ] + // - 1/2 [ 0 0 -3/2 +2 -1/2] + // [+1/4 -1 +3/2 -1 +1/4] + constexpr T c0a = 3 / T(2); + constexpr T c1a = -1; + constexpr T c2a = 1 / T(4); + const simd anti = + c2a * (maskz_loadu(mask, &var[2 * di]) + + maskz_loadu(mask, &var[-2 * di])) // + + c1a(maskz_loadu(mask, &var[di]) + maskz_loadu(mask, &var[-di])) // + + c0a * maskz_loadu(mask, &var[0]); + using std::fabs; + return (vel * symm - fabs(vel) * anti) / dx; } -template -void CCTK_ATTRIBUTE_NOINLINE calc_copy(const cGH *restrict const cctkGH, - const vec, dim> &gf0_, - const vec, dim> &gf_, - const GF3D5layout &layout) { - for (int a = 0; a < 3; ++a) - calc_copy(cctkGH, gf0_(a), gf_(a), layout); +template +inline CCTK_ATTRIBUTE_ALWAYS_INLINE + CCTK_DEVICE CCTK_HOST std::enable_if_t > + deriv1d_upwind(const simdl &mask, const T *restrict const var, + const std::ptrdiff_t di, const simd &vel, const T dx) { + // arXiv:1111.2177 [gr-qc], (71) + + // A fourth order stencil for a first derivative, shifted by one grid point + + // if (sign) + // return (-1 / T(12) * var[-3 * di] // + // + 1 / T(2) * var[-2 * di] // + // - 3 / T(2) * var[-di] // + // + 5 / T(6) * var[0] // + // + 1 / T(4) * var[+di]) / + // dx; + // else + // return (-1 / T(4) * var[-di] // + // - 5 / T(6) * var[0] // + // + 3 / T(2) * var[+di] // + // - 1 / T(2) * var[+2 * di] // + // + 1 / T(12) * var[+3 * di]) / + // dx; + + constexpr T c1s = 7 / T(8); + constexpr T c2s = -1 / T(4); + constexpr T c3s = 1 / T(24); + const simd symm = + c3s * (maskz_loadu(mask, &var[3 * di]) - + maskz_loadu(mask, &var[-3 * di])) // + + c2s * (maskz_loadu(mask, &var[2 * di]) - + maskz_loadu(mask, &var[-2 * di])) // + + c1s * (maskz_loadu(mask, &var[di]) - maskz_loadu(mask, &var[-di])); + constexpr T c0a = 5 / T(6); + constexpr T c1a = -5 / T(8); + constexpr T c2a = 1 / T(4); + constexpr T c3a = -1 / T(24); + const simd anti = + c3a * (maskz_loadu(mask, &var[3 * di]) + + maskz_loadu(mask, &var[-3 * di])) // + + c2a * (maskz_loadu(mask, &var[2 * di]) + + maskz_loadu(mask, &var[-2 * di])) // + + c1a * (maskz_loadu(mask, &var[di]) + maskz_loadu(mask, &var[-di])) // + + c0a * maskz_loadu(mask, &var[0]); + using std::fabs; + return (vel * symm - fabs(vel) * anti) / dx; } -template -CCTK_ATTRIBUTE_NOINLINE void calc_derivs( - const cGH *restrict const cctkGH, const smat, dim> &gf0_, - const smat, dim> &gf_, const smat, dim>, dim> &dgf_, - const GF3D5layout &layout) { - for (int a = 0; a < 3; ++a) - for (int b = a; b < 3; ++b) - calc_derivs(cctkGH, gf0_(a, b), gf_(a, b), dgf_(a, b), layout); +template > +inline CCTK_ATTRIBUTE_ALWAYS_INLINE + CCTK_DEVICE CCTK_HOST std::enable_if_t + deriv2_1d(const TS var, const T dx) { + // constexpr T c0 = -2 / pow2(dx); + // constexpr T c1 = 1 / pow2(dx); + // return c1 * (var(-1) + var(1)) + c0 * var(0); + const T c0 = 1 / pow2(dx); + return c0 * ((var(1) - var(0)) - (var(0) - var(-1))); } -template -CCTK_ATTRIBUTE_NOINLINE void calc_derivs2( - const cGH *restrict const cctkGH, const smat, dim> &gf0_, - const smat, dim> &gf_, const smat, dim>, dim> &dgf_, - const smat, dim>, dim> &ddgf_, const GF3D5layout &layout) { - for (int a = 0; a < 3; ++a) - for (int b = a; b < 3; ++b) - calc_derivs2(cctkGH, gf0_(a, b), gf_(a, b), dgf_(a, b), ddgf_(a, b), - layout); +template > +inline CCTK_ATTRIBUTE_ALWAYS_INLINE + CCTK_DEVICE CCTK_HOST std::enable_if_t + deriv2_1d(const TS var, const T dx) { + // constexpr T c0 = -5 / T(2); + // constexpr T c1 = 4 / T(3); + // constexpr T c2 = -1 / T(12); + // return (c2 * (var(-2) + var(2)) + c1 * (var(-1) + var(1)) + c0 * var(0)) / + // pow2(dx); + const T c0 = 15 / (12 * pow2(dx)); + const T c1 = -1 / (12 * pow2(dx)); + return c1 * ((var(4) - var(1)) - (var(3) - var(0))) + + c0 * ((var(3) - var(2)) - (var(2) - var(1))); } -template -inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd -deriv1d_upwind(const simdl &mask, const T *restrict const var, - const ptrdiff_t di, const simd &vel, const T dx) { - // arXiv:1111.2177 [gr-qc], (71) - if constexpr (deriv_order == 2) { - // if (sign) - // // + [ 0 -1 +1 0 0] - // // + 1/2 [+1 -2 +1 0 0] - // // [+1/2 -2 +3/2 0 0] - // return (1 / T(2) * var[-2 * di] // - // - 2 * var[-di] // - // + 3 / T(2) * var[0]) / - // dx; - // else - // // + [ 0 0 -1 +1 0 ] - // // - 1/2 [ 0 0 +1 -2 +1 ] - // // [ 0 0 -3/2 +2 -1/2] - // return (-3 / T(2) * var[0] // - // + 2 * var[+di] // - // - 1 / T(2) * var[+2 * di]) / - // dx; - - // + 1/2 [+1/2 -2 +3/2 0 0 ] - // + 1/2 [ 0 0 -3/2 +2 -1/2] - // [+1/4 -1 0 +1 -1/4] - const simd symm = - 1 / T(4) * - (maskz_loadu(mask, &var[-2 * di]) - - maskz_loadu(mask, &var[2 * di])) // - - (maskz_loadu(mask, &var[-di]) - maskz_loadu(mask, &var[di])); - // + 1/2 [+1/2 -2 +3/2 0 0 ] - // - 1/2 [ 0 0 -3/2 +2 -1/2] - // [+1/4 -1 +3/2 -1 +1/4] - const simd anti = - 1 / T(4) * - (maskz_loadu(mask, &var[-2 * di]) + - maskz_loadu(mask, &var[2 * di])) // - - (maskz_loadu(mask, &var[-di]) + maskz_loadu(mask, &var[di])) // - + 3 / T(2) * maskz_loadu(mask, &var[0]); - return (vel * symm - fabs(vel) * anti) / dx; - } - if constexpr (deriv_order == 4) { - // A fourth order stencil for a first derivative, shifted by one grid point - - // if (sign) - // return (-1 / T(12) * var[-3 * di] // - // + 1 / T(2) * var[-2 * di] // - // - 3 / T(2) * var[-di] // - // + 5 / T(6) * var[0] // - // + 1 / T(4) * var[+di]) / - // dx; - // else - // return (-1 / T(4) * var[-di] // - // - 5 / T(6) * var[0] // - // + 3 / T(2) * var[+di] // - // - 1 / T(2) * var[+2 * di] // - // + 1 / T(12) * var[+3 * di]) / - // dx; - - const simd symm = - -1 / T(24) * - (maskz_loadu(mask, &var[-3 * di]) - - maskz_loadu(mask, &var[3 * di])) // - + 1 / T(4) * - (maskz_loadu(mask, &var[-2 * di]) - - maskz_loadu(mask, &var[2 * di])) // - - - 7 / T(8) * (maskz_loadu(mask, &var[-di]) - maskz_loadu(mask, &var[di])); - const simd anti = - -1 / T(24) * - (maskz_loadu(mask, &var[-3 * di]) + - maskz_loadu(mask, &var[3 * di])) // - + 1 / T(4) * - (maskz_loadu(mask, &var[-2 * di]) + - maskz_loadu(mask, &var[2 * di])) // - - 5 / T(8) * - (maskz_loadu(mask, &var[-di]) + maskz_loadu(mask, &var[di])) // - + 5 / T(6) * maskz_loadu(mask, &var[0]); - return (vel * symm - fabs(vel) * anti) / dx; +template > +inline CCTK_ATTRIBUTE_ALWAYS_INLINE CCTK_DEVICE CCTK_HOST R +deriv2_2d(const TS var, const T dx, const T dy) { + // We assume that the x-direction might be special since it might + // be SIMD-vectorized. We assume that the y-direction is not + // SIMD-vectorized. + + // Calculate y-derivative first. + // (If we wanted to calculate the x-derivative first, then we would + // be difficult to determine the extent of the `n`-loop, since it + // needs to include enough room for the SIMD y-derivative later.) + static_assert(sizeof(R) % sizeof(T) == 0, ""); + constexpr std::ptrdiff_t vsize = sizeof(R) / sizeof(T); + constexpr std::ptrdiff_t ndyvars = + align_ceil(std::ptrdiff_t(2 * deriv_order + 1), vsize); + std::array dyvar; +#ifdef CCTK_DEBUG + for (std::ptrdiff_t n = 0; n < ndyvars; ++n) + dyvar[n] = Arith::nan()(); +#endif + for (std::ptrdiff_t n = 0; n < ndyvars; ++n) { + std::ptrdiff_t di = vsize * n - deriv_order; + if (vsize == 1 && di == 0) + continue; + dyvar[n] = deriv1d( + [&](int dj) CCTK_ATTRIBUTE_ALWAYS_INLINE { return var(di, dj); }, dy); } -} -template -inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd -deriv_upwind(const simdl &mask, const GF3D2 &gf_, - const vect &I, const vec, D> &vel, - const vec &dx) { - static_assert(dir >= 0 && dir < D, ""); - const auto &DI = vect::unit; - const ptrdiff_t di = gf_.delta(DI(dir)); - return deriv1d_upwind(mask, &gf_(I), di, vel(dir), dx(dir)); + // Calculate x-derivative next + return deriv1d( + [&](int di) + CCTK_ATTRIBUTE_ALWAYS_INLINE { return dyvar[di + deriv_order]; }, + dx); } -template -inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd -deriv_upwind(const simdl &mask, const GF3D2 &gf_, - const vect &I, const vec, dim> &vel, - const vec &dx) { - return deriv_upwind<0>(mask, gf_, I, vel, dx) + - deriv_upwind<1>(mask, gf_, I, vel, dx) + - deriv_upwind<2>(mask, gf_, I, vel, dx); +template +inline CCTK_ATTRIBUTE_ALWAYS_INLINE + CCTK_DEVICE CCTK_HOST std::enable_if_t > + diss1d(const simdl &mask, const T *restrict const var, + const std::ptrdiff_t di, const T dx) { + constexpr T c0 = 6; + constexpr T c1 = -4; + constexpr T c2 = 1; + return (c2 * (maskz_loadu(mask, &var[2 * di]) + + maskz_loadu(mask, &var[-2 * di])) // + + c1 * (maskz_loadu(mask, &var[di]) + maskz_loadu(mask, &var[-di])) // + + c0 * maskz_loadu(mask, &var[0])) / + dx; } -template -inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd -deriv1d_diss(const simdl &mask, const T *restrict const var, - const ptrdiff_t di, const T dx) { - if constexpr (deriv_order == 2) - return ((maskz_loadu(mask, &var[-2 * di]) + - maskz_loadu(mask, &var[+2 * di])) // - - - 4 * (maskz_loadu(mask, &var[-di]) + maskz_loadu(mask, &var[+di])) // - + 6 * maskz_loadu(mask, &var[0])) / - dx; - if constexpr (deriv_order == 4) - return ((maskz_loadu(mask, &var[-3 * di]) + - maskz_loadu(mask, &var[+3 * di])) // - - 6 * (maskz_loadu(mask, &var[-2 * di]) + - maskz_loadu(mask, &var[+2 * di])) // - + 15 * (maskz_loadu(mask, &var[-di]) + - maskz_loadu(mask, &var[+di])) // - - 20 * maskz_loadu(mask, &var[0])) / - dx; +template +inline CCTK_ATTRIBUTE_ALWAYS_INLINE + CCTK_DEVICE CCTK_HOST std::enable_if_t > + diss1d(const simdl &mask, const T *restrict const var, + const std::ptrdiff_t di, const T dx) { + constexpr T c0 = -20; + constexpr T c1 = 15; + constexpr T c2 = -6; + constexpr T c3 = 1; + return (c3 * (maskz_loadu(mask, &var[3 * di]) + + maskz_loadu(mask, &var[-3 * di])) // + + c2 * *(maskz_loadu(mask, &var[2 * di]) + + maskz_loadu(mask, &var[-2 * di])) // + + c1 * (maskz_loadu(mask, &var[di]) + maskz_loadu(mask, &var[-di])) // + + c0 * maskz_loadu(mask, &var[0])) / + dx; } -using Arith::pown; +} // namespace detail + +//////////////////////////////////////////////////////////////////////////////// -template -inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd -deriv_diss(const simdl &mask, const GF3D2 &gf_, - const vect &I, const vec &dx) { - static_assert(dir >= 0 && dir < D, ""); - const auto &DI = vect::unit; - const ptrdiff_t di = gf_.delta(DI(dir)); - return deriv1d_diss(mask, &gf_(I), di, dx(dir)); +// Pointwise multi-dimensional derivative operators + +template +inline CCTK_ATTRIBUTE_ALWAYS_INLINE + CCTK_DEVICE CCTK_HOST Arith::vec, Loop::dim> + calc_deriv(const Loop::GF3D5 &gf, const Arith::simdl &mask, + const Loop::GF3D5layout &layout, + const Arith::vect &I, + const Arith::vect &dx) { + using namespace Arith; + using namespace Loop; + // We use explicit index calculations to avoid unnecessary integer + // multiplications + const T *restrict const ptr = &gf(layout, I); + const std::array offsets{ + layout.delta(1, 0, 0), + layout.delta(0, 1, 0), + layout.delta(0, 0, 1), + }; + return { + detail::deriv1d( + [&](int di) CCTK_ATTRIBUTE_ALWAYS_INLINE { + return maskz_loadu(mask, &ptr[di * offsets[0]]); + }, + dx[0]), + detail::deriv1d( + [&](int di) CCTK_ATTRIBUTE_ALWAYS_INLINE { + return maskz_loadu(mask, &ptr[di * offsets[1]]); + }, + dx[1]), + detail::deriv1d( + [&](int di) CCTK_ATTRIBUTE_ALWAYS_INLINE { + return maskz_loadu(mask, &ptr[di * offsets[2]]); + }, + dx[2]), + }; } -template -inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd -diss(const simdl &mask, const GF3D2 &gf_, const vect &I, - const vec &dx) { - // arXiv:gr-qc/0610128, (63), with r=2 - constexpr int diss_order = deriv_order + 2; - constexpr int sign = diss_order % 4 == 0 ? -1 : +1; - return sign / T(pown(2, deriv_order + 2)) * - (deriv_diss<0>(mask, gf_, I, dx) // - + deriv_diss<1>(mask, gf_, I, dx) // - + deriv_diss<2>(mask, gf_, I, dx)); +template +inline CCTK_ATTRIBUTE_ALWAYS_INLINE + CCTK_DEVICE CCTK_HOST Arith::vec + calc_deriv(const Loop::GF3D5 &gf, const Loop::GF3D5layout &layout, + const Arith::vect &I, + const Arith::vect &dx) { + using namespace Arith; + using namespace Loop; + // We use explicit index calculations to avoid unnecessary integer + // multiplications + const T *restrict const ptr = &gf(layout, I); + const std::array offsets{ + layout.delta(1, 0, 0), + layout.delta(0, 1, 0), + layout.delta(0, 0, 1), + }; + return { + detail::deriv1d( + [&](int di) + CCTK_ATTRIBUTE_ALWAYS_INLINE { return ptr[di * offsets[0]]; }, + dx[0]), + detail::deriv1d( + [&](int di) + CCTK_ATTRIBUTE_ALWAYS_INLINE { return ptr[di * offsets[1]]; }, + dx[1]), + detail::deriv1d( + [&](int di) + CCTK_ATTRIBUTE_ALWAYS_INLINE { return ptr[di * offsets[2]]; }, + dx[2]), + }; } -template -CCTK_ATTRIBUTE_NOINLINE void -apply_upwind_diss(const cGH *restrict const cctkGH, const GF3D2 &gf_, - const vec, dim> &gf_betaG_, - const GF3D2 &gf_rhs_) { - DECLARE_CCTK_ARGUMENTS; - //DECLARE_CCTK_PARAMETERS; - const CCTK_REAL& epsdiss = *(CCTK_REAL*)CCTK_ParameterGet("epsdiss", "Derivs", nullptr); - - const vec dx([&](int a) { return CCTK_DELTA_SPACE(a); }); - - typedef simd vreal; - typedef simdl vbool; - constexpr size_t vsize = tuple_size_v; - - if (epsdiss == 0) { - - const Loop::GridDescBaseDevice grid(cctkGH); - grid.loop_int_device<0, 0, 0, vsize>( - grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { - const vbool mask = mask_for_loop_tail(p.i, p.imax); - const vec betaG = gf_betaG_(mask, p.I); - const vreal rhs_old = gf_rhs_(mask, p.I); - const vreal rhs_new = - rhs_old + deriv_upwind(mask, gf_, p.I, betaG, dx); - gf_rhs_.store(mask, p.I, rhs_new); - }); - - } else { - - const Loop::GridDescBaseDevice grid(cctkGH); - grid.loop_int_device<0, 0, 0, vsize>( - grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { - const vbool mask = mask_for_loop_tail(p.i, p.imax); - const vec betaG = gf_betaG_(mask, p.I); - const vreal rhs_old = gf_rhs_(mask, p.I); - const vreal rhs_new = rhs_old + - deriv_upwind(mask, gf_, p.I, betaG, dx) + - epsdiss * diss(mask, gf_, p.I, dx); - gf_rhs_.store(mask, p.I, rhs_new); - }); - } +template +inline CCTK_ATTRIBUTE_ALWAYS_INLINE + CCTK_DEVICE CCTK_HOST Arith::smat, Loop::dim> + calc_deriv2(const Loop::GF3D5 &gf, const Arith::simdl &mask, + const Loop::GF3D5layout &layout, + const Arith::vect &I, + const Arith::vect &dx) { + using namespace Arith; + using namespace Loop; + // We use explicit index calculations to avoid unnecessary integer + // multiplications + const T *restrict const ptr = &gf(layout, I); + const std::array offsets{ + layout.delta(1, 0, 0), + layout.delta(0, 1, 0), + layout.delta(0, 0, 1), + }; + return { + detail::deriv2_1d( + [&](int di) CCTK_ATTRIBUTE_ALWAYS_INLINE { + return maskz_loadu(mask, &ptr[di * offsets[0]]); + }, + dx[0]), + detail::deriv2_2d( + [&](int di, int dj) CCTK_ATTRIBUTE_ALWAYS_INLINE { + return maskz_loadu(mask, &ptr[di * offsets[0] + dj * offsets[1]]); + }, + dx[0], dx[1]), + detail::deriv2_2d( + [&](int di, int dj) CCTK_ATTRIBUTE_ALWAYS_INLINE { + return maskz_loadu(mask, &ptr[di * offsets[0] + dj * offsets[2]]); + }, + dx[0], dx[2]), + detail::deriv2_1d( + [&](int di) CCTK_ATTRIBUTE_ALWAYS_INLINE { + return maskz_loadu(mask, &ptr[di * offsets[1]]); + }, + dx[1]), + detail::deriv2_2d( + [&](int di, int dj) CCTK_ATTRIBUTE_ALWAYS_INLINE { + return maskz_loadu(mask, &ptr[di * offsets[1] + dj * offsets[2]]); + }, + dx[1], dx[2]), + detail::deriv2_1d( + [&](int di) CCTK_ATTRIBUTE_ALWAYS_INLINE { + return maskz_loadu(mask, &ptr[di * offsets[2]]); + }, + dx[2]), + }; } -template -void CCTK_ATTRIBUTE_NOINLINE calc_copy(const cGH *restrict const cctkGH, - const smat, dim> &gf0_, - const smat, dim> &gf_, - const GF3D5layout &layout) { - for (int a = 0; a < 3; ++a) - for (int b = a; b < 3; ++b) - calc_copy(cctkGH, gf0_(a, b), gf_(a, b), layout); +template +inline CCTK_ATTRIBUTE_ALWAYS_INLINE + CCTK_DEVICE CCTK_HOST Arith::smat + calc_deriv2(const Loop::GF3D5 &gf, const Loop::GF3D5layout &layout, + const Arith::vect &I, + const Arith::vect &dx) { + using namespace Arith; + using namespace Loop; + // We use explicit index calculations to avoid unnecessary integer + // multiplications + const T *restrict const ptr = &gf(layout, I); + const std::array offsets{ + layout.delta(1, 0, 0), + layout.delta(0, 1, 0), + layout.delta(0, 0, 1), + }; + return { + detail::deriv2_1d( + [&](int di) + CCTK_ATTRIBUTE_ALWAYS_INLINE { return ptr[di * offsets[0]]; }, + dx[0]), + detail::deriv2_2d( + [&](int di, int dj) CCTK_ATTRIBUTE_ALWAYS_INLINE { + return ptr[di * offsets[0] + dj * offsets[1]]; + }, + dx[0], dx[1]), + detail::deriv2_2d( + [&](int di, int dj) CCTK_ATTRIBUTE_ALWAYS_INLINE { + return ptr[di * offsets[0] + dj * offsets[2]]; + }, + dx[0], dx[2]), + detail::deriv2_1d( + [&](int di) + CCTK_ATTRIBUTE_ALWAYS_INLINE { return ptr[di * offsets[1]]; }, + dx[1]), + detail::deriv2_2d( + [&](int di, int dj) CCTK_ATTRIBUTE_ALWAYS_INLINE { + return ptr[di * offsets[1] + dj * offsets[2]]; + }, + dx[1], dx[2]), + detail::deriv2_1d( + [&](int di) + CCTK_ATTRIBUTE_ALWAYS_INLINE { return ptr[di * offsets[2]]; }, + dx[2]), + }; } +//////////////////////////////////////////////////////////////////////////////// + +// Tile-based multi-dimensional operators + +template +CCTK_ATTRIBUTE_NOINLINE void +calc_derivs(const Arith::vec, Loop::dim> &dgf, + const Loop::GridDescBaseDevice &grid, + const Loop::GF3D5 &gf, const Loop::GF3D5layout layout, + const Arith::vect dx, const int deriv_order); + +template +CCTK_ATTRIBUTE_NOINLINE void +calc_derivs2(const Arith::vec, Loop::dim> &dgf, + const Arith::smat, Loop::dim> &ddgf, + const Loop::GridDescBaseDevice &grid, + const Loop::GF3D5 &gf, const Loop::GF3D5layout layout, + const Arith::vect dx, const int deriv_order); + +} // namespace Derivs + #endif // #ifndef CARPETX_DERIVS_DERIVS_HXX diff --git a/Derivs/src/make.code.defn b/Derivs/src/make.code.defn index 77206c7e9..bf1bff1c3 100644 --- a/Derivs/src/make.code.defn +++ b/Derivs/src/make.code.defn @@ -1,7 +1,7 @@ # Main make.code.defn file for thorn Derivs # Source files in this directory -SRCS = +SRCS = derivs.cxx # Subdirectories containing source files SUBDIRS = From d0a0b80cb0d604fa657c2e0bee2ead68585772f9 Mon Sep 17 00:00:00 2001 From: "Steven R. Brandt" Date: Mon, 24 Jul 2023 15:37:15 -0500 Subject: [PATCH 4/7] Rename Derivs/src/derivs-spacetimex.hxx -> Derivs/src/derivs_spacetimex.hxx --- Derivs/interface.ccl | 1 + Derivs/src/{derivs-spacetimex.hxx => derivs_spacetimex.hxx} | 0 2 files changed, 1 insertion(+) rename Derivs/src/{derivs-spacetimex.hxx => derivs_spacetimex.hxx} (100%) diff --git a/Derivs/interface.ccl b/Derivs/interface.ccl index 716cd4167..de6a87cd5 100644 --- a/Derivs/interface.ccl +++ b/Derivs/interface.ccl @@ -3,6 +3,7 @@ IMPLEMENTS: Derivs INCLUDES HEADER: derivs.hxx IN derivs.hxx +INCLUDES HEADER: derivs_spacetimex.hxx IN derivs_spacetimex.hxx USES INCLUDE HEADER: defs.hxx USES INCLUDE HEADER: div.hxx diff --git a/Derivs/src/derivs-spacetimex.hxx b/Derivs/src/derivs_spacetimex.hxx similarity index 100% rename from Derivs/src/derivs-spacetimex.hxx rename to Derivs/src/derivs_spacetimex.hxx From 6adcf640056114c9e52b0b18b8e3739157ebc221 Mon Sep 17 00:00:00 2001 From: "Steven R. Brandt" Date: Mon, 24 Jul 2023 15:37:49 -0500 Subject: [PATCH 5/7] Restore derivs.cxx --- Derivs/src/derivs.cxx | 150 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 Derivs/src/derivs.cxx diff --git a/Derivs/src/derivs.cxx b/Derivs/src/derivs.cxx new file mode 100644 index 000000000..43dc63eef --- /dev/null +++ b/Derivs/src/derivs.cxx @@ -0,0 +1,150 @@ +#include "derivs.hxx" + +namespace Derivs { +using namespace Arith; +using namespace Loop; + +//////////////////////////////////////////////////////////////////////////////// + +// Tile-based multi-dimensional derivative operators + +template +CCTK_ATTRIBUTE_NOINLINE void +calc_derivs(const vec, dim> &dgf, const GridDescBaseDevice &grid, + const GF3D5 &gf, const GF3D5layout layout, + const vect dx, const int deriv_order) { + using vreal = simd; + using vbool = simdl; + constexpr std::size_t vsize = std::tuple_size_v; + + switch (deriv_order) { + + case 2: + grid.loop_int_device( + grid.nghostzones, + [=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE { + const vbool mask = mask_for_loop_tail(p.i, p.imax); + const GF3D5index index(layout, p.I); + const auto dval = calc_deriv<2>(gf, mask, layout, p.I, dx); + dgf.store(mask, index, dval); + }); + break; + + case 4: + grid.loop_int_device( + grid.nghostzones, + [=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE { + const vbool mask = mask_for_loop_tail(p.i, p.imax); + const GF3D5index index(layout, p.I); + const auto dval = calc_deriv<4>(gf, mask, layout, p.I, dx); + dgf.store(mask, index, dval); + }); + break; + + default: + CCTK_VERROR("Unsupported derivative order %d", deriv_order); + } +} + +template +CCTK_ATTRIBUTE_NOINLINE void +calc_derivs2(const vec, dim> &dgf, const smat, dim> &ddgf, + const GridDescBaseDevice &grid, const GF3D5 &gf, + const GF3D5layout layout, const vect dx, + const int deriv_order) { + using vreal = simd; + using vbool = simdl; + constexpr std::size_t vsize = std::tuple_size_v; + + switch (deriv_order) { + + case 2: + grid.loop_int_device( + grid.nghostzones, + [=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE { + const vbool mask = mask_for_loop_tail(p.i, p.imax); + const GF3D5index index(layout, p.I); + const auto dval = calc_deriv<2>(gf, mask, layout, p.I, dx); + const auto ddval = calc_deriv2<2>(gf, mask, layout, p.I, dx); + dgf.store(mask, index, dval); + ddgf.store(mask, index, ddval); + }); + break; + + case 4: + grid.loop_int_device( + grid.nghostzones, + [=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE { + const vbool mask = mask_for_loop_tail(p.i, p.imax); + const GF3D5index index(layout, p.I); + const auto dval = calc_deriv<4>(gf, mask, layout, p.I, dx); + const auto ddval = calc_deriv2<4>(gf, mask, layout, p.I, dx); + dgf.store(mask, index, dval); + ddgf.store(mask, index, ddval); + }); + break; + + default: + CCTK_VERROR("Unsupported derivative order %d", deriv_order); + } +} + +//////////////////////////////////////////////////////////////////////////////// + +// Template instantiations + +using T = CCTK_REAL; + +template CCTK_DEVICE CCTK_HOST Arith::vec, Loop::dim> +calc_deriv<2>(const Loop::GF3D5 &gf, const Arith::simdl &mask, + const Loop::GF3D5layout &layout, + const Arith::vect &I, + const Arith::vect &dx); +template CCTK_DEVICE CCTK_HOST Arith::vec, Loop::dim> +calc_deriv<4>(const Loop::GF3D5 &gf, const Arith::simdl &mask, + const Loop::GF3D5layout &layout, + const Arith::vect &I, + const Arith::vect &dx); + +template CCTK_DEVICE CCTK_HOST Arith::vec +calc_deriv<2>(const Loop::GF3D5 &gf, const Loop::GF3D5layout &layout, + const Arith::vect &I, + const Arith::vect &dx); +template CCTK_DEVICE CCTK_HOST Arith::vec +calc_deriv<4>(const Loop::GF3D5 &gf, const Loop::GF3D5layout &layout, + const Arith::vect &I, + const Arith::vect &dx); + +template CCTK_DEVICE CCTK_HOST Arith::smat, Loop::dim> +calc_deriv2<2>(const Loop::GF3D5 &gf, const Arith::simdl &mask, + const Loop::GF3D5layout &layout, + const Arith::vect &I, + const Arith::vect &dx); +template CCTK_DEVICE CCTK_HOST Arith::smat, Loop::dim> +calc_deriv2<4>(const Loop::GF3D5 &gf, const Arith::simdl &mask, + const Loop::GF3D5layout &layout, + const Arith::vect &I, + const Arith::vect &dx); + +template CCTK_DEVICE CCTK_HOST Arith::smat +calc_deriv2<2>(const Loop::GF3D5 &gf, const Loop::GF3D5layout &layout, + const Arith::vect &I, + const Arith::vect &dx); +template CCTK_DEVICE CCTK_HOST Arith::smat +calc_deriv2<4>(const Loop::GF3D5 &gf, const Loop::GF3D5layout &layout, + const Arith::vect &I, + const Arith::vect &dx); + +template void calc_derivs<0, 0, 0>(const vec, dim> &dgf, + const GridDescBaseDevice &grid, + const GF3D5 &gf, + const GF3D5layout layout, + const vect dx, + const int deriv_order); + +template void calc_derivs2<0, 0, 0>( + const vec, dim> &dgf, const smat, dim> &ddgf, + const GridDescBaseDevice &grid, const GF3D5 &gf, + const GF3D5layout layout, const vect dx, const int deriv_order); + +} // namespace Derivs From 28c1cafb2f0cd28ff53b03d5672f31220ba13c6b Mon Sep 17 00:00:00 2001 From: "Steven R. Brandt" Date: Tue, 25 Jul 2023 12:54:53 -0500 Subject: [PATCH 6/7] Add namespace --- Derivs/src/derivs_spacetimex.hxx | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Derivs/src/derivs_spacetimex.hxx b/Derivs/src/derivs_spacetimex.hxx index 6a51e6e5a..9823cf219 100644 --- a/Derivs/src/derivs_spacetimex.hxx +++ b/Derivs/src/derivs_spacetimex.hxx @@ -1,5 +1,5 @@ -#ifndef CARPETX_DERIVS_DERIVS_HXX -#define CARPETX_DERIVS_DERIVS_HXX +#ifndef CARPETX_DERIVS_SPACETIMEX_DERIVS_HXX +#define CARPETX_DERIVS_SPACETIMEX_DERIVS_HXX #include #include @@ -13,6 +13,8 @@ #include // Unified derivative header +namespace derivs_spacetimex { + // TODO: Make this a runtime parameter constexpr int deriv_order = 4; @@ -508,5 +510,7 @@ void CCTK_ATTRIBUTE_NOINLINE calc_copy(const cGH *restrict const cctkGH, for (int b = a; b < 3; ++b) calc_copy(cctkGH, gf0_(a, b), gf_(a, b), layout); } + +} #endif // #ifndef CARPETX_DERIVS_DERIVS_HXX From d83e60e9ffe51c9fcd9adc64d908dba308cf7dd2 Mon Sep 17 00:00:00 2001 From: "Steven R. Brandt" Date: Wed, 26 Jul 2023 13:01:30 -0500 Subject: [PATCH 7/7] A single comment line was removed --- CarpetX/src/io_openpmd.cxx | 1 - 1 file changed, 1 deletion(-) diff --git a/CarpetX/src/io_openpmd.cxx b/CarpetX/src/io_openpmd.cxx index 0b719045c..c51694368 100644 --- a/CarpetX/src/io_openpmd.cxx +++ b/CarpetX/src/io_openpmd.cxx @@ -1254,7 +1254,6 @@ void carpetx_openpmd_t::OutputOpenPMD(const cGH *const cctkGH, series->setIterationEncoding(iterationEncoding); { - // Does not work in containers char const *const user = getenv("USER"); if (user) series->setAuthor(user);