diff --git a/Z4co/src/diss.hxx b/Z4co/src/diss.hxx deleted file mode 100644 index 409b0fae..00000000 --- a/Z4co/src/diss.hxx +++ /dev/null @@ -1,94 +0,0 @@ -#ifndef Z4CO_DERIVS_HXX -#define Z4CO_DERIVS_HXX - -#include -#include - -#include -#include -#include - -namespace Z4co { -using namespace Arith; -using namespace Loop; - -template -CCTK_ATTRIBUTE_NOINLINE void -apply_upwind_diss(const cGH *restrict const cctkGH, const GF3D2 &gf_, - const vec, dim> &gf_betaG_, - const GF3D2 &gf_rhs_) { - DECLARE_CCTK_ARGUMENTS; - DECLARE_CCTK_PARAMETERS; - - const vect dx{ - CCTK_DELTA_SPACE(0), - CCTK_DELTA_SPACE(1), - CCTK_DELTA_SPACE(2), - }; - - simd (*calc_deriv_upwind)( - const GF3D2 &, const simdl &, - const vect &, const vect &, - const vec, dim> &); - - simd (*calc_diss)(const GF3D2 &, - const simdl &, const vect &, - const vect &); - - switch (deriv_order) { - case 2: { - calc_deriv_upwind = &Derivs::calc_deriv_upwind<2>; - calc_diss = &Derivs::calc_diss<2>; - break; - } - case 4: - case 6: { - calc_deriv_upwind = &Derivs::calc_deriv_upwind<4>; - calc_diss = &Derivs::calc_diss<4>; - break; - } - // case 6: { - // calc_deriv_upwind = &Derivs::calc_deriv_upwind<6>; - // calc_diss = &Derivs::calc_diss<6>; - // break; - // } - default: - assert(0); - } - - typedef simd vreal; - typedef simdl vbool; - constexpr size_t vsize = tuple_size_v; - - if (epsdiss == 0) { - - const Loop::GridDescBaseDevice grid(cctkGH); - grid.loop_int_device<0, 0, 0, vsize>( - grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { - const vbool mask = mask_for_loop_tail(p.i, p.imax); - const vec betaG = gf_betaG_(mask, p.I); - const vreal rhs_old = gf_rhs_(mask, p.I); - const vreal rhs_new = - rhs_old + calc_deriv_upwind(gf_, mask, p.I, dx, betaG); - gf_rhs_.store(mask, p.I, rhs_new); - }); - - } else { - - const Loop::GridDescBaseDevice grid(cctkGH); - grid.loop_int_device<0, 0, 0, vsize>( - grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { - const vbool mask = mask_for_loop_tail(p.i, p.imax); - const vec betaG = gf_betaG_(mask, p.I); - const vreal rhs_old = gf_rhs_(mask, p.I); - const vreal rhs_new = rhs_old + - calc_deriv_upwind(gf_, mask, p.I, dx, betaG) + - epsdiss * calc_diss(gf_, mask, p.I, dx); - gf_rhs_.store(mask, p.I, rhs_new); - }); - } -} - -} // namespace Z4co - -#endif // #ifndef Z4CO_DERIVS_HXX diff --git a/Z4co/src/rhs.cxx b/Z4co/src/rhs.cxx index d015d85f..99c5ab6b 100644 --- a/Z4co/src/rhs.cxx +++ b/Z4co/src/rhs.cxx @@ -8,10 +8,6 @@ #endif #endif -// #define Power(x, y) (Arith::pown((x), (y))) - -#include "diss.hxx" - #include #include #include @@ -249,29 +245,68 @@ extern "C" void Z4co_RHS(CCTK_ARGUMENTS) { // Upwind and dissipation terms // TODO: Consider fusing the loops to reduce memory bandwidth - - apply_upwind_diss(cctkGH, gf_chi, gf_beta, gf_dtchi); + vreal (*calc_deriv_upwind)( + const GF3D2 &, const vbool &, const vect &, + const vect &, const vec &); + + vreal (*calc_diss)(const GF3D2 &, const vbool &, + const vect &, const vect &); + + switch (deriv_order) { + case 2: { + calc_deriv_upwind = &Derivs::calc_deriv_upwind<2>; + calc_diss = &Derivs::calc_diss<2>; + break; + } + case 4: + case 6: { + calc_deriv_upwind = &Derivs::calc_deriv_upwind<4>; + calc_diss = &Derivs::calc_diss<4>; + break; + } + default: + assert(0); + } + + const auto apply_upwind_diss = + [&](const GF3D2 &gf_, + const vec, dim> &gf_betaG_, + const GF3D2 &gf_rhs_) { + grid.loop_int_device<0, 0, 0, vsize>( + grid.nghostzones, + [=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE { + const vbool mask = mask_for_loop_tail(p.i, p.imax); + const vec betaG = gf_betaG_(mask, p.I); + const vreal rhs_old = gf_rhs_(mask, p.I); + const vreal rhs_new = + rhs_old + calc_deriv_upwind(gf_, mask, p.I, dx, betaG) + + epsdiss * calc_diss(gf_, mask, p.I, dx); + gf_rhs_.store(mask, p.I, rhs_new); + }); + }; + + apply_upwind_diss(gf_chi, gf_beta, gf_dtchi); for (int a = 0; a < 3; ++a) for (int b = a; b < 3; ++b) - apply_upwind_diss(cctkGH, gf_gamt(a, b), gf_beta, gf_dtgamt(a, b)); + apply_upwind_diss(gf_gamt(a, b), gf_beta, gf_dtgamt(a, b)); - apply_upwind_diss(cctkGH, gf_exKh, gf_beta, gf_dtexKh); + apply_upwind_diss(gf_exKh, gf_beta, gf_dtexKh); for (int a = 0; a < 3; ++a) for (int b = a; b < 3; ++b) - apply_upwind_diss(cctkGH, gf_exAt(a, b), gf_beta, gf_dtexAt(a, b)); + apply_upwind_diss(gf_exAt(a, b), gf_beta, gf_dtexAt(a, b)); for (int a = 0; a < 3; ++a) - apply_upwind_diss(cctkGH, gf_trGt(a), gf_beta, gf_dttrGt(a)); + apply_upwind_diss(gf_trGt(a), gf_beta, gf_dttrGt(a)); if (!set_Theta_zero) - apply_upwind_diss(cctkGH, gf_Theta, gf_beta, gf_dtTheta); + apply_upwind_diss(gf_Theta, gf_beta, gf_dtTheta); - apply_upwind_diss(cctkGH, gf_alpha, gf_beta, gf_dtalpha); + apply_upwind_diss(gf_alpha, gf_beta, gf_dtalpha); for (int a = 0; a < 3; ++a) - apply_upwind_diss(cctkGH, gf_beta(a), gf_beta, gf_dtbeta(a)); + apply_upwind_diss(gf_beta(a), gf_beta, gf_dtbeta(a)); } } // namespace Z4co