From 1e0d0c11462b309dcb8f0ac759e49106f783890d Mon Sep 17 00:00:00 2001 From: Liwei Ji Date: Wed, 11 Sep 2024 13:11:45 -0400 Subject: [PATCH] Z4cow: add constraint.cxx --- Z4cow/src/constraint.cxx | 196 +++++++++++++++++++++++++++++++++++++++ Z4cow/src/rhs.cxx | 7 +- 2 files changed, 198 insertions(+), 5 deletions(-) create mode 100644 Z4cow/src/constraint.cxx diff --git a/Z4cow/src/constraint.cxx b/Z4cow/src/constraint.cxx new file mode 100644 index 00000000..98f61166 --- /dev/null +++ b/Z4cow/src/constraint.cxx @@ -0,0 +1,196 @@ +#include +#include +#include + +#ifdef __CUDACC__ +// Disable CCTK_DEBUG since the debug information takes too much +// parameter space to launch the kernels +#ifdef CCTK_DEBUG +#undef CCTK_DEBUG +#endif +#endif + +#include +#include +#include + +#ifdef __CUDACC__ +#include +#endif + +#include + +namespace Z4cow { +using namespace Arith; +using namespace Loop; +using namespace std; + +template +CCTK_DEVICE CCTK_HOST CCTK_ATTRIBUTE_ALWAYS_INLINE inline T Power(T x, int y) { + return (y == 2) ? Arith::pow2(x) : Arith::pown(x, y); +} + +extern "C" void Z4cow_Constraints(CCTK_ARGUMENTS) { + DECLARE_CCTK_ARGUMENTS_Z4cow_Constraints; + DECLARE_CCTK_PARAMETERS; + + for (int d = 0; d < 3; ++d) + if (cctk_nghostzones[d] < deriv_order / 2) + CCTK_VERROR("Need at least %d ghost zones", deriv_order / 2); + + const array indextype = {0, 0, 0}; + const array nghostzones = {cctk_nghostzones[0], cctk_nghostzones[1], + cctk_nghostzones[2]}; + vect imin, imax; + GridDescBase(cctkGH).box_int<0, 0, 0>(nghostzones, imin, imax); + // suffix 2: with ghost zones, suffix 5: without ghost zones + const GF3D2layout layout2(cctkGH, indextype); + const GF3D5layout layout5(imin, imax); + + // Input grid functions + const GF3D2 gf_W(layout2, W); + const smat, 3> gf_gamt{ + GF3D2(layout2, gammatxx), + GF3D2(layout2, gammatxy), + GF3D2(layout2, gammatxz), + GF3D2(layout2, gammatyy), + GF3D2(layout2, gammatyz), + GF3D2(layout2, gammatzz)}; + const GF3D2 gf_exKh(layout2, Kh); + const smat, 3> gf_exAt{ + GF3D2(layout2, Atxx), + GF3D2(layout2, Atxy), + GF3D2(layout2, Atxz), + GF3D2(layout2, Atyy), + GF3D2(layout2, Atyz), + GF3D2(layout2, Atzz)}; + const vec, 3> gf_trGt{ + GF3D2(layout2, Gamtx), + GF3D2(layout2, Gamty), + GF3D2(layout2, Gamtz)}; + const GF3D2 gf_Theta(layout2, Theta); + const GF3D2 gf_alpha(layout2, alphaG); + const vec, 3> gf_beta{ + GF3D2(layout2, betaGx), + GF3D2(layout2, betaGy), + GF3D2(layout2, betaGz)}; + + // Define derivs lambdas + const Loop::GridDescBaseDevice grid(cctkGH); + const vect dx(std::array{ + CCTK_DELTA_SPACE(0), + CCTK_DELTA_SPACE(1), + CCTK_DELTA_SPACE(2), + }); + + const auto calccopy = [&](const auto &gf, const auto &gf0) { + Derivs::calc_copy<0, 0, 0>(gf, layout5, grid, gf0); + }; + const auto calcderivs = [&](const auto &gf, const auto &dgf, + const auto &gf0) { + Derivs::calc_derivs<0, 0, 0>(gf, dgf, layout5, grid, gf0, dx, deriv_order); + }; + const auto calcderivs2 = [&](const auto &gf, const auto &dgf, + const auto &ddgf, const auto &gf0) { + Derivs::calc_derivs2<0, 0, 0>(gf, dgf, ddgf, layout5, grid, gf0, dx, + deriv_order); + }; + + // Tile variables for derivatives and so on + const int ntmps = 118; + GF3D5vector tmps(layout5, ntmps); + int itmp = 0; + + const auto make_gf = [&]() { return GF3D5(tmps(itmp++)); }; + const auto make_vec = [&](const auto &f) { + return vec, 3>([&](int) { return f(); }); + }; + const auto make_mat = [&](const auto &f) { + return smat, 3>([&](int, int) { return f(); }); + }; + const auto make_vec_gf = [&]() { return make_vec(make_gf); }; + const auto make_mat_gf = [&]() { return make_mat(make_gf); }; + const auto make_vec_vec_gf = [&]() { return make_vec(make_vec_gf); }; + const auto make_mat_vec_gf = [&]() { return make_mat(make_vec_gf); }; + const auto make_mat_mat_gf = [&]() { return make_mat(make_mat_gf); }; + + const GF3D5 tl_W(make_gf()); + const vec, 3> tl_dW(make_vec_gf()); + const smat, 3> tl_ddW(make_mat_gf()); + calcderivs2(tl_W, tl_dW, tl_ddW, gf_W); + + const smat, 3> tl_gamt(make_mat_gf()); + const smat, 3>, 3> tl_dgamt(make_mat_vec_gf()); + const smat, 3>, 3> tl_ddgamt(make_mat_mat_gf()); + calcderivs2(tl_gamt, tl_dgamt, tl_ddgamt, gf_gamt); + + const GF3D5 tl_exKh(make_gf()); + const vec, 3> tl_dexKh(make_vec_gf()); + calcderivs(tl_exKh, tl_dexKh, gf_exKh); + + const smat, 3> tl_exAt(make_mat_gf()); + const smat, 3>, 3> tl_dexAt(make_mat_vec_gf()); + calcderivs(tl_exAt, tl_dexAt, gf_exAt); + + const vec, 3> tl_trGt(make_vec_gf()); + const vec, 3>, 3> tl_dtrGt(make_vec_vec_gf()); + calcderivs(tl_trGt, tl_dtrGt, gf_trGt); + + const GF3D5 tl_Theta(make_gf()); + const vec, 3> tl_dTheta(make_vec_gf()); + calcderivs(tl_Theta, tl_dTheta, gf_Theta); + + const GF3D5 tl_alpha(make_gf()); + calccopy(tl_alpha, gf_alpha); + + const vec, 3> tl_beta(make_vec_gf()); + calccopy(tl_beta, gf_beta); + + if (itmp != ntmps) + CCTK_VERROR("Wrong number of temporary variables: ntmps=%d itmp=%d", ntmps, + itmp); + itmp = -1; + + // More input grid functions + const GF3D2 gf_eTtt(layout2, eTtt); + const vec, 3> gf_eTt{ + GF3D2(layout2, eTtx), + GF3D2(layout2, eTty), + GF3D2(layout2, eTtz)}; + const smat, 3> gf_eT{ + GF3D2(layout2, eTxx), + GF3D2(layout2, eTxy), + GF3D2(layout2, eTxz), + GF3D2(layout2, eTyy), + GF3D2(layout2, eTyz), + GF3D2(layout2, eTzz)}; + + // Output grid functions + const vec, 3> gf_ZtC{GF3D2(layout2, ZtCx), + GF3D2(layout2, ZtCy), + GF3D2(layout2, ZtCz)}; + const GF3D2 gf_HC(layout2, HC); + const vec, 3> gf_MtC{GF3D2(layout2, MtCx), + GF3D2(layout2, MtCy), + GF3D2(layout2, MtCz)}; + + // simd types + typedef simd vreal; + typedef simdl vbool; + constexpr size_t vsize = tuple_size_v; + + // parameters + const CCTK_REAL cpi = M_PI; + +#ifdef __CUDACC__ + const nvtxRangeId_t range = nvtxRangeStartA("Z4cow_Constraints::constraints"); +#endif + +#include "../wolfram/Z4cow_set_constraint.hxx" + +#ifdef __CUDACC__ + nvtxRangeEnd(range); +#endif +} + +} // namespace Z4cow diff --git a/Z4cow/src/rhs.cxx b/Z4cow/src/rhs.cxx index 69390dcd..6766c732 100644 --- a/Z4cow/src/rhs.cxx +++ b/Z4cow/src/rhs.cxx @@ -1,4 +1,6 @@ #include +#include +#include #ifdef __CUDACC__ // Disable CCTK_DEBUG since the debug information takes too much @@ -14,10 +16,6 @@ #include #include -#include -#include -#include - #ifdef __CUDACC__ #include #endif @@ -47,7 +45,6 @@ extern "C" void Z4cow_RHS(CCTK_ARGUMENTS) { cctk_nghostzones[2]}; vect imin, imax; GridDescBase(cctkGH).box_int<0, 0, 0>(nghostzones, imin, imax); - // suffix 2: with ghost zones, suffix 5: without ghost zones const GF3D2layout layout2(cctkGH, indextype); const GF3D5layout layout5(imin, imax);