Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates from main #329

Merged
merged 11 commits into from
Jan 7, 2025
2 changes: 1 addition & 1 deletion Arith/src/rten.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ public:
template <typename F, typename... Args>
friend constexpr ARITH_INLINE ARITH_DEVICE ARITH_HOST void
fmap_(const F &f, const rten &x, const rten<Args, D> &...args) {
fmap_(f, x.args, args.elts...);
fmap_(f, x.elts, args.elts...);
}

template <
Expand Down
222 changes: 148 additions & 74 deletions Derivs/src/derivs.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ using namespace Loop;
template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void
calc_copy(const GF3D5<T> &gf, const GF3D5layout layout,
const GridDescBaseDevice &grid, const GF3D2<const T> &gf0,
const vect<T, dim> dx) {
const GridDescBaseDevice &grid, const GF3D2<const T> &gf0) {
using vreal = simd<T>;
using vbool = simdl<T>;
constexpr std::size_t vsize = std::tuple_size_v<vreal>;
Expand All @@ -27,6 +26,24 @@ calc_copy(const GF3D5<T> &gf, const GF3D5layout layout,
});
}

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void
calc_copy(const vec<GF3D5<T>, dim> &gf, const GF3D5layout layout,
const GridDescBaseDevice &grid, const vec<GF3D2<const T>, dim> &gf0) {
for (int a = 0; a < dim; ++a)
calc_copy<CI, CJ, CK>(gf(a), layout, grid, gf0(a));
};

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void calc_copy(const smat<GF3D5<T>, dim> &gf,
const GF3D5layout layout,
const GridDescBaseDevice &grid,
const smat<GF3D2<const T>, dim> &gf0) {
for (int a = 0; a < dim; ++a)
for (int b = a; b < dim; ++b)
calc_copy<CI, CJ, CK>(gf(a, b), layout, grid, gf0(a, b));
};

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void
calc_derivs(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
Expand All @@ -44,9 +61,8 @@ calc_derivs(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
grid.nghostzones,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
// Take account of ghost points
const vbool mask1 =
mask_for_loop_tail<vbool>(p.i, p.imax + deriv_order / 2);
// Take ghost points into account
const vbool mask1 = mask_for_loop_tail<vbool>(p.i, p.imax + 2 / 2);
const GF3D5index index(layout, p.I);
const auto val = gf0(mask, p.I);
const auto dval = calc_deriv<2>(gf0, mask1, p.I, dx);
Expand All @@ -60,9 +76,8 @@ calc_derivs(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
grid.nghostzones,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
// Take account of ghost points
const vbool mask1 =
mask_for_loop_tail<vbool>(p.i, p.imax + deriv_order / 2);
// Take ghost points into account
const vbool mask1 = mask_for_loop_tail<vbool>(p.i, p.imax + 4 / 2);
const GF3D5index index(layout, p.I);
const auto val = gf0(mask, p.I);
const auto dval = calc_deriv<4>(gf0, mask1, p.I, dx);
Expand All @@ -76,9 +91,8 @@ calc_derivs(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
grid.nghostzones,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
// Take account of ghost points
const vbool mask1 =
mask_for_loop_tail<vbool>(p.i, p.imax + deriv_order / 2);
// Take ghost points into account
const vbool mask1 = mask_for_loop_tail<vbool>(p.i, p.imax + 6 / 2);
const GF3D5index index(layout, p.I);
const auto val = gf0(mask, p.I);
const auto dval = calc_deriv<6>(gf0, mask1, p.I, dx);
Expand All @@ -87,11 +101,50 @@ calc_derivs(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
});
break;

case 8:
grid.loop_int_device<CI, CJ, CK, vsize>(
grid.nghostzones,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
// Take ghost points into account
const vbool mask1 = mask_for_loop_tail<vbool>(p.i, p.imax + 8 / 2);
const GF3D5index index(layout, p.I);
const auto val = gf0(mask, p.I);
const auto dval = calc_deriv<8>(gf0, mask1, p.I, dx);
gf.store(mask, index, val);
dgf.store(mask, index, dval);
});
break;

default:
CCTK_VERROR("Unsupported derivative order %d", deriv_order);
}
}

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void
calc_derivs(const vec<GF3D5<T>, dim> &gf,
const vec<vec<GF3D5<T>, dim>, dim> &dgf, const GF3D5layout layout,
const GridDescBaseDevice &grid, const vec<GF3D2<const T>, dim> &gf0,
const vect<T, dim> dx, const int deriv_order) {
for (int a = 0; a < dim; ++a)
calc_derivs<CI, CJ, CK>(gf(a), dgf(a), layout, grid, gf0(a), dx,
deriv_order);
}

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void
calc_derivs(const smat<GF3D5<T>, dim> &gf,
const smat<vec<GF3D5<T>, dim>, dim> &dgf, const GF3D5layout layout,
const GridDescBaseDevice &grid,
const smat<GF3D2<const T>, dim> &gf0, const vect<T, dim> dx,
const int deriv_order) {
for (int a = 0; a < dim; ++a)
for (int b = a; b < dim; ++b)
calc_derivs<CI, CJ, CK>(gf(a, b), dgf(a, b), layout, grid, gf0(a, b), dx,
deriv_order);
}

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void
calc_derivs2(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
Expand All @@ -109,9 +162,8 @@ calc_derivs2(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
grid.nghostzones,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
// Take account of ghost points
const vbool mask1 =
mask_for_loop_tail<vbool>(p.i, p.imax + deriv_order / 2);
// Take ghost points into account
const vbool mask1 = mask_for_loop_tail<vbool>(p.i, p.imax + 2 / 2);
const GF3D5index index(layout, p.I);
const auto val = gf0(mask, p.I);
const auto dval = calc_deriv<2>(gf0, mask1, p.I, dx);
Expand All @@ -127,9 +179,8 @@ calc_derivs2(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
grid.nghostzones,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
// Take account of ghost points
const vbool mask1 =
mask_for_loop_tail<vbool>(p.i, p.imax + deriv_order / 2);
// Take ghost points into account
const vbool mask1 = mask_for_loop_tail<vbool>(p.i, p.imax + 4 / 2);
const GF3D5index index(layout, p.I);
const auto val = gf0(mask, p.I);
const auto dval = calc_deriv<4>(gf0, mask1, p.I, dx);
Expand All @@ -145,9 +196,8 @@ calc_derivs2(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
grid.nghostzones,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
// Take account of ghost points
const vbool mask1 =
mask_for_loop_tail<vbool>(p.i, p.imax + deriv_order / 2);
// Take ghost points into account
const vbool mask1 = mask_for_loop_tail<vbool>(p.i, p.imax + 6 / 2);
const GF3D5index index(layout, p.I);
const auto val = gf0(mask, p.I);
const auto dval = calc_deriv<6>(gf0, mask1, p.I, dx);
Expand All @@ -158,84 +208,108 @@ calc_derivs2(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
});
break;

case 8:
grid.loop_int_device<CI, CJ, CK, vsize>(
grid.nghostzones,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
// Take ghost points into account
const vbool mask1 = mask_for_loop_tail<vbool>(p.i, p.imax + 8 / 2);
const GF3D5index index(layout, p.I);
const auto val = gf0(mask, p.I);
const auto dval = calc_deriv<8>(gf0, mask1, p.I, dx);
const auto ddval = calc_deriv2<8>(gf0, mask1, p.I, dx);
gf.store(mask, index, val);
dgf.store(mask, index, dval);
ddgf.store(mask, index, ddval);
});
break;

default:
CCTK_VERROR("Unsupported derivative order %d", deriv_order);
}
}

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void calc_derivs2(
const vec<GF3D5<T>, dim> &gf, const vec<vec<GF3D5<T>, dim>, dim> &dgf,
const vec<smat<GF3D5<T>, dim>, dim> &ddgf, const GF3D5layout layout,
const GridDescBaseDevice &grid, const vec<GF3D2<const T>, dim> &gf0,
const vect<T, dim> dx, const int deriv_order) {
for (int a = 0; a < dim; ++a)
calc_derivs2<CI, CJ, CK>(gf(a), dgf(a), ddgf(a), layout, grid, gf0(a), dx,
deriv_order);
}

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void calc_derivs2(
const smat<GF3D5<T>, dim> &gf, const smat<vec<GF3D5<T>, dim>, dim> &dgf,
const smat<smat<GF3D5<T>, dim>, dim> &ddgf, const GF3D5layout layout,
const GridDescBaseDevice &grid, const smat<GF3D2<const T>, dim> &gf0,
const vect<T, dim> dx, const int deriv_order) {
for (int a = 0; a < dim; ++a)
for (int b = a; b < dim; ++b)
calc_derivs2<CI, CJ, CK>(gf(a, b), dgf(a, b), ddgf(a, b), layout, grid,
gf0(a, b), dx, deriv_order);
}

////////////////////////////////////////////////////////////////////////////////

// Template instantiations

using T = CCTK_REAL;

template CCTK_DEVICE CCTK_HOST Arith::vec<Arith::simd<T>, Loop::dim>
calc_deriv<2>(const Loop::GF3D2<const T> &gf, const Arith::simdl<T> &mask,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);
template CCTK_DEVICE CCTK_HOST Arith::vec<Arith::simd<T>, Loop::dim>
calc_deriv<4>(const Loop::GF3D2<const T> &gf, const Arith::simdl<T> &mask,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);
template CCTK_DEVICE CCTK_HOST Arith::vec<Arith::simd<T>, Loop::dim>
calc_deriv<6>(const Loop::GF3D2<const T> &gf, const Arith::simdl<T> &mask,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);

template CCTK_DEVICE CCTK_HOST Arith::vec<T, Loop::dim>
calc_deriv<2>(const Loop::GF3D2<const T> &gf,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);
template CCTK_DEVICE CCTK_HOST Arith::vec<T, Loop::dim>
calc_deriv<4>(const Loop::GF3D2<const T> &gf,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);
template CCTK_DEVICE CCTK_HOST Arith::vec<T, Loop::dim>
calc_deriv<6>(const Loop::GF3D2<const T> &gf,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);

template CCTK_DEVICE CCTK_HOST Arith::smat<Arith::simd<T>, Loop::dim>
calc_deriv2<2>(const Loop::GF3D2<const T> &gf, const Arith::simdl<T> &mask,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);
template CCTK_DEVICE CCTK_HOST Arith::smat<Arith::simd<T>, Loop::dim>
calc_deriv2<4>(const Loop::GF3D2<const T> &gf, const Arith::simdl<T> &mask,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);
template CCTK_DEVICE CCTK_HOST Arith::smat<Arith::simd<T>, Loop::dim>
calc_deriv2<6>(const Loop::GF3D2<const T> &gf, const Arith::simdl<T> &mask,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);

template CCTK_DEVICE CCTK_HOST Arith::smat<T, Loop::dim>
calc_deriv2<2>(const Loop::GF3D2<const T> &gf,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);
template CCTK_DEVICE CCTK_HOST Arith::smat<T, Loop::dim>
calc_deriv2<4>(const Loop::GF3D2<const T> &gf,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);
template CCTK_DEVICE CCTK_HOST Arith::smat<T, Loop::dim>
calc_deriv2<6>(const Loop::GF3D2<const T> &gf,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);

template void calc_copy<0, 0, 0>(const GF3D5<T> &gf, const GF3D5layout layout,
const GridDescBaseDevice &grid,
const GF3D2<const T> &gf0,
const vect<T, dim> dx);
const GF3D2<const T> &gf0);

template void calc_copy<0, 0, 0>(const vec<GF3D5<T>, dim> &gf,
const GF3D5layout layout,
const GridDescBaseDevice &grid,
const vec<GF3D2<const T>, dim> &gf0);

template void calc_copy<0, 0, 0>(const smat<GF3D5<T>, dim> &gf,
const GF3D5layout layout,
const GridDescBaseDevice &grid,
const smat<GF3D2<const T>, dim> &gf0);
template void
calc_derivs<0, 0, 0>(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
const GF3D5layout layout, const GridDescBaseDevice &grid,
const GF3D2<const T> &gf0, const vect<T, dim> dx,
const int deriv_order);

template void calc_derivs<0, 0, 0>(const vec<GF3D5<T>, dim> &gf,
const vec<vec<GF3D5<T>, dim>, dim> &dgf,
const GF3D5layout layout,
const GridDescBaseDevice &grid,
const vec<GF3D2<const T>, dim> &gf0,
const vect<T, dim> dx,
const int deriv_order);

template void calc_derivs<0, 0, 0>(const smat<GF3D5<T>, dim> &gf,
const smat<vec<GF3D5<T>, dim>, dim> &dgf,
const GF3D5layout layout,
const GridDescBaseDevice &grid,
const smat<GF3D2<const T>, dim> &gf0,
const vect<T, dim> dx,
const int deriv_order);

template void
calc_derivs2<0, 0, 0>(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
const smat<GF3D5<T>, dim> &ddgf, const GF3D5layout layout,
const GridDescBaseDevice &grid, const GF3D2<const T> &gf0,
const vect<T, dim> dx, const int deriv_order);

template void calc_derivs2<0, 0, 0>(
const vec<GF3D5<T>, dim> &gf, const vec<vec<GF3D5<T>, dim>, dim> &dgf,
const vec<smat<GF3D5<T>, dim>, dim> &ddgf, const GF3D5layout layout,
const GridDescBaseDevice &grid, const vec<GF3D2<const T>, dim> &gf0,
const vect<T, dim> dx, const int deriv_order);

template void calc_derivs2<0, 0, 0>(
const smat<GF3D5<T>, dim> &gf, const smat<vec<GF3D5<T>, dim>, dim> &dgf,
const smat<smat<GF3D5<T>, dim>, dim> &ddgf, const GF3D5layout layout,
const GridDescBaseDevice &grid, const smat<GF3D2<const T>, dim> &gf0,
const vect<T, dim> dx, const int deriv_order);

} // namespace Derivs
Loading
Loading