From a6c3a9c7ebd3fe65b4bbc0dba230aacc19059fef Mon Sep 17 00:00:00 2001 From: Roland Haas Date: Mon, 10 Jul 2023 14:23:40 -0400 Subject: [PATCH] Loop: add "offset" argument for 4D indexing NOT to be used for productiong, since it is a horrible hack --- Loop/src/loop.hxx | 134 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) diff --git a/Loop/src/loop.hxx b/Loop/src/loop.hxx index e1a600bbe..87add514d 100644 --- a/Loop/src/loop.hxx +++ b/Loop/src/loop.hxx @@ -1073,6 +1073,18 @@ template struct GF3D2 { CCTK_DEVICE CCTK_HOST T &restrict operator()(int i, int j, int k) const { return ptr[linear(i, j, k)]; } + CCTK_DEVICE CCTK_HOST T &restrict operator()(const GF3D2index &index, const int var) const { +#ifdef CCTK_DEBUG + assert(index.layout == this->layout); +#endif + return ptr[var + index.linear()]; + } + CCTK_DEVICE CCTK_HOST T &restrict operator()(const vect &I, const int var) const { + return ptr[var + linear(I)]; + } + CCTK_DEVICE CCTK_HOST T &restrict operator()(int i, int j, int k, const int var) const { + return ptr[var + linear(i, j, k)]; + } CCTK_DEVICE CCTK_HOST void store(const GF3D2index &index, const T &value) const { ptr[index.linear()] = value; @@ -1081,6 +1093,14 @@ template struct GF3D2 { const T &value) const { ptr[linear(I)] = value; } + CCTK_DEVICE CCTK_HOST void store(const GF3D2index &index, const int var, + const T &value) const { + ptr[var + index.linear()] = value; + } + CCTK_DEVICE CCTK_HOST void store(const vect &I, const int var, + const T &value) const { + ptr[var + linear(I)] = value; + } #if 0 struct simd_reference { using element_type = std::remove_cv_t; @@ -1191,6 +1211,42 @@ template struct GF3D2 { const vect &I, const U &other) const { return Arith::masko_loadu(mask, &(*this)(I), other); } + CCTK_DEVICE CCTK_HOST Arith::simd > + operator()(const Arith::simdl > &mask, + const GF3D2index &index, const int var) const { + return Arith::maskz_loadu(mask, &(*this)(index, var)); + } + CCTK_DEVICE CCTK_HOST Arith::simd > + operator()(const Arith::simdl > &mask, + const GF3D2index &index, const int var, + const Arith::simd > &other) const { + return Arith::masko_loadu(mask, &(*this)(index, var), other); + } + template > * = nullptr> + CCTK_DEVICE CCTK_HOST Arith::simd > + operator()(const Arith::simdl > &mask, + const GF3D2index &index, const int var, const U &other) const { + return Arith::masko_loadu(mask, &(*this)(index, var), other); + } + CCTK_DEVICE CCTK_HOST Arith::simd > + operator()(const Arith::simdl > &mask, + const vect &I, const int var) const { + return Arith::maskz_loadu(mask, &(*this)(I, var)); + } + CCTK_DEVICE CCTK_HOST Arith::simd > + operator()(const Arith::simdl > &mask, + const vect &I, const int var, + const Arith::simd > &other) const { + return Arith::masko_loadu(mask, &(*this)(I, var), other); + } + template > * = nullptr> + CCTK_DEVICE CCTK_HOST Arith::simd > + operator()(const Arith::simdl > &mask, + const vect &I, const int var, const U &other) const { + return Arith::masko_loadu(mask, &(*this)(I, var), other); + } CCTK_DEVICE CCTK_HOST void store(const Arith::simdl &mask, const vect &I, const Arith::simd &value) const { @@ -1201,6 +1257,16 @@ template struct GF3D2 { const Arith::simd &value) const { mask_storeu(mask, &ptr[index.linear()], value); } + CCTK_DEVICE CCTK_HOST void store(const Arith::simdl &mask, + const vect &I, const int var, + const Arith::simd &value) const { + mask_storeu(mask, &ptr[linear(I)], value); + } + CCTK_DEVICE CCTK_HOST void store(const Arith::simdl &mask, + const GF3D2index &index, const int var, + const Arith::simd &value) const { + mask_storeu(mask, &ptr[var + index.linear()], value); + } }; //////////////////////////////////////////////////////////////////////////////// @@ -1429,6 +1495,13 @@ template struct GF3D5 { return ptr[index.linear()]; } CCTK_DEVICE CCTK_HOST constexpr T &restrict + operator()(const GF3D5index &index, const int var) const { +#ifdef CCTK_DEBUG + assert(index.layout == this->layout); +#endif + return ptr[var + index.linear()]; + } + CCTK_DEVICE CCTK_HOST constexpr T &restrict operator()(const GF3D5layout &layout, const vect &I) const { return (*this)(GF3D5index(layout, I)); } @@ -1445,6 +1518,15 @@ template struct GF3D5 { const T &value) const { operator()(layout, I) = value; } + CCTK_DEVICE CCTK_HOST void store(const GF3D5index &index, const int var, + const T &value) const { + operator()(index, var) = value; + } + CCTK_DEVICE CCTK_HOST void store(const GF3D5layout &layout, + const vect &I, const int var, + const T &value) const { + operator()(layout, I, var) = value; + } CCTK_DEVICE CCTK_HOST Arith::simd > operator()(const Arith::simdl > &mask, const GF3D5index &index) const { @@ -1527,6 +1609,46 @@ template struct GF3D5 { const U &other) const { return (*this)(mask, GF3D5index(layout, I), other); } + + // 4D array + CCTK_DEVICE CCTK_HOST simd_reference + operator()(const Arith::simdl > &mask, + const GF3D5index &index, const int var) const { + return simd_reference(&(*this)(index, var), mask); + } + CCTK_DEVICE CCTK_HOST simd_reference + operator()(const Arith::simdl > &mask, + const GF3D5layout &layout, const vect &I, const int var) const { + return (*this)(mask, GF3D5index(layout, I), var); + } + CCTK_DEVICE CCTK_HOST Arith::simd > + operator()(const Arith::simdl > &mask, + const GF3D5index &index, const int var, + const Arith::simd > &other) const { + return Arith::masko_loadu(mask, &(*this)(index, var), other); + } + template > * = nullptr> + CCTK_DEVICE CCTK_HOST Arith::simd > + operator()(const Arith::simdl > &mask, + const GF3D5index &index, const int var, const U &other) const { + return Arith::masko_loadu(mask, &(*this)(index, var), other); + } + CCTK_DEVICE CCTK_HOST Arith::simd > + operator()(const Arith::simdl > &mask, + const GF3D5layout &layout, const vect &I, const int var, + const Arith::simd > &other) const { + return (*this)(mask, GF3D5index(layout, I), var, other); + } + template > * = nullptr> + CCTK_DEVICE CCTK_HOST Arith::simd > + operator()(const Arith::simdl > &mask, + const GF3D5layout &layout, const vect &I, const int var, + const U &other) const { + return (*this)(mask, GF3D5index(layout, I), var, other); + } + CCTK_DEVICE CCTK_HOST void store(const Arith::simdl &mask, const GF3D5index &index, const Arith::simd &value) const { @@ -1538,6 +1660,18 @@ template struct GF3D5 { const Arith::simd &value) const { store(mask, GF3D5index(layout, I), value); } + + CCTK_DEVICE CCTK_HOST void store(const Arith::simdl &mask, + const GF3D5index &index, const int var, + const Arith::simd &value) const { + mask_storeu(mask, &(*this)(index, var), value); + } + CCTK_DEVICE CCTK_HOST void store(const Arith::simdl &mask, + const GF3D5layout &layout, + const vect &I, const int var, + const Arith::simd &value) const { + store(mask, GF3D5index(layout, I), var, value); + } }; template struct is_GF3D5 : std::false_type {};