From 45e7ade2286fe43688253282f0bc91b3fafa8378 Mon Sep 17 00:00:00 2001 From: Roland Haas Date: Fri, 10 Jan 2025 10:27:54 -0600 Subject: [PATCH 1/2] CarpetX: manually implement custom OpenMP reductions for nvc++ --- CarpetX/src/reduction.cxx | 70 +++++++++++++++++++++++---------------- CarpetX/src/reduction.hxx | 2 ++ 2 files changed, 43 insertions(+), 29 deletions(-) diff --git a/CarpetX/src/reduction.cxx b/CarpetX/src/reduction.cxx index f75684b35..59f91a7e5 100644 --- a/CarpetX/src/reduction.cxx +++ b/CarpetX/src/reduction.cxx @@ -201,38 +201,50 @@ reduction reduce(int gi, int vi, int tl) { // TODO: check that multi-threading actually helps (and we are // not dominated by memory latency) // TODO: document required version of OpenMP to use custom reductions +#ifdef __NVCOMPILER +#pragma omp parallel + { + auto &outer = red; + reduction red; +#else #pragma omp parallel reduction(reduction : red) - for (amrex::MFIter mfi(mfab, mfitinfo); mfi.isValid(); ++mfi) { - const amrex::Box &bx = mfi.tilebox(); // current tile (without ghosts) - const vect tmin{bx.smallEnd(0), bx.smallEnd(1), - bx.smallEnd(2)}; - const vect tmax{bx.bigEnd(0) + 1, bx.bigEnd(1) + 1, - bx.bigEnd(2) + 1}; - const amrex::Box &vbx = - mfi.validbox(); // interior region (without ghosts) - const vect imin{vbx.smallEnd(0), vbx.smallEnd(1), - vbx.smallEnd(2)}; - const vect imax{vbx.bigEnd(0) + 1, vbx.bigEnd(1) + 1, - vbx.bigEnd(2) + 1}; - - const amrex::Array4 &vars = mfab.array(mfi); - - std::unique_ptr > finemask; - if (finemask_imfab) { - finemask = make_unique >( - finemask_imfab->array(mfi)); - // Ensure the mask has the correct size - assert(finemask->begin.x == vars.begin.x); - assert(finemask->begin.y == vars.begin.y); - assert(finemask->begin.z == vars.begin.z); - assert(finemask->end.x == vars.end.x); - assert(finemask->end.y == vars.end.y); - assert(finemask->end.z == vars.end.z); +#endif + for (amrex::MFIter mfi(mfab, mfitinfo); mfi.isValid(); ++mfi) { + const amrex::Box &bx = mfi.tilebox(); // current tile (without ghosts) + const vect tmin{bx.smallEnd(0), bx.smallEnd(1), + bx.smallEnd(2)}; + const vect tmax{bx.bigEnd(0) + 1, bx.bigEnd(1) + 1, + bx.bigEnd(2) + 1}; + const amrex::Box &vbx = + mfi.validbox(); // interior region (without ghosts) + const vect imin{vbx.smallEnd(0), vbx.smallEnd(1), + vbx.smallEnd(2)}; + const vect imax{vbx.bigEnd(0) + 1, vbx.bigEnd(1) + 1, + vbx.bigEnd(2) + 1}; + + const amrex::Array4 &vars = mfab.array(mfi); + + std::unique_ptr > finemask; + if (finemask_imfab) { + finemask = make_unique >( + finemask_imfab->array(mfi)); + // Ensure the mask has the correct size + assert(finemask->begin.x == vars.begin.x); + assert(finemask->begin.y == vars.begin.y); + assert(finemask->begin.z == vars.begin.z); + assert(finemask->end.x == vars.end.x); + assert(finemask->end.y == vars.end.y); + assert(finemask->end.z == vars.end.z); + } + + red += reduce_array(vars, vi, tmin, tmax, indextype, imin, imax, + finemask.get(), x0, dx); } - - red += reduce_array(vars, vi, tmin, tmax, indextype, imin, imax, - finemask.get(), x0, dx); +#ifdef __NVCOMPILER +#pragma omp critical + outer += red; } +#endif } } diff --git a/CarpetX/src/reduction.hxx b/CarpetX/src/reduction.hxx index e1b03121c..b6180afb1 100644 --- a/CarpetX/src/reduction.hxx +++ b/CarpetX/src/reduction.hxx @@ -121,7 +121,9 @@ template struct combine { }; typedef reduction reduction_CCTK_REAL; +#ifndef __NVCOMPILER #pragma omp declare reduction(reduction:reduction_CCTK_REAL : omp_out += omp_in) +#endif MPI_Datatype reduction_mpi_datatype_CCTK_REAL(); MPI_Op reduction_mpi_op(); From 017629a9ef4a59109b921d72a674c9cc40a3523d Mon Sep 17 00:00:00 2001 From: Roland Haas Date: Fri, 10 Jan 2025 10:27:59 -0600 Subject: [PATCH 2/2] TestArrayGroup: avoid use of non-standard RANK function --- TestArrayGroup/src/TestDynamicData.F90 | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/TestArrayGroup/src/TestDynamicData.F90 b/TestArrayGroup/src/TestDynamicData.F90 index bb589222d..631dc9c4a 100644 --- a/TestArrayGroup/src/TestDynamicData.F90 +++ b/TestArrayGroup/src/TestDynamicData.F90 @@ -6,25 +6,21 @@ subroutine TestArrayGroup_DynamicDataF(CCTK_ARGUMENTS) DECLARE_CCTK_PARAMETERS DECLARE_CCTK_ARGUMENTS - ! Validate grid array dynamic data - if(RANK(test1) /= 3) then ! note rank 3 b/c of vector of rank=2 arrays - call CCTK_ERROR("incorrect dimension in test1 array dynamic data") - endif - if(SIZE(test1, 1) /= 5 .or. SIZE(test1, 2) /= 6 .or. SIZE(test1, 3) /= 4) then + integer, dimension(3) :: sizes1, sizes2, sizes3 + + ! check that grid variable is of rank 3. This fails to compiler otherwise. + sizes1 = SHAPE(test1) + if(sizes1(1) /= 5 .or. sizes1(2) /= 6 .or. sizes1(3) /= 4) then call CCTK_ERROR("incorrect size in test1 array dynamic data") endif - if(RANK(test2) /= 3) then ! note rank 3 b/c of vector of rank=2 arrays - call CCTK_ERROR("incorrect dimension in test2 array dynamic data") - endif - if(SIZE(test2, 1) /= 5 .or. SIZE(test2, 2) /= 6 .or. SIZE(test2, 3) /= 4) then + sizes2 = SHAPE(test2) + if(sizes2(1) /= 5 .or. sizes2(2) /= 6 .or. sizes2(3) /= 4) then call CCTK_ERROR("incorrect size in test2 array dynamic data") endif - if(RANK(test3) /= 3) then ! note rank 3 b/c of vector of rank=2 arrays - call CCTK_ERROR("incorrect dimension in test3 array dynamic data") - endif - if(SIZE(test3, 1) /= 5 .or. SIZE(test3, 2) /= 6 .or. SIZE(test3, 3) /= 4) then + sizes3 = SHAPE(test3) + if(sizes3(1) /= 5 .or. sizes3(2) /= 6 .or. sizes3(3) /= 4) then call CCTK_ERROR("incorrect size in test3 array dynamic data") endif