diff --git a/examples/combi_example/TaskExample.hpp b/examples/combi_example/TaskExample.hpp index 379817634..6d83aebd3 100644 --- a/examples/combi_example/TaskExample.hpp +++ b/examples/combi_example/TaskExample.hpp @@ -20,7 +20,7 @@ class TaskExample : public Task { */ TaskExample(const LevelVector& l, const std::vector& boundary, real coeff, LoadModel* loadModel, real dt, size_t nsteps, - std::vector p = std::vector(0), + const std::vector& p = std::vector(0), FaultCriterion* faultCrit = (new StaticFaults({0, IndexVector(0), IndexVector(0)}))) : Task(l, boundary, coeff, loadModel, faultCrit), dt_(dt), @@ -31,7 +31,7 @@ class TaskExample : public Task { dfg_(NULL) {} void init(CommunicatorType lcomm, - std::vector decomposition = std::vector()) { + const std::vector& decomposition = std::vector()) { assert(!initialized_); assert(dfg_ == NULL); @@ -108,8 +108,6 @@ class TaskExample : public Task { void run(CommunicatorType lcomm) { assert(initialized_); - int lrank = theMPISystem()->getLocalRank(); - auto elements = dfg_->getData(); // TODO if your Example uses another data structure, you need to copy // the data from elements to that data structure @@ -152,9 +150,9 @@ class TaskExample : public Task { dfg_->gatherFullGrid(fg, r); } - DistributedFullGrid& getDistributedFullGrid(int n = 0) override { return *dfg_; } + DistributedFullGrid& getDistributedFullGrid(size_t n = 0) override { return *dfg_; } - const DistributedFullGrid& getDistributedFullGrid(int n = 0) const override { return *dfg_; } + const DistributedFullGrid& getDistributedFullGrid(size_t n = 0) const override { return *dfg_; } static real myfunction(std::vector& coords, real t) { real u = std::cos(M_PI * t); @@ -183,7 +181,7 @@ class TaskExample : public Task { // new variables that are set by manager. need to be added to serialize real dt_; - size_t nsteps_; + size_t nsteps_ = 0; std::vector p_; // pure local variables that exist only on the worker processes diff --git a/examples/combi_example_faults/TaskExample.hpp b/examples/combi_example_faults/TaskExample.hpp index a98ed3ce5..852f4edf0 100644 --- a/examples/combi_example_faults/TaskExample.hpp +++ b/examples/combi_example_faults/TaskExample.hpp @@ -22,7 +22,7 @@ class TaskExample: public Task { */ TaskExample(DimType dim, const LevelVector& l, const std::vector& boundary, real coeff, LoadModel* loadModel, real dt, size_t nsteps, - std::vector p = std::vector(0), + const std::vector& p = std::vector(0), FaultCriterion* faultCrit = (new StaticFaults({0, IndexVector(0), IndexVector(0)}))) : Task(l, boundary, coeff, loadModel, faultCrit), dt_(dt), @@ -33,7 +33,7 @@ class TaskExample: public Task { combiStep_(0), dfg_(NULL) {} - void init(CommunicatorType lcomm, std::vector decomposition = std::vector()) { + void init(CommunicatorType lcomm, const std::vector& decomposition = std::vector()) { assert(!initialized_); assert(dfg_ == NULL); @@ -110,9 +110,6 @@ class TaskExample: public Task { void run(CommunicatorType lcomm) { assert(initialized_); - int globalRank = theMPISystem()->getGlobalRank(); - int lrank = theMPISystem()->getLocalRank(); - /* pseudo timestepping to demonstrate the behaviour of your typical * time-dependent simulation problem. */ auto elements = dfg_->getData(); @@ -147,11 +144,12 @@ class TaskExample: public Task { dfg_->gatherFullGrid(fg, r); } - DistributedFullGrid& getDistributedFullGrid(int n) override { + DistributedFullGrid& getDistributedFullGrid(size_t n) override { return *dfg_; } - const DistributedFullGrid& getDistributedFullGrid(int n) const override{ + const DistributedFullGrid& getDistributedFullGrid(size_t n) const override{ + assert(n == 0); return *dfg_; } @@ -200,7 +198,7 @@ class TaskExample: public Task { // new variables that are set by manager. need to be added to serialize real dt_; - size_t nsteps_; + size_t nsteps_ = 0; size_t stepsTotal_; std::vector p_; diff --git a/examples/combi_workers_only/combi_example_worker_only.cpp b/examples/combi_workers_only/combi_example_worker_only.cpp index af4ce36f4..bdfc4e991 100644 --- a/examples/combi_workers_only/combi_example_worker_only.cpp +++ b/examples/combi_workers_only/combi_example_worker_only.cpp @@ -152,7 +152,7 @@ int main(int argc, char** argv) { interpolationCoords = broadcastParameters::getCoordinatesFromRankZero( interpolationCoordsFile, theMPISystem()->getWorldComm()); - if (interpolationCoords.size() != 1e5) { + if (interpolationCoords.size() != static_cast(1e5)) { sleep(1); throw std::runtime_error("not enough interpolation coordinates"); } diff --git a/examples/distributed_advection/TaskAdvection.hpp b/examples/distributed_advection/TaskAdvection.hpp index 5f955b5bd..3bd2dad31 100644 --- a/examples/distributed_advection/TaskAdvection.hpp +++ b/examples/distributed_advection/TaskAdvection.hpp @@ -33,7 +33,7 @@ class TaskAdvection : public Task { */ TaskAdvection(const LevelVector& l, const std::vector& boundary, real coeff, LoadModel* loadModel, real dt, size_t nsteps, - std::vector p = std::vector(0), + const std::vector& p = std::vector(0), FaultCriterion* faultCrit = (new StaticFaults({0, IndexVector(0), IndexVector(0)}))) : Task(l, boundary, coeff, loadModel, faultCrit), dt_(dt), @@ -42,17 +42,16 @@ class TaskAdvection : public Task { initialized_(false), stepsTotal_(0), dfg_(nullptr) { - for (const auto& b : boundary) { + for ([[maybe_unused]] const auto& b : boundary) { assert(b == 1); } } void init(CommunicatorType lcomm, - std::vector decomposition = std::vector()) override { + const std::vector& decomposition = std::vector()) override { assert(!initialized_); assert(dfg_ == NULL); - auto lrank = theMPISystem()->getLocalRank(); DimType dim = this->getDim(); const LevelVector& l = this->getLevelVector(); @@ -105,7 +104,7 @@ class TaskAdvection : public Task { // swap at the end of each time step auto& u_dot_dphi = *phi_; auto const ElementVector = dfg_->getData(); - for (unsigned int d = 0; d < this->getDim(); ++d) { + for (DimType d = 0; d < this->getDim(); ++d) { static std::vector subarrayExtents; std::vector phi_ghost{}; dfg_->exchangeGhostLayerUpward(d, subarrayExtents, phi_ghost); @@ -183,7 +182,7 @@ class TaskAdvection : public Task { dfg_->gatherFullGrid(fg, r); } - DistributedFullGrid& getDistributedFullGrid(int n = 0) override { return *dfg_; } + DistributedFullGrid& getDistributedFullGrid(size_t n = 0) override { return *dfg_; } void setZero() override { dfg_->setZero(); diff --git a/examples/distributed_third_level/TaskAdvection.hpp b/examples/distributed_third_level/TaskAdvection.hpp index fac0c1ae0..82a4c43c7 100644 --- a/examples/distributed_third_level/TaskAdvection.hpp +++ b/examples/distributed_third_level/TaskAdvection.hpp @@ -33,7 +33,7 @@ class TaskAdvection : public Task { */ TaskAdvection(const LevelVector& l, const std::vector& boundary, real coeff, LoadModel* loadModel, real dt, size_t nsteps, - std::vector p = std::vector(0), + const std::vector& p = std::vector(0), FaultCriterion* faultCrit = (new StaticFaults({0, IndexVector(0), IndexVector(0)}))) : Task(l, boundary, coeff, loadModel, faultCrit), dt_(dt), @@ -42,13 +42,13 @@ class TaskAdvection : public Task { initialized_(false), stepsTotal_(0), dfg_(nullptr) { - for (const auto& b : boundary) { + for ([[maybe_unused]] const auto& b : boundary) { assert(b == 1); } } void init(CommunicatorType lcomm, - std::vector decomposition = std::vector()) override { + const std::vector& decomposition = std::vector()) override { assert(!initialized_); assert(dfg_ == NULL); @@ -157,7 +157,7 @@ class TaskAdvection : public Task { } // iterate the lowest layer and update the values, compensating for the wrong update // before - assert(numLocalElements / dfg_->getLocalSizes()[d] == phi_ghost.size()); + assert(static_cast(numLocalElements) / dfg_->getLocalSizes()[d] == phi_ghost.size()); const auto& stride = fullOffsetsInThisDimension; const IndexType jump = stride * dfg_->getLocalSizes()[d]; const IndexType numberOfPolesHigherDimensions = numLocalElements / jump; @@ -235,7 +235,7 @@ class TaskAdvection : public Task { dfg_->gatherFullGrid(fg, r); } - DistributedFullGrid& getDistributedFullGrid(int n = 0) override { return *dfg_; } + DistributedFullGrid& getDistributedFullGrid(size_t n = 0) override { return *dfg_; } void setZero() override { dfg_->setZero(); diff --git a/examples/distributed_third_level/combi_example_worker_only.cpp b/examples/distributed_third_level/combi_example_worker_only.cpp index 91e6ba45b..deceae7cc 100644 --- a/examples/distributed_third_level/combi_example_worker_only.cpp +++ b/examples/distributed_third_level/combi_example_worker_only.cpp @@ -70,7 +70,7 @@ int main(int argc, char** argv) { bool evalMCError = cfg.get("application.mcerror", false); uint16_t numberOfFileParts = cfg.get("io.numberParts", 1); - theMPISystem()->initOuputGroupComm(numberOfFileParts); + theMPISystem()->initOutputGroupComm(numberOfFileParts); // read in third level parameters if available std::string thirdLevelHost, thirdLevelSSHCommand = ""; @@ -166,7 +166,7 @@ int main(int argc, char** argv) { interpolationCoords = broadcastParameters::getCoordinatesFromRankZero( interpolationCoordsFile, theMPISystem()->getWorldComm()); - if (interpolationCoords.size() != 1e5) { + if (interpolationCoords.size() != static_cast(1e5)) { sleep(1); throw std::runtime_error("not enough interpolation coordinates"); } diff --git a/examples/distributed_third_level/tools/worker_only_io_test.cpp b/examples/distributed_third_level/tools/worker_only_io_test.cpp index 303f0ab0b..1da1fdc8c 100644 --- a/examples/distributed_third_level/tools/worker_only_io_test.cpp +++ b/examples/distributed_third_level/tools/worker_only_io_test.cpp @@ -69,7 +69,7 @@ int main(int argc, char** argv) { nsteps = cfg.get("application.nsteps"); bool evalMCError = cfg.get("application.mcerror", false); uint16_t numberOfFileParts = cfg.get("io.numberParts", 1); - theMPISystem()->initOuputGroupComm(numberOfFileParts); + theMPISystem()->initOutputGroupComm(numberOfFileParts); // read in third level parameters if available std::string thirdLevelHost, thirdLevelSSHCommand = ""; @@ -86,8 +86,7 @@ int main(int argc, char** argv) { MIDDLE_PROCESS_EXCLUSIVE_SECTION std::cout << "running in file-based third level mode" << std::endl; } else { - MIDDLE_PROCESS_EXCLUSIVE_SECTION std::cout << "running in file-based local mode" - << std::endl; + MIDDLE_PROCESS_EXCLUSIVE_SECTION std::cout << "running in file-based local mode" << std::endl; } // periodic boundary conditions @@ -239,13 +238,12 @@ int main(int argc, char** argv) { readSparseGridFile = "dsg_" + std::to_string((systemNumber + 1) % 2) + "_step" + std::to_string(i); std::string readSparseGridFileToken = readSparseGridFile + "_token.txt"; - OUTPUT_GROUP_EXCLUSIVE_SECTION { - } + OUTPUT_GROUP_EXCLUSIVE_SECTION {} } else { readSparseGridFile = writeSparseGridFile; - OUTPUT_GROUP_EXCLUSIVE_SECTION { - worker.waitForTokenFile(writeSparseGridFileToken); + OUTPUT_GROUP_EXCLUSIVE_SECTION { + worker.waitForTokenFile(writeSparseGridFileToken); Stats::startEvent("read SG"); int numRead = worker.readReduce(readSparseGridFile, true); Stats::stopEvent("read SG"); diff --git a/examples/gene_distributed/src/GeneTask.cpp b/examples/gene_distributed/src/GeneTask.cpp index b3088dd42..d86943213 100644 --- a/examples/gene_distributed/src/GeneTask.cpp +++ b/examples/gene_distributed/src/GeneTask.cpp @@ -163,7 +163,7 @@ void GeneTask::decideToKill(){ //toDo check if combiStep should be included in t /** * This routine initializes GeneTask; currently it only sets a bool value. */ -void GeneTask::init(CommunicatorType lcomm, std::vector decomposition){ +void GeneTask::init(CommunicatorType lcomm, const std::vector& decomposition){ // if( dfg_ == NULL ){ // dfg_ = new OwningDistributedFullGrid( dim_, l_, lcomm, // this->getBoundary(), p_, false); @@ -247,7 +247,7 @@ void GeneTask::getFullGrid( FullGrid& fg, RankType lroot, /** * This routine returns the local part of the fullgrid */ -DistributedFullGrid& GeneTask::getDistributedFullGrid(int specie){ +DistributedFullGrid& GeneTask::getDistributedFullGrid(size_t specie) override { return *dfgVector_[specie]; } diff --git a/examples/gene_distributed/src/GeneTask.hpp b/examples/gene_distributed/src/GeneTask.hpp index 9d7651b43..0a33e749c 100644 --- a/examples/gene_distributed/src/GeneTask.hpp +++ b/examples/gene_distributed/src/GeneTask.hpp @@ -31,7 +31,7 @@ class GeneTask : public combigrid::Task { public: GeneTask(DimType dim, const LevelVector& l, const std::vector& boundary, real coeff, LoadModel* loadModel, std::string& path, real dt, real combitime, size_t nsteps, - real shat, real lx, int ky0_ind, std::vector p = std::vector(0), + real shat, real lx, int ky0_ind, const std::vector& p = std::vector(0), FaultCriterion* faultCrit = (new StaticFaults({0, IndexVector(0), IndexVector(0)})), IndexType numSpecies = 1, bool GENE_Global = false, bool GENE_Linear = true, size_t checkpointFrequency = 1, size_t offsetForDiagnostics = 0); @@ -57,20 +57,21 @@ class GeneTask : public combigrid::Task { */ void changeDir(CommunicatorType lcomm); - //void init(CommunicatorType lcomm); + // void init(CommunicatorType lcomm); /** * This method initializes the task * lcomm is the local communicator of the process group. * decomposition is the spatial decomposition of the component grid */ - void init(CommunicatorType lcomm, std::vector decomposition = std::vector()); + void init(CommunicatorType lcomm, + const std::vector& decomposition = std::vector()); /** * This method returns the decomposition of the grid of the specified species */ - std::vector getDecomposition(int species){ - return dfgVector_[species]->getDecomposition(); + std::vector getDecomposition(int species) { + return dfgVector_[species]->getDecomposition(); } /** @@ -108,7 +109,7 @@ class GeneTask : public combigrid::Task { /** * Returns the distributed full grid of the specified specie */ - DistributedFullGrid& getDistributedFullGrid(int specie); + DistributedFullGrid& getDistributedFullGrid(size_t specie); /* * Convert fg to GeneGrid and scatter over processes of pgroup. The fullgrid diff --git a/examples/gene_distributed/tools/testBoundaryZ.cpp b/examples/gene_distributed/tools/testBoundaryZ.cpp index c5554a9d4..4facbb928 100644 --- a/examples/gene_distributed/tools/testBoundaryZ.cpp +++ b/examples/gene_distributed/tools/testBoundaryZ.cpp @@ -19,7 +19,7 @@ using namespace combigrid; -void testBoundaryZ(FullGrid& fg, std::string filePrefix ){ +void testBoundaryZ(FullGrid& fg, const std::string& filePrefix ){ const double tol = 1e-15; MultiArrayRef6 fgData = createMultiArrayRef( fg ); diff --git a/examples/gene_distributed_linear/src/GeneTask.cpp b/examples/gene_distributed_linear/src/GeneTask.cpp index e055b0f06..ba83155f8 100644 --- a/examples/gene_distributed_linear/src/GeneTask.cpp +++ b/examples/gene_distributed_linear/src/GeneTask.cpp @@ -184,7 +184,7 @@ void GeneTask::decideToKill(){ //toDo check if combiStep should be included in t /** * This routine initializes GeneTask; currently it only sets a bool value. */ -void GeneTask::init(CommunicatorType lcomm, std::vector decomposition){ +void GeneTask::init(CommunicatorType lcomm, const std::vector& decomposition){ // if( dfg_ == NULL ){ // dfg_ = new DistributedFullGrid( dim_, l_, lcomm, // this->getBoundary(), p_, false); @@ -279,7 +279,7 @@ void GeneTask::getFullGrid( FullGrid& fg, RankType lroot, /** * This routine returns the local part of the fullgrid */ -DistributedFullGrid& GeneTask::getDistributedFullGrid(int specie){ +DistributedFullGrid& GeneTask::getDistributedFullGrid(size_t specie) override { return *dfgVector_[specie]; } diff --git a/examples/gene_distributed_linear/src/GeneTask.hpp b/examples/gene_distributed_linear/src/GeneTask.hpp index 54f60a770..103f01bd6 100644 --- a/examples/gene_distributed_linear/src/GeneTask.hpp +++ b/examples/gene_distributed_linear/src/GeneTask.hpp @@ -32,7 +32,7 @@ class GeneTask : public combigrid::Task { public: GeneTask(DimType dim, const LevelVector& l, const std::vector& boundary, real coeff, LoadModel* loadModel, std::string& path, real dt, real combitime, size_t nsteps, - real shat, real lx, int ky0_ind, std::vector p = std::vector(0), + real shat, real lx, int ky0_ind, const std::vector& p = std::vector(0), FaultCriterion* faultCrit = (new StaticFaults({0, IndexVector(0), IndexVector(0)})), IndexType numSpecies = 1, bool GENE_Global = false, bool GENE_Linear = true); @@ -51,20 +51,21 @@ class GeneTask : public combigrid::Task { */ void changeDir(CommunicatorType lcomm); - //void init(CommunicatorType lcomm); + // void init(CommunicatorType lcomm); /** * This method initializes the task * lcomm is the local communicator of the process group. * decomposition is the spatial decomposition of the component grid */ - void init(CommunicatorType lcomm, std::vector decomposition = std::vector()); + void init(CommunicatorType lcomm, + const std::vector& decomposition = std::vector()); /** * This method returns the decomposition of the grid of the specified species */ - std::vector getDecomposition(int species){ - return dfgVector_[species]->getDecomposition(); + std::vector getDecomposition(int species) { + return dfgVector_[species]->getDecomposition(); } /** @@ -102,7 +103,7 @@ class GeneTask : public combigrid::Task { /** * Returns the distributed full grid of the specified specie */ - DistributedFullGrid& getDistributedFullGrid(int specie); + DistributedFullGrid& getDistributedFullGrid(size_t specie); /* * Convert fg to GeneGrid and scatter over processes of pgroup. The fullgrid diff --git a/examples/selalib_distributed/src/SelalibTask.hpp b/examples/selalib_distributed/src/SelalibTask.hpp index e04c0542c..d6e1bdb41 100644 --- a/examples/selalib_distributed/src/SelalibTask.hpp +++ b/examples/selalib_distributed/src/SelalibTask.hpp @@ -97,7 +97,7 @@ class SelalibTask : public combigrid::Task { // MASTER_EXCLUSIVE_SECTION{ // std::cout << "run " << *this << std::endl; // } - bool haveResolution = coeff_ == std::numeric_limits::max(); + // bool haveResolution = coeff_ == std::numeric_limits::max(); Stats::startEvent("BSL run"); if (selalibSimPointer_ == nullptr) { throw std::runtime_error("selalibSimPointer_ is null"); @@ -105,7 +105,8 @@ class SelalibTask : public combigrid::Task { sim_bsl_vp_3d3v_cart_dd_slim_run(simPtrPtr_); Stats::stopEvent("BSL run"); - int32_t* iPtr = ¤tNumTimeStepsRun_; + int32_t currentNumTimeStepsAsInt = static_cast(currentNumTimeStepsRun_); + int32_t* iPtr = ¤tNumTimeStepsAsInt; sim_bsl_vp_3d3v_cart_dd_slim_write_diagnostics(simPtrPtr_, iPtr); changeDir(lcomm, true); } @@ -134,7 +135,7 @@ class SelalibTask : public combigrid::Task { * decomposition is the spatial decomposition of the component grid */ void init(CommunicatorType lcomm, - std::vector decomposition = std::vector()) { + const std::vector& decomposition = std::vector()) { static_assert(!reverseOrderingDFGPartitions, "BSL needs this flag to be false, " "change only if the partitioning does not match between dfg and distribution"); @@ -203,7 +204,7 @@ class SelalibTask : public combigrid::Task { /** * Returns the distributed full grid of the specified specie */ - DistributedFullGrid& getDistributedFullGrid(int specie) { return *dfg_; } + DistributedFullGrid& getDistributedFullGrid(size_t specie) { return *dfg_; } /** * @return double* the pointer to the distribution (in Fortran allocated memory) @@ -225,7 +226,7 @@ class SelalibTask : public combigrid::Task { */ real getCurrentTime() const override { return currentNumTimeStepsRun_ * dt_; } - void setCurrentNumTimeStepsRun(IndexType currentNumTimeStepsRun) { + void setCurrentNumTimeStepsRun(size_t currentNumTimeStepsRun) { currentNumTimeStepsRun_ = currentNumTimeStepsRun; } @@ -237,7 +238,8 @@ class SelalibTask : public combigrid::Task { changeDir(dfg_->getCommunicator(), false); // set selalib distribution from dfg assert(diagnosticsInitialized_); - int32_t* iPtr = ¤tNumTimeStepsRun_; + int32_t currentNumTimeStepsAsInt = static_cast(currentNumTimeStepsRun_); + int32_t* iPtr = ¤tNumTimeStepsAsInt; sim_bsl_vp_3d3v_cart_dd_slim_write_diagnostics(simPtrPtr_, iPtr); changeDir(dfg_->getCommunicator(), true); } @@ -266,7 +268,7 @@ class SelalibTask : public combigrid::Task { /* * simulation time specific parameters */ - int32_t currentNumTimeStepsRun_; // current number of time steps already run in the simulation + size_t currentNumTimeStepsRun_; // current number of time steps already run in the simulation real dt_; /**number of time-steps in between two combinations diff --git a/examples/selalib_distributed/src/selalib_distributed.cpp b/examples/selalib_distributed/src/selalib_distributed.cpp index bb2f53be9..ffd1823ef 100644 --- a/examples/selalib_distributed/src/selalib_distributed.cpp +++ b/examples/selalib_distributed/src/selalib_distributed.cpp @@ -165,7 +165,7 @@ int main(int argc, char** argv) { std::cout << "reduceCombinationDimsLmax = " << reduceCombinationDimsLmax << std::endl; std::cout << "boundary = " << boundary << std::endl; std::cout << "hierarchization_dims = " << hierarchizationDims << std::endl; - auto numDOF = printCombiDegreesOfFreedom(combischeme.getCombiSpaces(), boundary); + [[maybe_unused]] auto numDOF = printCombiDegreesOfFreedom(combischeme.getCombiSpaces(), boundary); std::cout << "CombiScheme: " << std::endl; for (size_t i = 0; i < levels.size(); ++i) { std::cout << "\t" << levels[i] << " " << coeffs[i] << std::endl; @@ -177,11 +177,10 @@ int main(int argc, char** argv) { setCheckpointRestart(basename, levels); } else { // using a very high diagnostics interval -> write no diagnostics in the component grids - size_t veryHighNumber = 2147483647; // =2^31-1 - size_t sometimes = 100; - size_t always = 1; + [[maybe_unused]] size_t veryHighNumber = 2147483647; // =2^31-1 + [[maybe_unused]] size_t sometimes = 100; + [[maybe_unused]] size_t always = 1; // create necessary folders and files to run each task in a separate folder - std::vector taskNumbers(levels.size()); // only necessary in case of static task assignment std::iota(taskNumbers.begin(), taskNumbers.end(), 0); createTaskFolders(basename, levels, taskNumbers, p, nsteps, dt, always, nameDiagnostics); diff --git a/examples/selalib_distributed/src/selalib_distributed_workers_only.cpp b/examples/selalib_distributed/src/selalib_distributed_workers_only.cpp index 20e5d1c9d..e0847bd83 100644 --- a/examples/selalib_distributed/src/selalib_distributed_workers_only.cpp +++ b/examples/selalib_distributed/src/selalib_distributed_workers_only.cpp @@ -166,9 +166,9 @@ int main(int argc, char** argv) { setCheckpointRestart(basename, levels); } else { // using a very high diagnostics interval -> write no diagnostics in the component grids - size_t veryHighNumber = 2147483647; // =2^31-1 - size_t sometimes = 100; - size_t always = 1; + [[maybe_unused]] size_t veryHighNumber = 2147483647; // =2^31-1 + [[maybe_unused]] size_t sometimes = 100; + [[maybe_unused]] size_t always = 1; // create necessary folders and files to run each task in a separate folder createTaskFolders(basename, levels, taskNumbers, p, nsteps, dt, veryHighNumber, nameDiagnostics); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 030936704..ffafb516d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -158,9 +158,8 @@ target_link_libraries(discotec PUBLIC MPI::MPI_CXX) include(FetchContent) -find_package(Boost REQUIRED COMPONENTS serialization headers program_options unit_test_framework system filesystem date_time) +find_package(Boost REQUIRED COMPONENTS serialization program_options unit_test_framework system filesystem date_time) -target_include_directories(discotec PRIVATE Boost::headers) target_link_libraries(discotec PRIVATE Boost::serialization Boost::program_options Boost::system Boost::date_time) find_package(GLPK REQUIRED) diff --git a/src/combicom/CombiCom.hpp b/src/combicom/CombiCom.hpp index d1b02cc3f..20b475749 100644 --- a/src/combicom/CombiCom.hpp +++ b/src/combicom/CombiCom.hpp @@ -50,14 +50,14 @@ void FGAllreduce(FullGrid& fg, MPI_Comm comm) { } template -int sumAndCheckSubspaceSizes(const SparseGridType& dsg, - const std::vector& subspaceSizes) { - int bsize = 0; +size_t sumAndCheckSubspaceSizes(const SparseGridType& dsg, + const std::vector& subspaceSizes) { + size_t bsize = 0; for (size_t i = 0; i < subspaceSizes.size(); ++i) { // check for implementation errors, the reduced subspace size should not be // different from the size of already initialized subspaces bool check = (subspaceSizes[i] == 0 || dsg.getDataSize(i) == 0 || - subspaceSizes[i] == int(dsg.getDataSize(i))); + subspaceSizes[i] == dsg.getDataSize(i)); if (!check) { int rank; @@ -215,7 +215,6 @@ std::vector>& get static thread_local std::set chunkSubspaces; chunkSubspaces.clear(); - const auto subspaceItBefore = subspaceIt; SubspaceSizeType chunkDataSize = 0; // select chunk of not more than chunkSize, but at least one index auto nextAddedDataSize = dsg.getDataSize(*subspaceIt); @@ -225,7 +224,7 @@ std::vector>& get ++subspaceIt; } while (subspaceIt != siContainer.cend() && (nextAddedDataSize = dsg.getDataSize(*subspaceIt)) && - (chunkDataSize + nextAddedDataSize) < maxChunkSize); + (chunkDataSize + nextAddedDataSize) < static_cast(maxChunkSize)); subspaceIndicesChunks.push_back(std::move(chunkSubspaces)); } return subspaceIndicesChunks; @@ -255,7 +254,7 @@ getReductionDatatypes(const DistributedSparseGridUniform& dsg, arrayOfDisplacements.reserve(subspacesChunk.size()); for (const auto& ss : subspacesChunk) { arrayOfBlocklengths.push_back(dsg.getDataSize(ss)); - arrayOfDisplacements.push_back(dsg.getData(ss) - rawDataStartFirst); + arrayOfDisplacements.push_back(static_cast(dsg.getData(ss) - rawDataStartFirst)); } MPI_Datatype myIndexedDatatype; @@ -298,13 +297,14 @@ void distributedGlobalSubspaceReduce(SparseGridType& dsg, uint32_t maxMiBToSendP datatypesByStartIndex = getReductionDatatypes(dsg, commAndItsSubspaces.second, maxMiBToSendPerThread); } - + /* // // this would be best for outgroup reduce, but leads to MPI truncation // // errors if not ordered (desynchronization between MPI ranks on the same communicators // // probably) // #pragma omp parallel if (dsg.getSubspacesByCommunicator().size() == 1) default(none) \ // shared(dsg, indexedAdd, datatypesByStartIndex, commAndItsSubspaces) // #pragma omp for ordered schedule(static) + */ for (size_t datatypeIndex = 0; datatypeIndex < datatypesByStartIndex.size(); ++datatypeIndex) { // reduce for each datatype auto& subspaceStartIndex = datatypesByStartIndex[datatypeIndex].first; @@ -312,7 +312,7 @@ void distributedGlobalSubspaceReduce(SparseGridType& dsg, uint32_t maxMiBToSendP auto& datatype = datatypesByStartIndex[datatypeIndex].second; if (globalReduceRankThatCollects == MPI_PROC_NULL) { // #pragma omp ordered - auto success = MPI_Allreduce(MPI_IN_PLACE, dsg.getData(subspaceStartIndex), 1, datatype, + [[maybe_unused]] auto success = MPI_Allreduce(MPI_IN_PLACE, dsg.getData(subspaceStartIndex), 1, datatype, indexedAdd, comm); assert(success == MPI_SUCCESS); @@ -394,7 +394,7 @@ static void asyncBcastDsgData(SparseGridType& dsg, RankType root, CommunicatorTy MPI_Datatype dataType = getMPIDatatype(abstraction::getabstractionDataType()); - auto success = MPI_Ibcast(data, dataSize, dataType, root, comm, request); + [[maybe_unused]] auto success = MPI_Ibcast(data, dataSize, dataType, root, comm, request); assert(success == MPI_SUCCESS); } @@ -421,7 +421,7 @@ static void asyncBcastOutgroupDsgData(SparseGridType& dsg, RankType root, Commun auto& subspaceStartIndex = datatypesByStartIndex[0].first; auto& datatype = datatypesByStartIndex[0].second; - auto success = MPI_Ibcast(dsg.getData(subspaceStartIndex), 1, datatype, root, comm, request); + [[maybe_unused]] auto success = MPI_Ibcast(dsg.getData(subspaceStartIndex), 1, datatype, root, comm, request); assert(success == MPI_SUCCESS); } } @@ -451,7 +451,8 @@ void reduceSubspaceSizes(SparseGridType& dsg, CommunicatorType comm) { MPI_Datatype dtype = getMPIDatatype(abstraction::getabstractionDataType()); // perform allreduce - assert(dsg.getNumSubspaces() < static_cast(std::numeric_limits::max())); + assert(dsg.getNumSubspaces() < + static_cast(std::numeric_limits::max())); MPI_Allreduce(MPI_IN_PLACE, dsg.getSubspaceDataSizes().data(), static_cast(dsg.getNumSubspaces()), dtype, MPI_MAX, comm); // assume that the sizes changed, the buffer might be the wrong size now @@ -464,7 +465,8 @@ void broadcastSubspaceSizes(SparseGridType& dsg, CommunicatorType comm, RankType MPI_Datatype dtype = getMPIDatatype(abstraction::getabstractionDataType()); // perform broadcast - assert(dsg.getNumSubspaces() < static_cast(std::numeric_limits::max())); + assert(dsg.getNumSubspaces() < + static_cast(std::numeric_limits::max())); MPI_Bcast(dsg.getSubspaceDataSizes().data(), static_cast(dsg.getNumSubspaces()), dtype, sendingRank, comm); // assume that the sizes changed, the buffer might be the wrong size now @@ -480,7 +482,8 @@ void localMaxReduceSubspaceSizes(SparseGridType& dsg, CommunicatorType comm, Ran std::vector subspaceSizes = dsg.getSubspaceDataSizes(); // perform broadcast - assert(dsg.getNumSubspaces() < static_cast(std::numeric_limits::max())); + assert(dsg.getNumSubspaces() < + static_cast(std::numeric_limits::max())); MPI_Bcast(subspaceSizes.data(), static_cast(dsg.getNumSubspaces()), dtype, sendingRank, comm); assert(sumAndCheckSubspaceSizes(dsg, subspaceSizes)); @@ -531,7 +534,7 @@ void receiveSubspaceSizesWithScatter(SparseGridType& dsg, CommunicatorType comm, RankType collectorRank) { auto numSubspaces = static_cast(dsg.getNumSubspaces()); assert(numSubspaces > 0); - assert(numSubspaces == dsg.getSubspaceDataSizes().size()); + assert(static_cast(numSubspaces) == dsg.getSubspaceDataSizes().size()); MPI_Datatype dtype = getMPIDatatype(abstraction::getabstractionDataType()); // receive updated sizes from manager diff --git a/src/combischeme/CombiMinMaxScheme.hpp b/src/combischeme/CombiMinMaxScheme.hpp index 713d766df..8166cef68 100644 --- a/src/combischeme/CombiMinMaxScheme.hpp +++ b/src/combischeme/CombiMinMaxScheme.hpp @@ -163,7 +163,7 @@ class CombiMinMaxSchemeFromFile : public CombiMinMaxScheme { assert(lmin <= lvl); combiSpaces_.push_back(lvl); } else if (c.first == "group_no") { - processGroupNumbers_.push_back(c.second.get_value()); + processGroupNumbers_.push_back(c.second.get_value()); } else { assert(false); } @@ -176,10 +176,10 @@ class CombiMinMaxSchemeFromFile : public CombiMinMaxScheme { assert(coefficients_.size() == combiSpaces_.size()); } - inline const std::vector& getProcessGroupNumbers() const { return processGroupNumbers_; } + inline const std::vector& getProcessGroupNumbers() const { return processGroupNumbers_; } private: - std::vector processGroupNumbers_; + std::vector processGroupNumbers_; }; inline long long int getCombiDegreesOfFreedom(const LevelVector& level, @@ -300,7 +300,7 @@ inline std::vector getPartitionedNumDOFSG( IndexType multiplier = 1; for (const auto& d : decomposition) { decompositionOffsets.push_back(multiplier); - multiplier *= d.size(); + multiplier *= static_cast(d.size()); } auto numProcsPerGroup = multiplier; std::vector numDOF(numProcsPerGroup, 0); @@ -319,8 +319,8 @@ inline std::vector getPartitionedNumDOFSG( level_d, decomposition[d][k + 1], referenceLevel[d]); if (highestIndexOfSubspaceInThisDimensionInThisProcess != -1) { subspaceExtentsPerProcessPerDimension[d][k] = - highestIndexOfSubspaceInThisDimensionInThisProcess - - numIndicesProcessedOnPoleMinusOne; + static_cast(highestIndexOfSubspaceInThisDimensionInThisProcess - + numIndicesProcessedOnPoleMinusOne); numIndicesProcessedOnPoleMinusOne = highestIndexOfSubspaceInThisDimensionInThisProcess; } else { subspaceExtentsPerProcessPerDimension[d][k] = 0; @@ -329,10 +329,10 @@ inline std::vector getPartitionedNumDOFSG( // the rest belongs to the last partition subspaceExtentsPerProcessPerDimension[d][subspaceExtentsPerProcessPerDimension[d].size() - 1] = - powerOfTwo[level_d - 1] - numIndicesProcessedOnPoleMinusOne - 1; + powerOfTwo[level_d - 1] - static_cast(numIndicesProcessedOnPoleMinusOne) - 1; if (level_d == 1) { subspaceExtentsPerProcessPerDimension[d][subspaceExtentsPerProcessPerDimension[d].size() - - 1] = 3 - numIndicesProcessedOnPoleMinusOne - 1; + 1] = static_cast(3 - numIndicesProcessedOnPoleMinusOne - 1); assert(std::accumulate(subspaceExtentsPerProcessPerDimension[d].begin(), subspaceExtentsPerProcessPerDimension[d].end(), 0) == 3); } else { @@ -350,8 +350,8 @@ inline std::vector getPartitionedNumDOFSG( size_t numDOFtoAdd = 1; // iterate the vector index entries belonging to linear index i auto tmp = i; - for (int d = dim - 1; d >= 0; --d) { - assert(d < subspaceExtentsPerProcessPerDimension.size()); + for (IndexType d = dim - 1; d >= 0; --d) { + assert(static_cast(d) < subspaceExtentsPerProcessPerDimension.size()); auto decompositionIndexInDimD = tmp / decompositionOffsets[d]; assert(decompositionIndexInDimD < subspaceExtentsPerProcessPerDimension[d].size()); numDOFtoAdd *= subspaceExtentsPerProcessPerDimension[d][decompositionIndexInDimD]; @@ -468,7 +468,7 @@ inline size_t getAssignedLevels(const CombiMinMaxSchemeFromFile& combischeme, throw std::runtime_error( "CombiSchemeFromFile::getAssignedLevels : inconsistent scheme from file"); } - const auto [itMin, itMax] = std::minmax_element(pgNumbers.begin(), pgNumbers.end()); + [[maybe_unused]] const auto [itMin, itMax] = std::minmax_element(pgNumbers.begin(), pgNumbers.end()); assert(*itMin == 0); // make sure it starts with 0 // filter out only those tasks that belong to "our" process group for (size_t taskNo = 0; taskNo < pgNumbers.size(); ++taskNo) { @@ -534,7 +534,7 @@ inline size_t getLoadBalancedLevels(const CombiMinMaxScheme& combischeme, // find the group with the lowest load RankType minGroup = 0; long long minLoad = std::numeric_limits::max(); - for (RankType group = 0; group < totalNumGroups; ++group) { + for (RankType group = 0; group < static_cast(totalNumGroups); ++group) { if (loadAssignedToGroup[group] < minLoad) { minGroup = group; minLoad = loadAssignedToGroup[group]; diff --git a/src/combischeme/CombiThirdLevelScheme.cpp b/src/combischeme/CombiThirdLevelScheme.cpp index 9e66d86dc..724a21776 100644 --- a/src/combischeme/CombiThirdLevelScheme.cpp +++ b/src/combischeme/CombiThirdLevelScheme.cpp @@ -34,7 +34,7 @@ void CombiThirdLevelScheme::decomposeScheme(const std::vector& full std::vector>& decomposedCoeffs, size_t numSystems, const std::vector& fractionsOfScheme) { - auto fracSum = + [[maybe_unused]] auto fracSum = std::accumulate(fractionsOfScheme.begin(), fractionsOfScheme.end(), 0., std::plus()); assert(std::abs(fracSum - 1.) < 1e-3); assert(fractionsOfScheme.size() == numSystems); @@ -56,7 +56,7 @@ void CombiThirdLevelScheme::decomposeScheme(const std::vector& full } // assert that all are assigned - assert(std::accumulate(decomposedScheme.begin(), decomposedScheme.end(), 0, + assert(std::accumulate(decomposedScheme.begin(), decomposedScheme.end(), static_cast(0), [](size_t a, const std::vector& l) { return a + l.size(); }) == fullScheme.size()); diff --git a/src/fault_tolerance/FTUtils.cpp b/src/fault_tolerance/FTUtils.cpp index 0f65d940d..32226242f 100644 --- a/src/fault_tolerance/FTUtils.cpp +++ b/src/fault_tolerance/FTUtils.cpp @@ -116,7 +116,7 @@ matrix get_inv_M(const CombigridDict& aux_downset, const int& dim) { return M_inv; } -CombigridDict set_entire_downset_dict(const LevelVectorList levels, +CombigridDict set_entire_downset_dict(const LevelVectorList& levels, const CombigridDict& received_dict, const int& dim) { LevelVector level_min = levels.front(); LevelVector level_max = levels.back(); diff --git a/src/fault_tolerance/FTUtils.hpp b/src/fault_tolerance/FTUtils.hpp index 7d73129e9..fd6c5be86 100644 --- a/src/fault_tolerance/FTUtils.hpp +++ b/src/fault_tolerance/FTUtils.hpp @@ -29,7 +29,7 @@ CombigridDict get_python_data(const std::string& script_run, const int& dim); matrix get_inv_M(const CombigridDict& aux_downset, const int& dim); /* used to create the inverse of M matrix for the interpolation based problem */ -CombigridDict set_entire_downset_dict(const LevelVectorList levels, +CombigridDict set_entire_downset_dict(const LevelVectorList& levels, const CombigridDict& received_dict, const int& dim); /* used to get a vector of the entire downset indices */ diff --git a/src/fault_tolerance/StaticFaults.cpp b/src/fault_tolerance/StaticFaults.cpp index f5f0db20b..61b775479 100644 --- a/src/fault_tolerance/StaticFaults.cpp +++ b/src/fault_tolerance/StaticFaults.cpp @@ -14,7 +14,7 @@ bool StaticFaults::failNow(int ncombi, real t_iter, int globalRank) { std::vector::iterator it; it = std::find(iF.begin(), iF.end(), ncombi); - IndexType idx = std::distance(iF.begin(), it); + auto idx = std::distance(iF.begin(), it); // std::cout << "faultInfo" << iF[0] << " " << rF[0] << "\n"; // Check if current iteration is in iterationFaults_ while (it != iF.end()) { diff --git a/src/fullgrid/DistributedFullGrid.hpp b/src/fullgrid/DistributedFullGrid.hpp index 590a556e5..978f04130 100644 --- a/src/fullgrid/DistributedFullGrid.hpp +++ b/src/fullgrid/DistributedFullGrid.hpp @@ -8,6 +8,7 @@ #include #include #include +#include #include "fullgrid/FullGrid.hpp" #include "fullgrid/Tensor.hpp" @@ -16,6 +17,7 @@ #include "mpi/MPITags.hpp" #include "sparsegrid/AnyDistributedSparseGrid.hpp" #include "sparsegrid/DistributedSparseGridUniform.hpp" +#include "utils/DecompositionUtils.hpp" #include "utils/IndexVector.hpp" #include "utils/LevelSetUtils.hpp" #include "utils/LevelVector.hpp" @@ -25,45 +27,24 @@ namespace combigrid { -/* a regular (equidistant) domain decompositioning for an even number of processes - * leads to grid points on the (geometrical) process boundaries. - * with the forwardDecomposition flag it can be decided if the grid points on - * the process boundaries belong to the process on the right-hand side (true) - * of the process boundary, or to the one on the left-hand side (false). - */ -static inline std::vector getDefaultDecomposition( - const IndexVector& globalNumPointsPerDimension, - const std::vector& cartesianProcsPerDimension, bool forwardDecomposition) { - auto dim = static_cast(globalNumPointsPerDimension.size()); - assert(cartesianProcsPerDimension.size() == dim); - - // create decomposition vectors - std::vector decomposition(dim); - for (DimType i = 0; i < dim; ++i) { - if (cartesianProcsPerDimension[i] > globalNumPointsPerDimension[i]) { - throw std::invalid_argument( - "change to coarser parallelization! currently not all processes can have points."); - } - IndexVector& llbnd = decomposition[i]; - llbnd.resize(cartesianProcsPerDimension[i]); - - for (int j = 0; j < cartesianProcsPerDimension[i]; ++j) { - double tmp = static_cast(globalNumPointsPerDimension[i]) * static_cast(j) / - static_cast(cartesianProcsPerDimension[i]); - - if (forwardDecomposition) - llbnd[j] = static_cast(std::ceil(tmp)); - else - llbnd[j] = static_cast(std::floor(tmp)); - } - } - return decomposition; -} - /** * @brief DistributedFullGrid : a (non-owning) indexed full grid data structure - * - * @tparam FG_ELEMENT + * + * @tparam FG_ELEMENT the data type to be stored on the grid + * @param dim the dimensionality of the full grid (may become a template parameter in future + * versions) + * @param levels the level vector describing the full grid + * @param comm the Cartesian communicator to be used for the distributed full grid (will not be + * duplicated; ownership is not transferred) + * @param hasBdrPoints a vector of flags to show if the dimension has boundary points (0 for no + * points, 1 for one-sided boundary, 2 for both sides) + * @param dataPointer a pointer to the beginning of the data array + * @param procs a vector of the number of processes in each dimension (must match the decomposition + * in comm) + * @param forwardDecomposition a flag to decide if the middle grid points on the process boundaries + * belong to the process on the right-hand side (true) of the process boundary, or to the one on the + * left-hand side (false). + * @param decomposition a vector of the lower bounds of the grid points in each dimension */ template class DistributedFullGrid { @@ -72,374 +53,83 @@ class DistributedFullGrid { DistributedFullGrid(DimType dim, const LevelVector& levels, CommunicatorType const& comm, const std::vector& hasBdrPoints, FG_ELEMENT* dataPointer, const std::vector& procs, bool forwardDecomposition = true, - const std::vector& decomposition = std::vector()) - : dim_(dim), levels_(levels), hasBoundaryPoints_(hasBdrPoints) { - assert(levels_.size() == dim); - assert(hasBoundaryPoints_.size() == dim); - assert(procs.size() == dim); - - InitMPI(comm, procs); - - IndexVector nrPoints(dim_); - gridSpacing_.resize(dim_); - - // set global num of elements and offsets - IndexType nrElements = 1; - for (DimType j = 0; j < dim_; j++) { - nrPoints[j] = combigrid::getNumDofNodal(levels_[j], hasBoundaryPoints_[j]); - nrElements = nrElements * nrPoints[j]; - if (hasBoundaryPoints_[j] == 1) { - assert(!decomposition.empty() || !forwardDecomposition); - } - assert(levels_[j] < 30); - gridSpacing_[j] = oneOverPowOfTwo[levels_[j]]; - } - - if (decomposition.size() == 0) { - setDecomposition(getDefaultDecomposition( - nrPoints, this->getCartesianUtils().getCartesianDimensions(), forwardDecomposition)); - } else { - setDecomposition(decomposition); - } - globalIndexer_ = TensorIndexer(std::move(nrPoints)); - myPartitionsLowerBounds_ = getLowerBounds(this->getRank()); - myPartitionsFirstGlobalIndex_ = globalIndexer_.sequentialIndex(myPartitionsLowerBounds_); - myPartitionsUpperBounds_ = getUpperBounds(this->getRank()); - - // set local elements and local offsets - auto nrLocalPoints = getUpperBounds() - getLowerBounds(); - localTensor_ = Tensor(dataPointer, std::move(nrLocalPoints)); - } + const std::vector& decomposition = std::vector()); DistributedFullGrid(const DistributedFullGrid& other) = delete; DistributedFullGrid& operator=(const DistributedFullGrid&) = delete; DistributedFullGrid(DistributedFullGrid&& other) = default; DistributedFullGrid& operator=(DistributedFullGrid&& other) = default; - virtual ~DistributedFullGrid() { - for (size_t i = 0; i < upwardSubarrays_.size(); ++i) { - MPI_Type_free(&upwardSubarrays_[i]); - } - for (size_t i = 0; i < downwardSubarrays_.size(); ++i) { - MPI_Type_free(&downwardSubarrays_[i]); - } - } - -inline double getPointDistanceToCoordinate(IndexType oneDimensionalLocalIndex, double coord, - DimType d) const { - const auto& h = getGridSpacing(); - auto coordDistance = - static_cast(this->getLowerBounds()[d] + (hasBoundaryPoints_[d] > 0 ? 0 : 1) + - oneDimensionalLocalIndex) * - h[d] - - coord; -#ifndef NDEBUG - if (std::abs(coordDistance) > h[d]) { - std::cout << "assert bounds " << coordDistance << coord << h << static_cast(d) - << oneDimensionalLocalIndex << std::endl; - assert(false && - "should only be called for coordinates within the support of this point's basis " - "function"); - } -#endif // ndef NDEBUG - return std::abs(coordDistance); -} + virtual ~DistributedFullGrid(); -inline FG_ELEMENT evalIndexAndAllUpperNeighbors(const IndexVector& localIndex, - const std::vector& coords) const { - FG_ELEMENT result = 0.; - auto localLinearIndex = this->getLocalLinearIndex(localIndex); - for (size_t localIndexIterate = 0; - localIndexIterate < combigrid::powerOfTwoByBitshift(this->getDimension()); - ++localIndexIterate) { -#ifndef NDEBUG - auto neighborVectorIndex = localIndex; -#endif - auto neighborIndex = localLinearIndex; - real phi_c = 1.; // value of product of iterate's basis function on coords - for (DimType d = 0; d < dim_; ++d) { - const auto lastIndexInDim = this->getLocalSizes()[d] - 1; - bool isUpperInDim = std::bitset(localIndexIterate).test(d); - auto iterateIndexInThisDimension = localIndex[d] + isUpperInDim; - if (iterateIndexInThisDimension < 0) { - phi_c = 0.; - break; - } else if (isUpperInDim && this->hasBoundaryPoints_[d] == 1 && - // this->getCartesianUtils().isOnLowerBoundaryInDimension(d) && - iterateIndexInThisDimension == this->getGlobalSizes()[d]) { - // if we have a wrap-around AND are on the lowest cartesian process in this dimension - // we need to set the index in this dim to 0 and shift the coordinate by 1. - neighborIndex -= localIndex[d] * this->getLocalOffsets()[d]; - iterateIndexInThisDimension = 0; - auto coordDistance = - getPointDistanceToCoordinate(iterateIndexInThisDimension, coords[d] - 1.0, d); - auto oneOverHinD = powerOfTwo[levels_[d]]; - phi_c *= 1. - coordDistance * oneOverHinD; - } else if (iterateIndexInThisDimension > lastIndexInDim) { - phi_c = 0.; - break; - } else { - auto coordDistance = - getPointDistanceToCoordinate(iterateIndexInThisDimension, coords[d], d); - auto oneOverHinD = powerOfTwo[levels_[d]]; - phi_c *= 1. - coordDistance * oneOverHinD; - neighborIndex += isUpperInDim * this->getLocalOffsets()[d]; - } -#ifndef NDEBUG - neighborVectorIndex[d] = iterateIndexInThisDimension; -#endif - } - if (phi_c > 0.) { -#ifndef NDEBUG - auto unlinearizedNeighborIndex = neighborVectorIndex; - this->getLocalVectorIndex(neighborIndex, unlinearizedNeighborIndex); - if (unlinearizedNeighborIndex != neighborVectorIndex) { - std::cerr << "expected " << unlinearizedNeighborIndex << " got " << neighborVectorIndex - << std::endl; - } - if (neighborIndex < 0) { - std::cerr << "expected " << unlinearizedNeighborIndex << " got " << neighborVectorIndex - << " or " << neighborIndex << std::endl; - } -#endif - assert(neighborIndex > -1); - assert(neighborIndex < this->getNrLocalElements()); - result += phi_c * this->getData()[neighborIndex]; - } - } - return result; -} + inline double getPointDistanceToCoordinate(IndexType oneDimensionalLocalIndex, double coord, + DimType d) const; -/** evaluates the full grid on the specified coordinates - * @param coords ND coordinates on the unit square [0,1]^D*/ -FG_ELEMENT evalLocal(const std::vector& coords) const { - FG_ELEMENT value; - evalLocal(coords, value); - return value; -} + inline FG_ELEMENT evalIndexAndAllUpperNeighbors(const IndexVector& localIndex, + const std::vector& coords) const; -void evalLocal(const std::vector& coords, FG_ELEMENT& value) const { - assert(coords.size() == this->getDimension()); - // get the lowest-index point of the points - // whose basis functions contribute to the interpolated value - const auto& h = getGridSpacing(); - static thread_local IndexVector localIndexLowerNonzeroNeighborPoint; - localIndexLowerNonzeroNeighborPoint.resize(dim_); - for (DimType d = 0; d < dim_; ++d) { -#ifndef NDEBUG - if (coords[d] < 0. || coords[d] > 1.) { - std::cout << "coords " << coords << " out of bounds" << std::endl; - } - assert(coords[d] >= 0. && coords[d] <= 1.); -#endif // ndef NDEBUG - // this is the local index of the point that is lower than the coordinate - // may also be negative if the coordinate is lower than this processes' coordinates - IndexType localIndexLowerNonzeroNeighborIndexInThisDimension = static_cast( - std::floor((coords[d] - this->getLowerBoundsCoord(d)) * this->getInverseGridSpacingIn(d))); + /** evaluates the full grid on the specified coordinates + * @param coords ND coordinates on the unit square [0,1]^D*/ + FG_ELEMENT evalLocal(const std::vector& coords) const; - // check if we even need to evaluate on this process - if (localIndexLowerNonzeroNeighborIndexInThisDimension < -1) { - // index too small - value = 0.; - return; - } else if ((coords[d] >= 1.0 - h[d]) && this->hasBoundaryPoints_[d] == 1 && - this->getCartesianUtils().isOnLowerBoundaryInDimension(d)) { - // if we have periodic boundary and this process is at the lower end of the dimension d - // we need the periodic coordinate => don't return 0 - } else if (localIndexLowerNonzeroNeighborIndexInThisDimension > this->getLocalSizes()[d] - 1) { - // index too high - value = 0.; - return; - } - localIndexLowerNonzeroNeighborPoint[d] = localIndexLowerNonzeroNeighborIndexInThisDimension; - } - // evaluate at those points and sum up according to the basis function - value = evalIndexAndAllUpperNeighbors(localIndexLowerNonzeroNeighborPoint, coords); -} + void evalLocal(const std::vector& coords, FG_ELEMENT& value) const; -/** evaluates the full grid on the specified coordinates - * @param interpolationCoords vector of ND coordinates on the unit square [0,1]^D*/ -std::vector getInterpolatedValues( - const std::vector>& interpolationCoords) const { - auto numValues = interpolationCoords.size(); - std::vector values; - values.resize(numValues); -#pragma omp parallel for default(none) firstprivate(numValues) shared(values, interpolationCoords) \ - schedule(static) - for (size_t i = 0; i < numValues; ++i) { - this->evalLocal(interpolationCoords[i], values[i]); - } - MPI_Allreduce(MPI_IN_PLACE, values.data(), static_cast(numValues), this->getMPIDatatype(), - MPI_SUM, this->getCommunicator()); - return values; -} + /** evaluates the full grid on the specified coordinates + * @param interpolationCoords vector of ND coordinates on the unit square [0,1]^D*/ + std::vector getInterpolatedValues( + const std::vector>& interpolationCoords) const; /** return the coordinates on the unit square corresponding to global idx * @param globalIndex [IN] global linear index of the element i * @param coords [OUT] the vector must be resized already */ - inline void getCoordsGlobal(IndexType globalLinearIndex, std::vector& coords) const { - IndexType ind = 0; - IndexType tmp_add = 0; - - coords.resize(dim_); - - for (DimType j = 0; j < dim_; j++) { - ind = globalLinearIndex % this->getGlobalSizes()[j]; - globalLinearIndex = globalLinearIndex / this->getGlobalSizes()[j]; - // set the coordinate based on if we have boundary points - tmp_add = (hasBoundaryPoints_[j] > 0) ? (0) : (1); - coords[j] = static_cast(ind + tmp_add) * getGridSpacing()[j]; - } - } + inline void getCoordsGlobal(IndexType globalLinearIndex, std::vector& coords) const; /** return coordinates on the unit square corresponding to local idx * @param globalIndex [IN] local linear index of the element i * @param coords [OUT] the vector must be resized already */ - inline void getCoordsLocal(IndexType localLinearIndex, std::vector& coords) const { - // todo: probably very inefficient implementation, if crucial for - // performance implement more direct way of computing the coordinates - assert(localLinearIndex < getNrLocalElements()); - - IndexType globalLinearIndex = getGlobalLinearIndex(localLinearIndex); - - getCoordsGlobal(globalLinearIndex, coords); - } + inline void getCoordsLocal(IndexType localLinearIndex, std::vector& coords) const; /** returns the LI (level,index) notation for a given element in the full grid * @param elementIndex [IN] the linear index of the element * @param levels [OUT] the levels of the point in the LI notation * @param indexes [OUT] the indexes of the point in the LI notation */ - inline void getGlobalLI(IndexType elementIndex, LevelVector& levels, IndexVector& indexes) const { - IndexType startindex, tmp_val; - - assert(elementIndex < this->getNrElements()); - levels.resize(dim_); - indexes.resize(dim_); - - tmp_val = elementIndex; - - // first calculate intermediary indexes - for (DimType k = 0; k < dim_; k++) { - startindex = (hasBoundaryPoints_[k] > 0) ? 0 : 1; - indexes[k] = tmp_val % this->getGlobalSizes()[k] + startindex; - tmp_val = tmp_val / this->getGlobalSizes()[k]; - } - - // The level and index of the element in the hashgridstorage are computed dividing by two the - // index and level in the fullgrid - // until we obtain an impair number for the index, thus obtaining the level and index in the - // hierarchical basis (Aliz Nagy) - // ... - for (DimType k = 0; k < dim_; k++) { - tmp_val = levels_[k]; - - if (indexes[k] != 0) { - // todo: these operations can be optimized - while (indexes[k] % 2 == 0) { - indexes[k] = indexes[k] / 2; - tmp_val--; - } - } else { - tmp_val = 0; - } - - levels[k] = tmp_val; - } - } + inline void getGlobalLI(IndexType elementIndex, LevelVector& levels, IndexVector& indexes) const; /** the global vector index corresponding to a global linear index * @param linIndex [IN] the linear index * @param axisIndex [OUT] the returned vector index */ - inline void getGlobalVectorIndex(IndexType globLinIndex, IndexVector& globAxisIndex) const { - assert(globLinIndex < this->getNrElements()); - assert(globAxisIndex.size() == dim_); - - globAxisIndex = this->globalIndexer_.getVectorIndex(globLinIndex); - } + inline void getGlobalVectorIndex(IndexType globLinIndex, IndexVector& globAxisIndex) const; /** the global vector index corresponding to a local vector index */ inline void getGlobalVectorIndex(const IndexVector& locAxisIndex, - IndexVector& globAxisIndex) const { - assert(locAxisIndex.size() == dim_); - - globAxisIndex = this->getLowerBounds() + locAxisIndex; - } + IndexVector& globAxisIndex) const; /** the local vector index corresponding to a local linear index */ - inline void getLocalVectorIndex(IndexType locLinIndex, IndexVector& locAxisIndex) const { - locAxisIndex = this->localTensor_.getVectorIndex(locLinIndex); - } + inline void getLocalVectorIndex(IndexType locLinIndex, IndexVector& locAxisIndex) const; /** the local vector index corresponding to a global vector index if global index vector not contained in local domain false will be returned */ inline bool getLocalVectorIndex(const IndexVector& globAxisIndex, - IndexVector& locAxisIndex) const { - assert(globAxisIndex.size() == dim_); - - if (this->isGlobalIndexHere(globAxisIndex)) { - locAxisIndex.assign(globAxisIndex.begin(), globAxisIndex.end()); - std::transform(locAxisIndex.begin(), locAxisIndex.end(), this->getLowerBounds().begin(), - locAxisIndex.begin(), std::minus()); - return true; - } else { - return false; - } - } + IndexVector& locAxisIndex) const; /** returns the global linear index corresponding to the global index vector * @param axisIndex [IN] the vector index */ - inline IndexType getGlobalLinearIndex(const IndexVector& globAxisIndex) const { - return globalIndexer_.sequentialIndex(globAxisIndex); - } + inline IndexType getGlobalLinearIndex(const IndexVector& globAxisIndex) const; // the global linear index corresponding to the local linear index - inline IndexType getGlobalLinearIndex(IndexType locLinIndex) const { - assert(locLinIndex < this->getNrLocalElements()); - - // convert to local vector index - static thread_local IndexVector locAxisIndex(dim_); - locAxisIndex.resize(dim_); - getLocalVectorIndex(locLinIndex, locAxisIndex); - - // convert to global linear index - IndexType globLinIndex = - this->myPartitionsFirstGlobalIndex_ + this->getGlobalLinearIndex(locAxisIndex); - assert(globLinIndex < this->getNrElements()); - return globLinIndex; - } + inline IndexType getGlobalLinearIndex(IndexType locLinIndex) const; /** the local linear index corresponding to the local index vector */ - inline IndexType getLocalLinearIndex(const IndexVector& locAxisIndex) const { - return localTensor_.sequentialIndex(locAxisIndex); - } + inline IndexType getLocalLinearIndex(const IndexVector& locAxisIndex) const; /** the global linear index corresponding to the local linear index returns negative value if element not inside local partition */ - inline IndexType getLocalLinearIndex(IndexType globLinIndex) const { - assert(globLinIndex < this->getNrElements()); + inline IndexType getLocalLinearIndex(IndexType globLinIndex) const; - // convert to global vector index - static thread_local IndexVector globAxisIndex(dim_); - globAxisIndex.resize(dim_); - getGlobalVectorIndex(globLinIndex, globAxisIndex); + inline bool isGlobalIndexHere(IndexVector globalVectorIndex) const; - if (this->isGlobalIndexHere(globAxisIndex)) { - // convert to local linear index - return getLocalLinearIndex(globAxisIndex) - this->myPartitionsFirstGlobalIndex_; - } else { - return -1; - } - } - - inline bool isGlobalIndexHere(IndexVector globalVectorIndex) const { - return (globalVectorIndex >= this->getLowerBounds()) && - (globalVectorIndex < this->getUpperBounds()); - } - - inline bool isGlobalIndexHere(IndexType globLinIndex) const { - return isGlobalIndexHere(this->globalIndexer_.getVectorIndex(globLinIndex)); - } + inline bool isGlobalIndexHere(IndexType globLinIndex) const; /** returns the dimension of the full grid */ inline DimType getDimension() const { return dim_; } @@ -451,21 +141,12 @@ std::vector getInterpolatedValues( /** return the offset in the full grid vector of the dimension */ inline IndexType getOffset(DimType i) const { return this->getOffsets()[i]; } - inline const IndexVector& getOffsets() const { - return globalIndexer_.getOffsetsVector(); - } - - inline const IndexVector& getLocalOffsets() const { - return localTensor_.getOffsetsVector(); - } + inline const IndexVector& getOffsets() const { return globalIndexer_.getOffsetsVector(); } - inline std::tuple,std::vector,std::vector> getSizesSubsizesStartsOfSubtensor() const { - std::vector csizes(this->getGlobalSizes().begin(), this->getGlobalSizes().end()); - std::vector csubsizes(this->getLocalSizes().begin(), this->getLocalSizes().end()); - std::vector cstarts(this->getLowerBounds().begin(), this->getLowerBounds().end()); + inline const IndexVector& getLocalOffsets() const { return localTensor_.getOffsetsVector(); } - return std::make_tuple(csizes, csubsizes, cstarts); - } + inline std::tuple, std::vector, std::vector> + getSizesSubsizesStartsOfSubtensor() const; /** return the level vector */ inline const LevelVector& getLevels() const { return levels_; } @@ -473,54 +154,25 @@ std::vector getInterpolatedValues( /** returns the grid spacing (sometimes called h) */ inline const std::vector& getGridSpacing() const { return gridSpacing_; } - inline double getInverseGridSpacingIn(DimType inDimension) const { - return powerOfTwo[levels_[inDimension]]; - } + inline double getInverseGridSpacingIn(DimType inDimension) const; - inline std::vector getInverseGridSpacing() const { - std::vector oneOverH; - oneOverH.resize(gridSpacing_.size()); - // should be the same as the number of intervals (N) - for (DimType j = 0; j < dim_; j++) { - oneOverH[j] = powerOfTwo[levels_[j]]; - } -#ifndef NDEBUG - std::vector oneOverHByDivision; - oneOverHByDivision.resize(gridSpacing_.size()); - std::transform(gridSpacing_.begin(), gridSpacing_.end(), oneOverHByDivision.begin(), - std::bind(std::divides(), 1, std::placeholders::_1)); - for (DimType j = 0; j < dim_; j++) { - assert(std::abs(oneOverHByDivision[j] - oneOverH[j]) < 1e-10); - assert(oneOverHByDivision[j] == oneOverH[j]); - } -#endif - return oneOverH; - } + inline std::vector getInverseGridSpacing() const; - double getInnerNodalBasisFunctionIntegral() const { - const auto& h = this->getGridSpacing(); - return std::accumulate(h.begin(), h.end(), 1., std::multiplies()); - } + double getInnerNodalBasisFunctionIntegral() const; /** returns the number of elements in the entire, global full grid */ - inline IndexType getNrElements() const { - return globalIndexer_.size(); - } + inline IndexType getNrElements() const { return static_cast(globalIndexer_.size()); } /** number of elements in the local partition */ - inline IndexType getNrLocalElements() const { - return localTensor_.size(); - } + inline IndexType getNrLocalElements() const { return static_cast(localTensor_.size()); } /** number of points per dimension i */ - inline IndexType length(DimType i) const { return this->getGlobalSizes()[i]; } + inline IndexType globalNumPointsInDimension(DimType i) const { return this->getGlobalSizes()[i]; } /** vector of flags to show if the dimension has boundary points*/ inline const std::vector& returnBoundaryFlags() const { return hasBoundaryPoints_; } - inline void setZero() { - std::memset(this->getData(), 0, this->getNrLocalElements() * sizeof(FG_ELEMENT)); - } + inline void setZero(); inline FG_ELEMENT* getData() { return localTensor_.getData(); } @@ -534,109 +186,34 @@ std::vector getInterpolatedValues( inline int getCommunicatorSize() const { return this->getCartesianUtils().getCommunicatorSize(); } /** lower Bounds of this process */ - inline const IndexVector& getLowerBounds() const { - // return getLowerBounds(this->getRank()); - return myPartitionsLowerBounds_; - } + inline const IndexVector& getLowerBounds() const; /** lower bounds of rank r */ - inline IndexVector getLowerBounds(RankType r) const { - assert(r >= 0 && r < this->getCommunicatorSize()); - // get coords of r in cart comm - IndexVector lowerBounds(dim_); - const auto& coords = cartesianUtils_.getPartitionCoordsOfRank(r); - - for (DimType i = 0; i < dim_; ++i) { - lowerBounds[i] = getDecomposition()[i][coords[i]]; - } - return lowerBounds; - } + inline IndexVector getLowerBounds(RankType r) const; /** coordinates of this process' lower bounds in some dimension*/ - inline real getLowerBoundsCoord(DimType inDimension) const { - return static_cast(this->getLowerBounds()[inDimension] + - (hasBoundaryPoints_[inDimension] > 0 ? 0 : 1)) * - this->getGridSpacing()[inDimension]; - } + inline real getLowerBoundsCoord(DimType inDimension) const; /** upper Bounds of this process */ - inline const IndexVector& getUpperBounds() const { - // return getUpperBounds(this->getRank()); - return myPartitionsUpperBounds_; - } + inline const IndexVector& getUpperBounds() const; /** upper bounds of rank r */ - inline IndexVector getUpperBounds(RankType r) const { - assert(r >= 0 && r < this->getCommunicatorSize()); - IndexVector upperBounds(dim_); - const auto& coords = cartesianUtils_.getPartitionCoordsOfRank(r); - - for (DimType i = 0; i < dim_; ++i) { - RankType n; - std::vector nc(coords); - - if (nc[i] < this->getCartesianUtils().getCartesianDimensions()[i] - 1) { - // get rank of next neighbor in dim i - nc[i] += 1; - n = this->getCartesianUtils().getRankFromPartitionCoords(nc); - upperBounds[i] = getLowerBounds(n)[i]; - } else { - // no neighbor in dim i -> end of domain - upperBounds[i] = this->getGlobalSizes()[i]; - } - } - return upperBounds; - } + inline IndexVector getUpperBounds(RankType r) const; /** coordinates of this process' upper bounds */ - inline std::vector getUpperBoundsCoords() const { - return getUpperBoundsCoords(this->getRank()); - } + inline std::vector getUpperBoundsCoords() const; /** coordinates of rank r' upper bounds */ - inline std::vector getUpperBoundsCoords(RankType r) const { - assert(r >= 0 && r < this->getCommunicatorSize()); - - /* the upper bound index vector can correspond to coordinates outside of - * the domain, thus we cannot use the getGlobalLinearIndex method here. - */ - const IndexVector& ubvi = getUpperBounds(r); - std::vector coords(dim_); - IndexType tmp_add = 0; - - for (DimType j = 0; j < dim_; ++j) { - tmp_add = (hasBoundaryPoints_[j] > 0) ? (0) : (1); - coords[j] = static_cast(ubvi[j] + tmp_add) * getGridSpacing()[j]; - } - return coords; - } + inline std::vector getUpperBoundsCoords(RankType r) const; /* returns the neighboring process (in the sense that the neighbor has the same * partion coordinates in all other dimensions than d) in dimension d which * contains the point with the one-dimensional index idx1d */ - RankType getNeighbor1dFromAxisIndex(DimType dim, IndexType idx1d) const { - // if global index is outside of domain return negative value - { - if (idx1d < 0) return -1; - if (idx1d > this->getGlobalSizes()[dim] - 1) return -1; - } - const auto& decomp1d = this->getDecomposition()[dim]; - auto lower = std::lower_bound(decomp1d.begin(), decomp1d.end(), idx1d + 1); - int partitionIdx1d = static_cast(std::distance(decomp1d.begin(), lower)) - 1; - RankType r = this->getCartesianUtils().getNeighbor1dFromPartitionIndex(dim, partitionIdx1d); - - // check if global index vector is actually contained in the domain of rank r - assert(idx1d >= this->getLowerBounds(r)[dim]); - assert(idx1d < this->getUpperBounds(r)[dim]); - assert(r < this->getCommunicatorSize()); - return r; - } + RankType getNeighbor1dFromAxisIndex(DimType dim, IndexType idx1d) const; /** Number of Grids in every dimension*/ - inline const std::vector& getParallelization() const { - return this->getCartesianUtils().getCartesianDimensions(); - } + inline const std::vector& getParallelization() const; /** returns the 1d global index of the first point in the local domain * @@ -649,183 +226,36 @@ std::vector getInterpolatedValues( inline IndexType getLastGlobal1dIndex(DimType d) const { return getUpperBounds()[d] - 1; } // returns level of a global 1d index - inline LevelType getLevel(DimType d, IndexType idx1d) const { - // IndexVector givec(dim_, 0); - // givec[d] = idx1d; - // IndexType idx = getGlobalLinearIndex(givec); - // LevelVector levels(dim_); - // IndexVector tmp(dim_); - // getGlobalLI(idx, levels, tmp); - // return levels[d]; - - if (this->hasBoundaryPoints_[d] == 0) { - ++idx1d; - } - // get level of idx1d by rightmost set bit - // (e.g., all points on the finest level already have a 1 at the rightmost bit) - if (idx1d == 0) { - return 0; - } - const LevelType lmax = this->getLevels()[d]; - // auto set_bit = idx1d & ~(idx1d-1); - // LevelType l = lmax - log2(set_bit); - // builtin is fast and should work with gcc and clang - // if it is not available, use the one above (at a slight performance hit) - // or c++20's std::countr_zero - LevelType l; - if constexpr(sizeof(IndexType) == sizeof(long)) { - l = static_cast(lmax - __builtin_ctzl(static_cast(idx1d))); - } else if constexpr(sizeof(IndexType) == sizeof(long long)){ - l = static_cast(lmax - __builtin_ctzll(static_cast(idx1d))); - } else { - l = static_cast(lmax - __builtin_ctz(static_cast(idx1d))); - } - return l; - } + inline LevelType getLevel(DimType d, IndexType idx1d) const; // get 1d index of LeftPredecessor of a point // returns negative number if point has no left predecessor - inline IndexType getLeftPredecessor(DimType d, IndexType idx1d) const { - LevelType l = getLevel(d, idx1d); - - // boundary points - if (l == 0) return -1; - - LevelType ldiff = static_cast(levels_[d] - l); - IndexType lpidx = idx1d - combigrid::powerOfTwoByBitshift(ldiff); - - return lpidx; - } - - inline IndexType getRightPredecessor(DimType d, IndexType idx1d) const { - LevelType l = getLevel(d, idx1d); - - // boundary points - if (l == 0) return -1; - - LevelType ldiff = static_cast(levels_[d] - l); - IndexType rpidx = idx1d + combigrid::powerOfTwoByBitshift(ldiff); + inline IndexType getLeftPredecessor(DimType d, IndexType idx1d) const; - // check if outside of domain - IndexType numElementsD = this->getGlobalSizes()[d]; - - // in case of periodic, "virtual" domain is larger by one - bool oneSidedBoundary = this->returnBoundaryFlags()[d] == 1; - if (oneSidedBoundary && rpidx == numElementsD) return numElementsD; - - if (rpidx > numElementsD - 1) rpidx = -1; - - return rpidx; - } + inline IndexType getRightPredecessor(DimType d, IndexType idx1d) const; // get coordinates of the partition which contains the point specified // by the global index vector - inline void getPartitionCoords(IndexVector& globalAxisIndex, std::vector& partitionCoords) const { - partitionCoords.resize(dim_); - - for (DimType d = 0; d < dim_; ++d) { - partitionCoords[d] = -1; - for (int i = 0; i < this->getCartesianUtils().getCartesianDimensions()[d]; ++i) { - if (globalAxisIndex[d] >= getDecomposition()[d][i]) partitionCoords[d] = i; - } - - // check whether the partition coordinates are valid - assert(partitionCoords[d] > -1 && - partitionCoords[d] < this->getCartesianUtils().getCartesianDimensions()[d]); - } - } + inline void getPartitionCoords(IndexVector& globalAxisIndex, + std::vector& partitionCoords) const; - inline void print() const { - std::visit([&](auto&& arg) { print(arg); }, localTensor_); - } + inline void print() const; // return extents of local grid - inline const IndexVector& getLocalSizes() const { - return localTensor_.getExtentsVector(); - } + inline const IndexVector& getLocalSizes() const { return localTensor_.getExtentsVector(); } // return extents of global grid - inline const IndexVector& getGlobalSizes() const { - return globalIndexer_.getExtentsVector(); - } + inline const IndexVector& getGlobalSizes() const { return globalIndexer_.getExtentsVector(); } // return MPI DataType - inline MPI_Datatype getMPIDatatype() const { - return abstraction::getMPIDatatype(abstraction::getabstractionDataType()); - } + inline MPI_Datatype getMPIDatatype() const; // gather fullgrid on rank r - void gatherFullGrid(FullGrid& fg, RankType root) { - int size = this->getCommunicatorSize(); - int rank = this->getRank(); - CommunicatorType comm = this->getCommunicator(); - - // each rank: send dfg to root - int dest = root; - MPI_Request sendRequest; - MPI_Isend(this->getData(), static_cast(this->getNrLocalElements()), this->getMPIDatatype(), - dest, 0, comm, &sendRequest); - - std::vector requests; - std::vector subarrayTypes; - - // rank r: for each rank create subarray view on fg - if (rank == root) { - if (!fg.isGridCreated()) fg.createFullGrid(); - - for (int r = 0; r < size; ++r) { - IndexVector sizes(fg.getSizes().begin(), fg.getSizes().end()); - IndexVector subsizes = this->getUpperBounds(r) - this->getLowerBounds(r); - IndexVector starts = this->getLowerBounds(r); - - std::vector csizes(sizes.begin(), sizes.end()); - std::vector csubsizes(subsizes.begin(), subsizes.end()); - std::vector cstarts(starts.begin(), starts.end()); - - // create subarray view on data - MPI_Datatype mysubarray; - MPI_Type_create_subarray(static_cast(this->getDimension()), &csizes[0], &csubsizes[0], - &cstarts[0], MPI_ORDER_FORTRAN, this->getMPIDatatype(), - &mysubarray); - MPI_Type_commit(&mysubarray); - subarrayTypes.push_back(mysubarray); - - int src = r; - MPI_Request req; - MPI_Irecv(fg.getData(), 1, mysubarray, src, 0, this->getCommunicator(), &req); - requests.push_back(req); - } - } - - // all - MPI_Wait(&sendRequest, MPI_STATUS_IGNORE); - - if (rank == root) { - MPI_Waitall(static_cast(requests.size()), &requests[0], MPI_STATUSES_IGNORE); - } - - // free subarrays - for (size_t i = 0; i < subarrayTypes.size(); ++i) MPI_Type_free(&subarrayTypes[i]); - } + void gatherFullGrid(FullGrid& fg, RankType root); inline void getFGPointsOfSubspaceRecursive(DimType d, IndexType localLinearIndexSum, std::vector& oneDIndices, - std::vector& subspaceIndices) const { - assert(d < dim_); - assert(oneDIndices.size() == dim_); - assert(!oneDIndices.empty()); - - for (const auto idx : oneDIndices[d]) { - auto updatedLocalIndexSum = localLinearIndexSum; - updatedLocalIndexSum += this->getLocalOffsets()[d] * idx; - if (d > 0) { - getFGPointsOfSubspaceRecursive(static_cast(d - 1), updatedLocalIndexSum, - oneDIndices, subspaceIndices); - } else { - subspaceIndices.emplace_back(updatedLocalIndexSum); - } - } - } + std::vector& subspaceIndices) const; /** * @brief Get the indices of the points of the subspace on this partition @@ -833,28 +263,7 @@ std::vector getInterpolatedValues( * @param l level of hierarchical subspace * @return the indices of points on this partition */ - inline std::vector getFGPointsOfSubspace(const LevelVector& l) const { - IndexVector subspaceIndices; - IndexType numPointsOfSubspace = 1; - static thread_local std::vector oneDIndices; - oneDIndices.resize(dim_); - for (DimType d = 0; d < dim_; ++d) { - if (l[d] > levels_[d]) { - return subspaceIndices; - } - get1dIndicesLocal(d, l[d], oneDIndices[d]); - numPointsOfSubspace *= oneDIndices[d].size(); - } - if (numPointsOfSubspace > 0) { - subspaceIndices.reserve(numPointsOfSubspace); - - IndexType localLinearIndexSum = 0; - getFGPointsOfSubspaceRecursive(static_cast(dim_ - 1), localLinearIndexSum, - oneDIndices, subspaceIndices); - } - assert(static_cast(subspaceIndices.size()) == numPointsOfSubspace); - return subspaceIndices; - } + inline std::vector getFGPointsOfSubspace(const LevelVector& l) const; /** * @brief extracts the (hopefully) hierarchical coefficients from dsg @@ -863,99 +272,17 @@ std::vector getInterpolatedValues( * @param dsg the DSG to extract from */ template - size_t extractFromUniformSG(const DistributedSparseGridUniform& dsg) { - assert(dsg.isSubspaceDataCreated()); + size_t extractFromUniformSG(const DistributedSparseGridUniform& dsg); - // all the hierarchical subspaces contained in this full grid - const auto downwardClosedSet = combigrid::getDownSet(levels_); - - // loop over all subspaces (-> somewhat linear access in the sg) - size_t numCopied = 0; - static thread_local IndexVector subspaceIndices; - typename AnyDistributedSparseGrid::SubspaceIndexType sIndex = 0; -#pragma omp parallel for shared(dsg, downwardClosedSet) default(none) schedule(guided) \ - firstprivate(sIndex) reduction(+ : numCopied) - for (const auto& level : downwardClosedSet) { - sIndex = dsg.getIndexInRange(level, sIndex); - bool shouldBeCopied = sIndex > -1 && dsg.getDataSize(sIndex) > 0; - if constexpr (!sparseGridFullyAllocated) { - shouldBeCopied = shouldBeCopied && dsg.isSubspaceCurrentlyAllocated(sIndex); - } - if (shouldBeCopied) { - auto sPointer = dsg.getData(sIndex); - subspaceIndices = std::move(this->getFGPointsOfSubspace(level)); - // #pragma omp simd linear(sPointer : 1) // no simd benefit - for (size_t fIndex = 0; fIndex < subspaceIndices.size(); ++fIndex) { - this->getData()[subspaceIndices[fIndex]] = *sPointer; - ++sPointer; - } - assert(dsg.getDataSize(sIndex) == subspaceIndices.size()); - numCopied += subspaceIndices.size(); - } - } - return numCopied; - } - - inline IndexType getStrideForThisLevel(LevelType l, DimType d) const { - assert(d < this->getDimension()); - // special treatment for level 1 suspaces with boundary - return (l == 1 && hasBoundaryPoints_[d] > 0) - ? combigrid::powerOfTwoByBitshift(static_cast(levels_[d] - 1)) - : combigrid::powerOfTwoByBitshift(static_cast(levels_[d] - l + 1)); - } + inline IndexType getStrideForThisLevel(LevelType l, DimType d) const; inline IndexType getLocalStartForThisLevel(LevelType l, DimType d, - IndexType strideForThisLevel) const { - const auto firstGlobal1dIdx = getFirstGlobal1dIndex(d); - - // get global offset to find indices of this level - // this is the first global index that has level l in dimension d - IndexType offsetForThisLevel; - if (hasBoundaryPoints_[d] > 0) { - if (l == 1) { - offsetForThisLevel = 0; - } else { - // offsetForThisLevel = strideForThisLevel / 2; - offsetForThisLevel = - combigrid::powerOfTwoByBitshift(static_cast(levels_[d] - l)); - } - } else { - // offsetForThisLevel = strideForThisLevel / 2 - 1; - offsetForThisLevel = - combigrid::powerOfTwoByBitshift(static_cast(levels_[d] - l)) - 1; - } - assert(offsetForThisLevel > -1); - assert(strideForThisLevel >= offsetForThisLevel); - - // first level-index that is on our partition - const IndexType firstGlobalIndexOnThisPartition = - (firstGlobal1dIdx < offsetForThisLevel) - ? 0 - : (firstGlobal1dIdx - offsetForThisLevel + strideForThisLevel - 1) / strideForThisLevel; - // get global index of first local index which has level l - const IndexType globalStart = - (firstGlobalIndexOnThisPartition)*strideForThisLevel + offsetForThisLevel; - assert(getLevel(d, globalStart) == l || - (getLevel(d, globalStart) == 0 && hasBoundaryPoints_[d] > 0 && l == 1)); - assert(globalStart >= firstGlobal1dIdx); - - return globalStart - firstGlobal1dIdx; - } + IndexType strideForThisLevel) const; inline IndexType getNumPointsOnThisPartition(DimType d, IndexType localStart, - IndexType strideForThisLevel) const { - assert(d < this->getDimension()); - return (this->getLocalSizes()[d] - 1 < localStart) - ? 0 - : (this->getLocalSizes()[d] - 1 - localStart) / strideForThisLevel + 1; - } + IndexType strideForThisLevel) const; - inline IndexType getNumPointsOnThisPartition(LevelType l, DimType d) const { - assert(!(l > levels_[d])); - const auto strideForThisLevel = getStrideForThisLevel(l, d); - return getNumPointsOnThisPartition(d, getLocalStartForThisLevel(l, d, strideForThisLevel), - strideForThisLevel); - } + inline IndexType getNumPointsOnThisPartition(LevelType l, DimType d) const; /** * @brief get the local 1d indices that correspond to a given level @@ -964,26 +291,7 @@ std::vector getInterpolatedValues( * @param l the level * @param oneDIndices a list of the local vector indices */ - inline void get1dIndicesLocal(DimType d, LevelType l, IndexVector& oneDIndices) const { - assert(l > 0); - assert(d < this->getDimension()); - if (l > levels_[d]) { - oneDIndices.clear(); - return; - } - - const IndexType strideForThisLevel = getStrideForThisLevel(l, d); - IndexType localStart = getLocalStartForThisLevel(l, d, strideForThisLevel); - const IndexType numPointsOnThisPartition = - getNumPointsOnThisPartition(d, localStart, strideForThisLevel); - oneDIndices.resize(numPointsOnThisPartition); - - std::generate(oneDIndices.begin(), oneDIndices.end(), [&localStart, &strideForThisLevel]() { - auto localStartBefore = localStart; - localStart += strideForThisLevel; - return localStartBefore; - }); - } + inline void get1dIndicesLocal(DimType d, LevelType l, IndexVector& oneDIndices) const; /** * @brief Get the Lp Norm of the data on the dfg: @@ -992,459 +300,63 @@ std::vector getInterpolatedValues( * @param p : the (polynomial) parameter, 0 is interpreted as maximum norm * @return real : the norm */ - real getLpNorm(int p) const { - assert(p >= 0); - // special case maximum norm - MPI_Datatype dtype = abstraction::getMPIDatatype(abstraction::getabstractionDataType()); - if (p == 0) { - real max = 0.0; - auto data = this->getData(); -#pragma omp parallel for reduction(max : max) default(none) shared(data) schedule(static) - for (size_t i = 0; i < this->getNrLocalElements(); ++i) { - max = std::max(max, std::abs(data[i])); - } - real globalMax(-1); - MPI_Allreduce(&max, &globalMax, 1, dtype, MPI_MAX, getCommunicator()); - return globalMax; - } else { - real p_f = static_cast(p); - auto data = this->getData(); - real res = 0.0; - -#pragma omp parallel for reduction(+ : res) default(none) shared(data, oneOverPowOfTwo) \ - firstprivate(p_f) schedule(static) - for (size_t i = 0; i < this->getNrLocalElements(); ++i) { - auto isOnBoundary = this->isLocalLinearIndexOnBoundary(i); - auto countBoundary = std::count(isOnBoundary.begin(), isOnBoundary.end(), true); - double hatFcnIntegral = oneOverPowOfTwo[countBoundary]; - // std::cout << hatFcnIntegral << std::endl; - // sumIntegral += hatFcnIntegral; - real abs = std::abs(data[i]); - res += std::pow(abs, p_f) * hatFcnIntegral; - } - // std::cout << " sum " << sumIntegral << std::endl; - real globalRes(0.); - MPI_Allreduce(&res, &globalRes, 1, dtype, MPI_SUM, getCommunicator()); - // TODO this is only correct for 1 norm - // other norms have mixed terms and need additional boundary communication - return std::pow(globalRes * std::pow(this->getInnerNodalBasisFunctionIntegral(), p_f), - 1.0 / p_f); - } - } + real getLpNorm(int p) const; // write data to file using MPI-IO - void writePlotFile(const char* filename) const { - auto dim = getDimension(); - - // create subarray data type - auto [csizes, csubsizes, cstarts] = this->getSizesSubsizesStartsOfSubtensor(); - - // create subarray view on data - MPI_Datatype mysubarray; - MPI_Type_create_subarray(static_cast(getDimension()), &csizes[0], &csubsizes[0], - &cstarts[0], MPI_ORDER_FORTRAN, getMPIDatatype(), &mysubarray); - MPI_Type_commit(&mysubarray); - - // open file - MPI_File fh; - MPI_File_open(getCommunicator(), filename, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, - &fh); - - MPI_Offset offset = 0; - - // .raw files can be read by paraview, in that case write the header separately - std::string fn = filename; - if (fn.find(".raw") != std::string::npos) { - // rank 0 write human-readable header - if (this->getRank() == 0) { - auto headername = fn + "_header"; - std::ofstream ofs(headername); - - // first line: dimension - ofs << "dimensionality " << getDimension() << std::endl; - - // grid points per dimension in the order - // x_0, x_1, ... , x_d - ofs << "Extents "; - for (auto s : csizes) { - ofs << s << " "; - } - ofs << std::endl; - - // data type - ofs << "Data type size " << sizeof(FG_ELEMENT) << std::endl; - // TODO (pollinta) write endianness and data spacing - } - } else { - // rank 0 write dim and resolution (and data format?) - if (this->getRank() == 0) { - MPI_File_write(fh, &dim, 1, MPI_UNSIGNED_CHAR, MPI_STATUS_IGNORE); - - std::vector res(csizes.begin(), csizes.end()); - MPI_File_write(fh, &res[0], 6, MPI_INT, MPI_STATUS_IGNORE); - } - - // set file view to right offset (in bytes) - offset = (1 + dim) * sizeof(int); - } - - MPI_File_set_view(fh, offset, getMPIDatatype(), mysubarray, "native", MPI_INFO_NULL); - - // write subarray - MPI_File_write_all(fh, getData(), static_cast(getNrLocalElements()), getMPIDatatype(), MPI_STATUS_IGNORE); - // close file - MPI_File_close(&fh); - MPI_Type_free(&mysubarray); - } + void writePlotFile(const char* filename) const; // write data to legacy-type VTK file using MPI-IO - void writePlotFileVTK(const char* filename) const { - auto dim = getDimension(); - assert(dim < 4); // vtk supports only up to 3D - - // create subarray data type - auto [csizes, csubsizes, cstarts] = this->getSizesSubsizesStartsOfSubtensor(); - - // create subarray view on data - MPI_Datatype mysubarray; - MPI_Type_create_subarray(static_cast(getDimension()), &csizes[0], &csubsizes[0], - &cstarts[0], MPI_ORDER_FORTRAN, getMPIDatatype(), &mysubarray); - MPI_Type_commit(&mysubarray); - - // open file - MPI_File fh; - MPI_File_open(getCommunicator(), filename, MPI_MODE_WRONLY + MPI_MODE_CREATE, MPI_INFO_NULL, - &fh); - - std::stringstream vtk_header; - // cf https://lorensen.github.io/VTKExamples/site/VTKFileFormats/ => structured points - vtk_header << "# vtk DataFile Version 2.0.\n" - << "This file contains the combination solution evaluated on a full grid\n" - << "BINARY\n" - << "DATASET STRUCTURED_POINTS\n"; - if (dim == 3) { // TODO change for non-boundary grids using getGridSpacing - vtk_header << "DIMENSIONS " << csizes[0] << " " << csizes[1] << " " << csizes[2] << "\n" - << "ORIGIN 0 0 0\n" - << "SPACING " << 1. / static_cast(csizes[0] - 1) << " " - << 1. / static_cast(csizes[1] - 1) << " " - << 1. / static_cast(csizes[2] - 1) << "\n"; - } else if (dim == 2) { - vtk_header << "DIMENSIONS " << csizes[0] << " " << csizes[1] << " 1\n" - << "ORIGIN 0 0 0\n" - << "SPACING " << 1. / static_cast(csizes[0] - 1) << " " - << 1. / static_cast(csizes[1] - 1) << " 1\n"; - } else if (dim == 1) { - vtk_header << "DIMENSIONS " << csizes[0] << " 1 1\n" - << "ORIGIN 0 0 0\n" - << "SPACING " << 1. / static_cast(csizes[0] - 1) << " 1 1\n"; - } else { - assert(false); - } - vtk_header << "POINT_DATA " - << std::accumulate(csizes.begin(), csizes.end(), 1, std::multiplies()) - << "\n" - << "SCALARS quantity double 1\n" - << "LOOKUP_TABLE default\n"; - // TODO set the right data type from combidatatype, for now double by default - bool rightDataType = std::is_same::value; - assert(rightDataType); - auto header_string = vtk_header.str(); - auto header_size = header_string.size(); - - // rank 0 write header - if (this->getRank() == 0) { - MPI_File_write(fh, header_string.data(), static_cast(header_size), MPI_CHAR, - MPI_STATUS_IGNORE); - } - - // set file view to right offset (in bytes) - MPI_Offset offset = header_size * sizeof(char); - // external32 not supported in OpenMPI < 5. -> writes "native" endianness - // might work with MPICH - MPI_File_set_view(fh, offset, getMPIDatatype(), mysubarray, "external32", MPI_INFO_NULL); - - // write subarray - MPI_File_write_all(fh, getData(), static_cast(getNrLocalElements()), getMPIDatatype(), MPI_STATUS_IGNORE); - - // close file - MPI_File_close(&fh); - MPI_Type_free(&mysubarray); - } + void writePlotFileVTK(const char* filename) const; const std::vector& getDecomposition() const { return decomposition_; } - MPI_Datatype getUpwardSubarray(DimType d) { - // do index calculations - // set lower bounds of subarray - auto subarrayLowerBounds = this->getLowerBounds(); - auto subarrayUpperBounds = this->getUpperBounds(); - subarrayLowerBounds[d] += this->getLocalSizes()[d] - 1; - - auto subarrayExtents = subarrayUpperBounds - subarrayLowerBounds; - assert(subarrayExtents[d] == 1); - auto subarrayStarts = subarrayLowerBounds - this->getLowerBounds(); - - // create MPI datatype - std::vector sizes(this->getLocalSizes().begin(), this->getLocalSizes().end()); - std::vector subsizes(subarrayExtents.begin(), subarrayExtents.end()); - // the starts are local indices - std::vector starts(subarrayStarts.begin(), subarrayStarts.end()); - - // create subarray view on data - MPI_Datatype mysubarray; - MPI_Type_create_subarray(static_cast(this->getDimension()), sizes.data(), subsizes.data(), - starts.data(), MPI_ORDER_FORTRAN, this->getMPIDatatype(), &mysubarray); - MPI_Type_commit(&mysubarray); - return mysubarray; - } + MPI_Datatype getUpwardSubarray(DimType d); - std::vector getUpwardSubarrays() { - // initialize upwardSubarrays_ only once - if (upwardSubarrays_.size() == 0) { - upwardSubarrays_.resize(this->getDimension()); - for (DimType d = 0; d < this->getDimension(); ++d) { - upwardSubarrays_[d] = getUpwardSubarray(d); - } - } - return upwardSubarrays_; - } + std::vector getUpwardSubarrays(); - MPI_Datatype getDownwardSubarray(DimType d) { - // do index calculations - // set upper bounds of subarray - IndexVector subarrayLowerBounds = this->getLowerBounds(); - IndexVector subarrayUpperBounds = this->getUpperBounds(); - subarrayUpperBounds[d] -= this->getLocalSizes()[d] - 1; - - auto subarrayExtents = subarrayUpperBounds - subarrayLowerBounds; - assert(subarrayExtents[d] == 1); - auto subarrayStarts = subarrayLowerBounds - this->getLowerBounds(); - - // create MPI datatype - // also, the data dimensions are reversed - std::vector sizes(this->getLocalSizes().begin(), this->getLocalSizes().end()); - std::vector subsizes(subarrayExtents.begin(), subarrayExtents.end()); - // the starts are local indices - std::vector starts(subarrayStarts.begin(), subarrayStarts.end()); - - // create subarray view on data - MPI_Datatype mysubarray; - MPI_Type_create_subarray(static_cast(this->getDimension()), sizes.data(), subsizes.data(), - starts.data(), MPI_ORDER_FORTRAN, this->getMPIDatatype(), &mysubarray); - MPI_Type_commit(&mysubarray); - return mysubarray; - } + MPI_Datatype getDownwardSubarray(DimType d); + + std::vector getDownwardSubarrays(); - std::vector getDownwardSubarrays() { - // initialize downwardSubarrays_ only once - if (downwardSubarrays_.size() == 0) { - downwardSubarrays_.resize(this->getDimension()); - for (DimType d = 0; d < this->getDimension(); ++d) { - downwardSubarrays_[d] = getDownwardSubarray(d); - } - } - return downwardSubarrays_; - } /** * @brief get the ranks of the highest and lowest "neighbor" rank in dimension d * only sets highest and lowest if they actually are my neighbors */ - void getHighestAndLowestNeighbor(DimType d, int& highest, int& lowest) { - //TODO this is not going to work with periodic cartesian communicator - lowest = MPI_PROC_NULL; - highest = MPI_PROC_NULL; - - auto d_reverse = this->getDimension() - d - 1; - if (!reverseOrderingDFGPartitions) { - d_reverse = d; - } - - MPI_Cart_shift(this->getCommunicator(), static_cast(d_reverse), - static_cast(getParallelization()[d] - 1), &lowest, &highest); - - // assert only boundaries have those neighbors (remove in case of periodicity) - // this assumes no periodicity! - if (!this->getLowerBounds()[d] == 0) { - assert(highest < 0); - } - if (!this->getUpperBounds()[d] == this->getGlobalSizes()[d]) { - assert(lowest < 0); - } - } - - void writeLowerBoundaryToUpperBoundary(DimType d) { - assert(hasBoundaryPoints_[d] == 2); - - // create MPI datatypes - auto downSubarray = getDownwardSubarray(d); - auto upSubarray = getUpwardSubarray(d); + void getHighestAndLowestNeighbor(DimType d, int& highest, int& lowest) const; - // if I have the highest neighbor (i. e. I am the lowest rank), I need to send my lowest layer in d to them, - // if I have the lowest neighbor (i. e. I am the highest rank), I can receive it - int lower, higher; - getHighestAndLowestNeighbor(d, higher, lower); - - // TODO asynchronous over d?? - auto success = MPI_Sendrecv(this->getData(), 1, downSubarray, higher, TRANSFER_GHOST_LAYER_TAG, - this->getData(), 1, upSubarray, lower, TRANSFER_GHOST_LAYER_TAG, - this->getCommunicator(), MPI_STATUS_IGNORE); - assert(success == MPI_SUCCESS); - MPI_Type_free(&downSubarray); - MPI_Type_free(&upSubarray); - } + void writeLowerBoundaryToUpperBoundary(DimType d); // non-RVO dependent version of ghost layer exchange void exchangeGhostLayerUpward(DimType d, std::vector& subarrayExtents, std::vector& recvbuffer, - MPI_Request* recvRequest = nullptr) { - subarrayExtents.resize(this->getDimension()); - subarrayExtents.assign(this->getLocalSizes().begin(), this->getLocalSizes().end()); - subarrayExtents[d] = 1; - - // create MPI datatype - auto subarray = getUpwardSubarray(d); - - // if I have a higher neighbor, I need to send my highest layer in d to them, - // if I have a lower neighbor, I can receive it - int lower = MPI_PROC_NULL; - int higher = MPI_PROC_NULL; - - // somehow the cartesian directions in the communicator are reversed - // cf InitMPI(...) - auto d_reverse = this->getDimension() - d - 1; - if (!reverseOrderingDFGPartitions) { - d_reverse = d; - } - MPI_Cart_shift(this->getCommunicator(), static_cast(d_reverse), 1, &lower, &higher); - - if (this->returnBoundaryFlags()[d] == 1) { - // if one-sided boundary, assert that everyone has neighbors - assert(lower >= 0); - assert(higher >= 0); - } else if (this->returnBoundaryFlags()[d] == 2) { - // assert that boundaries have no neighbors - if (this->getLowerBounds()[d] == 0) { - assert(lower < 0); - } - if (this->getUpperBounds()[d] == this->getGlobalSizes()[d]) { - assert(higher < 0); - } - } - - // create recvbuffer - auto numElements = std::accumulate(subarrayExtents.begin(), subarrayExtents.end(), 1, - std::multiplies()); - if (lower < 0) { - numElements = 0; - std::memset(subarrayExtents.data(), 0, subarrayExtents.size() * sizeof(int)); - } - recvbuffer.resize(numElements); - std::memset(recvbuffer.data(), 0, recvbuffer.size() * sizeof(FG_ELEMENT)); - - if (recvRequest != nullptr) { - auto success = MPI_Irecv(recvbuffer.data(), numElements, this->getMPIDatatype(), lower, - TRANSFER_GHOST_LAYER_TAG, this->getCommunicator(), recvRequest); - assert(success == MPI_SUCCESS); - success = MPI_Send(this->getData(), 1, subarray, higher, TRANSFER_GHOST_LAYER_TAG, - this->getCommunicator()); - assert(success == MPI_SUCCESS); - } else { - auto success = - MPI_Sendrecv(this->getData(), 1, subarray, higher, TRANSFER_GHOST_LAYER_TAG, - recvbuffer.data(), numElements, this->getMPIDatatype(), lower, - TRANSFER_GHOST_LAYER_TAG, this->getCommunicator(), MPI_STATUS_IGNORE); - assert(success == MPI_SUCCESS); - } - MPI_Type_free(&subarray); - } + MPI_Request* recvRequest = nullptr); - std::vector exchangeGhostLayerUpward(DimType d, std::vector& subarrayExtents) { - std::vector recvbuffer{}; - exchangeGhostLayerUpward(d, subarrayExtents, recvbuffer); - return recvbuffer; - } + std::vector exchangeGhostLayerUpward(DimType d, std::vector& subarrayExtents); /** * @brief check if given globalLinearIndex is on the boundary of this DistributedFullGrid */ - std::vector isGlobalLinearIndexOnBoundary(IndexType globalLinearIndex) const { - // this could likely be done way more efficiently, but it's - // currently not in any performance critical spot - std::vector isOnBoundary(this->getDimension(), false); - - // convert to global vector index - IndexVector globalAxisIndex(dim_); - getGlobalVectorIndex(globalLinearIndex, globalAxisIndex); - for (DimType d = 0; d < this->getDimension(); ++d) { - if (this->returnBoundaryFlags()[d] == 2) { - if (globalAxisIndex[d] == 0 || globalAxisIndex[d] == this->length(d) - 1) { - isOnBoundary[d] = true; - } - } - } - return isOnBoundary; - } + std::vector isGlobalLinearIndexOnBoundary(IndexType globalLinearIndex) const; /** * @brief check if given localLinearIndex is on the boundary of this DistributedFullGrid */ - std::vector isLocalLinearIndexOnBoundary(IndexType localLinearIndex) const { - return isGlobalLinearIndexOnBoundary(getGlobalLinearIndex(localLinearIndex)); - } + std::vector isLocalLinearIndexOnBoundary(IndexType localLinearIndex) const; /** * @brief recursive helper function for getCornersGlobal*Indices - * (otherwise, it can't be dimension-independent) * */ std::vector getCornersGlobalVectorIndicesRecursive( - std::vector indicesSoFar, DimType dim) const { - if (dim < this->getDimension()) { - std::vector newIndicesSoFar{}; - for (const auto& indexVec : indicesSoFar) { - newIndicesSoFar.push_back(indexVec); - newIndicesSoFar.back().push_back(0); - newIndicesSoFar.push_back(indexVec); - newIndicesSoFar.back().push_back(this->length(dim) - 1); - } - assert(newIndicesSoFar.size() == 2 * indicesSoFar.size()); - return getCornersGlobalVectorIndicesRecursive(newIndicesSoFar, static_cast(dim + 1)); - } else { - return indicesSoFar; - } - } + std::vector indicesSoFar, DimType dim) const; /** * @brief get a vector containing the global vector indices of the 2^d corners of this dfg * */ - std::vector getCornersGlobalVectorIndices() const { - auto emptyVectorInVector = std::vector(1); - assert(emptyVectorInVector.size() == 1); - assert(emptyVectorInVector[0].size() == 0); - std::vector cornersVectors = - getCornersGlobalVectorIndicesRecursive(emptyVectorInVector, 0); - assert(cornersVectors.size() == static_cast(powerOfTwo[this->getDimension()])); - return cornersVectors; - } + std::vector getCornersGlobalVectorIndices() const; - std::vector getCornersValues() const { - std::vector values(powerOfTwo[this->getDimension()]); - auto corners = getCornersGlobalVectorIndices(); - for (size_t cornerNo = 0; cornerNo < corners.size(); ++cornerNo) { - if (this->isGlobalIndexHere(corners[cornerNo])) { - // convert to local vector index, then to linear index - IndexVector locAxisIndex(this->getDimension()); - bool present = getLocalVectorIndex(corners[cornerNo], locAxisIndex); - assert(present); - auto index = getLocalLinearIndex(locAxisIndex); - values[cornerNo] = this->getData()[index]; - } - } - MPI_Allreduce(MPI_IN_PLACE, values.data(), static_cast(values.size()), - this->getMPIDatatype(), MPI_SUM, this->getCommunicator()); - return values; - } + std::vector getCornersValues() const; const MPICartesianUtils& getCartesianUtils() const { return cartesianUtils_; } @@ -1452,6 +364,16 @@ std::vector getInterpolatedValues( inline void setData(FG_ELEMENT* newData) { return localTensor_.setData(newData); } private: + /** + * @brief sets the MPI-related member cartesianUtils_ + * + * @param comm the communicator to use (assumed to be cartesian) + * @param procs the desired partition (for sanity checking) + */ + void InitMPI(MPI_Comm comm, const std::vector& procs); + + void setDecomposition(const std::vector& decomposition); + /** dimension of the full grid */ const DimType dim_; @@ -1461,13 +383,13 @@ std::vector getInterpolatedValues( /** the grid spacing h for each dimension */ std::vector gridSpacing_; - //TODO: make these normal templates, and SomeDistributedFullGrid a std::variant ? + // TODO: make these normal templates, and SomeDistributedFullGrid a std::variant ? /** TensorIndexer , only populated for the used dimensionality**/ /** number of points per axis, boundary included*/ - TensorIndexer globalIndexer_ {}; + TensorIndexer globalIndexer_{}; // /** Tensor -- only populated for the used dimensionality**/ - Tensor localTensor_ {}; + Tensor localTensor_{}; /** flag to show if the dimension has boundary points*/ const std::vector hasBoundaryPoints_; @@ -1481,11 +403,11 @@ std::vector getInterpolatedValues( /** my partition's upper 1D bounds */ IndexVector myPartitionsUpperBounds_; - /** - * the decomposition of the full grid over processors - * contains (for every dimension) the grid point indices at - * which a process boundary is assumed - */ + /** + * the decomposition of the full grid over processors + * contains (for every dimension) the grid point indices at + * which a process boundary is assumed + */ std::vector decomposition_; /** utility to get info about cartesian communicator */ @@ -1494,100 +416,13 @@ std::vector getInterpolatedValues( // the MPI Datatypes representing the boundary layers of the MPI processes' subgrid std::vector downwardSubarrays_; std::vector upwardSubarrays_; - - /** - * @brief sets the MPI-related member cartesianUtils_ - * - * @param comm the communicator to use (assumed to be cartesian) - * @param procs the desired partition (for sanity checking) - */ - void InitMPI(MPI_Comm comm, const std::vector& procs) { - // check if communicator is already cartesian - int status; - MPI_Topo_test(comm, &status); - - if (status == MPI_CART) { - if (comm != cartesianUtils_.getComm()) { - assert(uniformDecomposition); - cartesianUtils_ = MPICartesianUtils(comm); - } - if (procs != cartesianUtils_.getCartesianDimensions()) { - std::cerr << "unpart" << std::endl; - throw std::runtime_error("The given communicator is not partitioned as desired"); - } - } else { - // MPI_Comm_dup(comm, &communicator_); - // cf. https://www.researchgate.net/publication/220439585_MPI_on_millions_of_cores - // "Figure 3 shows the memory consumption in all these cases after 32 calls to MPI Comm dup" - std::cerr << "undup" << std::endl; - throw std::runtime_error( - "Currently testing to not duplicate communicator (to save memory), \ - if you do want to use this code please take care that \ - MPI_Comm_free(&communicator_) will be called at some point"); - } - } - - void setDecomposition(const std::vector& decomposition) { -#ifndef NDEBUG - assert(decomposition.size() == dim_); - for (DimType i = 0; i < dim_; ++i) - assert(decomposition[i].size() == - static_cast(this->getCartesianUtils().getCartesianDimensions()[i])); - - // check if 1d bounds given in ascending order - // if not, this might indicate there's something wrong - for (DimType i = 0; i < dim_; ++i) { - IndexVector tmp(decomposition[i]); - std::sort(tmp.begin(), tmp.end()); - assert(tmp == decomposition[i]); - } - - // check if partitions size larger zero - // this can be detected by checking for duplicate entries in - // the 1d decompositions - for (DimType i = 0; i < dim_; ++i) { - IndexVector tmp(decomposition[i]); - IndexVector::iterator last = std::unique(tmp.begin(), tmp.end()); - - // todo: this is not sufficient to check for duplicate entries - // if last points to the same element as tmp.end() - // this means all entries in tmp are unique - assert(last == tmp.end() && "some partition in decomposition is of zero size!"); - } -#endif // not def NDEBUG - - decomposition_ = decomposition; - } }; -// end class - -template -MPICartesianUtils DistributedFullGrid::cartesianUtils_; - -// output operator -template -inline std::ostream& operator<<(std::ostream& os, const DistributedFullGrid& dfg) { - // os << dfg.getRank() << " " << dfg.getDimension() << " " << dfg.getLevels() << std::endl; - // os << "bounds " << dfg.getLowerBounds() << " to " << dfg.getUpperBounds() << " to " << dfg.getUpperBoundsCoords() << std::endl; - // os << "offsets " << dfg.getOffsets() << " " << dfg.getLocalOffsets() << std::endl; - // os << "sizes " << dfg.getLocalSizes() << "; " << dfg.getNrLocalElements() << " " << dfg.getNrLocalElements() << "; " << dfg.getNrElements() << std::endl; - // std::vector> decomposition = dfg.getDecomposition(); - // for (auto& dec : decomposition) { - // os << dec; - // } - // os << " decomposition; parallelization " << dfg.getParallelization() << std::endl; - // if(dfg.getNrLocalElements() < 30) - // os << "elements " << dfg.getElementVector() << std::endl; - - dfg.print(); - - return os; -} template class OwningDistributedFullGrid : public DistributedFullGrid { public: OwningDistributedFullGrid() = default; + explicit OwningDistributedFullGrid( DimType dim, const LevelVector& levels, CommunicatorType const& comm, const std::vector& hasBdrPoints, const std::vector& procs, @@ -1617,32 +452,1361 @@ class OwningDistributedFullGrid : public DistributedFullGrid { std::vector ownedDataVector_{}; }; -static inline std::vector downsampleDecomposition( - const std::vector decomposition, const LevelVector& referenceLevel, - const LevelVector& newLevel, const std::vector& boundary) { - auto newDecomposition = decomposition; - if (decomposition.size() > 0) { - for (DimType d = 0 ; d < static_cast(referenceLevel.size()); ++ d) { - // for now, assume that we never want to interpolate on a level finer than referenceLevel - assert(referenceLevel[d] >= newLevel[d]); - auto levelDiff = referenceLevel[d] - newLevel[d]; - auto stepFactor = oneOverPowOfTwo[levelDiff]; - if(boundary[d] > 0) { - // all levels contain the boundary points -> point 0 is the same - for (auto& dec: newDecomposition[d]) { - dec = static_cast(std::ceil(static_cast(dec)*stepFactor)); - } +template +DistributedFullGrid::DistributedFullGrid(DimType dim, const LevelVector& levels, + CommunicatorType const& comm, + const std::vector& hasBdrPoints, + FG_ELEMENT* dataPointer, + const std::vector& procs, + bool forwardDecomposition, + const std::vector& decomposition) + : dim_(dim), levels_(levels), hasBoundaryPoints_(hasBdrPoints) { + assert(levels_.size() == dim); + assert(hasBoundaryPoints_.size() == dim); + assert(procs.size() == dim); + + InitMPI(comm, procs); + + IndexVector nrPoints(dim_); + gridSpacing_.resize(dim_); + + // set global num of elements and offsets + IndexType nrElements = 1; + for (DimType j = 0; j < dim_; j++) { + nrPoints[j] = combigrid::getNumDofNodal(levels_[j], hasBoundaryPoints_[j]); + nrElements = nrElements * nrPoints[j]; + if (hasBoundaryPoints_[j] == 1) { + assert(!decomposition.empty() || !forwardDecomposition); + } + assert(levels_[j] < 30); + gridSpacing_[j] = oneOverPowOfTwo[levels_[j]]; + } + + if (decomposition.size() == 0) { + setDecomposition(getDefaultDecomposition( + nrPoints, this->getCartesianUtils().getCartesianDimensions(), forwardDecomposition)); + } else { + setDecomposition(decomposition); + } + globalIndexer_ = TensorIndexer(std::move(nrPoints)); + myPartitionsLowerBounds_ = getLowerBounds(this->getRank()); + myPartitionsFirstGlobalIndex_ = globalIndexer_.sequentialIndex(myPartitionsLowerBounds_); + myPartitionsUpperBounds_ = getUpperBounds(this->getRank()); + + // set local elements and local offsets + auto nrLocalPoints = getUpperBounds() - getLowerBounds(); + localTensor_ = Tensor(dataPointer, std::move(nrLocalPoints)); +} + +template +DistributedFullGrid::~DistributedFullGrid() { + for (size_t i = 0; i < upwardSubarrays_.size(); ++i) { + MPI_Type_free(&upwardSubarrays_[i]); + } + for (size_t i = 0; i < downwardSubarrays_.size(); ++i) { + MPI_Type_free(&downwardSubarrays_[i]); + } +} + +template +double DistributedFullGrid::getPointDistanceToCoordinate( + IndexType oneDimensionalLocalIndex, double coord, DimType d) const { + const auto& h = getGridSpacing(); + auto coordDistance = + static_cast(this->getLowerBounds()[d] + (hasBoundaryPoints_[d] > 0 ? 0 : 1) + + oneDimensionalLocalIndex) * + h[d] - + coord; +#ifndef NDEBUG + if (std::abs(coordDistance) > h[d]) { + std::cout << "assert bounds " << coordDistance << coord << h << static_cast(d) + << oneDimensionalLocalIndex << std::endl; + assert(false && + "should only be called for coordinates within the support of this point's basis " + "function"); + } +#endif // ndef NDEBUG + return std::abs(coordDistance); +} + +template +FG_ELEMENT DistributedFullGrid::evalIndexAndAllUpperNeighbors( + const IndexVector& localIndex, const std::vector& coords) const { + FG_ELEMENT result = 0.; + auto localLinearIndex = this->getLocalLinearIndex(localIndex); + for (size_t localIndexIterate = 0; + localIndexIterate < combigrid::powerOfTwoByBitshift(this->getDimension()); + ++localIndexIterate) { +#ifndef NDEBUG + auto neighborVectorIndex = localIndex; +#endif + auto neighborIndex = localLinearIndex; + real phi_c = 1.; // value of product of iterate's basis function on coords + for (DimType d = 0; d < dim_; ++d) { + const auto lastIndexInDim = this->getLocalSizes()[d] - 1; + bool isUpperInDim = std::bitset(localIndexIterate).test(d); + auto iterateIndexInThisDimension = localIndex[d] + isUpperInDim; + if (iterateIndexInThisDimension < 0) { + phi_c = 0.; + break; + } else if (isUpperInDim && this->hasBoundaryPoints_[d] == 1 && + // this->getCartesianUtils().isOnLowerBoundaryInDimension(d) && + iterateIndexInThisDimension == this->getGlobalSizes()[d]) { + // if we have a wrap-around AND are on the lowest cartesian process in this dimension + // we need to set the index in this dim to 0 and shift the coordinate by 1. + neighborIndex -= localIndex[d] * this->getLocalOffsets()[d]; + iterateIndexInThisDimension = 0; + auto coordDistance = + getPointDistanceToCoordinate(iterateIndexInThisDimension, coords[d] - 1.0, d); + auto oneOverHinD = powerOfTwo[levels_[d]]; + phi_c *= 1. - coordDistance * oneOverHinD; + } else if (iterateIndexInThisDimension > lastIndexInDim) { + phi_c = 0.; + break; } else { - // all levels do not contain the boundary points -> mid point is the same - auto leftProtrusion = powerOfTwo[levelDiff] - 1; - for (auto& dec: newDecomposition[d]) { - // same as before, but subtract the "left" protrusion on the finer level - dec = static_cast(std::max(0., std::ceil(static_cast(dec - leftProtrusion)*stepFactor))); - } + auto coordDistance = + getPointDistanceToCoordinate(iterateIndexInThisDimension, coords[d], d); + auto oneOverHinD = powerOfTwo[levels_[d]]; + phi_c *= 1. - coordDistance * oneOverHinD; + neighborIndex += isUpperInDim * this->getLocalOffsets()[d]; + } +#ifndef NDEBUG + neighborVectorIndex[d] = iterateIndexInThisDimension; +#endif + } + if (phi_c > 0.) { +#ifndef NDEBUG + auto unlinearizedNeighborIndex = neighborVectorIndex; + this->getLocalVectorIndex(neighborIndex, unlinearizedNeighborIndex); + if (unlinearizedNeighborIndex != neighborVectorIndex) { + std::cerr << "expected " << unlinearizedNeighborIndex << " got " << neighborVectorIndex + << std::endl; + } + if (neighborIndex < 0) { + std::cerr << "expected " << unlinearizedNeighborIndex << " got " << neighborVectorIndex + << " or " << neighborIndex << std::endl; } +#endif + assert(neighborIndex > -1); + assert(neighborIndex < this->getNrLocalElements()); + result += phi_c * this->getData()[neighborIndex]; } } - return newDecomposition; + return result; +} + +template +FG_ELEMENT DistributedFullGrid::evalLocal(const std::vector& coords) const { + FG_ELEMENT value; + evalLocal(coords, value); + return value; +} +template +void DistributedFullGrid::evalLocal(const std::vector& coords, + FG_ELEMENT& value) const { + assert(coords.size() == this->getDimension()); + // get the lowest-index point of the points + // whose basis functions contribute to the interpolated value + const auto& h = getGridSpacing(); + static thread_local IndexVector localIndexLowerNonzeroNeighborPoint; + localIndexLowerNonzeroNeighborPoint.resize(dim_); + for (DimType d = 0; d < dim_; ++d) { +#ifndef NDEBUG + if (coords[d] < 0. || coords[d] > 1.) { + std::cout << "coords " << coords << " out of bounds" << std::endl; + } + assert(coords[d] >= 0. && coords[d] <= 1.); +#endif // ndef NDEBUG + // this is the local index of the point that is lower than the coordinate + // may also be negative if the coordinate is lower than this processes' coordinates + IndexType localIndexLowerNonzeroNeighborIndexInThisDimension = static_cast( + std::floor((coords[d] - this->getLowerBoundsCoord(d)) * this->getInverseGridSpacingIn(d))); + + // check if we even need to evaluate on this process + if (localIndexLowerNonzeroNeighborIndexInThisDimension < -1) { + // index too small + value = 0.; + return; + } else if ((coords[d] >= 1.0 - h[d]) && this->hasBoundaryPoints_[d] == 1 && + this->getCartesianUtils().isOnLowerBoundaryInDimension(d)) { + // if we have periodic boundary and this process is at the lower end of the dimension d + // we need the periodic coordinate => don't return 0 + } else if (localIndexLowerNonzeroNeighborIndexInThisDimension > this->getLocalSizes()[d] - 1) { + // index too high + value = 0.; + return; + } + localIndexLowerNonzeroNeighborPoint[d] = localIndexLowerNonzeroNeighborIndexInThisDimension; + } + // evaluate at those points and sum up according to the basis function + value = evalIndexAndAllUpperNeighbors(localIndexLowerNonzeroNeighborPoint, coords); +} + +template +std::vector DistributedFullGrid::getInterpolatedValues( + const std::vector>& interpolationCoords) const { + auto numValues = interpolationCoords.size(); + std::vector values; + values.resize(numValues); +#pragma omp parallel for default(none) firstprivate(numValues) shared(values, interpolationCoords) \ + schedule(static) + for (size_t i = 0; i < numValues; ++i) { + this->evalLocal(interpolationCoords[i], values[i]); + } + MPI_Allreduce(MPI_IN_PLACE, values.data(), static_cast(numValues), this->getMPIDatatype(), + MPI_SUM, this->getCommunicator()); + return values; +} + +template +void DistributedFullGrid::getCoordsGlobal(IndexType globalLinearIndex, + std::vector& coords) const { + IndexType ind = 0; + IndexType tmp_add = 0; + + coords.resize(dim_); + + for (DimType j = 0; j < dim_; j++) { + ind = globalLinearIndex % this->getGlobalSizes()[j]; + globalLinearIndex = globalLinearIndex / this->getGlobalSizes()[j]; + // set the coordinate based on if we have boundary points + tmp_add = (hasBoundaryPoints_[j] > 0) ? (0) : (1); + coords[j] = static_cast(ind + tmp_add) * getGridSpacing()[j]; + } +} + +template +void DistributedFullGrid::getCoordsLocal(IndexType localLinearIndex, + std::vector& coords) const { + // todo: probably very inefficient implementation, if crucial for + // performance implement more direct way of computing the coordinates + assert(localLinearIndex < getNrLocalElements()); + + IndexType globalLinearIndex = getGlobalLinearIndex(localLinearIndex); + + getCoordsGlobal(globalLinearIndex, coords); +} + +template +void DistributedFullGrid::getGlobalLI(IndexType elementIndex, LevelVector& levels, + IndexVector& indexes) const { + IndexType startindex, tmp_val; + + assert(elementIndex < this->getNrElements()); + levels.resize(dim_); + indexes.resize(dim_); + + tmp_val = elementIndex; + + // first calculate intermediary indexes + for (DimType k = 0; k < dim_; k++) { + startindex = (hasBoundaryPoints_[k] > 0) ? 0 : 1; + indexes[k] = tmp_val % this->getGlobalSizes()[k] + startindex; + tmp_val = tmp_val / this->getGlobalSizes()[k]; + } + + // The level and index of the element in the hashgridstorage are computed dividing by two the + // index and level in the fullgrid + // until we obtain an impair number for the index, thus obtaining the level and index in the + // hierarchical basis (Aliz Nagy) + // ... + for (DimType k = 0; k < dim_; k++) { + tmp_val = levels_[k]; + + if (indexes[k] != 0) { + // todo: these operations can be optimized + while (indexes[k] % 2 == 0) { + indexes[k] = indexes[k] / 2; + tmp_val--; + } + } else { + tmp_val = 0; + } + + levels[k] = tmp_val; + } +} + +template +void DistributedFullGrid::getGlobalVectorIndex(IndexType globLinIndex, + IndexVector& globAxisIndex) const { + assert(globLinIndex < this->getNrElements()); + assert(globAxisIndex.size() == dim_); + + globAxisIndex = this->globalIndexer_.getVectorIndex(globLinIndex); +} + +template +void DistributedFullGrid::getGlobalVectorIndex(const IndexVector& locAxisIndex, + IndexVector& globAxisIndex) const { + assert(locAxisIndex.size() == dim_); + + globAxisIndex = this->getLowerBounds() + locAxisIndex; +} + +template +void DistributedFullGrid::getLocalVectorIndex(IndexType locLinIndex, + IndexVector& locAxisIndex) const { + locAxisIndex = this->localTensor_.getVectorIndex(locLinIndex); +} + +template +bool DistributedFullGrid::getLocalVectorIndex(const IndexVector& globAxisIndex, + IndexVector& locAxisIndex) const { + assert(globAxisIndex.size() == dim_); + + if (this->isGlobalIndexHere(globAxisIndex)) { + locAxisIndex.assign(globAxisIndex.begin(), globAxisIndex.end()); + std::transform(locAxisIndex.begin(), locAxisIndex.end(), this->getLowerBounds().begin(), + locAxisIndex.begin(), std::minus()); + return true; + } else { + return false; + } +} + +template +IndexType DistributedFullGrid::getGlobalLinearIndex( + const IndexVector& globAxisIndex) const { + return globalIndexer_.sequentialIndex(globAxisIndex); +} + +template +IndexType DistributedFullGrid::getGlobalLinearIndex(IndexType locLinIndex) const { + assert(locLinIndex < this->getNrLocalElements()); + + // convert to local vector index + static thread_local IndexVector locAxisIndex(dim_); + locAxisIndex.resize(dim_); + getLocalVectorIndex(locLinIndex, locAxisIndex); + + // convert to global linear index + IndexType globLinIndex = + this->myPartitionsFirstGlobalIndex_ + this->getGlobalLinearIndex(locAxisIndex); + assert(globLinIndex < this->getNrElements()); + return globLinIndex; +} + +template +IndexType DistributedFullGrid::getLocalLinearIndex( + const IndexVector& locAxisIndex) const { + return localTensor_.sequentialIndex(locAxisIndex); +} + +template +IndexType DistributedFullGrid::getLocalLinearIndex(IndexType globLinIndex) const { + assert(globLinIndex < this->getNrElements()); + + // convert to global vector index + static thread_local IndexVector globAxisIndex(dim_); + globAxisIndex.resize(dim_); + getGlobalVectorIndex(globLinIndex, globAxisIndex); + + if (this->isGlobalIndexHere(globAxisIndex)) { + // convert to local linear index + return getLocalLinearIndex(globAxisIndex) - this->myPartitionsFirstGlobalIndex_; + } else { + return -1; + } +} + +template +bool DistributedFullGrid::isGlobalIndexHere(IndexVector globalVectorIndex) const { + return (globalVectorIndex >= this->getLowerBounds()) && + (globalVectorIndex < this->getUpperBounds()); +} + +template +bool DistributedFullGrid::isGlobalIndexHere(IndexType globLinIndex) const { + return isGlobalIndexHere(this->globalIndexer_.getVectorIndex(globLinIndex)); +} + +template +std::tuple, std::vector, std::vector> +DistributedFullGrid::getSizesSubsizesStartsOfSubtensor() const { + std::vector csizes(this->getGlobalSizes().begin(), this->getGlobalSizes().end()); + std::vector csubsizes(this->getLocalSizes().begin(), this->getLocalSizes().end()); + std::vector cstarts(this->getLowerBounds().begin(), this->getLowerBounds().end()); + + return std::make_tuple(csizes, csubsizes, cstarts); +} + +template +double DistributedFullGrid::getInverseGridSpacingIn(DimType inDimension) const { + return powerOfTwo[levels_[inDimension]]; +} + +template +std::vector DistributedFullGrid::getInverseGridSpacing() const { + std::vector oneOverH; + oneOverH.resize(gridSpacing_.size()); + // should be the same as the number of intervals (N) + for (DimType j = 0; j < dim_; j++) { + oneOverH[j] = powerOfTwo[levels_[j]]; + } +#ifndef NDEBUG + std::vector oneOverHByDivision; + oneOverHByDivision.resize(gridSpacing_.size()); + std::transform(gridSpacing_.begin(), gridSpacing_.end(), oneOverHByDivision.begin(), + std::bind(std::divides(), 1, std::placeholders::_1)); + for (DimType j = 0; j < dim_; j++) { + assert(std::abs(oneOverHByDivision[j] - oneOverH[j]) < 1e-10); + assert(oneOverHByDivision[j] == oneOverH[j]); + } +#endif + return oneOverH; +} + +template +double DistributedFullGrid::getInnerNodalBasisFunctionIntegral() const { + const auto& h = this->getGridSpacing(); + return std::accumulate(h.begin(), h.end(), 1., std::multiplies()); +} + +template +void DistributedFullGrid::setZero() { + std::memset(this->getData(), 0, this->getNrLocalElements() * sizeof(FG_ELEMENT)); +} + +template +const IndexVector& DistributedFullGrid::getLowerBounds() const { + // return getLowerBounds(this->getRank()); + return myPartitionsLowerBounds_; +} + +template +IndexVector DistributedFullGrid::getLowerBounds(RankType r) const { + assert(r >= 0 && r < this->getCommunicatorSize()); + // get coords of r in cart comm + IndexVector lowerBounds(dim_); + const auto& coords = cartesianUtils_.getPartitionCoordsOfRank(r); + + for (DimType i = 0; i < dim_; ++i) { + lowerBounds[i] = getDecomposition()[i][coords[i]]; + } + return lowerBounds; +} + +template +real DistributedFullGrid::getLowerBoundsCoord(DimType inDimension) const { + return static_cast(this->getLowerBounds()[inDimension] + + (hasBoundaryPoints_[inDimension] > 0 ? 0 : 1)) * + this->getGridSpacing()[inDimension]; +} + +template +const IndexVector& DistributedFullGrid::getUpperBounds() const { + // return getUpperBounds(this->getRank()); + return myPartitionsUpperBounds_; +} + +template +IndexVector DistributedFullGrid::getUpperBounds(RankType r) const { + assert(r >= 0 && r < this->getCommunicatorSize()); + IndexVector upperBounds(dim_); + const auto& coords = cartesianUtils_.getPartitionCoordsOfRank(r); + + for (DimType i = 0; i < dim_; ++i) { + RankType n; + std::vector nc(coords); + + if (nc[i] < this->getCartesianUtils().getCartesianDimensions()[i] - 1) { + // get rank of next neighbor in dim i + nc[i] += 1; + n = this->getCartesianUtils().getRankFromPartitionCoords(nc); + upperBounds[i] = getLowerBounds(n)[i]; + } else { + // no neighbor in dim i -> end of domain + upperBounds[i] = this->getGlobalSizes()[i]; + } + } + return upperBounds; +} + +template +std::vector DistributedFullGrid::getUpperBoundsCoords() const { + return getUpperBoundsCoords(this->getRank()); +} + +template +std::vector DistributedFullGrid::getUpperBoundsCoords(RankType r) const { + assert(r >= 0 && r < this->getCommunicatorSize()); + + /* the upper bound index vector can correspond to coordinates outside of + * the domain, thus we cannot use the getGlobalLinearIndex method here. + */ + const IndexVector& ubvi = getUpperBounds(r); + std::vector coords(dim_); + IndexType tmp_add = 0; + + for (DimType j = 0; j < dim_; ++j) { + tmp_add = (hasBoundaryPoints_[j] > 0) ? (0) : (1); + coords[j] = static_cast(ubvi[j] + tmp_add) * getGridSpacing()[j]; + } + return coords; +} + +template +RankType DistributedFullGrid::getNeighbor1dFromAxisIndex(DimType dim, + IndexType idx1d) const { + // if global index is outside of domain return negative value + { + if (idx1d < 0) return -1; + if (idx1d > this->getGlobalSizes()[dim] - 1) return -1; + } + const auto& decomp1d = this->getDecomposition()[dim]; + auto lower = std::lower_bound(decomp1d.begin(), decomp1d.end(), idx1d + 1); + int partitionIdx1d = static_cast(std::distance(decomp1d.begin(), lower)) - 1; + RankType r = this->getCartesianUtils().getNeighbor1dFromPartitionIndex(dim, partitionIdx1d); + + // check if global index vector is actually contained in the domain of rank r + assert(idx1d >= this->getLowerBounds(r)[dim]); + assert(idx1d < this->getUpperBounds(r)[dim]); + assert(r < this->getCommunicatorSize()); + return r; +} + +template +const std::vector& DistributedFullGrid::getParallelization() const { + return this->getCartesianUtils().getCartesianDimensions(); +} + +template +LevelType DistributedFullGrid::getLevel(DimType d, IndexType idx1d) const { + // IndexVector givec(dim_, 0); + // givec[d] = idx1d; + // IndexType idx = getGlobalLinearIndex(givec); + // LevelVector levels(dim_); + // IndexVector tmp(dim_); + // getGlobalLI(idx, levels, tmp); + // return levels[d]; + + if (this->hasBoundaryPoints_[d] == 0) { + ++idx1d; + } + // get level of idx1d by rightmost set bit + // (e.g., all points on the finest level already have a 1 at the rightmost bit) + if (idx1d == 0) { + return 0; + } + const LevelType lmax = this->getLevels()[d]; + // auto set_bit = idx1d & ~(idx1d-1); + // LevelType l = lmax - log2(set_bit); + // builtin is fast and should work with gcc and clang + // if it is not available, use the one above (at a slight performance hit) + // or c++20's std::countr_zero + LevelType l; + if constexpr (sizeof(IndexType) == sizeof(long)) { + l = static_cast(lmax - __builtin_ctzl(static_cast(idx1d))); + } else if constexpr (sizeof(IndexType) == sizeof(long long)) { + l = static_cast(lmax - __builtin_ctzll(static_cast(idx1d))); + } else { + l = static_cast(lmax - __builtin_ctz(static_cast(idx1d))); + } + return l; +} + +template +IndexType DistributedFullGrid::getLeftPredecessor(DimType d, IndexType idx1d) const { + LevelType l = getLevel(d, idx1d); + + // boundary points + if (l == 0) return -1; + + LevelType ldiff = static_cast(levels_[d] - l); + IndexType lpidx = idx1d - combigrid::powerOfTwoByBitshift(ldiff); + + return lpidx; +} + +template +IndexType DistributedFullGrid::getRightPredecessor(DimType d, IndexType idx1d) const { + LevelType l = getLevel(d, idx1d); + + // boundary points + if (l == 0) return -1; + + LevelType ldiff = static_cast(levels_[d] - l); + IndexType rpidx = idx1d + combigrid::powerOfTwoByBitshift(ldiff); + + // check if outside of domain + IndexType numElementsD = this->getGlobalSizes()[d]; + + // in case of periodic, "virtual" domain is larger by one + bool oneSidedBoundary = this->returnBoundaryFlags()[d] == 1; + if (oneSidedBoundary && rpidx == numElementsD) return numElementsD; + + if (rpidx > numElementsD - 1) rpidx = -1; + + return rpidx; +} + +template +void DistributedFullGrid::getPartitionCoords(IndexVector& globalAxisIndex, + std::vector& partitionCoords) const { + partitionCoords.resize(dim_); + + for (DimType d = 0; d < dim_; ++d) { + partitionCoords[d] = -1; + for (int i = 0; i < this->getCartesianUtils().getCartesianDimensions()[d]; ++i) { + if (globalAxisIndex[d] >= getDecomposition()[d][i]) partitionCoords[d] = i; + } + + // check whether the partition coordinates are valid + assert(partitionCoords[d] > -1 && + partitionCoords[d] < this->getCartesianUtils().getCartesianDimensions()[d]); + } +} + +template +void DistributedFullGrid::print() const { + std::visit([&](auto&& arg) { print(arg); }, localTensor_); +} + +template +MPI_Datatype DistributedFullGrid::getMPIDatatype() const { + return abstraction::getMPIDatatype(abstraction::getabstractionDataType()); +} +template +void DistributedFullGrid::gatherFullGrid(FullGrid& fg, RankType root) { + int size = this->getCommunicatorSize(); + int rank = this->getRank(); + CommunicatorType comm = this->getCommunicator(); + + // each rank: send dfg to root + int dest = root; + MPI_Request sendRequest; + MPI_Isend(this->getData(), static_cast(this->getNrLocalElements()), this->getMPIDatatype(), + dest, 0, comm, &sendRequest); + + std::vector requests; + std::vector subarrayTypes; + + // rank r: for each rank create subarray view on fg + if (rank == root) { + if (!fg.isGridCreated()) fg.createFullGrid(); + + for (int r = 0; r < size; ++r) { + IndexVector sizes(fg.getSizes().begin(), fg.getSizes().end()); + IndexVector subsizes = this->getUpperBounds(r) - this->getLowerBounds(r); + IndexVector starts = this->getLowerBounds(r); + + std::vector csizes(sizes.begin(), sizes.end()); + std::vector csubsizes(subsizes.begin(), subsizes.end()); + std::vector cstarts(starts.begin(), starts.end()); + + // create subarray view on data + MPI_Datatype mysubarray; + MPI_Type_create_subarray(static_cast(this->getDimension()), &csizes[0], &csubsizes[0], + &cstarts[0], MPI_ORDER_FORTRAN, this->getMPIDatatype(), &mysubarray); + MPI_Type_commit(&mysubarray); + subarrayTypes.push_back(mysubarray); + + int src = r; + MPI_Request req; + MPI_Irecv(fg.getData(), 1, mysubarray, src, 0, this->getCommunicator(), &req); + requests.push_back(req); + } + } + + MPI_Wait(&sendRequest, MPI_STATUS_IGNORE); + + if (rank == root) { + MPI_Waitall(static_cast(requests.size()), &requests[0], MPI_STATUSES_IGNORE); + } + + for (size_t i = 0; i < subarrayTypes.size(); ++i) MPI_Type_free(&subarrayTypes[i]); +} + +template +void DistributedFullGrid::getFGPointsOfSubspaceRecursive( + DimType d, IndexType localLinearIndexSum, std::vector& oneDIndices, + std::vector& subspaceIndices) const { + assert(d < dim_); + assert(oneDIndices.size() == dim_); + assert(!oneDIndices.empty()); + + for (const auto idx : oneDIndices[d]) { + auto updatedLocalIndexSum = localLinearIndexSum; + updatedLocalIndexSum += this->getLocalOffsets()[d] * idx; + if (d > 0) { + getFGPointsOfSubspaceRecursive(static_cast(d - 1), updatedLocalIndexSum, oneDIndices, + subspaceIndices); + } else { + subspaceIndices.emplace_back(updatedLocalIndexSum); + } + } +} +template +std::vector DistributedFullGrid::getFGPointsOfSubspace( + const LevelVector& l) const { + IndexVector subspaceIndices; + IndexType numPointsOfSubspace = 1; + static thread_local std::vector oneDIndices; + oneDIndices.resize(dim_); + for (DimType d = 0; d < dim_; ++d) { + if (l[d] > levels_[d]) { + return subspaceIndices; + } + get1dIndicesLocal(d, l[d], oneDIndices[d]); + numPointsOfSubspace *= static_cast(oneDIndices[d].size()); + } + if (numPointsOfSubspace > 0) { + subspaceIndices.reserve(numPointsOfSubspace); + + IndexType localLinearIndexSum = 0; + getFGPointsOfSubspaceRecursive(static_cast(dim_ - 1), localLinearIndexSum, oneDIndices, + subspaceIndices); + } + assert(static_cast(subspaceIndices.size()) == numPointsOfSubspace); + return subspaceIndices; +} + +template +template +size_t DistributedFullGrid::extractFromUniformSG( + const DistributedSparseGridUniform& dsg) { + assert(dsg.isSubspaceDataCreated()); + + // all the hierarchical subspaces contained in this full grid + const auto downwardClosedSet = combigrid::getDownSet(levels_); + + // loop over all subspaces (-> somewhat linear access in the sg) + size_t numCopied = 0; + static thread_local IndexVector subspaceIndices; + typename AnyDistributedSparseGrid::SubspaceIndexType sIndex = 0; +#pragma omp parallel for shared(dsg, downwardClosedSet) default(none) schedule(guided) \ + firstprivate(sIndex) reduction(+ : numCopied) + for (const auto& level : downwardClosedSet) { + sIndex = dsg.getIndexInRange(level, sIndex); + bool shouldBeCopied = sIndex > -1 && dsg.getDataSize(sIndex) > 0; + if constexpr (!sparseGridFullyAllocated) { + shouldBeCopied = shouldBeCopied && dsg.isSubspaceCurrentlyAllocated(sIndex); + } + if (shouldBeCopied) { + auto sPointer = dsg.getData(sIndex); + subspaceIndices = std::move(this->getFGPointsOfSubspace(level)); + // #pragma omp simd linear(sPointer : 1) // no simd benefit + for (size_t fIndex = 0; fIndex < subspaceIndices.size(); ++fIndex) { + this->getData()[subspaceIndices[fIndex]] = *sPointer; + ++sPointer; + } + assert(dsg.getDataSize(sIndex) == subspaceIndices.size()); + numCopied += subspaceIndices.size(); + } + } + return numCopied; +} + +template +IndexType DistributedFullGrid::getStrideForThisLevel(LevelType l, DimType d) const { + assert(d < this->getDimension()); + // special treatment for level 1 suspaces with boundary + return (l == 1 && hasBoundaryPoints_[d] > 0) + ? combigrid::powerOfTwoByBitshift(static_cast(levels_[d] - 1)) + : combigrid::powerOfTwoByBitshift(static_cast(levels_[d] - l + 1)); +} + +template +IndexType DistributedFullGrid::getLocalStartForThisLevel( + LevelType l, DimType d, IndexType strideForThisLevel) const { + const auto firstGlobal1dIdx = getFirstGlobal1dIndex(d); + + // get global offset to find indices of this level + // this is the first global index that has level l in dimension d + IndexType offsetForThisLevel; + if (hasBoundaryPoints_[d] > 0) { + if (l == 1) { + offsetForThisLevel = 0; + } else { + // offsetForThisLevel = strideForThisLevel / 2; + offsetForThisLevel = combigrid::powerOfTwoByBitshift(static_cast(levels_[d] - l)); + } + } else { + // offsetForThisLevel = strideForThisLevel / 2 - 1; + offsetForThisLevel = + combigrid::powerOfTwoByBitshift(static_cast(levels_[d] - l)) - 1; + } + assert(offsetForThisLevel > -1); + assert(strideForThisLevel >= offsetForThisLevel); + + // first level-index that is on our partition + const IndexType firstGlobalIndexOnThisPartition = + (firstGlobal1dIdx < offsetForThisLevel) + ? 0 + : (firstGlobal1dIdx - offsetForThisLevel + strideForThisLevel - 1) / strideForThisLevel; + // get global index of first local index which has level l + const IndexType globalStart = + (firstGlobalIndexOnThisPartition)*strideForThisLevel + offsetForThisLevel; + assert(getLevel(d, globalStart) == l || + (getLevel(d, globalStart) == 0 && hasBoundaryPoints_[d] > 0 && l == 1)); + assert(globalStart >= firstGlobal1dIdx); + + return globalStart - firstGlobal1dIdx; +} + +template +IndexType DistributedFullGrid::getNumPointsOnThisPartition( + DimType d, IndexType localStart, IndexType strideForThisLevel) const { + assert(d < this->getDimension()); + return (this->getLocalSizes()[d] - 1 < localStart) + ? 0 + : (this->getLocalSizes()[d] - 1 - localStart) / strideForThisLevel + 1; +} + +template +IndexType DistributedFullGrid::getNumPointsOnThisPartition(LevelType l, + DimType d) const { + assert(!(l > levels_[d])); + const auto strideForThisLevel = getStrideForThisLevel(l, d); + return getNumPointsOnThisPartition(d, getLocalStartForThisLevel(l, d, strideForThisLevel), + strideForThisLevel); +} + +template +void DistributedFullGrid::get1dIndicesLocal(DimType d, LevelType l, + IndexVector& oneDIndices) const { + assert(l > 0); + assert(d < this->getDimension()); + if (l > levels_[d]) { + oneDIndices.clear(); + return; + } + + const IndexType strideForThisLevel = getStrideForThisLevel(l, d); + IndexType localStart = getLocalStartForThisLevel(l, d, strideForThisLevel); + const IndexType numPointsOnThisPartition = + getNumPointsOnThisPartition(d, localStart, strideForThisLevel); + oneDIndices.resize(numPointsOnThisPartition); + + std::generate(oneDIndices.begin(), oneDIndices.end(), [&localStart, &strideForThisLevel]() { + auto localStartBefore = localStart; + localStart += strideForThisLevel; + return localStartBefore; + }); +} + +template +real DistributedFullGrid::getLpNorm(int p) const { + assert(p >= 0); + // special case maximum norm + MPI_Datatype dtype = abstraction::getMPIDatatype(abstraction::getabstractionDataType()); + if (p == 0) { + real max = 0.0; + auto data = this->getData(); +#pragma omp parallel for reduction(max : max) default(none) shared(data) schedule(static) + for (IndexType i = 0; i < this->getNrLocalElements(); ++i) { + max = std::max(max, std::abs(data[i])); + } + real globalMax(-1); + MPI_Allreduce(&max, &globalMax, 1, dtype, MPI_MAX, getCommunicator()); + return globalMax; + } else { + real p_f = static_cast(p); + auto data = this->getData(); + real res = 0.0; + +#pragma omp parallel for reduction(+ : res) default(none) shared(data, oneOverPowOfTwo) \ + firstprivate(p_f) schedule(static) + for (IndexType i = 0; i < this->getNrLocalElements(); ++i) { + auto isOnBoundary = this->isLocalLinearIndexOnBoundary(i); + auto countBoundary = std::count(isOnBoundary.begin(), isOnBoundary.end(), true); + double hatFcnIntegral = oneOverPowOfTwo[countBoundary]; + // std::cout << hatFcnIntegral << std::endl; + // sumIntegral += hatFcnIntegral; + real abs = std::abs(data[i]); + res += std::pow(abs, p_f) * hatFcnIntegral; + } + // std::cout << " sum " << sumIntegral << std::endl; + real globalRes(0.); + MPI_Allreduce(&res, &globalRes, 1, dtype, MPI_SUM, getCommunicator()); + // TODO this is only correct for 1 norm + // other norms have mixed terms and need additional boundary communication + return std::pow(globalRes * std::pow(this->getInnerNodalBasisFunctionIntegral(), p_f), + 1.0 / p_f); + } +} + +template +void DistributedFullGrid::writePlotFile(const char* filename) const { + auto dim = getDimension(); + + // create subarray data type + auto [csizes, csubsizes, cstarts] = this->getSizesSubsizesStartsOfSubtensor(); + + // create subarray view on data + MPI_Datatype mysubarray; + MPI_Type_create_subarray(static_cast(getDimension()), &csizes[0], &csubsizes[0], &cstarts[0], + MPI_ORDER_FORTRAN, getMPIDatatype(), &mysubarray); + MPI_Type_commit(&mysubarray); + + // open file + MPI_File fh; + MPI_File_open(getCommunicator(), filename, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &fh); + + MPI_Offset offset = 0; + + // .raw files can be read by paraview, in that case write the header separately + std::string fn = filename; + if (fn.find(".raw") != std::string::npos) { + // rank 0 write human-readable header + if (this->getRank() == 0) { + auto headername = fn + "_header"; + std::ofstream ofs(headername); + + // first line: dimension + ofs << "dimensionality " << getDimension() << std::endl; + + // grid points per dimension in the order + // x_0, x_1, ... , x_d + ofs << "Extents "; + for (auto s : csizes) { + ofs << s << " "; + } + ofs << std::endl; + + // data type + ofs << "Data type size " << sizeof(FG_ELEMENT) << std::endl; + // TODO (pollinta) write endianness and data spacing + } + } else { + // rank 0 write dim and resolution (and data format?) + if (this->getRank() == 0) { + MPI_File_write(fh, &dim, 1, MPI_UNSIGNED_CHAR, MPI_STATUS_IGNORE); + + std::vector res(csizes.begin(), csizes.end()); + MPI_File_write(fh, &res[0], 6, MPI_INT, MPI_STATUS_IGNORE); + } + + // set file view to right offset (in bytes) + offset = (1 + dim) * sizeof(int); + } + + MPI_File_set_view(fh, offset, getMPIDatatype(), mysubarray, "native", MPI_INFO_NULL); + + // write subarray + MPI_File_write_all(fh, getData(), static_cast(getNrLocalElements()), getMPIDatatype(), + MPI_STATUS_IGNORE); + // close file + MPI_File_close(&fh); + MPI_Type_free(&mysubarray); +} + +template +void DistributedFullGrid::writePlotFileVTK(const char* filename) const { + auto dim = getDimension(); + assert(dim < 4); // vtk supports only up to 3D + + // create subarray data type + auto [csizes, csubsizes, cstarts] = this->getSizesSubsizesStartsOfSubtensor(); + + // create subarray view on data + MPI_Datatype mysubarray; + MPI_Type_create_subarray(static_cast(getDimension()), &csizes[0], &csubsizes[0], &cstarts[0], + MPI_ORDER_FORTRAN, getMPIDatatype(), &mysubarray); + MPI_Type_commit(&mysubarray); + + // open file + MPI_File fh; + MPI_File_open(getCommunicator(), filename, MPI_MODE_WRONLY + MPI_MODE_CREATE, MPI_INFO_NULL, &fh); + + std::stringstream vtk_header; + // cf https://lorensen.github.io/VTKExamples/site/VTKFileFormats/ => structured points + vtk_header << "# vtk DataFile Version 2.0.\n" + << "This file contains the combination solution evaluated on a full grid\n" + << "BINARY\n" + << "DATASET STRUCTURED_POINTS\n"; + if (dim == 3) { // TODO change for non-boundary grids using getGridSpacing + vtk_header << "DIMENSIONS " << csizes[0] << " " << csizes[1] << " " << csizes[2] << "\n" + << "ORIGIN 0 0 0\n" + << "SPACING " << 1. / static_cast(csizes[0] - 1) << " " + << 1. / static_cast(csizes[1] - 1) << " " + << 1. / static_cast(csizes[2] - 1) << "\n"; + } else if (dim == 2) { + vtk_header << "DIMENSIONS " << csizes[0] << " " << csizes[1] << " 1\n" + << "ORIGIN 0 0 0\n" + << "SPACING " << 1. / static_cast(csizes[0] - 1) << " " + << 1. / static_cast(csizes[1] - 1) << " 1\n"; + } else if (dim == 1) { + vtk_header << "DIMENSIONS " << csizes[0] << " 1 1\n" + << "ORIGIN 0 0 0\n" + << "SPACING " << 1. / static_cast(csizes[0] - 1) << " 1 1\n"; + } else { + assert(false); + } + vtk_header << "POINT_DATA " + << std::accumulate(csizes.begin(), csizes.end(), 1, std::multiplies()) << "\n" + << "SCALARS quantity double 1\n" + << "LOOKUP_TABLE default\n"; + // TODO set the right data type from combidatatype, for now double by default + [[maybe_unused]] bool rightDataType = std::is_same::value; + assert(rightDataType); + auto header_string = vtk_header.str(); + auto header_size = header_string.size(); + + // rank 0 write header + if (this->getRank() == 0) { + MPI_File_write(fh, header_string.data(), static_cast(header_size), MPI_CHAR, + MPI_STATUS_IGNORE); + } + + // set file view to right offset (in bytes) + MPI_Offset offset = header_size * sizeof(char); + // external32 not supported in OpenMPI < 5. -> writes "native" endianness + // might work with MPICH + MPI_File_set_view(fh, offset, getMPIDatatype(), mysubarray, "external32", MPI_INFO_NULL); + + // write subarray + MPI_File_write_all(fh, getData(), static_cast(getNrLocalElements()), getMPIDatatype(), + MPI_STATUS_IGNORE); + + // close file + MPI_File_close(&fh); + MPI_Type_free(&mysubarray); +} + +template +MPI_Datatype DistributedFullGrid::getUpwardSubarray(DimType d) { + // do index calculations + // set lower bounds of subarray + auto subarrayLowerBounds = this->getLowerBounds(); + auto subarrayUpperBounds = this->getUpperBounds(); + subarrayLowerBounds[d] += this->getLocalSizes()[d] - 1; + + auto subarrayExtents = subarrayUpperBounds - subarrayLowerBounds; + assert(subarrayExtents[d] == 1); + auto subarrayStarts = subarrayLowerBounds - this->getLowerBounds(); + + // create MPI datatype + std::vector sizes(this->getLocalSizes().begin(), this->getLocalSizes().end()); + std::vector subsizes(subarrayExtents.begin(), subarrayExtents.end()); + // the starts are local indices + std::vector starts(subarrayStarts.begin(), subarrayStarts.end()); + + // create subarray view on data + MPI_Datatype mysubarray; + MPI_Type_create_subarray(static_cast(this->getDimension()), sizes.data(), subsizes.data(), + starts.data(), MPI_ORDER_FORTRAN, this->getMPIDatatype(), &mysubarray); + MPI_Type_commit(&mysubarray); + return mysubarray; +} + +template +std::vector DistributedFullGrid::getUpwardSubarrays() { + // initialize upwardSubarrays_ only once + if (upwardSubarrays_.size() == 0) { + upwardSubarrays_.resize(this->getDimension()); + for (DimType d = 0; d < this->getDimension(); ++d) { + upwardSubarrays_[d] = getUpwardSubarray(d); + } + } + return upwardSubarrays_; +} + +template +MPI_Datatype DistributedFullGrid::getDownwardSubarray(DimType d) { + // do index calculations + // set upper bounds of subarray + IndexVector subarrayLowerBounds = this->getLowerBounds(); + IndexVector subarrayUpperBounds = this->getUpperBounds(); + subarrayUpperBounds[d] -= this->getLocalSizes()[d] - 1; + + auto subarrayExtents = subarrayUpperBounds - subarrayLowerBounds; + assert(subarrayExtents[d] == 1); + auto subarrayStarts = subarrayLowerBounds - this->getLowerBounds(); + + // create MPI datatype + // also, the data dimensions are reversed + std::vector sizes(this->getLocalSizes().begin(), this->getLocalSizes().end()); + std::vector subsizes(subarrayExtents.begin(), subarrayExtents.end()); + // the starts are local indices + std::vector starts(subarrayStarts.begin(), subarrayStarts.end()); + + // create subarray view on data + MPI_Datatype mysubarray; + MPI_Type_create_subarray(static_cast(this->getDimension()), sizes.data(), subsizes.data(), + starts.data(), MPI_ORDER_FORTRAN, this->getMPIDatatype(), &mysubarray); + MPI_Type_commit(&mysubarray); + return mysubarray; +} + +template +std::vector DistributedFullGrid::getDownwardSubarrays() { + // initialize downwardSubarrays_ only once + if (downwardSubarrays_.size() == 0) { + downwardSubarrays_.resize(this->getDimension()); + for (DimType d = 0; d < this->getDimension(); ++d) { + downwardSubarrays_[d] = getDownwardSubarray(d); + } + } + return downwardSubarrays_; +} + +template +void DistributedFullGrid::getHighestAndLowestNeighbor(DimType d, int& highest, + int& lowest) const { + lowest = MPI_PROC_NULL; + highest = MPI_PROC_NULL; + + auto d_reverse = this->getDimension() - d - 1; + if (!reverseOrderingDFGPartitions) { + d_reverse = d; + } + + MPI_Cart_shift(this->getCommunicator(), static_cast(d_reverse), + static_cast(getParallelization()[d] - 1), &lowest, &highest); + + // assert only boundaries have those neighbors (remove in case of periodicity) + // this assumes no periodicity! + if (!this->getLowerBounds()[d] == 0) { + assert(highest < 0); + } + if (!this->getUpperBounds()[d] == this->getGlobalSizes()[d]) { + assert(lowest < 0); + } +} + +template +void DistributedFullGrid::writeLowerBoundaryToUpperBoundary(DimType d) { + assert(hasBoundaryPoints_[d] == 2); + + // create MPI datatypes + auto downSubarray = getDownwardSubarray(d); + auto upSubarray = getUpwardSubarray(d); + + // if I have the highest neighbor (i. e. I am the lowest rank), I need to send my lowest layer + // in d to them, if I have the lowest neighbor (i. e. I am the highest rank), I can receive it + int lower, higher; + getHighestAndLowestNeighbor(d, higher, lower); + + // TODO asynchronous over d?? + [[maybe_unused]] auto success = MPI_Sendrecv( + this->getData(), 1, downSubarray, higher, TRANSFER_GHOST_LAYER_TAG, this->getData(), 1, + upSubarray, lower, TRANSFER_GHOST_LAYER_TAG, this->getCommunicator(), MPI_STATUS_IGNORE); + assert(success == MPI_SUCCESS); + MPI_Type_free(&downSubarray); + MPI_Type_free(&upSubarray); +} + +template +void DistributedFullGrid::exchangeGhostLayerUpward(DimType d, + std::vector& subarrayExtents, + std::vector& recvbuffer, + MPI_Request* recvRequest) { + subarrayExtents.resize(this->getDimension()); + subarrayExtents.assign(this->getLocalSizes().begin(), this->getLocalSizes().end()); + subarrayExtents[d] = 1; + + // create MPI datatype + auto subarray = getUpwardSubarray(d); + + // if I have a higher neighbor, I need to send my highest layer in d to them, + // if I have a lower neighbor, I can receive it + int lower = MPI_PROC_NULL; + int higher = MPI_PROC_NULL; + + // somehow the cartesian directions in the communicator are reversed + // cf InitMPI(...) + auto d_reverse = this->getDimension() - d - 1; + if (!reverseOrderingDFGPartitions) { + d_reverse = d; + } + MPI_Cart_shift(this->getCommunicator(), static_cast(d_reverse), 1, &lower, &higher); + + if (this->returnBoundaryFlags()[d] == 1) { + // if one-sided boundary, assert that everyone has neighbors + assert(lower >= 0); + assert(higher >= 0); + } else if (this->returnBoundaryFlags()[d] == 2) { + // assert that boundaries have no neighbors + if (this->getLowerBounds()[d] == 0) { + assert(lower < 0); + } + if (this->getUpperBounds()[d] == this->getGlobalSizes()[d]) { + assert(higher < 0); + } + } + + // create recvbuffer + auto numElements = std::accumulate(subarrayExtents.begin(), subarrayExtents.end(), 1, + std::multiplies()); + if (lower < 0) { + numElements = 0; + std::memset(subarrayExtents.data(), 0, subarrayExtents.size() * sizeof(int)); + } + recvbuffer.resize(numElements); + std::memset(recvbuffer.data(), 0, recvbuffer.size() * sizeof(FG_ELEMENT)); + + if (recvRequest != nullptr) { + [[maybe_unused]] auto success = + MPI_Irecv(recvbuffer.data(), numElements, this->getMPIDatatype(), lower, + TRANSFER_GHOST_LAYER_TAG, this->getCommunicator(), recvRequest); + assert(success == MPI_SUCCESS); + success = MPI_Send(this->getData(), 1, subarray, higher, TRANSFER_GHOST_LAYER_TAG, + this->getCommunicator()); + assert(success == MPI_SUCCESS); + } else { + [[maybe_unused]] auto success = + MPI_Sendrecv(this->getData(), 1, subarray, higher, TRANSFER_GHOST_LAYER_TAG, + recvbuffer.data(), numElements, this->getMPIDatatype(), lower, + TRANSFER_GHOST_LAYER_TAG, this->getCommunicator(), MPI_STATUS_IGNORE); + assert(success == MPI_SUCCESS); + } + MPI_Type_free(&subarray); +} + +template +std::vector DistributedFullGrid::exchangeGhostLayerUpward( + DimType d, std::vector& subarrayExtents) { + std::vector recvbuffer{}; + exchangeGhostLayerUpward(d, subarrayExtents, recvbuffer); + return recvbuffer; +} + +template +std::vector DistributedFullGrid::isGlobalLinearIndexOnBoundary( + IndexType globalLinearIndex) const { + // this could likely be done way more efficiently, but it's + // currently not in any performance critical spot + std::vector isOnBoundary(this->getDimension(), false); + + // convert to global vector index + IndexVector globalAxisIndex(dim_); + getGlobalVectorIndex(globalLinearIndex, globalAxisIndex); + for (DimType d = 0; d < this->getDimension(); ++d) { + if (this->returnBoundaryFlags()[d] == 2) { + if (globalAxisIndex[d] == 0 || + globalAxisIndex[d] == this->globalNumPointsInDimension(d) - 1) { + isOnBoundary[d] = true; + } + } + } + return isOnBoundary; +} + +template +std::vector DistributedFullGrid::isLocalLinearIndexOnBoundary( + IndexType localLinearIndex) const { + return isGlobalLinearIndexOnBoundary(getGlobalLinearIndex(localLinearIndex)); +} +template +std::vector DistributedFullGrid::getCornersGlobalVectorIndicesRecursive( + std::vector indicesSoFar, DimType dim) const { + if (dim < this->getDimension()) { + std::vector newIndicesSoFar{}; + for (const auto& indexVec : indicesSoFar) { + newIndicesSoFar.push_back(indexVec); + newIndicesSoFar.back().push_back(0); + newIndicesSoFar.push_back(indexVec); + newIndicesSoFar.back().push_back(this->globalNumPointsInDimension(dim) - 1); + } + assert(newIndicesSoFar.size() == 2 * indicesSoFar.size()); + return getCornersGlobalVectorIndicesRecursive(newIndicesSoFar, static_cast(dim + 1)); + } else { + return indicesSoFar; + } +} + +template +std::vector DistributedFullGrid::getCornersGlobalVectorIndices() const { + auto emptyVectorInVector = std::vector(1); + assert(emptyVectorInVector.size() == 1); + assert(emptyVectorInVector[0].size() == 0); + std::vector cornersVectors = + getCornersGlobalVectorIndicesRecursive(emptyVectorInVector, 0); + assert(cornersVectors.size() == static_cast(powerOfTwo[this->getDimension()])); + return cornersVectors; +} + +template +std::vector DistributedFullGrid::getCornersValues() const { + std::vector values(powerOfTwo[this->getDimension()]); + auto corners = getCornersGlobalVectorIndices(); + for (size_t cornerNo = 0; cornerNo < corners.size(); ++cornerNo) { + if (this->isGlobalIndexHere(corners[cornerNo])) { + // convert to local vector index, then to linear index + IndexVector locAxisIndex(this->getDimension()); + [[maybe_unused]] bool present = getLocalVectorIndex(corners[cornerNo], locAxisIndex); + assert(present); + auto index = getLocalLinearIndex(locAxisIndex); + values[cornerNo] = this->getData()[index]; + } + } + MPI_Allreduce(MPI_IN_PLACE, values.data(), static_cast(values.size()), + this->getMPIDatatype(), MPI_SUM, this->getCommunicator()); + return values; +} + +template +void DistributedFullGrid::InitMPI(MPI_Comm comm, const std::vector& procs) { + // check if communicator is already cartesian + int status; + MPI_Topo_test(comm, &status); + + if (status == MPI_CART) { + if (comm != cartesianUtils_.getComm()) { + assert(uniformDecomposition); + cartesianUtils_ = MPICartesianUtils(comm); + } + if (procs != cartesianUtils_.getCartesianDimensions()) { + std::cerr << "unpart" << std::endl; + throw std::runtime_error("The given communicator is not partitioned as desired"); + } + } else { + // MPI_Comm_dup(comm, &communicator_); + // cf. https://www.researchgate.net/publication/220439585_MPI_on_millions_of_cores + // "Figure 3 shows the memory consumption in all these cases after 32 calls to MPI Comm dup" + std::cerr << "undup" << std::endl; + throw std::runtime_error( + "Currently testing to not duplicate communicator (to save memory), \ + if you do want to use this code please take care that \ + MPI_Comm_free(&communicator_) will be called at some point"); + } +} + +template +void DistributedFullGrid::setDecomposition( + const std::vector& decomposition) { +#ifndef NDEBUG + assert(decomposition.size() == dim_); + for (DimType i = 0; i < dim_; ++i) + assert(decomposition[i].size() == + static_cast(this->getCartesianUtils().getCartesianDimensions()[i])); + + // check if 1d bounds given in ascending order + // if not, this might indicate there's something wrong + for (DimType i = 0; i < dim_; ++i) { + IndexVector tmp(decomposition[i]); + std::sort(tmp.begin(), tmp.end()); + assert(tmp == decomposition[i]); + } + + // check if partitions size larger zero + // this can be detected by checking for duplicate entries in + // the 1d decompositions + for (DimType i = 0; i < dim_; ++i) { + IndexVector tmp(decomposition[i]); + IndexVector::iterator last = std::unique(tmp.begin(), tmp.end()); + + // todo: this is not sufficient to check for duplicate entries + // if last points to the same element as tmp.end() + // this means all entries in tmp are unique + assert(last == tmp.end() && "some partition in decomposition is of zero size!"); + } +#endif // not def NDEBUG + + decomposition_ = decomposition; +} + +template +MPICartesianUtils DistributedFullGrid::cartesianUtils_; + +// output operator +template +inline std::ostream& operator<<(std::ostream& os, const DistributedFullGrid& dfg) { + dfg.print(); + + return os; } } // namespace combigrid diff --git a/src/fullgrid/MultiArray.hpp b/src/fullgrid/MultiArray.hpp index 8befc84c8..77f90f96e 100644 --- a/src/fullgrid/MultiArray.hpp +++ b/src/fullgrid/MultiArray.hpp @@ -13,7 +13,7 @@ template using MultiArrayRef = boost::multi_array_ref; /* create a multiarray ref to the local data in a distributed fullgrid */ -template +template static MultiArrayRef createMultiArrayRef(DistributedFullGrid& dfg) { /* note that the sizes of dfg are in reverse order compared to shape */ std::vector shape(dfg.getLocalSizes().rbegin(), dfg.getLocalSizes().rend()); @@ -30,7 +30,7 @@ static MultiArrayRef createMultiArrayRef(FullGrid& fg) { assert(fg.isGridCreated()); /* note that the sizes of dfg are in reverse order compared to shape */ - std::vector shape(fg.getSizes().rbegin(), fg.getSizes().rend()); + IndexVector shape(fg.getSizes().rbegin(), fg.getSizes().rend()); if (!reverseOrderingDFGPartitions) { assert(false && "this is not adapted to normal ordering of DFG partitions yet"); } diff --git a/src/fullgrid/Tensor.hpp b/src/fullgrid/Tensor.hpp index 380e2b769..644d53497 100644 --- a/src/fullgrid/Tensor.hpp +++ b/src/fullgrid/Tensor.hpp @@ -26,7 +26,7 @@ class TensorIndexer { localOffsets_[j] = nrElements; nrElements = nrElements * extents_[j]; } - assert(this->size() == nrElements); + assert(this->size() == static_cast(nrElements)); } // have only move constructors for now @@ -58,7 +58,7 @@ class TensorIndexer { } auto size = std::accumulate(this->extents_.begin(), this->extents_.end(), 1U, std::multiplies()); - assert(size < std::numeric_limits::max()); + assert(size < static_cast(std::numeric_limits::max())); return size; } @@ -99,7 +99,7 @@ class Tensor : public TensorIndexer { public: Tensor() = default; explicit Tensor(Type* data, IndexVector&& extents) - : data_(data), TensorIndexer(std::move(extents)) {} + : TensorIndexer(std::move(extents)), data_(data) {} // have only move constructors for now Tensor(Tensor const&) = delete; diff --git a/src/hierarchization/DistributedHierarchization.hpp b/src/hierarchization/DistributedHierarchization.hpp index 04fcd0ba7..2fbd2bb98 100644 --- a/src/hierarchization/DistributedHierarchization.hpp +++ b/src/hierarchization/DistributedHierarchization.hpp @@ -123,10 +123,12 @@ static void sendAndReceiveIndicesBlock(const std::map static void exchangeAllData1d(const DistributedFullGrid& dfg, DimType dim, RemoteDataCollector& remoteData) { // send every index to all neighboring ranks in dimension dim - const auto globalIdxMax = dfg.length(dim); + const auto globalIdxMax = dfg.globalNumPointsInDimension(dim); const IndexType idxMin = dfg.getFirstGlobal1dIndex(dim); const IndexType idxMax = dfg.getLastGlobal1dIndex(dim); @@ -268,15 +270,18 @@ static void exchangeAllData1d(const DistributedFullGrid& dfg, DimTyp } std::map> send1dIndices; std::map> recv1dIndices; - + /* // #pragma omp parallel for schedule(static) default(none) \ -// shared(poleNeighbors, send1dIndices, allMyIndices) + // shared(poleNeighbors, send1dIndices, allMyIndices) + */ for (const auto& r : poleNeighbors) { #pragma omp critical send1dIndices[r] = allMyIndices; } + /* // #pragma omp parallel for schedule(static) default(none) \ -// shared(poleNeighbors, recv1dIndices) + // shared(poleNeighbors, recv1dIndices) + */ for (const auto& r : poleNeighbors) { #pragma omp critical recv1dIndices[r] = {}; @@ -325,11 +330,11 @@ static void exchangeData1d(const DistributedFullGrid& dfg, DimType d std::map> send1dIndices; // main loop - IndexType idxMin = dfg.getFirstGlobal1dIndex(dim); - IndexType idxMax = dfg.getLastGlobal1dIndex(dim); - bool oneSidedBoundary = dfg.returnBoundaryFlags()[dim] == 1; - auto globalIdxMax = dfg.length(dim); - LevelType lmax = dfg.getLevels()[dim]; + const IndexType idxMin = dfg.getFirstGlobal1dIndex(dim); + const IndexType idxMax = dfg.getLastGlobal1dIndex(dim); + const bool oneSidedBoundary = dfg.returnBoundaryFlags()[dim] == 1; + const auto globalIdxMax = dfg.globalNumPointsInDimension(dim); + const LevelType lmax = dfg.getLevels()[dim]; IndexType idx = idxMin; @@ -981,7 +986,8 @@ void hierarchizeNoBoundary(DistributedFullGrid& dfg, for (IndexType nn = 0; nn < nbrOfPoles; ++nn) { // integer operations form bottleneck here -- nested loops are twice as slow divresult = std::lldiv(nn, stride); - start = divresult.quot * jump + divresult.rem; // localer lin index start of pole + start = static_cast(divresult.quot * jump + + divresult.rem); // localer lin index start of pole #ifndef NDEBUG IndexVector localIndexVector(dfg.getDimension()); @@ -1063,7 +1069,8 @@ void dehierarchizeNoBoundary(DistributedFullGrid& dfg, for (IndexType nn = 0; nn < nbrOfPoles; ++nn) { // integer operations form bottleneck here -- nested loops are twice as slow divresult = std::lldiv(nn, stride); - start = divresult.quot * jump + divresult.rem; // localer lin index start of pole + start = static_cast(divresult.quot * jump + + divresult.rem); // localer lin index start of pole #ifndef NDEBUG IndexVector localIndexVector(dfg.getDimension()); diff --git a/src/io/BroadcastParameters.cpp b/src/io/BroadcastParameters.cpp index 3c3c0a8c1..a94943438 100644 --- a/src/io/BroadcastParameters.cpp +++ b/src/io/BroadcastParameters.cpp @@ -18,11 +18,9 @@ std::string getFileContentsFromRankZero(const std::string& fileName, const MPI_C // only one rank reads inputs and broadcasts to others auto mpiRank = getCommRank(comm); std::string contentString; - int contentStringSize = 0; if (mpiRank == 0) { std::ifstream ifs(fileName); contentString.assign((std::istreambuf_iterator(ifs)), (std::istreambuf_iterator())); - contentStringSize = contentString.size(); } MPIUtils::broadcastContainer(contentString, 0, comm); return contentString; @@ -57,12 +55,12 @@ std::vector> getCoordinatesFromRankZero(const std::string& h5F auto mpiRank = getCommRank(comm); if (mpiRank == 0) { h5io::readH5Coordinates(coordinates, h5FileName); - coordinateSizes[0] = coordinates.size(); + coordinateSizes[0] = static_cast(coordinates.size()); assert(coordinateSizes[0] > 0); - coordinateSizes[1] = coordinates[0].size(); + coordinateSizes[1] = static_cast(coordinates[0].size()); assert(coordinateSizes[1] > 0); coordinatesSerialized = combigrid::serializeInterpolationCoords(coordinates); - assert(coordinatesSerialized.size() == coordinateSizes[0] * coordinateSizes[1]); + assert(coordinatesSerialized.size() == static_cast(coordinateSizes[0] * coordinateSizes[1])); } // broadcast vector sizes @@ -70,7 +68,7 @@ std::vector> getCoordinatesFromRankZero(const std::string& h5F coordinatesSerialized.resize(coordinateSizes[0] * coordinateSizes[1]); // broadcast serialized vector - MPI_Bcast(&coordinatesSerialized[0], coordinatesSerialized.size(), + MPI_Bcast(&coordinatesSerialized[0], static_cast(coordinatesSerialized.size()), abstraction::getMPIDatatype(abstraction::getabstractionDataType()), 0, comm); // split vector into coordinates diff --git a/src/io/FileInputOutput.cpp b/src/io/FileInputOutput.cpp index b3fdee679..6b472d3ab 100644 --- a/src/io/FileInputOutput.cpp +++ b/src/io/FileInputOutput.cpp @@ -1,54 +1,59 @@ #include "io/FileInputOutput.hpp" + #include #include #include #include -namespace combigrid { -void writeConcatenatedFileRootOnly(const char* data, int sizeOfData, const std::string& path, MPI_Comm comm, bool replaceExistingFile) { - int mpi_size, mpi_rank; - MPI_Comm_rank(comm, &mpi_rank); - MPI_Comm_size(comm, &mpi_size); +#include "mpi/MPISystem.hpp" - std::vector recvCounts; +namespace combigrid { +void writeConcatenatedFileRootOnly(const char* data, size_t sizeOfData, const std::string& path, + MPI_Comm comm, bool replaceExistingFile) { + int mpi_size, mpi_rank; + MPI_Comm_rank(comm, &mpi_rank); + MPI_Comm_size(comm, &mpi_size); - if (mpi_rank == 0) { - recvCounts.resize(mpi_size); - } + std::vector recvCounts; - MPI_Gather(&sizeOfData, 1, MPI_INT, recvCounts.data(), 1, MPI_INT, 0, comm); + if (mpi_rank == 0) { + recvCounts.resize(mpi_size); + } + int sizeAsInt = static_cast(sizeOfData); + MPI_Datatype datatype = MPI_INT; + MPI_Gather(&sizeAsInt, 1, datatype, recvCounts.data(), 1, datatype, 0, comm); - std::string recvBuffer; - std::vector displacement; + std::string recvBuffer; + std::vector displacement; - if (mpi_rank == 0) { - displacement.resize(mpi_size); - std::exclusive_scan(recvCounts.begin(), recvCounts.end(), displacement.begin(), 0); - recvBuffer.resize(std::accumulate(recvCounts.begin(),recvCounts.end(), 0)); - } + if (mpi_rank == 0) { + displacement.resize(mpi_size); + std::exclusive_scan(recvCounts.begin(), recvCounts.end(), displacement.begin(), 0); + recvBuffer.resize(std::accumulate(recvCounts.begin(), recvCounts.end(), 0)); + } - MPI_Gatherv(data, sizeOfData, MPI_CHAR, &recvBuffer[0], recvCounts.data(), displacement.data(), MPI_CHAR, 0, comm); + MPI_Gatherv(data, sizeAsInt, MPI_CHAR, &recvBuffer[0], recvCounts.data(), displacement.data(), + MPI_CHAR, 0, comm); - if (mpi_rank == 0) { - if(replaceExistingFile == true){ - bool fileExist = std::filesystem::exists(path); - if(fileExist == true){ - std::remove(path.c_str()); - } + if (mpi_rank == 0) { + if (replaceExistingFile == true) { + bool fileExist = std::filesystem::exists(path); + if (fileExist == true) { + std::remove(path.c_str()); } - std::ofstream out(path.c_str()); - out << recvBuffer; - out.close(); } + std::ofstream out(path.c_str()); + out << recvBuffer; + out.close(); + } } -bool getFileExistsRootOnly(const std::string& fileName, CommunicatorType comm, RankType myRank, - RankType rootRank) { +bool getFileExistsRootOnly(const std::string& fileName, CommunicatorType comm, RankType rootRank) { bool doesItExist = false; - if (myRank == rootRank) { + if (combigrid::getCommRank(comm) == rootRank) { doesItExist = std::filesystem::exists(fileName); } MPI_Bcast(&doesItExist, 1, MPI_C_BOOL, rootRank, comm); return doesItExist; } -} // namespace combigrid \ No newline at end of file +} // namespace combigrid diff --git a/src/io/FileInputOutput.hpp b/src/io/FileInputOutput.hpp index eca5941e1..3b1104928 100644 --- a/src/io/FileInputOutput.hpp +++ b/src/io/FileInputOutput.hpp @@ -9,10 +9,9 @@ #include "utils/Types.hpp" namespace combigrid { -void writeConcatenatedFileRootOnly(const char* data, int sizeOfData, const std::string& path, +void writeConcatenatedFileRootOnly(const char* data, size_t sizeOfData, const std::string& path, MPI_Comm comm, bool replaceExistingFile = false); -bool getFileExistsRootOnly(const std::string& fileName, CommunicatorType comm, RankType myRank, - RankType rootRank = 0); +bool getFileExistsRootOnly(const std::string& fileName, CommunicatorType comm, RankType rootRank = 0); -} // namespace combigrid \ No newline at end of file +} // namespace combigrid diff --git a/src/io/H5InputOutput.cpp b/src/io/H5InputOutput.cpp index abd3d0f9a..c45ae11bf 100644 --- a/src/io/H5InputOutput.cpp +++ b/src/io/H5InputOutput.cpp @@ -4,11 +4,11 @@ namespace combigrid { namespace h5io { // some instantiations -void readH5Coordinates(std::vector>& coordinates, std::string saveFilePath) { +void readH5Coordinates(std::vector>& coordinates, const std::string& saveFilePath) { return h5io::readValuesFromH5File(coordinates, saveFilePath); } -void readH5Values(std::vector& values, std::string saveFilePath) { +void readH5Values(std::vector& values, const std::string& saveFilePath) { return h5io::readValuesFromH5File(values, saveFilePath); } diff --git a/src/io/H5InputOutput.hpp b/src/io/H5InputOutput.hpp index db57430ba..f695f8a0b 100644 --- a/src/io/H5InputOutput.hpp +++ b/src/io/H5InputOutput.hpp @@ -62,9 +62,9 @@ void readValuesFromH5File(T& values, const std::string& fileName) { } // some instantiations -void readH5Coordinates(std::vector>& coordinates, std::string saveFilePath); +void readH5Coordinates(std::vector>& coordinates, const std::string& saveFilePath); -void readH5Values(std::vector& values, std::string saveFilePath); +void readH5Values(std::vector& values, const std::string& saveFilePath); } // namespace h5io } // namespace combigrid \ No newline at end of file diff --git a/src/manager/CombiParameters.hpp b/src/manager/CombiParameters.hpp index dadec05e8..928f8bc27 100644 --- a/src/manager/CombiParameters.hpp +++ b/src/manager/CombiParameters.hpp @@ -17,7 +17,7 @@ class CombiParameters { CombiParameters(DimType dim, LevelVector lmin, LevelVector lmax, std::vector& boundary, std::vector& levels, std::vector& coeffs, std::vector& taskIDs, - IndexType numberOfCombinations, IndexType numGrids = 1, + size_t numberOfCombinations, IndexType numGrids = 1, CombinationVariant combinationVariant = CombinationVariant::sparseGridReduce, const std::vector& parallelization = {0}, LevelVector reduceCombinationDimsLmin = LevelVector(0), @@ -56,7 +56,7 @@ class CombiParameters { // constructor variant w/o combination scheme specified -- the workers have their partial list // imlpicitly as tasks vector CombiParameters(DimType dim, LevelVector lmin, LevelVector lmax, - std::vector& boundary, IndexType numberOfCombinations, + std::vector& boundary, size_t numberOfCombinations, IndexType numGrids = 1, CombinationVariant combinationVariant = CombinationVariant::sparseGridReduce, const std::vector& parallelization = {0}, @@ -95,7 +95,7 @@ class CombiParameters { CombiParameters(DimType dim, LevelVector lmin, LevelVector lmax, const std::vector& boundary, std::vector& levels, std::vector& coeffs, std::vector& hierarchizationDims, - std::vector& taskIDs, IndexType numberOfCombinations, + std::vector& taskIDs, size_t numberOfCombinations, IndexType numGrids = 1, CombinationVariant combinationVariant = CombinationVariant::sparseGridReduce, LevelVector reduceCombinationDimsLmin = LevelVector(0), @@ -213,11 +213,12 @@ class CombiParameters { inline DimType getDim() const { return dim_; } inline size_t getNumLevels() const { return levels_.size(); } + /** * this method returns the number of grids a task contains * in case we have multiple grids in our simulation */ - inline IndexType getNumGrids() const { return numGridsPerTask_; } + inline size_t getNumGrids() const { return numGridsPerTask_; } inline const std::vector& getHierarchizationDims() const { return hierarchizationDims_; } @@ -264,7 +265,7 @@ class CombiParameters { return procs_; } - inline const IndexType& getNumberOfCombinations() const { return numberOfCombinations_; } + inline const size_t& getNumberOfCombinations() const { return numberOfCombinations_; } inline CombinationVariant getCombinationVariant() const { return combinationVariant_; } @@ -313,7 +314,7 @@ class CombiParameters { assert(decomposition[d][0] == 0); auto numPoints = combigrid::getNumDofNodal(lmax_[d], boundary_[d]); assert(decomposition[d].back() < numPoints); - assert(procs_[d] == decomposition[d].size()); + assert(static_cast(procs_[d]) == decomposition[d].size()); } #endif // not def NDEBUG } @@ -357,13 +358,11 @@ class CombiParameters { bool forwardDecomposition_; friend class boost::serialization::access; - IndexType numberOfCombinations_; // total number of combinations - IndexType numGridsPerTask_; // number of grids per task + size_t numberOfCombinations_; // total number of combinations + size_t numGridsPerTask_; // number of grids per task CombinationVariant combinationVariant_; - uint32_t sizeForChunkedCommunicationInMebibyte_; - /** * This level vector indicates which dimension of lmin should be decreased by how many levels * for constructing the distributed sparse grid. @@ -385,6 +384,7 @@ class CombiParameters { */ LevelVector reduceCombinationDimsLmax_; + uint32_t sizeForChunkedCommunicationInMebibyte_; std::string thirdLevelHost_; @@ -451,29 +451,6 @@ inline static void setCombiParametersHierarchicalBasesUniform(CombiParameters& c } } -inline static std::vector getStandardDecomposition(LevelVector lref, - std::vector procsRef) { - assert(lref.size() == procsRef.size()); - std::vector decomposition; - for (DimType d = 0; d < static_cast(lref.size()); ++d) { - IndexVector di; - if (procsRef[d] == 1) { - di = {0}; - } else if (procsRef[d] == 2) { - di = {0, powerOfTwo[lref[d]] / procsRef[d] + 1}; - } else if (procsRef[d] == 3) { - di = {0, powerOfTwo[lref[d]] / procsRef[d] + 1, 2 * powerOfTwo[lref[d]] / procsRef[d] + 1}; - } else if (procsRef[d] == 4) { - di = {0, powerOfTwo[lref[d]] / procsRef[d] + 1, 2 * powerOfTwo[lref[d]] / procsRef[d] + 1, - 3 * powerOfTwo[lref[d]] / procsRef[d] + 1}; - } else { - throw std::runtime_error("please implement a test decomposition matching procs and lref"); - } - decomposition.push_back(di); - } - return decomposition; -} - } // namespace combigrid #endif /* SRC_SGPP_COMBIGRID_MANAGER_COMBIPARAMETERS_HPP_ */ diff --git a/src/manager/InterpolationWorker.hpp b/src/manager/InterpolationWorker.hpp index 28b2d06fb..8000a2240 100644 --- a/src/manager/InterpolationWorker.hpp +++ b/src/manager/InterpolationWorker.hpp @@ -90,17 +90,16 @@ static void writeInterpolatedValuesSingleFile( static void writeVTKPlotFilesOfAllTasks(const std::vector>& tasks, int numberOfGrids) { - for (const auto& task : tasks) { #ifdef USE_VTK + for (const auto& task : tasks) { for (int g = 0; g < numberOfGrids; ++g) { DistributedFullGrid& dfg = task->getDistributedFullGrid(g); DFGPlotFileWriter writer{dfg, g}; writer.writePlotFile(); } + } #else - std::cout << "Warning: no vtk output produced as DisCoTec was compiled without VTK." - << std::endl; + std::cout << "Warning: no vtk output produced as DisCoTec was compiled without VTK." << std::endl; #endif /* USE_VTK */ - } } } /* namespace combigrid */ diff --git a/src/manager/ProcessGroupManager.cpp b/src/manager/ProcessGroupManager.cpp index f49019914..94ffff804 100644 --- a/src/manager/ProcessGroupManager.cpp +++ b/src/manager/ProcessGroupManager.cpp @@ -200,9 +200,9 @@ bool ProcessGroupManager::pretendCombineThirdLevelForWorkers(CombiParameters& pa const CommunicatorType& comm = thirdLevelComms[params.getThirdLevelPG()]; // exchange dsgus - IndexType numGrids = params.getNumGrids(); + auto numGrids = params.getNumGrids(); std::vector dsguData; - for (IndexType g = 0; g < numGrids; g++) { + for (size_t g = 0; g < numGrids; g++) { for (RankType p = 0; p < (RankType)theMPISystem()->getNumProcs(); p++) { // we assume here that all dsgus have the same size otherwise size collection must change size_t dsguSize = (size_t)(dsguDataSizePerWorker_[(size_t)p] / numGrids); @@ -332,7 +332,7 @@ bool ProcessGroupManager::pretendReduceLocalAndRemoteSubspaceSizes( from = to; } assert(to == sendBuff.end()); - for (const auto& dataSize : formerDsguDataSizePerWorker_) { + for ([[maybe_unused]] const auto& dataSize : formerDsguDataSizePerWorker_) { assert(dataSize > 0); } @@ -365,9 +365,9 @@ void ProcessGroupManager::exchangeDsgus(const ThirdLevelUtils& thirdLevel, Combi const CommunicatorType& comm = thirdLevelComms[params.getThirdLevelPG()]; // exchange dsgus - IndexType numGrids = params.getNumGrids(); + auto numGrids = params.getNumGrids(); std::vector dsguData; - for (IndexType g = 0; g < numGrids; g++) { + for (size_t g = 0; g < numGrids; g++) { for (RankType p = 0; p < (RankType)theMPISystem()->getNumProcs(); p++) { // we assume here that all dsgus have the same size otherwise size collection must change size_t dsguSize = (size_t)(dsguDataSizePerWorker_[(size_t)p] / numGrids); @@ -583,7 +583,7 @@ void ProcessGroupManager::writeSparseGridMinMaxCoefficients(const std::string& f } void ProcessGroupManager::doDiagnostics(size_t taskID) { - auto status = waitStatus(); + [[maybe_unused]] auto status = waitStatus(); assert(status == PROCESS_GROUP_WAIT); for (auto task : tasks_) { if (task->getID() == taskID) { @@ -659,10 +659,10 @@ std::vector ProcessGroupManager::evalErrorOnDFG(const LevelVector& leval void ProcessGroupManager::interpolateValues(const std::vector& interpolationCoordsSerial, std::vector& values, - MPI_Request* request, std::string filenamePrefix) { + MPI_Request* request, const std::string& filenamePrefix) { assert(interpolationCoordsSerial.size() < static_cast(std::numeric_limits::max()) && "needs chunking!"); - for (const auto& coord : interpolationCoordsSerial) { + for ([[maybe_unused]] const auto& coord : interpolationCoordsSerial) { assert(coord >= 0.0 && coord <= 1.0); } // if no request was passed, assume we are not waiting for a reply from that group diff --git a/src/manager/ProcessGroupManager.hpp b/src/manager/ProcessGroupManager.hpp index a47f89a82..f3fb1302e 100644 --- a/src/manager/ProcessGroupManager.hpp +++ b/src/manager/ProcessGroupManager.hpp @@ -96,7 +96,7 @@ class ProcessGroupManager { void interpolateValues(const std::vector& interpolationCoordsSerial, std::vector& values, MPI_Request* request = nullptr, - std::string filenamePrefix = ""); + const std::string& filenamePrefix = ""); void writeInterpolatedValuesPerGrid(const std::vector& interpolationCoordsSerial, const std::string& filenamePrefix); diff --git a/src/manager/ProcessGroupWorker.cpp b/src/manager/ProcessGroupWorker.cpp index 8959549ce..7d37f9710 100644 --- a/src/manager/ProcessGroupWorker.cpp +++ b/src/manager/ProcessGroupWorker.cpp @@ -34,7 +34,7 @@ std::string receiveStringFromManagerAndBroadcastToGroup() { } void sendNormsToManager(const std::vector lpnorms) { - for (int p = 0; p < lpnorms.size(); ++p) { + for (size_t p = 0; p < lpnorms.size(); ++p) { // send from master to manager MASTER_EXCLUSIVE_SECTION { MPI_Send(&lpnorms[p], 1, MPI_DOUBLE, theMPISystem()->getManagerRank(), TRANSFER_NORM_TAG, @@ -77,7 +77,7 @@ std::vector> receiveAndBroadcastInterpolationCoords(DimType di MASTER_EXCLUSIVE_SECTION { MPI_Status status; status.MPI_ERROR = MPI_SUCCESS; - int result = MPI_Probe(theMPISystem()->getManagerRank(), TRANSFER_INTERPOLATION_TAG, + [[maybe_unused]] int result = MPI_Probe(theMPISystem()->getManagerRank(), TRANSFER_INTERPOLATION_TAG, theMPISystem()->getGlobalComm(), &status); #ifndef NDEBUG assert(result == MPI_SUCCESS); @@ -110,7 +110,7 @@ std::vector> receiveAndBroadcastInterpolationCoords(DimType di std::cout << "error recv: " << errorString << std::endl; } assert(status.MPI_ERROR == MPI_SUCCESS); - for (const auto& coord : interpolationCoordsSerial) { + for ([[maybe_unused]] const auto& coord : interpolationCoordsSerial) { assert(coord >= 0.0 && coord <= 1.0); } #endif // NDEBUG @@ -121,7 +121,7 @@ std::vector> receiveAndBroadcastInterpolationCoords(DimType di interpolationCoordsSerial.resize(coordsSize); MPI_Bcast(interpolationCoordsSerial.data(), coordsSize, realType, theMPISystem()->getMasterRank(), theMPISystem()->getLocalComm()); - for (const auto& coord : interpolationCoordsSerial) { + for ([[maybe_unused]] const auto& coord : interpolationCoordsSerial) { assert(coord >= 0.0 && coord <= 1.0); } // split vector into coordinates @@ -153,7 +153,8 @@ SignalType ProcessGroupWorker::wait() { Stats::startEvent("run first"); auto& currentTask = this->getTaskWorker().getLastTask(); currentTask->run(theMPISystem()->getLocalComm()); - Stats::Event e = Stats::stopEvent("run first"); + // Stats::Event e = Stats::stopEvent("run first"); + Stats::stopEvent("run first"); } break; case RUN_NEXT: { assert(this->getTaskWorker().getTasks().size() > 0); @@ -254,7 +255,7 @@ SignalType ProcessGroupWorker::wait() { case WRITE_DSGS_TO_DISK: { Stats::startEvent("write to disk"); std::string filenamePrefix = receiveStringFromManagerAndBroadcastToGroup(); - writeDSGsToDisk(filenamePrefix); + writeDSGsToDisk(filenamePrefix, ""); Stats::stopEvent("write to disk"); } break; case READ_DSGS_FROM_DISK: { @@ -341,7 +342,7 @@ SignalType ProcessGroupWorker::wait() { interpolateValues(receiveAndBroadcastInterpolationCoords(combiParameters_.getDim())); // send result MASTER_EXCLUSIVE_SECTION { - MPI_Send(values.data(), values.size(), + MPI_Send(values.data(), static_cast(values.size()), abstraction::getMPIDatatype(abstraction::getabstractionDataType()), theMPISystem()->getManagerRank(), TRANSFER_INTERPOLATION_TAG, theMPISystem()->getGlobalComm()); @@ -720,8 +721,6 @@ void ProcessGroupWorker::combineThirdLevel() { assert(theMPISystem()->getThirdLevelComms().size() == 1 && "init thirdLevel communicator failed"); const CommunicatorType& managerComm = theMPISystem()->getThirdLevelComms()[0]; - const CommunicatorType& globalReduceComm = theMPISystem()->getGlobalReduceComm(); - const RankType& globalReduceRank = theMPISystem()->getGlobalReduceRank(); const RankType& manager = theMPISystem()->getThirdLevelManagerRank(); std::vector requests; @@ -769,7 +768,7 @@ void ProcessGroupWorker::combineThirdLevel() { // wait for bcasts to other pgs in globalReduceComm Stats::startEvent("wait for bcasts"); for (MPI_Request& request : requests) { - auto returnedValue = MPI_Wait(&request, MPI_STATUS_IGNORE); + [[maybe_unused]] auto returnedValue = MPI_Wait(&request, MPI_STATUS_IGNORE); assert(returnedValue == MPI_SUCCESS); } Stats::stopEvent("wait for bcasts"); @@ -777,36 +776,17 @@ void ProcessGroupWorker::combineThirdLevel() { int ProcessGroupWorker::combineThirdLevelFileBasedWrite( const std::string& filenamePrefixToWrite, const std::string& writeCompleteTokenFileName) { - assert(this->getSparseGridWorker().getNumberOfGrids() > 0); - assert(combiParametersSet_); - - // write sparse grid and corresponding token file - Stats::startEvent("write SG"); - int numWritten = this->writeDSGsToDisk(filenamePrefixToWrite); - MASTER_EXCLUSIVE_SECTION { std::ofstream tokenFile(writeCompleteTokenFileName); } - Stats::stopEvent("write SG"); - return numWritten; -} + OUTPUT_GROUP_EXCLUSIVE_SECTION { + assert(this->getSparseGridWorker().getNumberOfGrids() > 0); + assert(combiParametersSet_); -void ProcessGroupWorker::removeReadingFiles(const std::string& filenamePrefixToRead, - const std::string& startReadingTokenFileName, bool keepSparseGridFiles) const { - OUTPUT_GROUP_EXCLUSIVE_SECTION{ - MASTER_EXCLUSIVE_SECTION { - // remove reading token - std::filesystem::remove(startReadingTokenFileName); - // remove sparse grid file(s) - if (!keepSparseGridFiles) { - std::filesystem::remove(filenamePrefixToRead + "_" + std::to_string(0)); - for (const auto& entry : std::filesystem::directory_iterator(".")) { - if (entry.path().string().find(filenamePrefixToRead + "_" + std::to_string(0) + ".part") != - std::string::npos) { - assert(entry.is_regular_file()); - std::filesystem::remove(entry.path()); - } - } - } - } - } else { + // write sparse grid and corresponding token file + Stats::startEvent("write SG"); + int numWritten = this->writeDSGsToDisk(filenamePrefixToWrite, writeCompleteTokenFileName); + Stats::stopEvent("write SG"); + return numWritten; + } + else { throw std::runtime_error("should only be called from output group"); } } @@ -828,36 +808,19 @@ void ProcessGroupWorker::waitForTokenFile(const std::string& startReadingTokenFi } } -int ProcessGroupWorker::readReduce(const std::string& filenamePrefixToRead, bool overwrite) { - return getSparseGridWorker().readReduce( - filenamePrefixToRead, this->combiParameters_.getChunkSizeInMebibybtePerThread(), overwrite); +int ProcessGroupWorker::readReduce(const std::vector& filenamePrefixesToRead, + const std::vector& startReadingTokenFileNames, + bool overwriteInMemory) { + return getSparseGridWorker().readReduce(filenamePrefixesToRead, startReadingTokenFileNames, + this->combiParameters_.getChunkSizeInMebibybtePerThread(), + overwriteInMemory); } void ProcessGroupWorker::combineThirdLevelFileBasedReadReduce( const std::vector& filenamePrefixesToRead, - const std::vector& startReadingTokenFileNames, bool overwrite, + const std::vector& startReadingTokenFileNames, bool overwriteInMemory, bool keepSparseGridFiles) { - // wait until we can start to read any of the files - std::set indicesStillToReadReduce; - for (size_t i = 0; i < filenamePrefixesToRead.size(); ++i) { - indicesStillToReadReduce.insert(i); - } - while (!indicesStillToReadReduce.empty()) { - for (auto it = indicesStillToReadReduce.begin(); it != indicesStillToReadReduce.end(); ++it) { - if (combigrid::getFileExistsRootOnly(startReadingTokenFileNames[*it], - theMPISystem()->getOutputGroupComm(), - theMPISystem()->getOutputGroupRank())) { - overwrite ? Stats::startEvent("read SG") : Stats::startEvent("read/reduce SG"); - int numRead = this->readReduce(filenamePrefixesToRead[*it], overwrite); - assert(numRead > 0); - overwrite ? Stats::stopEvent("read SG") : Stats::stopEvent("read/reduce SG"); - indicesStillToReadReduce.erase(it); - break; // because iterators may be invalidated - } - } - // wait for 200ms - std::this_thread::sleep_for(std::chrono::milliseconds(200)); - } + this->readReduce(filenamePrefixesToRead, startReadingTokenFileNames, overwriteInMemory); MPI_Request request = MPI_REQUEST_NULL; if (this->combiParameters_.getCombinationVariant() == @@ -875,23 +838,22 @@ void ProcessGroupWorker::combineThirdLevelFileBasedReadReduce( // update fgs updateFullFromCombinedSparseGrids(); } - for (size_t i = 0; i < filenamePrefixesToRead.size(); ++i) { - // remove reading token and sparse grid file(s) - this->removeReadingFiles(filenamePrefixesToRead[i], startReadingTokenFileNames[i], - keepSparseGridFiles); - } - auto returnedValue = MPI_Wait(&request, MPI_STATUS_IGNORE); + // remove reading token and sparse grid file(s) + this->getSparseGridWorker().removeReadingFiles(filenamePrefixesToRead, startReadingTokenFileNames, + keepSparseGridFiles); + + [[maybe_unused]] auto returnedValue = MPI_Wait(&request, MPI_STATUS_IGNORE); assert(returnedValue == MPI_SUCCESS); } void ProcessGroupWorker::combineReadDistributeSystemWide( const std::vector& filenamePrefixesToRead, - const std::vector& startReadingTokenFileNames, bool overwrite, + const std::vector& startReadingTokenFileNames, bool overwriteInMemory, bool keepSparseGridFiles) { OUTPUT_GROUP_EXCLUSIVE_SECTION { this->combineThirdLevelFileBasedReadReduce(filenamePrefixesToRead, startReadingTokenFileNames, - overwrite, keepSparseGridFiles); + overwriteInMemory, keepSparseGridFiles); } else { if (combiParameters_.getCombinationVariant() == chunkedOutgroupSparseGridReduce) { @@ -968,7 +930,7 @@ int ProcessGroupWorker::reduceExtraSubspaceSizesFileBased( const std::string& filenamePrefixToWrite, const std::string& writeCompleteTokenFileName, const std::vector& filenamePrefixesToRead, const std::vector& startReadingTokenFileNames) { - int numSizesWritten = 0; + [[maybe_unused]] int numSizesWritten = 0; int numSizesReduced = 0; // we only need to write and read something if we are the I/O group OUTPUT_GROUP_EXCLUSIVE_SECTION { setExtraSparseGrid(true); } @@ -1015,13 +977,12 @@ void ProcessGroupWorker::waitForThirdLevelCombiResult(bool fromOutputGroup) { // receive third level combi result from third level pgroup (global reduce comm) broadcastSender = (RankType)combiParameters_.getThirdLevelPG(); } - CommunicatorType globalReduceComm = theMPISystem()->getGlobalReduceComm(); Stats::startEvent("wait for bcasts"); MPI_Request request; this->getSparseGridWorker().startSingleBroadcastDSGs(combiParameters_.getCombinationVariant(), broadcastSender, &request); - auto returnedValue = MPI_Wait(&request, MPI_STATUS_IGNORE); + [[maybe_unused]] auto returnedValue = MPI_Wait(&request, MPI_STATUS_IGNORE); assert(returnedValue == MPI_SUCCESS); Stats::stopEvent("wait for bcasts"); @@ -1032,14 +993,17 @@ void ProcessGroupWorker::zeroDsgsData() { this->getSparseGridWorker().zeroDsgsData(combiParameters_.getCombinationVariant()); } -int ProcessGroupWorker::writeDSGsToDisk(const std::string& filenamePrefix) { - return this->getSparseGridWorker().writeDSGsToDisk( +int ProcessGroupWorker::writeDSGsToDisk(const std::string& filenamePrefix, + const std::string& writeCompleteTokenFileName) { + auto numWritten = this->getSparseGridWorker().writeDSGsToDisk( filenamePrefix, this->getCombiParameters().getCombinationVariant()); + this->getSparseGridWorker().writeTokenFiles(writeCompleteTokenFileName); + return numWritten; } int ProcessGroupWorker::readDSGsFromDisk(const std::string& filenamePrefix, bool alwaysReadFullDSG) { - return this->getSparseGridWorker().readDSGsFromDisk(filenamePrefix, alwaysReadFullDSG); + return this->getSparseGridWorker().readDSGsFromDisk(filenamePrefix, true, alwaysReadFullDSG); } } /* namespace combigrid */ diff --git a/src/manager/ProcessGroupWorker.hpp b/src/manager/ProcessGroupWorker.hpp index 7715aed2c..d73fe5f54 100644 --- a/src/manager/ProcessGroupWorker.hpp +++ b/src/manager/ProcessGroupWorker.hpp @@ -73,7 +73,8 @@ class ProcessGroupWorker { void writeSparseGridMinMaxCoefficients(const std::string& fileNamePrefix) const; /** write extra SGs to disk (binary w/ MPI-IO) */ - int writeDSGsToDisk(const std::string& filenamePrefix); + int writeDSGsToDisk(const std::string& filenamePrefix, + const std::string& writeCompleteTokenFileName); /** read extra SGs from disk (binary w/ MPI-IO) */ int readDSGsFromDisk(const std::string& filenamePrefix, bool alwaysReadFullDSG = false); @@ -95,11 +96,8 @@ class ProcessGroupWorker { void waitForTokenFile(const std::string& startReadingTokenFileName) const; - void removeReadingFiles(const std::string& filenamePrefixToRead, - const std::string& startReadingTokenFileName, - bool keepSparseGridFiles) const; - - int readReduce(const std::string& filenamePrefixToRead, bool overwrite); + int readReduce(const std::vector& filenamePrefixesToRead, + const std::vector& startReadingTokenFileNames, bool overwrite); void combineThirdLevelFileBasedReadReduce( const std::vector& filenamePrefixesToRead, diff --git a/src/manager/ProcessManager.cpp b/src/manager/ProcessManager.cpp index fc5d749f2..07e2bddbb 100644 --- a/src/manager/ProcessManager.cpp +++ b/src/manager/ProcessManager.cpp @@ -57,7 +57,7 @@ void ProcessManager::receiveDurationsOfTasksFromGroupMasters(size_t numDurations } for (size_t i = 0; i < numDurationsToReceive; ++i) { DurationInformation recvbuf; - for(const auto& t : pgroups_[i]->getTaskContainer()){ + for([[maybe_unused]] const auto& t : pgroups_[i]->getTaskContainer()){ // this assumes that the manager rank is the highest in globalComm MPIUtils::receiveClass(&recvbuf, i, theMPISystem()->getGlobalComm()); @@ -104,7 +104,7 @@ void ProcessManager::exit() { // send exit signal to each group for (size_t i = 0; i < pgroups_.size(); ++i) { - bool success = pgroups_[i]->exit(); + [[maybe_unused]] bool success = pgroups_[i]->exit(); assert(success); } } @@ -125,7 +125,7 @@ void ProcessManager::initDsgus() { // tell groups to init Dsgus for (size_t i = 0; i < pgroups_.size(); ++i) { - bool success = pgroups_[i]->initDsgus(); + [[maybe_unused]] bool success = pgroups_[i]->initDsgus(); assert(success); } @@ -136,13 +136,13 @@ void ProcessManager::initDsgus() { void ProcessManager::updateCombiParameters() { Stats::startEvent("manager update parameters"); { - bool fail = waitAllFinished(); + [[maybe_unused]] bool fail = waitAllFinished(); assert(!fail && "should not fail here"); } for (auto g : pgroups_) g->updateCombiParameters(params_); { - bool fail = waitAllFinished(); + [[maybe_unused]] bool fail = waitAllFinished(); assert(!fail && "should not fail here"); } Stats::stopEvent("manager update parameters"); @@ -411,7 +411,7 @@ void ProcessManager::waitForAllGroupsToWait() const { void ProcessManager::parallelEval(const LevelVector& leval, std::string& filename, size_t groupID) { // actually it would be enough to wait for the group which does the eval { - bool fail = waitAllFinished(); + [[maybe_unused]] bool fail = waitAllFinished(); assert(!fail && "should not fail here"); } @@ -422,7 +422,7 @@ void ProcessManager::parallelEval(const LevelVector& leval, std::string& filenam g->parallelEval(leval, filename); { - bool fail = waitAllFinished(); + [[maybe_unused]] bool fail = waitAllFinished(); assert(!fail && "should not fail here"); } @@ -494,7 +494,7 @@ std::vector ProcessManager::interpolateValues( } void ProcessManager::writeInterpolatedValuesPerGrid( - const std::vector>& interpolationCoords, std::string filenamePrefix) { + const std::vector>& interpolationCoords, const std::string& filenamePrefix) { // send interpolation coords as a single array std::vector interpolationCoordsSerial = serializeInterpolationCoords(interpolationCoords); @@ -504,7 +504,7 @@ void ProcessManager::writeInterpolatedValuesPerGrid( } void ProcessManager::writeInterpolatedValuesSingleFile( - const std::vector>& interpolationCoords, std::string filenamePrefix) { + const std::vector>& interpolationCoords, const std::string& filenamePrefix) { Stats::startEvent("manager write interpolated"); // send interpolation coords as a single array std::vector interpolationCoordsSerial = serializeInterpolationCoords(interpolationCoords); @@ -521,7 +521,7 @@ void ProcessManager::writeInterpolatedValuesSingleFile( } void ProcessManager::writeInterpolationCoordinates( - const std::vector>& interpolationCoords, std::string filenamePrefix) const { + const std::vector>& interpolationCoords, const std::string& filenamePrefix) const { std::string saveFilePath = filenamePrefix + "_coords.h5"; h5io::writeValuesToH5File(interpolationCoords, saveFilePath, "manager", "coordinates"); } @@ -630,12 +630,12 @@ void ProcessManager::writeCombigridsToVTKPlotFile(ProcessGroupManagerID pg) { #endif /* defined(USE_VTK) */ } -void ProcessManager::writeDSGsToDisk(std::string filenamePrefix) { +void ProcessManager::writeDSGsToDisk(const std::string& filenamePrefix) { thirdLevelPGroup_->writeDSGsToDisk(filenamePrefix); waitForPG(thirdLevelPGroup_); } -void ProcessManager::readDSGsFromDisk(std::string filenamePrefix) { +void ProcessManager::readDSGsFromDisk(const std::string& filenamePrefix) { thirdLevelPGroup_->readDSGsFromDisk(filenamePrefix); waitForPG(thirdLevelPGroup_); } diff --git a/src/manager/ProcessManager.hpp b/src/manager/ProcessManager.hpp index ee8fbc935..96271520d 100644 --- a/src/manager/ProcessManager.hpp +++ b/src/manager/ProcessManager.hpp @@ -134,13 +134,13 @@ class ProcessManager { const std::vector>& interpolationCoords); void writeInterpolatedValuesSingleFile(const std::vector>& interpolationCoords, - std::string filenamePrefix); + const std::string& filenamePrefix); void writeInterpolatedValuesPerGrid(const std::vector>& interpolationCoords, - std::string filenamePrefix); + const std::string& filenamePrefix); void writeInterpolationCoordinates(const std::vector>& interpolationCoords, - std::string filenamePrefix) const; + const std::string& filenamePrefix) const; void writeSparseGridMinMaxCoefficients(const std::string& filename); @@ -175,9 +175,9 @@ class ProcessManager { void writeCombigridsToVTKPlotFile(ProcessGroupManagerID pg); - void writeDSGsToDisk(std::string filenamePrefix); + void writeDSGsToDisk(const std::string& filenamePrefix); - void readDSGsFromDisk(std::string filenamePrefix); + void readDSGsFromDisk(const std::string& filenamePrefix); private: ProcessGroupManagerContainer& pgroups_; @@ -267,7 +267,7 @@ void ProcessManager::combine() { // send signal to each group for (size_t i = 0; i < pgroups_.size(); ++i) { - bool success = pgroups_[i]->combine(); + [[maybe_unused]] bool success = pgroups_[i]->combine(); assert(success); } @@ -290,7 +290,7 @@ void ProcessManager::combineSystemWide() { // tell groups to combine local and global for (size_t i = 0; i < pgroups_.size(); ++i) { - bool success = pgroups_[i]->combineSystemWide(); + [[maybe_unused]] bool success = pgroups_[i]->combineSystemWide(); assert(success); } diff --git a/src/manager/SparseGridWorker.hpp b/src/manager/SparseGridWorker.hpp index 1cca2d500..45d2c38fe 100644 --- a/src/manager/SparseGridWorker.hpp +++ b/src/manager/SparseGridWorker.hpp @@ -1,5 +1,7 @@ #pragma once +#include + #include "combicom/CombiCom.hpp" #include "fullgrid/DistributedFullGrid.hpp" #include "hierarchization/DistributedHierarchization.hpp" @@ -8,7 +10,6 @@ #include "mpi/MPIUtils.hpp" #include "sparsegrid/DistributedSparseGridIO.hpp" #include "sparsegrid/DistributedSparseGridUniform.hpp" - namespace combigrid { // template @@ -26,9 +27,9 @@ class SparseGridWorker { uint32_t maxMiBToSendPerThread, bool collectMinMaxCoefficients = false); - inline void copyFromExtraDsgToPartialDSG(int gridNumber = 0); + inline void copyFromExtraDsgToPartialDSG(size_t gridNumber = 0); - inline void copyFromPartialDsgToExtraDSG(int gridNumber = 0); + inline void copyFromPartialDsgToExtraDSG(size_t gridNumber = 0); /* free DSG memory as intermediate step */ inline void deleteDsgsData(); @@ -53,13 +54,13 @@ class SparseGridWorker { inline const std::vector>>& getExtraUniDSGVector() const; - inline int getNumberOfGrids() const; + inline size_t getNumberOfGrids() const; inline std::unique_ptr>& getSparseGridToUseForThirdLevel(bool thirdLevelExtraSparseGrid); inline void initCombinedUniDSGVector(const LevelVector& lmin, LevelVector lmax, - const LevelVector& reduceLmaxByVector, int numGrids, + const LevelVector& reduceLmaxByVector, size_t numGrids, CombinationVariant combinationVariant, bool clearLevels = false); @@ -71,14 +72,17 @@ class SparseGridWorker { inline void maxReduceSubspaceSizesInOutputGroup(); - inline int readDSGsFromDisk(const std::string& filenamePrefix, bool alwaysReadFullDSG = false); + inline int readDSGsFromDisk(const std::string& filenamePrefixToRead, + bool addSpeciesNumberToFileName, bool alwaysReadFullDSG = false); inline int readDSGsFromDiskAndReduce(const std::string& filenamePrefixToRead, uint32_t maxMiBToReadPerThread, + bool addSpeciesNumberToFileName, bool alwaysReadFullDSG = false); - inline int readReduce(const std::string& filenamePrefixToRead, uint32_t maxMiBToReadPerThread, - bool overwrite); + inline int readReduce(const std::vector& filenamePrefixesToRead, + const std::vector& startReadingTokenFileNames, + uint32_t maxMiBToReadPerThread, bool overwrite); inline int reduceExtraSubspaceSizes(const std::vector& filenamesToRead, CombinationVariant combinationVariant, bool overwrite); @@ -90,6 +94,10 @@ class SparseGridWorker { inline void reduceSubspaceSizesBetweenGroups(CombinationVariant combinationVariant); + inline void removeReadingFiles(const std::vector& filenamePrefixToRead, + const std::vector& startReadingTokenFileName, + bool keepSparseGridFiles) const; + inline void setExtraSparseGrid(bool initializeSizes = true); inline void startSingleBroadcastDSGs(CombinationVariant combinationVariant, @@ -102,6 +110,8 @@ class SparseGridWorker { inline void writeMinMaxCoefficients(const std::string& fileNamePrefix) const; + inline void writeTokenFiles(const std::string& writeCompleteTokenFileName) const; + inline int writeSubspaceSizesToFile(const std::string& filenamePrefixToWrite) const; inline void zeroDsgsData(CombinationVariant combinationVariant); @@ -126,7 +136,7 @@ class SparseGridWorker { * @param g the dimension index (in the case that there are multiple different full grids per * task) */ - inline void fillDFGFromDSGU(DistributedFullGrid& dfg, int g, + inline void fillDFGFromDSGU(DistributedFullGrid& dfg, size_t g, const std::vector& boundary, const std::vector& hierarchizationDims, const std::vector& hierarchicalBases, @@ -150,7 +160,7 @@ inline void SparseGridWorker::collectReduceDistribute(CombinationVariant combina auto chunkSize = combigrid::CombiCom::getGlobalReduceChunkSize(maxMiBToSendPerThread); - for (int g = 0; g < numGrids; ++g) { + for (size_t g = 0; g < numGrids; ++g) { auto& dsg = this->getCombinedUniDSGVector()[g]; if (!dsg->getSubspacesByCommunicator().empty()) { auto& chunkedSubspaces = combigrid::CombiCom::getChunkedSubspaces( @@ -158,7 +168,7 @@ inline void SparseGridWorker::collectReduceDistribute(CombinationVariant combina for (auto& subspaceChunk : chunkedSubspaces) { // allocate new subspace vector dsg->allocateDifferentSubspaces(std::move(subspaceChunk)); - assert(dsg->getRawDataSize() <= chunkSize); + assert(dsg->getRawDataSize() <= static_cast(chunkSize)); #ifndef NDEBUG auto myRawDataSize = dsg->getRawDataSize(); decltype(myRawDataSize) maxRawDataSize = 0; @@ -174,8 +184,7 @@ inline void SparseGridWorker::collectReduceDistribute(CombinationVariant combina // local reduce (fg -> sg, within rank) for (const auto& t : this->taskWorkerRef_.getTasks()) { - const DistributedFullGrid& dfg = - t->getDistributedFullGrid(static_cast(g)); + const DistributedFullGrid& dfg = t->getDistributedFullGrid(g); dsg->addDistributedFullGrid(dfg, t->getCoefficient()); } // global reduce (across process groups) @@ -227,7 +236,7 @@ inline void SparseGridWorker::collectReduceDistribute(CombinationVariant combina } } -inline void SparseGridWorker::copyFromPartialDsgToExtraDSG(int gridNumber) { +inline void SparseGridWorker::copyFromPartialDsgToExtraDSG(size_t gridNumber) { assert(gridNumber == 0); assert(this->getCombinedUniDSGVector().size() == 1); assert(this->getExtraUniDSGVector().size() == 1); @@ -242,14 +251,14 @@ inline void SparseGridWorker::copyFromPartialDsgToExtraDSG(int gridNumber) { std::remove_if(subspacesToCopy.begin(), subspacesToCopy.end(), [&extraDSG](const auto& s) { return extraDSG->getDataSize(s) == 0; }), subspacesToCopy.end()); - for (const auto& s : subspacesToCopy) { + for ([[maybe_unused]] const auto& s : subspacesToCopy) { assert(extraDSG->getDataSize(s) == extraDSG->getAllocatedDataSize(s)); assert(dsg->getDataSize(s) == dsg->getAllocatedDataSize(s)); } extraDSG->copyDataFrom(*dsg, subspacesToCopy); } -inline void SparseGridWorker::copyFromExtraDsgToPartialDSG(int gridNumber) { +inline void SparseGridWorker::copyFromExtraDsgToPartialDSG(size_t gridNumber) { assert(gridNumber == 0); assert(this->getCombinedUniDSGVector().size() == 1); assert(this->getExtraUniDSGVector().size() == 1); @@ -261,7 +270,7 @@ inline void SparseGridWorker::copyFromExtraDsgToPartialDSG(int gridNumber) { std::remove_if(subspacesToCopy.begin(), subspacesToCopy.end(), [&extraDSG](const auto& s) { return extraDSG->getDataSize(s) == 0; }), subspacesToCopy.end()); - for (const auto& s : subspacesToCopy) { + for ([[maybe_unused]] const auto& s : subspacesToCopy) { assert(extraDSG->getDataSize(s) == extraDSG->getAllocatedDataSize(s)); assert(dsg->getDataSize(s) == dsg->getAllocatedDataSize(s)); } @@ -310,7 +319,7 @@ inline void SparseGridWorker::distributeChunkedBroadcasts(uint32_t maxMiBToSendP assert(this->getCombinedUniDSGVector().size() == 1); for (auto& dsg : this->getCombinedUniDSGVector()) { // make sure data sizes are set - for (auto& subspace : subspaces) { + for ([[maybe_unused]] auto& subspace : subspaces) { assert(dsg->getDataSize(subspace) > 0); } @@ -332,7 +341,7 @@ inline void SparseGridWorker::distributeChunkedBroadcasts(uint32_t maxMiBToSendP else { // non-output ranks need to wait before they can extract if (roundNumber == 0) Stats::startEvent("wait 1st bcast"); - auto returnedValue = MPI_Wait(&request, MPI_STATUS_IGNORE); + [[maybe_unused]] auto returnedValue = MPI_Wait(&request, MPI_STATUS_IGNORE); if (roundNumber == 0) Stats::stopEvent("wait 1st bcast"); assert(returnedValue == MPI_SUCCESS); } @@ -343,7 +352,7 @@ inline void SparseGridWorker::distributeChunkedBroadcasts(uint32_t maxMiBToSendP } OUTPUT_GROUP_EXCLUSIVE_SECTION { // output ranks can wait later - auto returnedValue = MPI_Wait(&request, MPI_STATUS_IGNORE); + [[maybe_unused]] auto returnedValue = MPI_Wait(&request, MPI_STATUS_IGNORE); assert(returnedValue == MPI_SUCCESS); } ++roundNumber; @@ -363,7 +372,7 @@ inline void SparseGridWorker::distributeChunkedBroadcasts(uint32_t maxMiBToSendP } for (auto& dsg : this->getCombinedUniDSGVector()) { // make sure data sizes are set - for (auto& subspace : subspaces) { + for ([[maybe_unused]] auto& subspace : subspaces) { assert(dsg->getDataSize(subspace) > 0); } @@ -386,7 +395,7 @@ inline void SparseGridWorker::distributeCombinedSolutionToTasks() { // not better than the parallel loop within extractFromUniformSG // #pragma omp parallel for collapse(2) default(none) schedule(static) for (auto& taskToUpdate : this->taskWorkerRef_.getTasks()) { - for (int g = 0; g < this->getNumberOfGrids(); ++g) { + for (size_t g = 0; g < this->getNumberOfGrids(); ++g) { // fill dfg with hierarchical coefficients from distributed sparse grid taskToUpdate->getDistributedFullGrid(g).extractFromUniformSG( *this->getCombinedUniDSGVector()[g]); @@ -395,7 +404,7 @@ inline void SparseGridWorker::distributeCombinedSolutionToTasks() { } inline void SparseGridWorker::fillDFGFromDSGU( - DistributedFullGrid& dfg, int g, const std::vector& boundary, + DistributedFullGrid& dfg, size_t g, const std::vector& boundary, const std::vector& hierarchizationDims, const std::vector& hierarchicalBases, const LevelVector& lmin) const { // fill dfg with hierarchical coefficients from distributed sparse grid @@ -416,7 +425,7 @@ inline void SparseGridWorker::fillDFGFromDSGU( inline void SparseGridWorker::fillDFGFromDSGU( Task& t, const std::vector& hierarchizationDims, const std::vector& hierarchicalBases, const LevelVector& lmin) const { - for (int g = 0; g < this->getNumberOfGrids(); g++) { + for (size_t g = 0; g < this->getNumberOfGrids(); g++) { assert(this->getCombinedUniDSGVector()[g] != nullptr); this->fillDFGFromDSGU(t.getDistributedFullGrid(g), g, t.getBoundary(), hierarchizationDims, hierarchicalBases, lmin); @@ -443,7 +452,9 @@ SparseGridWorker::getExtraUniDSGVector() const { return extraUniDSGVector_; } -inline int SparseGridWorker::getNumberOfGrids() const { return this->combinedUniDSGVector_.size(); } +inline size_t SparseGridWorker::getNumberOfGrids() const { + return this->combinedUniDSGVector_.size(); +} inline std::unique_ptr>& SparseGridWorker::getSparseGridToUseForThirdLevel(bool thirdLevelExtraSparseGrid) { @@ -466,7 +477,7 @@ SparseGridWorker::getSparseGridToUseForThirdLevel(bool thirdLevelExtraSparseGrid */ inline void SparseGridWorker::initCombinedUniDSGVector(const LevelVector& lmin, LevelVector lmax, const LevelVector& reduceLmaxByVector, - int numGrids, + size_t numGrids, CombinationVariant combinationVariant, bool clearLevels) { if (this->taskWorkerRef_.getTasks().size() == 0) { @@ -481,7 +492,7 @@ inline void SparseGridWorker::initCombinedUniDSGVector(const LevelVector& lmin, // get all subspaces in the (optimized) combischeme, create dsgs combinedUniDSGVector_.reserve(static_cast(numGrids)); - for (int g = 0; g < numGrids; ++g) { + for (size_t g = 0; g < numGrids; ++g) { combinedUniDSGVector_.emplace_back(std::unique_ptr>( new DistributedSparseGridUniform(static_cast(lmax.size()), lmax, lmin, theMPISystem()->getLocalComm()))); @@ -525,7 +536,7 @@ inline void SparseGridWorker::interpolateAndPlotOnLevel( OwningDistributedFullGrid dfg(static_cast(levelToEvaluate.size()), levelToEvaluate, theMPISystem()->getLocalComm(), boundary, parallelization, false, decomposition); - for (IndexType g = 0; g < this->getNumberOfGrids(); ++g) { // loop over all grids and plot them + for (size_t g = 0; g < this->getNumberOfGrids(); ++g) { // loop over all grids and plot them this->fillDFGFromDSGU(dfg, g, boundary, hierarchizationDims, hierarchicalBases, lmin); // save dfg to file with MPI-IO if (endsWith(filename, ".vtk")) { @@ -560,19 +571,22 @@ inline void SparseGridWorker::maxReduceSubspaceSizesInOutputGroup() { } } -inline int SparseGridWorker::readDSGsFromDisk(const std::string& filenamePrefix, +inline int SparseGridWorker::readDSGsFromDisk(const std::string& filenamePrefixToRead, + bool addSpeciesNumberToFileName, bool alwaysReadFullDSG) { int numRead = 0; for (size_t i = 0; i < this->getNumberOfGrids(); ++i) { auto uniDsg = this->getCombinedUniDSGVector()[i].get(); auto dsgToUse = uniDsg; + auto filename = filenamePrefixToRead; + if (addSpeciesNumberToFileName) { + filename += "_" + std::to_string(i); + } if (this->getExtraUniDSGVector().size() > 0 && !alwaysReadFullDSG) { dsgToUse = this->getExtraUniDSGVector()[i].get(); - numRead += DistributedSparseGridIO::readSomeFiles(*dsgToUse, - filenamePrefix + "_" + std::to_string(i)); + numRead += DistributedSparseGridIO::readSomeFiles(*dsgToUse, filename); } else { - numRead += - DistributedSparseGridIO::readOneFile(*dsgToUse, filenamePrefix + "_" + std::to_string(i)); + numRead += DistributedSparseGridIO::readOneFile(*dsgToUse, filename); } if (this->getExtraUniDSGVector().size() > 0 && uniDsg->isSubspaceDataCreated()) { // copy partial data from extraDSG back to uniDSG @@ -584,18 +598,23 @@ inline int SparseGridWorker::readDSGsFromDisk(const std::string& filenamePrefix, inline int SparseGridWorker::readDSGsFromDiskAndReduce(const std::string& filenamePrefixToRead, uint32_t maxMiBToReadPerThread, + bool addSpeciesNumberToFileName, bool alwaysReadFullDSG) { int numReduced = 0; for (size_t i = 0; i < this->getNumberOfGrids(); ++i) { + auto filename = filenamePrefixToRead; + if (addSpeciesNumberToFileName) { + filename += "_" + std::to_string(i); + } auto uniDsg = this->getCombinedUniDSGVector()[i].get(); auto dsgToUse = uniDsg; if (this->getExtraUniDSGVector().size() > 0 && !alwaysReadFullDSG) { dsgToUse = this->getExtraUniDSGVector()[i].get(); - numReduced += DistributedSparseGridIO::readSomeFilesAndReduce( - *dsgToUse, filenamePrefixToRead + "_" + std::to_string(i), maxMiBToReadPerThread); + numReduced += DistributedSparseGridIO::readSomeFilesAndReduce(*dsgToUse, filename, + maxMiBToReadPerThread); } else { - numReduced += DistributedSparseGridIO::readOneFileAndReduce( - *dsgToUse, filenamePrefixToRead + "_" + std::to_string(i), maxMiBToReadPerThread); + numReduced += + DistributedSparseGridIO::readOneFileAndReduce(*dsgToUse, filename, maxMiBToReadPerThread); } if (this->getExtraUniDSGVector().size() > 0 && uniDsg->isSubspaceDataCreated()) { // copy partial data from extraDSG back to uniDSG @@ -605,13 +624,55 @@ inline int SparseGridWorker::readDSGsFromDiskAndReduce(const std::string& filena return numReduced; } -inline int SparseGridWorker::readReduce(const std::string& filenamePrefixToRead, +inline int SparseGridWorker::readReduce(const std::vector& filenamePrefixesToRead, + const std::vector& startReadingTokenFileNames, uint32_t maxMiBToReadPerThread, bool overwrite) { int numRead = 0; - if (overwrite) { - numRead = this->readDSGsFromDisk(filenamePrefixToRead); - } else { - numRead = this->readDSGsFromDiskAndReduce(filenamePrefixToRead, maxMiBToReadPerThread); + assert(this->getNumberOfGrids() == 1); // for more species, todo loop + + // wait until we can start to read any of the files + std::set indicesStillToReadReduce; + for (size_t i = 0; i < filenamePrefixesToRead.size(); ++i) { + indicesStillToReadReduce.insert(i); + } + auto filePart = theMPISystem()->getFilePartNumber(); + bool firstTry = true; + while (!indicesStillToReadReduce.empty()) { + std::string filePartTokenToRead; + std::string filePartNameToRead; + for (auto it = indicesStillToReadReduce.begin(); it != indicesStillToReadReduce.end(); ++it) { + if (combigrid::theMPISystem()->getOutputComm() != MPI_COMM_NULL) { + filePartTokenToRead = startReadingTokenFileNames[*it] + ".part" + std::to_string(filePart); + } else { + filePartTokenToRead = startReadingTokenFileNames[*it]; + } + if (firstTry) { + std::cout << "trying to read " << filePartTokenToRead << std::endl; + firstTry = false; + } + if (combigrid::getFileExistsRootOnly(filePartTokenToRead, theMPISystem()->getOutputComm())) { + if (combigrid::theMPISystem()->getOutputComm() != MPI_COMM_NULL) { + filePartNameToRead = + filenamePrefixesToRead[*it] + "_0" + ".part" + std::to_string(filePart); + } else { + filePartNameToRead = filenamePrefixesToRead[*it] + "_0"; + } + std::cout << " read from " << filePartNameToRead << std::endl; + overwrite ? Stats::startEvent("read SG") : Stats::startEvent("read/reduce SG"); + if (overwrite) { + numRead += this->readDSGsFromDisk(filePartNameToRead, false); + } else { + numRead += + this->readDSGsFromDiskAndReduce(filePartNameToRead, maxMiBToReadPerThread, false); + } + assert(numRead > 0); + overwrite ? Stats::stopEvent("read SG") : Stats::stopEvent("read/reduce SG"); + indicesStillToReadReduce.erase(it); + break; // because iterators may be invalidated + } + } + // wait for 200ms + std::this_thread::sleep_for(std::chrono::milliseconds(200)); } if (this->getNumberOfGrids() != 1) { throw std::runtime_error("Combining more than one DSG is not implemented yet"); @@ -710,14 +771,14 @@ inline void SparseGridWorker::reduceLocalAndGlobal(CombinationVariant combinatio this->zeroDsgsData(combinationVariant); // local reduce (within rank) for (const auto& t : this->taskWorkerRef_.getTasks()) { - for (int g = 0; g < numGrids; ++g) { + for (size_t g = 0; g < numGrids; ++g) { const DistributedFullGrid& dfg = t->getDistributedFullGrid(static_cast(g)); this->getCombinedUniDSGVector()[g]->addDistributedFullGrid(dfg, t->getCoefficient()); } } // global reduce (across process groups) - for (int g = 0; g < numGrids; ++g) { + for (size_t g = 0; g < numGrids; ++g) { if (combinationVariant == CombinationVariant::sparseGridReduce) { CombiCom::distributedGlobalSparseGridReduce( *this->getCombinedUniDSGVector()[g], maxMiBToSendPerThread, globalReduceRankThatCollects); @@ -756,6 +817,34 @@ inline void SparseGridWorker::reduceSubspaceSizesBetweenGroups( } } +void SparseGridWorker::removeReadingFiles( + const std::vector& filenamePrefixesToRead, + const std::vector& startReadingTokenFileNames, bool keepSparseGridFiles) const { + assert(filenamePrefixesToRead.size() == startReadingTokenFileNames.size()); + OUTPUT_ROOT_EXCLUSIVE_SECTION { + for (size_t i = 0; i < filenamePrefixesToRead.size(); ++i) { + auto filePart = theMPISystem()->getFilePartNumber(); + // remove reading token + auto partTokenString = startReadingTokenFileNames[i]; + if (combigrid::theMPISystem()->getOutputComm() != MPI_COMM_NULL) { + partTokenString += ".part" + std::to_string(filePart); + } + std::filesystem::remove(partTokenString); + // remove sparse grid file(s) + if (!keepSparseGridFiles) { + std::filesystem::remove(filenamePrefixesToRead[i] + "_" + std::to_string(0)); + for (const auto& entry : std::filesystem::directory_iterator(".")) { + if (entry.path().string().find(filenamePrefixesToRead[i] + "_" + std::to_string(0) + + ".part" + std::to_string(filePart)) != std::string::npos) { + assert(entry.is_regular_file()); + std::filesystem::remove(entry.path()); + } + } + } + } + } +} + inline void SparseGridWorker::setExtraSparseGrid(bool initializeSizes) { if (this->getNumberOfGrids() != 1) { throw std::runtime_error("this->getCombinedUniDSGVector() is empty"); @@ -778,7 +867,8 @@ inline void SparseGridWorker::setExtraSparseGrid(bool initializeSizes) { // create Kahan buffer now (at zero size), because summation is not needed on this sparse grid extraUniDSG->createKahanBuffer(); if (initializeSizes) { - for (size_t i = 0; i < extraUniDSG->getNumSubspaces(); ++i) { + for (typename AnyDistributedSparseGrid::SubspaceIndexType i = 0; + i < extraUniDSG->getNumSubspaces(); ++i) { extraUniDSG->setDataSize(i, this->getCombinedUniDSGVector()[0]->getDataSize(i)); } } @@ -813,6 +903,7 @@ inline int SparseGridWorker::writeDSGsToDisk(const std::string& filenamePrefix, auto uniDsg = this->getCombinedUniDSGVector()[i].get(); auto dsgToUse = uniDsg; if (this->getExtraUniDSGVector().size() > 0) { + assert(theMPISystem()->getOutputComm() != MPI_COMM_NULL); dsgToUse = this->getExtraUniDSGVector()[i].get(); if (combinationVariant == CombinationVariant::sparseGridReduce || combinationVariant == CombinationVariant::outgroupSparseGridReduce) { @@ -844,6 +935,18 @@ inline void SparseGridWorker::writeMinMaxCoefficients(const std::string& fileNam } } +inline void SparseGridWorker::writeTokenFiles(const std::string& writeCompleteTokenFileName) const { + OUTPUT_ROOT_EXCLUSIVE_SECTION { + if (combigrid::theMPISystem()->getOutputComm() == MPI_COMM_NULL) { + // write single token + std::ofstream tokenFile(writeCompleteTokenFileName); + } else { + auto filePart = theMPISystem()->getFilePartNumber(); + std::ofstream tokenFile(writeCompleteTokenFileName + ".part" + std::to_string(filePart)); + } + } +} + inline int SparseGridWorker::writeSubspaceSizesToFile( const std::string& filenamePrefixToWrite) const { return DistributedSparseGridIO::writeSubspaceSizesToFile(*this->getCombinedUniDSGVector()[0], diff --git a/src/mpi/MPICartesianUtils.hpp b/src/mpi/MPICartesianUtils.hpp index eba767046..51d38541a 100644 --- a/src/mpi/MPICartesianUtils.hpp +++ b/src/mpi/MPICartesianUtils.hpp @@ -82,7 +82,7 @@ class MPICartesianUtils { assert(!partitionCoords_.empty()); // find rank r in partitionCoords_ auto findIt = std::find(partitionCoords_.begin(), partitionCoords_.end(), r); - IndexType rIndex = std::distance(partitionCoords_.begin(), findIt); + IndexType rIndex = static_cast(std::distance(partitionCoords_.begin(), findIt)); return partitionCoordsTensor_.getVectorIndex(rIndex); } @@ -145,7 +145,7 @@ class MPICartesianUtils { } inline int getCommunicatorSize() const { - return partitionCoords_.size(); + return static_cast(partitionCoords_.size()); } inline RankType getCommunicatorRank() const { return rank_; } diff --git a/src/mpi/MPIMemory.cpp b/src/mpi/MPIMemory.cpp index ee77e2e94..b2c01ab3d 100644 --- a/src/mpi/MPIMemory.cpp +++ b/src/mpi/MPIMemory.cpp @@ -2,6 +2,8 @@ #include "mpi/MPISystem.hpp" +#include + namespace combigrid { namespace mpimemory { /* @@ -18,8 +20,8 @@ int get_memory_usage_kb(unsigned long* vmrss_kb, unsigned long* vmsize_kb) { FILE* procfile = fopen("/proc/self/status", "r"); long to_read = 8192; - char buffer[to_read]; - fread(buffer, sizeof(char), to_read, procfile); + std::vector buffer(to_read); + fread(buffer.data(), sizeof(char), to_read, procfile); fclose(procfile); short found_vmrss = 0; @@ -28,7 +30,7 @@ int get_memory_usage_kb(unsigned long* vmrss_kb, unsigned long* vmsize_kb) { /* Look through proc status contents line by line */ char delims[] = "\n"; - char* line = strtok(buffer, delims); + char* line = strtok(buffer.data(), delims); while (line != NULL && (found_vmrss == 0 || found_vmsize == 0)) { search_result = strstr(line, "VmRSS:"); diff --git a/src/mpi/MPISystem.cpp b/src/mpi/MPISystem.cpp index 3e51f2e5b..e306fa871 100644 --- a/src/mpi/MPISystem.cpp +++ b/src/mpi/MPISystem.cpp @@ -139,7 +139,7 @@ void MPISystem::init(size_t ngroup, size_t nprocs, bool withWorldManager) { initGlobalReduceCommm(); - initOuputGroupComm(); + initOutputGroupComm(); if (withWorldManager) { /* @@ -183,7 +183,7 @@ void MPISystem::init(size_t ngroup, size_t nprocs, CommunicatorType lcomm, bool initGlobalReduceCommm(); - initOuputGroupComm(); + initOutputGroupComm(); if (withWorldManager) { initThirdLevelComms(); @@ -212,7 +212,7 @@ void MPISystem::initWorldReusable(CommunicatorType wcomm, size_t ngroup, size_t initGlobalReduceCommm(); - initOuputGroupComm(); + initOutputGroupComm(); /* create global communicator which contains only the manager and the master * process of each process group @@ -291,7 +291,7 @@ void MPISystem::storeLocalComm(CommunicatorType lcomm) { // in principle this does not have to be 0 masterRank_ = 0; - int localSize = getCommSize(localComm_); + [[maybe_unused]] int localSize = getCommSize(localComm_); assert(masterRank_ < localSize); } @@ -319,7 +319,7 @@ void MPISystem::initGlobalComm(bool withWorldManager) { } if (withWorldManager) { assert(managerRankWorld_ >= 0); - assert(managerRankWorld_ < nprocs_ * ngroup_ + 1); + assert(managerRankWorld_ < static_cast(nprocs_ * ngroup_ + 1)); ranks.push_back(managerRankWorld_); } @@ -334,12 +334,12 @@ void MPISystem::initGlobalComm(bool withWorldManager) { int worldSize = 0; MPI_Group_size(worldGroup, &worldSize); - assert(worldSize == nprocs_ * ngroup_ + (withWorldManager ? 1 : 0)); + assert(worldSize == static_cast(nprocs_ * ngroup_ + (withWorldManager ? 1 : 0))); for (const auto& r : ranks) { assert(r >= 0); assert(r < worldSize); } - assert(ranks.size() <= worldSize); + assert(ranks.size() <= static_cast(worldSize)); #endif MPI_Group globalGroup; @@ -447,7 +447,7 @@ std::vector& getDiagonalRanks(size_t nprocs, size_t ngroup) { return ranks; } -void MPISystem::initOuputGroupComm(uint16_t numFileParts) { +void MPISystem::initOutputGroupComm(uint16_t numFileParts) { assert(numFileParts > 0); if (outputComm_ != MPI_COMM_NULL && outputComm_ != outputGroupComm_) { MPI_Comm_free(&outputComm_); @@ -881,7 +881,7 @@ bool MPISystem::recoverCommunicators(bool groupAlive, deleteCommFTAndCcomm(&globalReduceCommFT_, &globalReduceComm_); initGlobalReduceCommm(); - initOuputGroupComm(); + initOutputGroupComm(); // toDo return fixed process group IDs std::cout << "returning \n"; diff --git a/src/mpi/MPISystem.hpp b/src/mpi/MPISystem.hpp index be097f928..c7dd7bc34 100644 --- a/src/mpi/MPISystem.hpp +++ b/src/mpi/MPISystem.hpp @@ -5,6 +5,7 @@ // to resolve https://github.com/open-mpi/ompi/issues/5157 #define OMPI_SKIP_MPICXX 1 #include + #include #include #include @@ -18,17 +19,28 @@ #define FIRST_GROUP_EXCLUSIVE_SECTION if (combigrid::theMPISystem()->getProcessGroupNumber() == 0) // output group does output (in no-manager case) -#define OUTPUT_GROUP_EXCLUSIVE_SECTION if (combigrid::theMPISystem()->getOutputGroupComm() != MPI_COMM_NULL) +#define OUTPUT_GROUP_EXCLUSIVE_SECTION \ + if (combigrid::theMPISystem()->getOutputGroupComm() != MPI_COMM_NULL) + +// output root: either, the split Output comm is not set (then the master of output group), +// or the zeroth rank of each split of the output group comm +#define OUTPUT_ROOT_EXCLUSIVE_SECTION \ + if ((combigrid::theMPISystem()->getOutputGroupComm() != MPI_COMM_NULL) && \ + ((combigrid::theMPISystem()->getOutputComm() == MPI_COMM_NULL && \ + combigrid::theMPISystem()->getOutputGroupRank() == 0) || \ + (combigrid::getCommRank(combigrid::theMPISystem()->getOutputComm()) == 0))) // last group does some other (potentially concurrent) output -#define OTHER_OUTPUT_GROUP_EXCLUSIVE_SECTION \ - if (combigrid::theMPISystem()->getProcessGroupNumber() == combigrid::theMPISystem()->getNumGroups() - 1) +#define OTHER_OUTPUT_GROUP_EXCLUSIVE_SECTION \ + if (combigrid::theMPISystem()->getProcessGroupNumber() == \ + static_cast(combigrid::theMPISystem()->getNumGroups() - 1)) // middle process can do command line output -#define MIDDLE_PROCESS_EXCLUSIVE_SECTION \ - if (combigrid::theMPISystem()->getWorldRank() == \ - (combigrid::theMPISystem()->getNumGroups() * combigrid::theMPISystem()->getNumProcs()) / 2) - +#define MIDDLE_PROCESS_EXCLUSIVE_SECTION \ + if (combigrid::theMPISystem()->getWorldRank() == \ + static_cast( \ + (combigrid::theMPISystem()->getNumGroups() * combigrid::theMPISystem()->getNumProcs()) / \ + 2)) #define GLOBAL_MANAGER_EXCLUSIVE_SECTION if (combigrid::theMPISystem()->isGlobalManager()) @@ -160,6 +172,8 @@ class MPISystem { inline const CommunicatorType& getOutputComm() const; + inline RankType getFilePartNumber() const; + inline const std::vector& getThirdLevelComms() const; /** @@ -312,7 +326,7 @@ class MPISystem { void storeLocalComm(CommunicatorType lcomm); /* let the output "group" be distributed across the actual process groups */ - void initOuputGroupComm(uint16_t numFileParts = 1); + void initOutputGroupComm(uint16_t numFileParts = 1); private: explicit MPISystem(); @@ -494,6 +508,18 @@ class MPISystem { std::vector reusableRanks_; }; +static inline int getCommSize(const CommunicatorType& comm) { + int commSize; + MPI_Comm_size(comm, &commSize); + return commSize; +} + +static inline int getCommRank(const CommunicatorType& comm) { + int commRank; + MPI_Comm_rank(comm, &commRank); + return commRank; +} + /*!\name MPI communication system setup functions */ //@{ inline MPISystemID theMPISystem(); @@ -548,19 +574,31 @@ inline const CommunicatorType& MPISystem::getGlobalReduceComm() const { return globalReduceComm_; } -inline const CommunicatorType& MPISystem::getOutputGroupComm() const { - return outputGroupComm_; -} +inline const CommunicatorType& MPISystem::getOutputGroupComm() const { return outputGroupComm_; } inline const CommunicatorType& MPISystem::getOutputComm() const { + OUTPUT_GROUP_EXCLUSIVE_SECTION { return outputComm_; } + else { + throw std::runtime_error("Called from outside output group!"); + } +} + +inline RankType MPISystem::getFilePartNumber() const { + int filePart = -1; OUTPUT_GROUP_EXCLUSIVE_SECTION { - return outputComm_; - } else { + if (combigrid::theMPISystem()->getOutputComm() != MPI_COMM_NULL) { + auto comm = theMPISystem()->getOutputComm(); + auto outputGroupSize = combigrid::getCommSize(comm); + filePart = theMPISystem()->getOutputGroupRank() / outputGroupSize; + } + } + else { throw std::runtime_error("Called from outside output group!"); } + return filePart; } -inline const std::vector& MPISystem::getThirdLevelComms() const{ +inline const std::vector& MPISystem::getThirdLevelComms() const { checkPreconditions(); return thirdLevelComms_; @@ -613,7 +651,7 @@ inline const RankType& MPISystem::getOutputGroupRank() const{ inline RankType MPISystem::getOutputRankInGlobalReduceComm() const { // my local rank gives the index of the global reduce comm, // and the output rank in the global reduce comm is the index modulo the number of groups - return localRank_ % ngroup_; + return static_cast(localRank_ % ngroup_); } inline const RankType& MPISystem::getGlobalRank() const { @@ -676,17 +714,6 @@ inline size_t MPISystem::getNumProcs() const { inline bool MPISystem::isInitialized() const { return initialized_; } -static inline int getCommSize(const CommunicatorType& comm) { - int commSize; - MPI_Comm_size(comm, &commSize); - return commSize; -} - -static inline int getCommRank(const CommunicatorType& comm) { - int commRank; - MPI_Comm_rank(comm, &commRank); - return commRank; -} inline RankType MPISystem::getWorldSize() const { int worldSize = getCommSize(getWorldComm()); diff --git a/src/sparsegrid/AnyDistributedSparseGrid.cpp b/src/sparsegrid/AnyDistributedSparseGrid.cpp index a0eaf1c65..e6f31dea3 100644 --- a/src/sparsegrid/AnyDistributedSparseGrid.cpp +++ b/src/sparsegrid/AnyDistributedSparseGrid.cpp @@ -12,7 +12,7 @@ namespace combigrid { AnyDistributedSparseGrid::AnyDistributedSparseGrid(size_t numSubspaces, CommunicatorType comm) : comm_(comm), subspacesDataSizes_(numSubspaces) { // make sure numSubspaces fits into SubspaceIndexType - assert(numSubspaces <= std::numeric_limits::max()); + assert(numSubspaces <= static_cast(std::numeric_limits::max())); MPI_Comm_rank(comm_, &rank_); } size_t AnyDistributedSparseGrid::getAccumulatedDataSize() const { @@ -56,6 +56,10 @@ SubspaceSizeType AnyDistributedSparseGrid::getDataSize(SubspaceIndexType i) cons return subspacesDataSizes_[i]; } +SubspaceSizeType AnyDistributedSparseGrid::getDataSize(size_t i) const { + return this->getDataSize(static_cast(i)); +} + std::set& AnyDistributedSparseGrid::getIngroupSubspaces() const { assert(this->getSubspacesByCommunicator().size() < 2); @@ -134,7 +138,7 @@ std::vector getSubspaceVote( // allocate vector of long long std::vector subspaceVote(subspacesDataSizes.size(), 0); // set to mySummand if we have data in this subspace - for (AnyDistributedSparseGrid::SubspaceIndexType i = 0; i < subspacesDataSizes.size(); ++i) { + for (size_t i = 0; i < subspacesDataSizes.size(); ++i) { if (subspacesDataSizes[i] > 0) { subspaceVote[i] = mySummand; } diff --git a/src/sparsegrid/AnyDistributedSparseGrid.hpp b/src/sparsegrid/AnyDistributedSparseGrid.hpp index c79aeb078..b09fa90f4 100644 --- a/src/sparsegrid/AnyDistributedSparseGrid.hpp +++ b/src/sparsegrid/AnyDistributedSparseGrid.hpp @@ -40,6 +40,7 @@ class AnyDistributedSparseGrid { // data size of the subspace at index i SubspaceSizeType getDataSize(SubspaceIndexType i) const; + SubspaceSizeType getDataSize(size_t i) const; std::set& getIngroupSubspaces() const; diff --git a/src/sparsegrid/DistributedSparseGridIO.hpp b/src/sparsegrid/DistributedSparseGridIO.hpp index 6e110ad52..58cd3ffca 100644 --- a/src/sparsegrid/DistributedSparseGridIO.hpp +++ b/src/sparsegrid/DistributedSparseGridIO.hpp @@ -17,15 +17,15 @@ namespace DistributedSparseGridIO { template void reduceVectorTowardsMe(std::vector& vectorToReduce, MPI_Comm comm, MPI_Op operation) { MPI_Datatype dataType = getMPIDatatype(abstraction::getabstractionDataType()); - MPI_Reduce(MPI_IN_PLACE, vectorToReduce.data(), vectorToReduce.size(), dataType, operation, 0, - comm); + MPI_Reduce(MPI_IN_PLACE, vectorToReduce.data(), static_cast(vectorToReduce.size()), dataType, + operation, 0, comm); } template void reduceVectorTowardsThem(std::vector& vectorToReduce, MPI_Comm comm, MPI_Op operation) { MPI_Datatype dataType = getMPIDatatype(abstraction::getabstractionDataType()); - MPI_Reduce(vectorToReduce.data(), MPI_IN_PLACE, vectorToReduce.size(), dataType, operation, 0, - comm); + MPI_Reduce(vectorToReduce.data(), MPI_IN_PLACE, static_cast(vectorToReduce.size()), dataType, + operation, 0, comm); } template @@ -131,18 +131,19 @@ int readOneFileAndReduce(SparseGridType& dsg, const std::string& fileName, uint32_t maxMiBToReadPerThread) { auto comm = dsg.getCommunicator(); - const int numElementsToBuffer = - CombiCom::getGlobalReduceChunkSize( - maxMiBToReadPerThread); - // get offset in file const MPI_Offset len = dsg.getRawDataSize(); auto data = dsg.getRawData(); #ifdef DISCOTEC_USE_LZ4 - int numReduced = mpiio::readReduceCompressedValuesConsecutive( - data, len, fileName, comm, //numElementsToBuffer, - std::plus{}); + int numReduced = + mpiio::readReduceCompressedValuesConsecutive( + data, len, fileName, comm, // numElementsToBuffer, + std::plus{}); #else + const int numElementsToBuffer = + CombiCom::getGlobalReduceChunkSize( + maxMiBToReadPerThread); + int numReduced = mpiio::readReduceValuesConsecutive( data, len, fileName, comm, numElementsToBuffer, std::plus{}); @@ -172,11 +173,8 @@ int writeSomeFiles(const SparseGridType& dsg, const std::string& fileName, } template -int readSomeFiles(SparseGridType& dsg, const std::string& fileName) { +int readSomeFiles(SparseGridType& dsg, const std::string& filePartName) { auto comm = theMPISystem()->getOutputComm(); - auto outputGroupSize = getCommSize(comm); - auto filePart = theMPISystem()->getOutputGroupRank() / outputGroupSize; - std::string filePartName = fileName + ".part" + std::to_string(filePart); // get offset in file MPI_Offset len = dsg.getRawDataSize(); @@ -192,26 +190,22 @@ int readSomeFiles(SparseGridType& dsg, const std::string& fileName) { } template -int readSomeFilesAndReduce(SparseGridType& dsg, const std::string& fileName, +int readSomeFilesAndReduce(SparseGridType& dsg, const std::string& filePartName, uint32_t maxMiBToReadPerThread) { auto comm = theMPISystem()->getOutputComm(); - auto outputGroupSize = getCommSize(comm); - auto filePart = theMPISystem()->getOutputGroupRank() / outputGroupSize; - std::string filePartName = fileName + ".part" + std::to_string(filePart); - - const int numElementsToBuffer = - CombiCom::getGlobalReduceChunkSize( - maxMiBToReadPerThread); - // get offset in file const MPI_Offset len = dsg.getRawDataSize(); auto data = dsg.getRawData(); #ifdef DISCOTEC_USE_LZ4 - int numReduced = mpiio::readReduceCompressedValuesConsecutive( - data, len, filePartName, comm, //numElementsToBuffer, - std::plus{}); + int numReduced = + mpiio::readReduceCompressedValuesConsecutive( + data, len, filePartName, comm, // numElementsToBuffer, + std::plus{}); #else + const int numElementsToBuffer = + CombiCom::getGlobalReduceChunkSize( + maxMiBToReadPerThread); int numReduced = mpiio::readReduceValuesConsecutive( data, len, filePartName, comm, numElementsToBuffer, std::plus{}); @@ -246,7 +240,7 @@ int readReduceSubspaceSizesFromFile(SparseGridType& dsg, const std::string& file auto comm = dsg.getCommunicator(); MPI_Offset len = dsg.getNumSubspaces(); if (numElementsToBuffer == 0) { - numElementsToBuffer = len; + numElementsToBuffer = static_cast(len); } int numReduced = mpiio::readReduceValuesConsecutive( @@ -263,7 +257,7 @@ int readReduceSubspaceSizesFromFiles(SparseGridType& dsg, const std::vector(len); } int numReduced = mpiio::readMultipleReduceValuesConsecutive( diff --git a/src/sparsegrid/DistributedSparseGridUniform.hpp b/src/sparsegrid/DistributedSparseGridUniform.hpp index c350f4992..d8b90df64 100644 --- a/src/sparsegrid/DistributedSparseGridUniform.hpp +++ b/src/sparsegrid/DistributedSparseGridUniform.hpp @@ -53,7 +53,7 @@ class DistributedSparseGridDataContainer { SubspaceSizeType offset = 0; for (size_t i = 0; i < subspaces_.size(); i++) { subspaces_[i] = subspacesData_.data() + offset; - if (subspacesWithData_.find(i) != subspacesWithData_.end()) { + if (subspacesWithData_.find(static_cast(i)) != subspacesWithData_.end()) { offset += dsgu_.getSubspaceDataSizes()[i]; } } @@ -84,7 +84,7 @@ class DistributedSparseGridDataContainer { SubspaceSizeType offset = 0; for (size_t i = 0; i < kahanDataBegin_.size(); i++) { kahanDataBegin_[i] = kahanData_.data() + offset; - if (subspacesWithData_.find(i) != subspacesWithData_.end()) { + if (subspacesWithData_.find(static_cast(i)) != subspacesWithData_.end()) { offset += dsgu_.getSubspaceDataSizes()[i]; } } @@ -213,6 +213,7 @@ class DistributedSparseGridUniform : public AnyDistributedSparseGrid { // return level vector of subspace i inline const LevelVector& getLevelVector(SubspaceIndexType i) const; + inline const LevelVector& getLevelVector(size_t i) const; inline SubspaceIndexType getIndexInRange(const LevelVector& l, IndexType lowerBound) const; @@ -422,7 +423,7 @@ std::vector DistributedSparseGridUniform::createLevels( combigrid::createTruncatedHierarchicalLevels(nmax, lmin, created); // std::sort(created.begin(), created.end()); assert(std::is_sorted(created.begin(), created.end())); - if (created.size() > std::numeric_limits::max()) { + if (created.size() > static_cast(std::numeric_limits::max())) { throw std::runtime_error("number of subspaces exceeds the maximum value of SubspaceIndexType"); } return created; @@ -442,6 +443,11 @@ inline const LevelVector& DistributedSparseGridUniform::getLevelVect return *levelIterator; } +template +inline const LevelVector& DistributedSparseGridUniform::getLevelVector(size_t i) const { + return this->getLevelVector(static_cast(i)); +} + template typename DistributedSparseGridUniform::SubspaceIndexType DistributedSparseGridUniform::getIndexInRange(const LevelVector& l, @@ -473,7 +479,7 @@ template inline FG_ELEMENT* DistributedSparseGridUniform::getData(SubspaceIndexType i) { #ifndef NDEBUG assert(isSubspaceDataCreated()); - assert(i < this->subspacesDataContainer_.subspaces_.size()); + assert(static_cast(i) < this->subspacesDataContainer_.subspaces_.size()); assert(this->subspacesDataContainer_.subspaces_[i] <= &(*(this->subspacesDataContainer_.subspacesData_.end()))); // if (this->subspacesDataContainer_.subspaces_[i] == @@ -489,7 +495,7 @@ inline const FG_ELEMENT* DistributedSparseGridUniform::getData( SubspaceIndexType i) const { #ifndef NDEBUG assert(isSubspaceDataCreated()); - assert(i < this->subspacesDataContainer_.subspaces_.size()); + assert(static_cast(i) < this->subspacesDataContainer_.subspaces_.size()); assert(this->subspacesDataContainer_.subspaces_[i] <= &(*(this->subspacesDataContainer_.subspacesData_.end()))); if (this->subspacesDataContainer_.subspaces_[i] == @@ -617,7 +623,7 @@ void DistributedSparseGridUniform::accumulateMinMaxCoefficients() { this->getCurrentlyAllocatedSubspaces().cend()); #pragma omp parallel for default(none) schedule(guided) \ shared(currentlyAllocatedSubspaceIndices, smaller_real) - for (SubspaceIndexType iAllocated = 0; iAllocated < currentlyAllocatedSubspaceIndices.size(); + for (size_t iAllocated = 0; iAllocated < currentlyAllocatedSubspaceIndices.size(); ++iAllocated) { SubspaceIndexType i = currentlyAllocatedSubspaceIndices[iAllocated]; if (this->getSubspaceDataSizes()[i] > 0) { @@ -664,7 +670,7 @@ inline void DistributedSparseGridUniform::registerDistributedFullGri if (subSgDataSize == 0) { this->setDataSize(index, numPointsOfSubspace); } else { - ASSERT(subSgDataSize == numPointsOfSubspace, + ASSERT(static_cast(subSgDataSize) == numPointsOfSubspace, "subSgDataSize: " << subSgDataSize << ", numPointsOfSubspace: " << numPointsOfSubspace << " , level " << level << " , rank " << this->rank_ << std::endl); @@ -713,7 +719,7 @@ inline void DistributedSparseGridUniform::addDistributedFullGrid( assert(sDataSize == this->getDataSize(sIndex)); assert(sDataSize == this->getAllocatedDataSize(sIndex)); assert(std::distance(this->getData(0), sPointer) < - this->subspacesDataContainer_.subspacesData_.size()); + static_cast(this->subspacesDataContainer_.subspacesData_.size())); auto kDataSize = this->subspacesDataContainer_.kahanDataBegin_[sIndex + 1] - kPointer; assert(kDataSize == this->getDataSize(sIndex)); } diff --git a/src/task/Task.cpp b/src/task/Task.cpp index 1417afcb2..d106b89ed 100644 --- a/src/task/Task.cpp +++ b/src/task/Task.cpp @@ -28,7 +28,7 @@ Task::Task(const LevelVector& l, const std::vector& boundary, real Task::~Task() { delete faultCriterion_; } -const DistributedFullGrid& Task::getDistributedFullGrid(int n) const { +const DistributedFullGrid& Task::getDistributedFullGrid(size_t n) const { throw std::runtime_error("const getDistributedFullGrid called but not implemented"); } diff --git a/src/task/Task.hpp b/src/task/Task.hpp index 2e2ef5574..9b12e7842 100644 --- a/src/task/Task.hpp +++ b/src/task/Task.hpp @@ -77,7 +77,7 @@ class Task { } virtual void init(CommunicatorType lcomm, - std::vector decomposition = std::vector()) = 0; + const std::vector& decomposition = std::vector()) = 0; // inline real estimateRuntime() const; @@ -90,9 +90,9 @@ class Task { int n = 0) = 0; // This method returns the local part of the n-th distributedFullGrid - virtual DistributedFullGrid& getDistributedFullGrid(int n = 0) = 0; + virtual DistributedFullGrid& getDistributedFullGrid(size_t n = 0) = 0; - virtual const DistributedFullGrid& getDistributedFullGrid(int n) const; + virtual const DistributedFullGrid& getDistributedFullGrid(size_t n) const; virtual void setZero() = 0; diff --git a/src/third_level/NetworkUtils.cpp b/src/third_level/NetworkUtils.cpp index e38fa9d9a..1c744fc15 100644 --- a/src/third_level/NetworkUtils.cpp +++ b/src/third_level/NetworkUtils.cpp @@ -94,7 +94,6 @@ bool ClientSocket::recvall(std::string& mesg, size_t len, int flags) const { assert(isInitialized() && "Client Socket not initialized"); assert(len > 0); //std::cout << "Trying to receive " << len << "Bytes" << std::endl; - ssize_t recvd = -1; std::unique_ptr buff(new char[len]); auto ret = this->recvall(buff.get(), len, flags); mesg = std::string(buff.get(), len); diff --git a/src/third_level/NetworkUtils.hpp b/src/third_level/NetworkUtils.hpp index 605d86324..0794a375e 100644 --- a/src/third_level/NetworkUtils.hpp +++ b/src/third_level/NetworkUtils.hpp @@ -189,7 +189,7 @@ bool ClientSocket::recvallBinaryAndCorrectInPlace(FG_ELEMENT* buff, assert(isInitialized() && "Client Socket not initialized"); // receive endianness char temp = ' '; - auto recvAllSuccess = recvall(&temp, 1); + [[maybe_unused]] auto recvAllSuccess = recvall(&temp, 1); assert(recvAllSuccess && "Receiving Endianess failed"); bool hasSameEndianness = bool(temp) == NetworkUtils::isLittleEndian(); @@ -300,7 +300,7 @@ bool ClientSocket::recvallBinaryAndReduceInPlace(FG_ELEMENT* buff, assert(isInitialized() && "Client Socket not initialized"); // receive endianness char temp = ' '; - auto recvSuccess = recvall(&temp, 1); + [[maybe_unused]] auto recvSuccess = recvall(&temp, 1); assert(recvSuccess && "Receiving Endianess failed"); bool hasSameEndianness = bool(temp) == NetworkUtils::isLittleEndian(); diff --git a/src/third_level/ThirdLevelUtils.hpp b/src/third_level/ThirdLevelUtils.hpp index c3ffe32cb..b5212e09f 100644 --- a/src/third_level/ThirdLevelUtils.hpp +++ b/src/third_level/ThirdLevelUtils.hpp @@ -94,9 +94,9 @@ namespace combigrid { { assert(isConnected_); size_t rawSize = receiveSize(); - size_t recvSize = (rawSize - 1) / sizeof(FG_ELEMENT); // - 1 due to endianness + [[maybe_unused]] size_t recvSize = (rawSize - 1) / sizeof(FG_ELEMENT); // - 1 due to endianness assert(recvSize == size && "Size mismatch receiving data size does not match expected"); - bool success = connection_->recvallBinaryAndCorrectInPlace(data, size); + [[maybe_unused]] bool success = connection_->recvallBinaryAndCorrectInPlace(data, size); assert(success && "receiving dsgu data failed"); } @@ -106,9 +106,9 @@ namespace combigrid { { assert(isConnected_); size_t rawSize = receiveSize(); - size_t recvSize = (rawSize - 1) / sizeof(FG_ELEMENT); // - 1 due to endianness + [[maybe_unused]] size_t recvSize = (rawSize - 1) / sizeof(FG_ELEMENT); // - 1 due to endianness assert(recvSize == size && "Size mismatch cannot add vectors of different size"); - bool success = connection_->recvallBinaryAndReduceInPlace(data, size, + [[maybe_unused]] bool success = connection_->recvallBinaryAndReduceInPlace(data, size, [](const FG_ELEMENT& lhs, const FG_ELEMENT& rhs) -> FG_ELEMENT {return lhs + rhs;}); assert(success && "receiving dsgu data failed"); } diff --git a/src/utils/DecompositionUtils.hpp b/src/utils/DecompositionUtils.hpp new file mode 100644 index 000000000..b5201ad6b --- /dev/null +++ b/src/utils/DecompositionUtils.hpp @@ -0,0 +1,94 @@ +#pragma once +#include + +#include "utils/Types.hpp" +namespace combigrid { + +/* a regular (equidistant) domain decompositioning for an even number of processes + * leads to grid points on the (geometrical) process boundaries. + * with the forwardDecomposition flag it can be decided if the grid points on + * the process boundaries belong to the process on the right-hand side (true) + * of the process boundary, or to the one on the left-hand side (false). + */ +static inline std::vector getDefaultDecomposition( + const IndexVector& globalNumPointsPerDimension, + const std::vector& cartesianProcsPerDimension, bool forwardDecomposition) { + auto dim = static_cast(globalNumPointsPerDimension.size()); + assert(cartesianProcsPerDimension.size() == dim); + + // create decomposition vectors + std::vector decomposition(dim); + for (DimType i = 0; i < dim; ++i) { + if (cartesianProcsPerDimension[i] > globalNumPointsPerDimension[i]) { + throw std::invalid_argument( + "change to coarser parallelization! currently not all processes can have points."); + } + IndexVector& llbnd = decomposition[i]; + llbnd.resize(cartesianProcsPerDimension[i]); + + for (int j = 0; j < cartesianProcsPerDimension[i]; ++j) { + double tmp = static_cast(globalNumPointsPerDimension[i]) * static_cast(j) / + static_cast(cartesianProcsPerDimension[i]); + + if (forwardDecomposition) + llbnd[j] = static_cast(std::ceil(tmp)); + else + llbnd[j] = static_cast(std::floor(tmp)); + } + } + return decomposition; +} + +inline static std::vector getStandardDecomposition(LevelVector lref, + std::vector& procsRef) { + assert(lref.size() == procsRef.size()); + std::vector decomposition; + for (DimType d = 0; d < static_cast(lref.size()); ++d) { + IndexVector di; + if (procsRef[d] == 1) { + di = {0}; + } else if (procsRef[d] == 2) { + di = {0, powerOfTwo[lref[d]] / procsRef[d] + 1}; + } else if (procsRef[d] == 3) { + di = {0, powerOfTwo[lref[d]] / procsRef[d] + 1, 2 * powerOfTwo[lref[d]] / procsRef[d] + 1}; + } else if (procsRef[d] == 4) { + di = {0, powerOfTwo[lref[d]] / procsRef[d] + 1, 2 * powerOfTwo[lref[d]] / procsRef[d] + 1, + 3 * powerOfTwo[lref[d]] / procsRef[d] + 1}; + } else { + throw std::runtime_error("please implement a test decomposition matching procs and lref"); + } + decomposition.push_back(di); + } + return decomposition; +} + +static inline std::vector downsampleDecomposition( + const std::vector& decomposition, const LevelVector& referenceLevel, + const LevelVector& newLevel, const std::vector& boundary) { + auto newDecomposition = decomposition; + if (decomposition.size() > 0) { + for (DimType d = 0; d < static_cast(referenceLevel.size()); ++d) { + // for now, assume that we never want to interpolate on a level finer than referenceLevel + assert(referenceLevel[d] >= newLevel[d]); + auto levelDiff = referenceLevel[d] - newLevel[d]; + auto stepFactor = oneOverPowOfTwo[levelDiff]; + if (boundary[d] > 0) { + // all levels contain the boundary points -> point 0 is the same + for (auto& dec : newDecomposition[d]) { + dec = static_cast(std::ceil(static_cast(dec) * stepFactor)); + } + } else { + // all levels do not contain the boundary points -> mid point is the same + auto leftProtrusion = powerOfTwo[levelDiff] - 1; + for (auto& dec : newDecomposition[d]) { + // same as before, but subtract the "left" protrusion on the finer level + dec = static_cast( + std::max(0., std::ceil(static_cast(dec - leftProtrusion) * stepFactor))); + } + } + } + } + return newDecomposition; +} + +} // namespace combigrid \ No newline at end of file diff --git a/src/utils/LevelSetUtils.cpp b/src/utils/LevelSetUtils.cpp index 8eb17cc56..fcabc46bc 100644 --- a/src/utils/LevelSetUtils.cpp +++ b/src/utils/LevelSetUtils.cpp @@ -11,8 +11,10 @@ void getDownSetRecursively(combigrid::LevelVector const& l, combigrid::LevelVect } else { // for the whole range of values in dimension d size_t d = fixedDimensions.size(); -// #pragma omp parallel for shared(l, fixedDimensions, downSet) firstprivate(d) default(none) \ -// schedule(static) //leads to unordered downSet, avoid in favor of registration + /* + // #pragma omp parallel for shared(l, fixedDimensions, downSet) firstprivate(d) default(none) \ + // schedule(static) //leads to unordered downSet, avoid in favor of registration + */ for (LevelType i = 1; i <= l[d]; ++i) { // levels start at 1 here, in compliance with our // definition of DistributedSparseGrid auto moreDimensionsFixed = fixedDimensions; diff --git a/src/utils/LevelSetUtils.hpp b/src/utils/LevelSetUtils.hpp index a6f9c4f3d..a0c89fec6 100644 --- a/src/utils/LevelSetUtils.hpp +++ b/src/utils/LevelSetUtils.hpp @@ -116,7 +116,7 @@ class HypercubeDownSetGenerator { { if (!this->isFinished()) { // find first dimension that can be increased - for (int i = (levelUpTo_.size() - 1); i > -1; --i) { + for (auto i = static_cast(levelUpTo_.size() - 1); i > -1; --i) { if (currentLevel_[i] < levelUpTo_[i]) { ++currentLevel_[i]; break; diff --git a/src/utils/MonteCarlo.cpp b/src/utils/MonteCarlo.cpp index e92093f17..b587cb142 100644 --- a/src/utils/MonteCarlo.cpp +++ b/src/utils/MonteCarlo.cpp @@ -50,7 +50,7 @@ std::vector> getRandomCoordinates(int numCoordinates, size_t d }; for (auto & coord : randomCoords) { std::generate(begin(coord), end(coord), gen); - for (auto & c : coord){ + for ([[maybe_unused]] const auto & c : coord){ assert( c <= 1. && c >=0. ); } } diff --git a/tests/TaskConst.hpp b/tests/TaskConst.hpp index 3e3a84a01..03a4c74bb 100644 --- a/tests/TaskConst.hpp +++ b/tests/TaskConst.hpp @@ -17,7 +17,7 @@ class TaskConst : public combigrid::Task { assert(l.size() == 2); } - void init(CommunicatorType lcomm, std::vector decomposition) { + void init(CommunicatorType lcomm, const std::vector& decomposition) { // parallelization // assert(dfg_ == nullptr); auto nprocs = getCommSize(lcomm); @@ -65,7 +65,7 @@ class TaskConst : public combigrid::Task { dfg_->gatherFullGrid(fg, r); } - DistributedFullGrid& getDistributedFullGrid(int n = 0) { return *dfg_; } + DistributedFullGrid& getDistributedFullGrid(size_t n = 0) override { return *dfg_; } void setZero() { BOOST_CHECK(true); } diff --git a/tests/TaskConstParaboloid.hpp b/tests/TaskConstParaboloid.hpp index 686f94f4f..06435cac0 100644 --- a/tests/TaskConstParaboloid.hpp +++ b/tests/TaskConstParaboloid.hpp @@ -59,7 +59,7 @@ class TaskConstParaboloid : public combigrid::Task { BOOST_TEST_CHECKPOINT("TaskConstParaboloid constructor"); } - void init(CommunicatorType lcomm, std::vector decomposition) override { + void init(CommunicatorType lcomm, const std::vector& decomposition) override { // parallelization // assert(dfg_ == nullptr); auto nprocs = getCommSize(lcomm); @@ -107,18 +107,18 @@ class TaskConstParaboloid : public combigrid::Task { dfg_->gatherFullGrid(fg, r); } - DistributedFullGrid& getDistributedFullGrid(int n = 0) override { + DistributedFullGrid& getDistributedFullGrid(size_t n = 0) override { BOOST_TEST_CHECKPOINT("TaskConstParaboloid getDFG"); return *dfg_; } - const DistributedFullGrid& getDistributedFullGrid(int n = 0) const override { + const DistributedFullGrid& getDistributedFullGrid(size_t n = 0) const override { BOOST_TEST_CHECKPOINT("TaskConstParaboloid getDFG const"); return *dfg_; } real getCurrentTime() const override { - return nsteps_; + return static_cast(nsteps_); } void setZero() override { BOOST_CHECK(true); } diff --git a/tests/TaskCount.hpp b/tests/TaskCount.hpp index 0af089078..44bdd6bbb 100644 --- a/tests/TaskCount.hpp +++ b/tests/TaskCount.hpp @@ -36,7 +36,7 @@ class TaskCount : public combigrid::Task { BOOST_TEST_CHECKPOINT("TaskCount constructor"); } - void init(CommunicatorType lcomm, std::vector decomposition) override { + void init(CommunicatorType lcomm, const std::vector& decomposition) override { auto nprocs = getCommSize(lcomm); std::vector p; if (decomposition.size() == 0) { @@ -85,7 +85,7 @@ class TaskCount : public combigrid::Task { dfg_->gatherFullGrid(fg, r); } - DistributedFullGrid& getDistributedFullGrid(int n = 0) override { return *dfg_; } + DistributedFullGrid& getDistributedFullGrid(size_t n = 0) override { return *dfg_; } void setZero() override { BOOST_TEST_CHECKPOINT("TaskCount setZero"); diff --git a/tests/test_distributedfullgrid.cpp b/tests/test_distributedfullgrid.cpp index 8ccc6bbc8..19fb08e8a 100644 --- a/tests/test_distributedfullgrid.cpp +++ b/tests/test_distributedfullgrid.cpp @@ -229,7 +229,7 @@ void checkDistributedFullgrid(LevelVector& levels, std::vector& procs, // check that InnerNodalBasisFunctionIntegral is correct double numFullInnerBasisFcns = 1.; for (DimType d = 0; d < dim; ++d) { - numFullInnerBasisFcns *= static_cast(dfg.length(d) - 1); + numFullInnerBasisFcns *= static_cast(dfg.globalNumPointsInDimension(d) - 1); } BOOST_CHECK_CLOSE(dfg.getInnerNodalBasisFunctionIntegral(), 1. / numFullInnerBasisFcns, TestHelper::tolerance); @@ -271,7 +271,7 @@ void checkDistributedFullgrid(LevelVector& levels, std::vector& procs, --globAxisIndex[d]; if (boundary[d] == 1 && globAxisIndex[d] < 0) { // wrap around - globAxisIndex[d] += dfg.length(d); + globAxisIndex[d] += dfg.globalNumPointsInDimension(d); } std::vector coords(dim); dfg.getCoordsGlobal(dfg.getGlobalLinearIndex(globAxisIndex), coords); diff --git a/tests/test_distributedsparsegrid.cpp b/tests/test_distributedsparsegrid.cpp index e25a16a2f..f441bfac1 100644 --- a/tests/test_distributedsparsegrid.cpp +++ b/tests/test_distributedsparsegrid.cpp @@ -19,6 +19,7 @@ #include "sparsegrid/DistributedSparseGridIO.hpp" #include "sparsegrid/DistributedSparseGridUniform.hpp" #include "sparsegrid/SGrid.hpp" +#include "utils/DecompositionUtils.hpp" #include "utils/IndexVector.hpp" #include "utils/LevelSetUtils.hpp" #include "utils/MonteCarlo.hpp" @@ -185,7 +186,7 @@ void checkDistributedSparsegrid(LevelVector& lmin, LevelVector& lmax, std::vecto } auto reduceSuccess = DistributedSparseGridIO::readOneFileAndReduce(*uniDSGfromSubspaces, "test_sg_all", 1); - BOOST_CHECK_EQUAL(readSuccess, uniDSGfromSubspaces->getRawDataSize()); + BOOST_CHECK_EQUAL(reduceSuccess, uniDSGfromSubspaces->getRawDataSize()); if (readSuccess) { BOOST_TEST_CHECKPOINT("compare double values"); for (size_t i = 0; i < uniDSG->getRawDataSize(); ++i) { @@ -1037,9 +1038,7 @@ BOOST_AUTO_TEST_CASE(test_sparseGridAndSubspaceReduce) { const auto& subspacesByComm = uniDSG->getSubspacesByCommunicator(); std::set checkedSubspaces{}; for (const auto& subspaces : subspacesByComm) { - auto commSize = combigrid::getCommSize(subspaces.first); for (const auto& subspace : subspaces.second) { - auto subspaceStart = uniDSG->getData(subspace); for (SubspaceSizeType j = 0; j < uniDSG->getDataSize(subspace); ++j) { // BOOST_CHECK_EQUAL(subspaceStart[j], ??); } @@ -1082,9 +1081,7 @@ BOOST_AUTO_TEST_CASE(test_sparseGridAndSubspaceReduce) { // check that the data is correct checkedSubspaces.clear(); for (const auto& subspaces : subspacesByComm) { - auto commSize = combigrid::getCommSize(subspaces.first); for (const auto& subspace : subspaces.second) { - auto subspaceStart = uniDSG->getData(subspace); for (SubspaceSizeType j = 0; j < uniDSG->getDataSize(subspace); ++j) { // BOOST_CHECK_EQUAL(subspaceStart[j], ??); } diff --git a/tests/test_ftolerance.cpp b/tests/test_ftolerance.cpp index ac6cbbd8a..cac7a5712 100644 --- a/tests/test_ftolerance.cpp +++ b/tests/test_ftolerance.cpp @@ -65,7 +65,7 @@ class TaskAdvFDM : public combigrid::Task { } void init(CommunicatorType lcomm, - std::vector decomposition = std::vector()) { + const std::vector& decomposition = std::vector()) override { // only use one process per group std::vector p(getDim(), 1); @@ -94,8 +94,8 @@ class TaskAdvFDM : public combigrid::Task { // gradient of phi std::vector dphi(getDim()); - IndexType l0 = dfg_->length(0); - IndexType l1 = dfg_->length(1); + IndexType l0 = dfg_->globalNumPointsInDimension(0); + IndexType l1 = dfg_->globalNumPointsInDimension(1); double h0 = 1.0 / (double)l0; double h1 = 1.0 / (double)l1; @@ -139,7 +139,7 @@ class TaskAdvFDM : public combigrid::Task { dfg_->gatherFullGrid(fg, r); } - DistributedFullGrid& getDistributedFullGrid(int n = 0) { return *dfg_; } + DistributedFullGrid& getDistributedFullGrid(size_t n = 0) override { return *dfg_; } void setZero() {} @@ -189,7 +189,7 @@ class TaskAdvFDM : public combigrid::Task { //real t = dt_ * nsteps_ * combiStep_; if (combiStep_ != 0 && faultCriterion_->failNow(combiStep_, -1.0, globalRank)){ std::cout<<"Rank "<< globalRank <<" failed at iteration "<getManagerRank(), TRANSFER_STATUS_TAG, theMPISystem()->getGlobalCommFT() ); diff --git a/tests/test_helper.hpp b/tests/test_helper.hpp index c76e1762c..5b8bfc20c 100644 --- a/tests/test_helper.hpp +++ b/tests/test_helper.hpp @@ -39,6 +39,9 @@ namespace TestHelper{ return MPI_COMM_NULL; } } + static inline MPI_Comm getComm(size_t nprocs) { + return getComm(static_cast(nprocs)); + } static inline MPI_Comm getCommSelfAsCartesian(combigrid::DimType dimensionality) { std::vector dims(dimensionality, 1); diff --git a/tests/test_hierarchization.cpp b/tests/test_hierarchization.cpp index 9caf0bfaf..5939459c7 100644 --- a/tests/test_hierarchization.cpp +++ b/tests/test_hierarchization.cpp @@ -192,7 +192,7 @@ real checkConservationOfMomentum(DistributedFullGrid& dfg, BOOST_CHECK(std::all_of(boundary.begin(), boundary.end(), [](BoundaryType b) { return b == 2; })); real mcMassBefore = getMonteCarloMass(dfg, nPointsMonteCarlo); auto mcMomentaBefore = getMonteCarloMomenta(dfg, nPointsMonteCarlo); - real mcMomentumBefore = mcMomentaBefore.back(); + // real mcMomentumBefore = mcMomentaBefore.back(); BOOST_TEST_CHECKPOINT("begin hierarchization"); auto dim = dfg.getDimension(); @@ -1428,7 +1428,7 @@ BOOST_AUTO_TEST_CASE(momentum) { fillDFGbyFunction(constFctn, dfg3); // usually, momentum will not be conserved for periodic BC, but for constant functions it // should work - auto momentum = checkConservationOfMomentum( + [[maybe_unused]] auto momentum = checkConservationOfMomentum( dfg3, DistributedHierarchization::hierarchizeBiorthogonalPeriodic); } { diff --git a/tests/test_integration.cpp b/tests/test_integration.cpp index 691f2a405..c7a46565b 100644 --- a/tests/test_integration.cpp +++ b/tests/test_integration.cpp @@ -358,7 +358,7 @@ void checkPassingHierarchicalBases(size_t ngroup = 1, size_t nprocs = 1) { combischeme.createAdaptiveCombischeme(); std::vector levels = combischeme.getCombiSpaces(); std::vector coeffs = combischeme.getCoeffs(); - auto numDOF = printCombiDegreesOfFreedom(levels, boundary); + [[maybe_unused]] auto numDOF = printCombiDegreesOfFreedom(levels, boundary); // create Tasks TaskContainer tasks; diff --git a/tests/test_io.cpp b/tests/test_io.cpp index b9b37a0bf..4d59cb9d9 100644 --- a/tests/test_io.cpp +++ b/tests/test_io.cpp @@ -86,7 +86,7 @@ void checkCompressionWithHeader(size_t numValues = 1000000) { std::string filename = "compression_testfile_" + std::to_string(theMPISystem()->getProcessGroupNumber()); - auto numBytesWritten = mpiio::writeCompressedValuesConsecutive( + mpiio::writeCompressedValuesConsecutive( originalValues.data(), originalValues.size(), filename, localComm); std::vector decompressedValues(originalValues.size()); diff --git a/tests/test_reduce.cpp b/tests/test_reduce.cpp index c07ad53f5..8d805e8ae 100644 --- a/tests/test_reduce.cpp +++ b/tests/test_reduce.cpp @@ -37,7 +37,7 @@ class TaskConst : public combigrid::Task { assert(l.size() == 2); } - void init(CommunicatorType lcomm, std::vector decomposition) { + void init(CommunicatorType lcomm, const std::vector& decomposition) { // parallelization // assert(dfg_ == nullptr); auto nprocs = getCommSize(lcomm); @@ -87,7 +87,7 @@ class TaskConst : public combigrid::Task { dfg_->gatherFullGrid(fg, r); } - DistributedFullGrid& getDistributedFullGrid(int n = 0) { return *dfg_; } + DistributedFullGrid& getDistributedFullGrid(size_t n = 0) override { return *dfg_; } void setZero() {} diff --git a/tests/test_rescheduling.cpp b/tests/test_rescheduling.cpp index 6dcd02f9b..b4e13081b 100644 --- a/tests/test_rescheduling.cpp +++ b/tests/test_rescheduling.cpp @@ -77,7 +77,7 @@ class TestingTask : public combigrid::Task { assert(l.size() == 2); } - void init(CommunicatorType lcomm, std::vector decomposition) override { + void init(CommunicatorType lcomm, const std::vector& decomposition) override { // parallelization // assert(dfg_ == nullptr); auto nprocs = getCommSize(lcomm); @@ -113,7 +113,7 @@ class TestingTask : public combigrid::Task { dfg_->gatherFullGrid(fg, r); } - DistributedFullGrid& getDistributedFullGrid(int n = 0) override { return *dfg_; } + DistributedFullGrid& getDistributedFullGrid(size_t n = 0) override { return *dfg_; } void setZero() override {} diff --git a/tests/test_task.cpp b/tests/test_task.cpp index b6cd770f9..7d482b6a7 100644 --- a/tests/test_task.cpp +++ b/tests/test_task.cpp @@ -26,7 +26,7 @@ class TaskTest : public combigrid::Task { : Task(l, boundary, coeff, loadModel), test(t) {} void init(CommunicatorType lcomm, - std::vector decomposition = std::vector()) override { + const std::vector& decomposition = std::vector()) override { // create dummy dfg std::vector p(getDim(), 1); dfg_ = new OwningDistributedFullGrid(getDim(), getLevelVector(), lcomm, @@ -39,7 +39,7 @@ class TaskTest : public combigrid::Task { dfg_->gatherFullGrid(fg, r); } - DistributedFullGrid& getDistributedFullGrid(int n = 0) override { return *dfg_; } + DistributedFullGrid& getDistributedFullGrid(size_t n = 0) override { return *dfg_; } void setZero() override {} diff --git a/tests/test_tensor.cpp b/tests/test_tensor.cpp index 131ad2808..168a550ff 100644 --- a/tests/test_tensor.cpp +++ b/tests/test_tensor.cpp @@ -102,7 +102,7 @@ BOOST_AUTO_TEST_CASE(test_iterate_lower_boundaries_6d) { IndexType numberOfPointsVisited = 0; const auto strideInThisDimension = tensor.getOffsetsVector()[d]; const IndexType jump = strideInThisDimension * tensor.getExtentsVector()[d]; - const IndexType numberOfPolesHigherDimensions = tensor.size() / jump; + const IndexType numberOfPolesHigherDimensions = static_cast(tensor.size()) / jump; IndexType tensorLowestLayerIteratedIndex = 0; BOOST_TEST_CHECKPOINT("iterate lowest layer of tensor"); diff --git a/tests/test_thirdLevel.cpp b/tests/test_thirdLevel.cpp index d5b73299c..b257fc87f 100644 --- a/tests/test_thirdLevel.cpp +++ b/tests/test_thirdLevel.cpp @@ -22,6 +22,7 @@ #include "sparsegrid/DistributedSparseGridUniform.hpp" #include "task/Task.hpp" #include "utils/Config.hpp" +#include "utils/DecompositionUtils.hpp" #include "utils/MonteCarlo.hpp" #include "utils/Types.hpp" #include "stdlib.h" @@ -581,7 +582,7 @@ void testCombineThirdLevelWithoutManagers( true); if (testParams.nprocs > 1 && thirdLevelExtraSparseGrid) { - theMPISystem()->initOuputGroupComm(testParams.nprocs / 2); + theMPISystem()->initOutputGroupComm(testParams.nprocs / 2); } auto loadmodel = std::unique_ptr(new LinearLoadModel()); std::vector boundary(testParams.dim, testParams.boundary); @@ -612,7 +613,7 @@ void testCombineThirdLevelWithoutManagers( for (size_t i = 0; i < systemLevels.size(); ++i) { // assign round-robin to process groups - if (i % theMPISystem()->getNumGroups() == theMPISystem()->getProcessGroupNumber()) { + if (static_cast(i % theMPISystem()->getNumGroups()) == theMPISystem()->getProcessGroupNumber()) { // find index in full list auto position = std::find(systemLevels.begin(), systemLevels.end(), systemLevels[i]); BOOST_REQUIRE(position != systemLevels.end()); @@ -790,9 +791,7 @@ void testPretendThirdLevel(TestParams& testParams) { BOOST_CHECK(testParams.comm != MPI_COMM_NULL); combigrid::Stats::initialize(); - - size_t procsPerSys = testParams.ngroup * testParams.nprocs + 1; - + // size_t procsPerSys = testParams.ngroup * testParams.nprocs + 1; theMPISystem()->initWorldReusable(testParams.comm, testParams.ngroup, testParams.nprocs); WORLD_MANAGER_EXCLUSIVE_SECTION { diff --git a/tests/test_worker_only.cpp b/tests/test_worker_only.cpp index cdde15666..c3539c46c 100644 --- a/tests/test_worker_only.cpp +++ b/tests/test_worker_only.cpp @@ -199,7 +199,7 @@ void checkWorkerOnly(size_t ngroup = 1, size_t nprocs = 1, BoundaryType boundary if (pretendThirdLevel || params.getCombinationVariant() == CombinationVariant::sparseGridReduce) { BOOST_TEST_CHECKPOINT("write DSGS " + filename); - numWritten = worker.writeDSGsToDisk(filename); + numWritten = worker.writeDSGsToDisk(filename, ""); BOOST_CHECK(numWritten > 0); } } diff --git a/third_level_manager/test_uftp.cpp b/third_level_manager/test_uftp.cpp index 7b89636f3..ccd2f514b 100644 --- a/third_level_manager/test_uftp.cpp +++ b/third_level_manager/test_uftp.cpp @@ -24,7 +24,7 @@ using namespace combigrid; -void mockUpDSGWriteToDisk(std::string filePrefix, +void mockUpDSGWriteToDisk(const std::string& filePrefix, const std::vector& dsgPartitionSizes) { for (size_t partitionIndex = 0; partitionIndex < dsgPartitionSizes.size(); ++partitionIndex) { std::string myFilename = filePrefix + std::to_string(partitionIndex); @@ -42,7 +42,7 @@ void mockUpDSGWriteToDisk(std::string filePrefix, } } -void writeRandomDataToDisk(std::string filePrefix, +void writeRandomDataToDisk(const std::string& filePrefix, const std::vector& dsgPartitionSizes) { Stats::startEvent("uftp write"); auto actualPartitionSizes = dsgPartitionSizes; @@ -53,7 +53,7 @@ void writeRandomDataToDisk(std::string filePrefix, auto sum = std::accumulate(actualPartitionSizes.begin(), actualPartitionSizes.end(), 0); actualPartitionSizes.back() += (dsgPartitionSizes[0] - sum); } - auto sumDOF = std::accumulate(actualPartitionSizes.begin(), actualPartitionSizes.end(), 0); + // [[maybe_unused]] auto sumDOF = std::accumulate(actualPartitionSizes.begin(), actualPartitionSizes.end(), 0); std::string myFilename = filePrefix + std::to_string(0); // cf. https://stackoverflow.com/a/47742514 { @@ -75,7 +75,7 @@ void writeRandomDataToDisk(std::string filePrefix, Stats::stopEvent("uftp write"); } -void createLargeFile(std::string filePrefix, const std::vector& dsgPartitionSizes) { +void createLargeFile(const std::string& filePrefix, const std::vector& dsgPartitionSizes) { Stats::startEvent("uftp create file"); // cf. https://stackoverflow.com/a/47742514 { @@ -90,7 +90,7 @@ void createLargeFile(std::string filePrefix, const std::vector& d Stats::stopEvent("uftp create file"); } -void validateExchangedData(std::string filePrefix, std::string tokenToWaitFor, +void validateExchangedData(const std::string& filePrefix, const std::string& tokenToWaitFor, const std::vector& dsgPartitionSizes) { // generate data on heap std::unique_ptr> mockUpData(new std::vector(dsgPartitionSizes[0])); @@ -118,7 +118,7 @@ void validateExchangedData(std::string filePrefix, std::string tokenToWaitFor, } } -void checkSizeOfFile(std::string filePrefix, std::string tokenToWaitFor, +void checkSizeOfFile(const std::string& filePrefix, const std::string& tokenToWaitFor, const std::vector& dsgPartitionSizes) { Stats::startEvent("uftp wait check size"); Stats::startEvent("uftp wait"); @@ -146,7 +146,7 @@ void checkSizeOfFile(std::string filePrefix, std::string tokenToWaitFor, Stats::stopEvent("uftp wait check size"); } -void readAndInvertDSGFromDisk(std::string filePrefixIn, std::string filePrefixOut, +void readAndInvertDSGFromDisk(const std::string& filePrefixIn, const std::string& filePrefixOut, std::string tokenToWaitFor, const std::vector& dsgPartitionSizes) { std::unique_ptr> readData(new std::vector(dsgPartitionSizes[0])); @@ -224,7 +224,6 @@ int main(int argc, char** argv) { // read in third level parameters if available std::string thirdLevelHost, thirdLevelSSHCommand = ""; unsigned int systemNumber = 0, numSystems = 1; - unsigned short thirdLevelPort = 0; bool hasThirdLevel = static_cast(cfg.get_child_optional("thirdLevel")); std::vector fractionsOfScheme; if (hasThirdLevel) { @@ -232,7 +231,6 @@ int main(int argc, char** argv) { thirdLevelHost = cfg.get("thirdLevel.host"); systemNumber = cfg.get("thirdLevel.systemNumber"); numSystems = cfg.get("thirdLevel.numSystems"); - thirdLevelPort = cfg.get("thirdLevel.port"); thirdLevelSSHCommand = cfg.get("thirdLevel.sshCommand", ""); bool hasFractions = static_cast(cfg.get_child_optional("thirdLevel.fractionsOfScheme")); if (hasFractions) { @@ -261,8 +259,7 @@ int main(int argc, char** argv) { std::vector levels; std::vector coeffs; std::vector taskNumbers; // only used in case of static task assignment - bool useStaticTaskAssignment = false; - long long int ctDOF = 0; + [[maybe_unused]] long long int ctDOF = 0; if (ctschemeFile == "") { /* generate a list of levelvectors and coefficients * CombiMinMaxScheme will create a classical combination scheme. @@ -288,10 +285,10 @@ int main(int argc, char** argv) { new CombiMinMaxSchemeFromFile(dim, lmin, lmax, ctschemeFile)); const auto& pgNumbers = scheme->getProcessGroupNumbers(); if (pgNumbers.size() > 0) { - useStaticTaskAssignment = true; + // useStaticTaskAssignment = true; const auto& allCoeffs = scheme->getCoeffs(); const auto& allLevels = scheme->getCombiSpaces(); - const auto [itMin, itMax] = std::minmax_element(pgNumbers.begin(), pgNumbers.end()); + [[maybe_unused]] const auto [itMin, itMax] = std::minmax_element(pgNumbers.begin(), pgNumbers.end()); assert(*itMin == 0); // make sure it starts with 0 // assert(*itMax == ngroup - 1); // and goes up to the maximum group //TODO // filter out only those tasks that belong to "our" process group diff --git a/third_level_manager/thirdLevelManager.cpp b/third_level_manager/thirdLevelManager.cpp index f7475a6a2..88562cfd9 100644 --- a/third_level_manager/thirdLevelManager.cpp +++ b/third_level_manager/thirdLevelManager.cpp @@ -77,7 +77,7 @@ void ThirdLevelManager::init() std::cout << "ThirdLevelManager::init(): Params are not loaded!" << std::endl; exit(0); } - auto serverInitSuccess = server_.init(); + [[maybe_unused]] auto serverInitSuccess = server_.init(); assert(serverInitSuccess); if (not server_.isInitialized()) { @@ -292,7 +292,7 @@ size_t ThirdLevelManager::forwardData(const System& sender, const System& receiv return dataSize; } -void ThirdLevelManager::writeStatistics(std::string filename) +void ThirdLevelManager::writeStatistics(const std::string& filename) { size_t walltime = stats_.getWallTime(); size_t numCombinations = stats_.getNumCombinations(); diff --git a/third_level_manager/thirdLevelManager.hpp b/third_level_manager/thirdLevelManager.hpp index dcb153ed9..9bd933ec7 100644 --- a/third_level_manager/thirdLevelManager.hpp +++ b/third_level_manager/thirdLevelManager.hpp @@ -44,7 +44,7 @@ class ThirdLevelManager void runtimeLoop(); - void writeStatistics(std::string filename = ""); + void writeStatistics(const std::string& filename = ""); }; } diff --git a/tools/errorCalc.cpp b/tools/errorCalc.cpp index 961056148..917b39a2c 100644 --- a/tools/errorCalc.cpp +++ b/tools/errorCalc.cpp @@ -58,8 +58,8 @@ int main(int argc, char** argv) { real tmp1 = 1.0 / l2norm1; real tmp2 = 1.0 / l2norm2; if (mode[0] == 'n') { - for (auto i = 0; i < data1.size(); ++i) data1[i] *= tmp1; - for (auto i = 0; i < data2.size(); ++i) data2[i] *= tmp2; + for (size_t i = 0; i < data1.size(); ++i) data1[i] *= tmp1; + for (size_t i = 0; i < data2.size(); ++i) data2[i] *= tmp2; } else if (mode[0] == 'a') { ; } else { @@ -76,7 +76,7 @@ int main(int argc, char** argv) { // calc l2 norm of values real err = 0.0; - for (auto i = 0; i < data1.size(); ++i) { + for (size_t i = 0; i < data1.size(); ++i) { real tmp = std::abs(data1[i] - data2[i]); if (std::abs(tmp) / std::abs(data1[i]) > 1e-12) @@ -121,7 +121,7 @@ void readPlotFile(const char* pltFileName, std::vector& data, pltFile.read((char*)&dim, sizeof(int)); std::vector res(dim); - for (size_t i = 0; i < dim; ++i) pltFile.read((char*)&res[i], sizeof(int)); + for (int i = 0; i < dim; ++i) pltFile.read((char*)&res[i], sizeof(int)); resolution.assign(res.begin(), res.end()); std::cout << "resolution " << resolution << std::endl; @@ -147,12 +147,12 @@ void readPlotFile(const char* pltFileName, std::vector& data, // copy data from local checkpoint to dfg // note that on the last process in some dimensions dfg is larger than the // local checkpoint - for (size_t n = 0; n < shape[0]; ++n) { - for (size_t m = 0; m < shape[1]; ++m) { - for (size_t l = 0; l < shape[2]; ++l) { - for (size_t k = 0; k < shape[3]; ++k) { - for (size_t j = 0; j < shape[4]; ++j) { - for (size_t i = 0; i < shape[5]; ++i) { + for (int n = 0; n < shape[0]; ++n) { + for (int m = 0; m < shape[1]; ++m) { + for (int l = 0; l < shape[2]; ++l) { + for (int k = 0; k < shape[3]; ++k) { + for (int j = 0; j < shape[4]; ++j) { + for (int i = 0; i < shape[5]; ++i) { data.push_back(grid[n][m][l][k][j][i]); } } @@ -162,11 +162,11 @@ void readPlotFile(const char* pltFileName, std::vector& data, } } else if (dim == 5) { auto grid = createMultiArrayRef(&tmp[0], shape); - for (size_t n = 0; n < shape[0]; ++n) { - for (size_t m = 0; m < shape[1]; ++m) { - for (size_t l = 0; l < shape[2]; ++l) { - for (size_t k = 0; k < shape[3]; ++k) { - for (size_t j = 0; j < shape[4]; ++j) { + for (int n = 0; n < shape[0]; ++n) { + for (int m = 0; m < shape[1]; ++m) { + for (int l = 0; l < shape[2]; ++l) { + for (int k = 0; k < shape[3]; ++k) { + for (int j = 0; j < shape[4]; ++j) { data.push_back(grid[n][m][l][k][j]); } } @@ -175,10 +175,10 @@ void readPlotFile(const char* pltFileName, std::vector& data, } } else if (dim == 4) { auto grid = createMultiArrayRef(&tmp[0], shape); - for (size_t n = 0; n < shape[0]; ++n) { - for (size_t m = 0; m < shape[1]; ++m) { - for (size_t l = 0; l < shape[2]; ++l) { - for (size_t k = 0; k < shape[3]; ++k) { + for (int n = 0; n < shape[0]; ++n) { + for (int m = 0; m < shape[1]; ++m) { + for (int l = 0; l < shape[2]; ++l) { + for (int k = 0; k < shape[3]; ++k) { data.push_back(grid[n][m][l][k]); } } @@ -186,17 +186,17 @@ void readPlotFile(const char* pltFileName, std::vector& data, } } else if (dim == 3) { auto grid = createMultiArrayRef(&tmp[0], shape); - for (size_t n = 0; n < shape[0]; ++n) { - for (size_t m = 0; m < shape[1]; ++m) { - for (size_t l = 0; l < shape[2]; ++l) { + for (int n = 0; n < shape[0]; ++n) { + for (int m = 0; m < shape[1]; ++m) { + for (int l = 0; l < shape[2]; ++l) { data.push_back(grid[n][m][l]); } } } } else if (dim == 2) { auto grid = createMultiArrayRef(&tmp[0], shape); - for (size_t n = 0; n < shape[0]; ++n) { - for (size_t m = 0; m < shape[1]; ++m) { + for (int n = 0; n < shape[0]; ++n) { + for (int m = 0; m < shape[1]; ++m) { data.push_back(grid[n][m]); } } diff --git a/tools/subspace_writer/subspace_writer.cpp b/tools/subspace_writer/subspace_writer.cpp index 1232ed817..41dcbd8fa 100644 --- a/tools/subspace_writer/subspace_writer.cpp +++ b/tools/subspace_writer/subspace_writer.cpp @@ -130,7 +130,6 @@ int main(int argc, char** argv) { // read in first CT scheme std::unique_ptr scheme( new CombiMinMaxSchemeFromFile(dim, lmin, lmax, ctschemeFile)); - const auto& allLevels = scheme->getCombiSpaces(); // generate distributed sparse grid uniDSGs.emplace_back(schemeFileToSparseGridAndSizesFile(