diff --git a/maintainer/benchmarks/lb.py b/maintainer/benchmarks/lb.py index 04453c5ea7..7d47461bad 100644 --- a/maintainer/benchmarks/lb.py +++ b/maintainer/benchmarks/lb.py @@ -50,6 +50,18 @@ parser.add_argument("--output", metavar="FILEPATH", action="store", type=str, required=False, default="benchmarks.csv", help="Output file (default: benchmarks.csv)") +parser.add_argument("--divided_block", action="store", + type=int, default=1, required=False, + help="blocks^(1/3) per mpi rank") +parser.add_argument("--divided_block_x", action="store", + type=int, default=0, required=False, + help="The number of divided blocks for x direction") +parser.add_argument("--divided_block_y", action="store", + type=int, default=0, required=False, + help="The number of divided blocks for x direction") +parser.add_argument("--divided_block_z", action="store", + type=int, default=0, required=False, + help="The number of divided blocks for x direction") args = parser.parse_args() @@ -85,7 +97,10 @@ n_proc = system.cell_system.get_state()["n_nodes"] n_part = n_proc * args.particles_per_core if n_part == 0: - box_l = 3 * args.box_l if len(args.box_l) == 1 else args.box_l + if len(args.box_l) == 1: + box_l = 3 * args.box_l + elif len(args.box_l) == 3: + box_l = args.box_l agrid = 1. lb_grid = box_l measurement_steps = 80 @@ -101,13 +116,21 @@ lb_grid = 3 * [lb_grid] box_l = 3 * [box_l] -print(f"box length: {box_l}") -print(f"LB shape: {lb_grid}") -print(f"LB agrid: {agrid:.3f}") +divided_block_x = args.divided_block_x +divided_block_y = args.divided_block_y +divided_block_z = args.divided_block_z +if divided_block_x != 0 and divided_block_y != 0 and divided_block_z != 0: + blocks_per_mpi_rank = [divided_block_x, + divided_block_y, divided_block_z] +else: + divided_block = args.divided_block + blocks_per_mpi_rank = [divided_block] * 3 # System ############################################################# -system.box_l = box_l +system.box_l = box_l * system.cell_system.node_grid +print(f"LB agrid: {agrid:.3f}") +print("LB shape", system.box_l) # Integration parameters ############################################################# @@ -145,7 +168,7 @@ if args.multi_gpu: system.cuda_init_handle.call_method("set_device_id_per_rank") lbf = lb_class(agrid=agrid, tau=system.time_step, kinematic_viscosity=1., - density=1., single_precision=args.single_precision) + density=1., single_precision=args.single_precision, blocks_per_mpi_rank=blocks_per_mpi_rank) system.lb = lbf if n_part: system.thermostat.set_lb(LB_fluid=lbf, gamma=1., seed=42) diff --git a/src/core/integrate.cpp b/src/core/integrate.cpp index 9ed1d628a4..badbc8f142 100644 --- a/src/core/integrate.cpp +++ b/src/core/integrate.cpp @@ -633,12 +633,18 @@ int System::System::integrate(int n_steps, int reuse_forces) { ek.propagate(); } } else if (lb_active) { +#ifdef CALIPER + CALI_MARK_BEGIN("LB.PROPAGATE"); +#endif auto const md_steps_per_lb_step = calc_md_steps_per_tau(lb.get_tau()); propagation.lb_skipped_md_steps += 1; if (propagation.lb_skipped_md_steps >= md_steps_per_lb_step) { propagation.lb_skipped_md_steps = 0; lb.propagate(); } +#ifdef CALIPER + CALI_MARK_END("LB.PROPAGATE"); +#endif } else if (ek_active) { auto const md_steps_per_ek_step = calc_md_steps_per_tau(ek.get_tau()); propagation.ek_skipped_md_steps += 1; diff --git a/src/core/lb/LBWalberla.cpp b/src/core/lb/LBWalberla.cpp index 9944d05408..37f3d78e64 100644 --- a/src/core/lb/LBWalberla.cpp +++ b/src/core/lb/LBWalberla.cpp @@ -40,6 +40,10 @@ #include #include +#ifdef CALIPER +#include +#endif + namespace LB { bool LBWalberla::is_gpu() const { return lb_fluid->is_gpu(); } @@ -50,7 +54,15 @@ Utils::VectorXd<9> LBWalberla::get_pressure_tensor() const { return lb_fluid->get_pressure_tensor(); } -void LBWalberla::propagate() { lb_fluid->integrate(); } +void LBWalberla::propagate() { +#ifdef CALIPER + CALI_MARK_BEGIN("LBWalberla.PROPAGATE"); +#endif + lb_fluid->integrate(); +#ifdef CALIPER + CALI_MARK_END("LBWalberla.PROPAGATE"); +#endif +} void LBWalberla::ghost_communication() { lb_fluid->ghost_communication(); } diff --git a/src/core/lb/Solver.cpp b/src/core/lb/Solver.cpp index 758f36c4d7..9a75558057 100644 --- a/src/core/lb/Solver.cpp +++ b/src/core/lb/Solver.cpp @@ -47,6 +47,10 @@ #include #include +#ifdef CALIPER +#include +#endif + namespace LB { Solver::Solver() { impl = std::make_unique(); } @@ -69,8 +73,14 @@ void Solver::reset() { } void Solver::propagate() { +#ifdef CALIPER + CALI_MARK_BEGIN("SOLVER.PROPAGATE"); +#endif check_solver(impl); std::visit([](auto &ptr) { ptr->propagate(); }, *impl->solver); +#ifdef CALIPER + CALI_MARK_END("SOLVER.PROPAGATE"); +#endif } void Solver::ghost_communication() { diff --git a/src/core/unit_tests/ek_interface_test.cpp b/src/core/unit_tests/ek_interface_test.cpp index 0abe8917bd..a80b2fa2fa 100644 --- a/src/core/unit_tests/ek_interface_test.cpp +++ b/src/core/unit_tests/ek_interface_test.cpp @@ -83,7 +83,8 @@ static auto make_ek_actor() { auto constexpr n_ghost_layers = 1u; auto constexpr single_precision = true; ek_lattice = std::make_shared( - params.grid_dimensions, ::communicator.node_grid, n_ghost_layers); + params.grid_dimensions, ::communicator.node_grid, + ::communicator.node_grid, n_ghost_layers); ek_container = std::make_shared( params.tau, walberla::new_ek_poisson_none(ek_lattice, single_precision)); ek_reactions = std::make_shared(); diff --git a/src/core/unit_tests/lb_particle_coupling_test.cpp b/src/core/unit_tests/lb_particle_coupling_test.cpp index 97e0f4c2e8..4b6c875360 100644 --- a/src/core/unit_tests/lb_particle_coupling_test.cpp +++ b/src/core/unit_tests/lb_particle_coupling_test.cpp @@ -102,7 +102,8 @@ static auto make_lb_actor() { auto constexpr single_precision = false; lb_params = std::make_shared(params.agrid, params.tau); lb_lattice = std::make_shared( - params.grid_dimensions, ::communicator.node_grid, n_ghost_layers); + params.grid_dimensions, ::communicator.node_grid, + ::communicator.node_grid, n_ghost_layers); lb_fluid = new_lb_walberla_cpu(lb_lattice, params.viscosity, params.density, single_precision); lb_fluid->set_collision_model(params.kT, params.seed); @@ -535,7 +536,8 @@ bool test_lb_domain_mismatch_local() { auto const params = std::make_shared(0.5, 0.01); ::communicator.node_grid = node_grid_reversed; auto const lattice = std::make_shared( - Utils::Vector3i{12, 12, 12}, node_grid_original, n_ghost_layers); + Utils::Vector3i{12, 12, 12}, node_grid_original, node_grid_original, + n_ghost_layers); auto const ptr = new_lb_walberla_cpu(lattice, 1.0, 1.0, false); ptr->set_collision_model(0.0, 0); ::communicator.node_grid = node_grid_original; diff --git a/src/python/espressomd/detail/walberla.py b/src/python/espressomd/detail/walberla.py index 6ec64dc94a..964832cc4a 100644 --- a/src/python/espressomd/detail/walberla.py +++ b/src/python/espressomd/detail/walberla.py @@ -47,7 +47,7 @@ def __init__(self, *args, **kwargs): super().__init__(**kwargs) def valid_keys(self): - return {"agrid", "n_ghost_layers"} + return {"agrid", "n_ghost_layers", "blocks_per_mpi_rank"} def required_keys(self): return self.valid_keys() diff --git a/src/python/espressomd/lb.py b/src/python/espressomd/lb.py index e4b870a307..5b7f588edb 100644 --- a/src/python/espressomd/lb.py +++ b/src/python/espressomd/lb.py @@ -58,14 +58,14 @@ def validate_params(self, params): def valid_keys(self): return {"agrid", "tau", "density", "ext_force_density", - "kinematic_viscosity", "lattice", "kT", "seed"} + "kinematic_viscosity", "lattice", "kT", "seed", "blocks_per_mpi_rank"} def required_keys(self): return {"lattice", "density", "kinematic_viscosity", "tau"} def default_params(self): return {"lattice": None, "seed": 0, "kT": 0., - "ext_force_density": [0.0, 0.0, 0.0]} + "ext_force_density": [0.0, 0.0, 0.0], "blocks_per_mpi_rank": [1, 1, 1]} def mach_limit(self): """ @@ -141,6 +141,8 @@ class LBFluidWalberla(HydrodynamicInteraction, Required for a thermalized fluid. Must be positive. single_precision : :obj:`bool`, optional Use single-precision floating-point arithmetic. + blocks_per_mpi_rank : (3,) array_like of :obj:`int`, optional + Ditribute more than one block to each CPU. Methods ------- @@ -240,7 +242,7 @@ def validate_params(self, params): if "agrid" not in params: raise ValueError("missing argument 'lattice' or 'agrid'") params["lattice"] = LatticeWalberla( - agrid=params.pop("agrid"), n_ghost_layers=1) + agrid=params.pop("agrid"), n_ghost_layers=1, blocks_per_mpi_rank=params.get("blocks_per_mpi_rank")) elif "agrid" in params: raise ValueError("cannot provide both 'lattice' and 'agrid'") diff --git a/src/script_interface/walberla/LBFluid.cpp b/src/script_interface/walberla/LBFluid.cpp index 5b3bf4cabc..954fa3fce8 100644 --- a/src/script_interface/walberla/LBFluid.cpp +++ b/src/script_interface/walberla/LBFluid.cpp @@ -139,6 +139,12 @@ void LBFluidGPU::make_instance(VariantMap const ¶ms) { auto const visc = get_value(params, "kinematic_viscosity"); auto const dens = get_value(params, "density"); auto const precision = get_value(params, "single_precision"); + auto const blocks_per_mpi_rank = get_value_or( + params, "blocks_per_mpi_rank", Utils::Vector3i{{1, 1, 1}}); + if (blocks_per_mpi_rank != Utils::Vector3i{{1, 1, 1}}) { + throw std::runtime_error( + "GPU architecture PROHIBITED allocating many blocks to 1 CPU."); + } auto const lb_lattice = m_lattice->lattice(); auto const lb_visc = m_conv_visc * visc; auto const lb_dens = m_conv_dens * dens; diff --git a/src/script_interface/walberla/LatticeWalberla.hpp b/src/script_interface/walberla/LatticeWalberla.hpp index 999513f0a7..d438bee616 100644 --- a/src/script_interface/walberla/LatticeWalberla.hpp +++ b/src/script_interface/walberla/LatticeWalberla.hpp @@ -43,6 +43,7 @@ class LatticeWalberla : public AutoParameters { std::shared_ptr<::LatticeWalberla> m_lattice; double m_agrid; Utils::Vector3d m_box_l; + Utils::Vector3i m_blocks_per_mpi_rank; public: LatticeWalberla() { @@ -53,6 +54,8 @@ class LatticeWalberla : public AutoParameters { {"shape", AutoParameter::read_only, [this]() { return m_lattice->get_grid_dimensions(); }}, {"_box_l", AutoParameter::read_only, [this]() { return m_box_l; }}, + {"blocks_per_mpi_rank", AutoParameter::read_only, + [this]() { return m_blocks_per_mpi_rank; }}, }); } @@ -60,7 +63,16 @@ class LatticeWalberla : public AutoParameters { auto const &box_geo = *::System::get_system().box_geo; m_agrid = get_value(args, "agrid"); m_box_l = get_value_or(args, "_box_l", box_geo.length()); + m_blocks_per_mpi_rank = get_value_or( + args, "blocks_per_mpi_rank", Utils::Vector3i{{1, 1, 1}}); auto const n_ghost_layers = get_value(args, "n_ghost_layers"); + auto const block_grid = + Utils::Vector3i{{static_cast(::communicator.node_grid[0] * + m_blocks_per_mpi_rank[0]), + static_cast(::communicator.node_grid[1] * + m_blocks_per_mpi_rank[1]), + static_cast(::communicator.node_grid[2] * + m_blocks_per_mpi_rank[2])}}; context()->parallel_try_catch([&]() { if (m_agrid <= 0.) { @@ -72,7 +84,7 @@ class LatticeWalberla : public AutoParameters { auto const grid_dim = ::LatticeWalberla::calc_grid_dimensions(m_box_l, m_agrid); m_lattice = std::make_shared<::LatticeWalberla>( - grid_dim, ::communicator.node_grid, + grid_dim, ::communicator.node_grid, block_grid, static_cast(n_ghost_layers)); }); } diff --git a/src/walberla_bridge/include/walberla_bridge/LatticeWalberla.hpp b/src/walberla_bridge/include/walberla_bridge/LatticeWalberla.hpp index 03c5ff6291..b49693e848 100644 --- a/src/walberla_bridge/include/walberla_bridge/LatticeWalberla.hpp +++ b/src/walberla_bridge/include/walberla_bridge/LatticeWalberla.hpp @@ -52,6 +52,7 @@ class LatticeWalberla { public: LatticeWalberla(Utils::Vector3i const &grid_dimensions, Utils::Vector3i const &node_grid, + Utils::Vector3i const &block_grid, unsigned int n_ghost_layers); // Grid, domain, halo diff --git a/src/walberla_bridge/src/BoundaryPackInfo.hpp b/src/walberla_bridge/src/BoundaryPackInfo.hpp index 83a26fa91d..48e3d4258c 100644 --- a/src/walberla_bridge/src/BoundaryPackInfo.hpp +++ b/src/walberla_bridge/src/BoundaryPackInfo.hpp @@ -96,7 +96,7 @@ class BoundaryPackInfo : public PackInfo { WALBERLA_ASSERT_EQUAL(bSize, buf_size); #endif - auto const offset = std::get<0>(m_lattice->get_local_grid_range()); + auto const offset = to_vector3i(receiver->getAABB().min()); typename Boundary_T::value_type value; for (auto it = begin(flag_field); it != flag_field->end(); ++it) { if (isFlagSet(it, boundary_flag)) { @@ -133,7 +133,7 @@ class BoundaryPackInfo : public PackInfo { << buf_size; #endif - auto const offset = std::get<0>(m_lattice->get_local_grid_range()); + auto const offset = to_vector3i(sender->getAABB().min()); for (auto it = begin(flag_field); it != flag_field->end(); ++it) { if (isFlagSet(it, boundary_flag)) { auto const node = offset + Utils::Vector3i{{it.x(), it.y(), it.z()}}; diff --git a/src/walberla_bridge/src/LatticeWalberla.cpp b/src/walberla_bridge/src/LatticeWalberla.cpp index 2dc2943a40..6551da010a 100644 --- a/src/walberla_bridge/src/LatticeWalberla.cpp +++ b/src/walberla_bridge/src/LatticeWalberla.cpp @@ -40,6 +40,7 @@ LatticeWalberla::LatticeWalberla(Utils::Vector3i const &grid_dimensions, Utils::Vector3i const &node_grid, + Utils::Vector3i const &block_grid, unsigned int n_ghost_layers) : m_grid_dimensions{grid_dimensions}, m_n_ghost_layers{n_ghost_layers} { using walberla::real_t; @@ -50,21 +51,28 @@ LatticeWalberla::LatticeWalberla(Utils::Vector3i const &grid_dimensions, throw std::runtime_error( "Lattice grid dimensions and MPI node grid are not compatible."); } + if (m_grid_dimensions[i] % block_grid[i] != 0) { + throw std::runtime_error( + "Lattice grid dimensions and block grid are not compatible."); + } } auto constexpr lattice_constant = real_t{1}; - auto const cells_block = Utils::hadamard_division(grid_dimensions, node_grid); + auto const cells_block = + Utils::hadamard_division(grid_dimensions, block_grid); m_blocks = walberla::blockforest::createUniformBlockGrid( // number of blocks in each direction - uint_c(node_grid[0]), uint_c(node_grid[1]), uint_c(node_grid[2]), + uint_c(block_grid[0]), uint_c(block_grid[1]), uint_c(block_grid[2]), // number of cells per block in each direction uint_c(cells_block[0]), uint_c(cells_block[1]), uint_c(cells_block[2]), lattice_constant, // number of cpus per direction uint_c(node_grid[0]), uint_c(node_grid[1]), uint_c(node_grid[2]), // periodicity - true, true, true); + true, true, true, + // keep global block information + false); for (IBlock &block : *m_blocks) { m_cached_blocks.push_back(&block); } @@ -73,11 +81,44 @@ LatticeWalberla::LatticeWalberla(Utils::Vector3i const &grid_dimensions, [[nodiscard]] std::pair LatticeWalberla::get_local_domain() const { using walberla::to_vector3d; - // We only have one block per mpi rank - assert(++(m_blocks->begin()) == m_blocks->end()); - - auto const ab = m_blocks->begin()->getAABB(); - return {to_vector3d(ab.min()), to_vector3d(ab.max())}; + // Get upper and lower corner of BlockForest assigned to a mpi rank. + // Since we can allocate multiple blocks per mpi rank, + // the corners of all Blocks are compared. + int64_t const stride_y = m_grid_dimensions[2]; + int64_t const stride_x = m_grid_dimensions[1] * stride_y; + auto aa = m_blocks->begin()->getAABB(); + auto bb = m_blocks->begin()->getAABB(); + int64_t aa_index = stride_x * static_cast(aa.min()[0]) + + stride_y * static_cast(aa.min()[1]) + + static_cast(aa.min()[2]); + int64_t bb_index = stride_x * static_cast(bb.max()[0]) + + stride_y * static_cast(bb.max()[1]) + + static_cast(bb.max()[2]); + for (auto b = m_blocks->begin(); b != m_blocks->end(); ++b) { + auto cc = b->getAABB(); + for (auto const i : {0u, 1u, 2u}) { + if ((cc.max()[i] - cc.min()[i]) != 0) { + assert(m_grid_dimensions[i] % + static_cast(cc.max()[i] - cc.min()[i]) == + 0); + } + } + int64_t min_index = stride_x * static_cast(cc.min()[0]) + + stride_y * static_cast(cc.min()[1]) + + static_cast(cc.min()[2]); + int64_t max_index = stride_x * static_cast(cc.max()[0]) + + stride_y * static_cast(cc.max()[1]) + + static_cast(cc.max()[2]); + if (min_index < aa_index) { + aa = cc; + aa_index = min_index; + } + if (max_index > bb_index) { + bb = cc; + bb_index = max_index; + } + } + return {to_vector3d(aa.min()), to_vector3d(bb.max())}; } [[nodiscard]] bool diff --git a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp index 3c1b11219f..8cbd6981ea 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp +++ b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp @@ -378,6 +378,7 @@ class LBWalberlaImpl : public LBWalberlaBase { std::shared_ptr> m_host_field_allocator; #endif + // Interval within not global but mpi rank [[nodiscard]] std::optional get_interval(Utils::Vector3i const &lower_corner, Utils::Vector3i const &upper_corner) const { @@ -389,8 +390,56 @@ class LBWalberlaImpl : public LBWalberlaBase { if (not lower_bc or not upper_bc) { return std::nullopt; } - assert(&(*(lower_bc->block)) == &(*(upper_bc->block))); - return {CellInterval(lower_bc->cell, upper_bc->cell)}; + Cell const global_lower_cell = lower_bc->cell; + Cell const global_upper_cell = + Cell(static_cast(upper_bc->cell[0] + + upper_bc->block->getAABB().min()[0] - + lower_bc->block->getAABB().min()[0]), + static_cast(upper_bc->cell[1] + + upper_bc->block->getAABB().min()[1] - + lower_bc->block->getAABB().min()[1]), + static_cast(upper_bc->cell[2] + + upper_bc->block->getAABB().min()[2] - + lower_bc->block->getAABB().min()[2])); + return {CellInterval(global_lower_cell, global_upper_cell)}; + } + + // Interval within local block + [[nodiscard]] std::optional get_block_interval( + Utils::Vector3i const &lower_corner, Utils::Vector3i const &upper_corner, + Utils::Vector3i const &local_offset, IBlock const &block) const { + auto block_lower_corner = to_vector3i(block.getAABB().min()); + if (upper_corner[0] < block_lower_corner[0] or + upper_corner[1] < block_lower_corner[1] or + upper_corner[2] < block_lower_corner[2]) { + return std::nullopt; + } + for (uint_t f = 0u; f < 3u; ++f) { + if (block_lower_corner[f] < lower_corner[f]) { + block_lower_corner[f] = lower_corner[f]; + } + } + auto block_upper_corner = to_vector3i(block.getAABB().max()); + if (lower_corner[0] > block_upper_corner[0] or + lower_corner[1] > block_upper_corner[1] or + lower_corner[2] > block_upper_corner[2]) { + return std::nullopt; + } + for (uint_t f = 0u; f < 3u; ++f) { + if (block_upper_corner[f] > upper_corner[f]) { + block_upper_corner[f] = upper_corner[f]; + } + } + block_upper_corner -= Utils::Vector3i::broadcast(1); + Cell const block_lower_cell = + Cell(static_cast(block_lower_corner[0] - local_offset[0]), + static_cast(block_lower_corner[1] - local_offset[1]), + static_cast(block_lower_corner[2] - local_offset[2])); + Cell const block_upper_cell = + Cell(static_cast(block_upper_corner[0] - local_offset[0]), + static_cast(block_upper_corner[1] - local_offset[1]), + static_cast(block_upper_corner[2] - local_offset[2])); + return {CellInterval(block_lower_cell, block_upper_cell)}; } /** @@ -614,6 +663,7 @@ class LBWalberlaImpl : public LBWalberlaBase { if (m_has_boundaries) { integrate_boundaries(blocks); } + // LB stream integrate_stream(blocks); // Mark pending ghost layer updates @@ -868,40 +918,62 @@ class LBWalberlaImpl : public LBWalberlaBase { get_slice_velocity(Utils::Vector3i const &lower_corner, Utils::Vector3i const &upper_corner) const override { std::vector out; + uint_t values_size = 0; if (auto const ci = get_interval(lower_corner, upper_corner)) { + out = std::vector(static_cast(3u * ci->numCells())); + int64_t const stride_y = (ci->max().z() - ci->min().z() + 1u); + int64_t const stride_x = (ci->max().y() - ci->min().y() + 1u) * stride_y; auto const &lattice = get_lattice(); - auto const &block = *(lattice.get_blocks()->begin()); - auto const field = - block.template getData(m_velocity_field_id); - auto const values = lbm::accessor::Vector::get(field, *ci); - assert(++(lattice.get_blocks()->begin()) == lattice.get_blocks()->end()); - assert(values.size() == 3u * ci->numCells()); - if constexpr (std::is_same_v) { - out = std::move(values); - } else { - out = std::vector(values.begin(), values.end()); - } - auto const local_offset = std::get<0>(lattice.get_local_grid_range()); - auto const lower_cell = ci->min(); - auto const upper_cell = ci->max(); - auto it = out.begin(); - for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) { - for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) { - for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) { - auto const node = local_offset + Utils::Vector3i{{x, y, z}}; - if (m_boundary->node_is_boundary(node)) { - auto const &vec = m_boundary->get_node_value_at_boundary(node); - for (uint_t f = 0u; f < 3u; ++f) { - (*it) = double_c(vec[f]); - std::advance(it, 1l); + for (auto b = lattice.get_blocks()->begin(); + b != lattice.get_blocks()->end(); ++b) { + auto const &block = *b; + auto const local_offset = to_vector3i(block.getAABB().min()); + if (auto const bci = get_block_interval(lower_corner, upper_corner, + local_offset, block)) { + auto const field = + block.template getData(m_velocity_field_id); + auto const values = lbm::accessor::Vector::get(field, *bci); + assert(values.size() == 3u * bci->numCells()); + values_size += 3u * bci->numCells(); + int64_t const stride_ly = (bci->max().z() - bci->min().z() + 1u); + int64_t const stride_lx = + (bci->max().y() - bci->min().y() + 1u) * stride_ly; + auto const lower_cell = bci->min(); + auto const upper_cell = bci->max(); + // The field data "values" knows about block-local indices + // In the loop, x,y,z are in block-local coordinates + // It is converted to BlockForest (lattice) coordinates assigned to a + // mpi rank The same applies to other get_slice methods + for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) { + for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) { + for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) { + auto const node = local_offset + Utils::Vector3i{{x, y, z}}; + auto const index = stride_x * (node[0] - lower_corner[0]) + + stride_y * (node[1] - lower_corner[1]) + + node[2] - lower_corner[2]; + auto const local_index = stride_lx * (x - lower_cell.x()) + + stride_ly * (y - lower_cell.y()) + z - + lower_cell.z(); + if (m_boundary->node_is_boundary(node)) { + auto const &vec = + m_boundary->get_node_value_at_boundary(node); + for (uint_t f = 0u; f < 3u; ++f) { + out[static_cast(3u * index + f)] = + double_c(vec[f]); + } + } else { + for (uint_t f = 0u; f < 3u; ++f) { + out[static_cast(3u * index + f)] = + double_c(values[static_cast( + 3u * local_index + f)]); + } + } } - } else { - std::advance(it, 3l); } } } } + assert(values_size == 3u * ci->numCells()); } return out; } @@ -912,17 +984,55 @@ class LBWalberlaImpl : public LBWalberlaBase { m_pending_ghost_comm.set(GhostComm::PDF); m_pending_ghost_comm.set(GhostComm::VEL); if (auto const ci = get_interval(lower_corner, upper_corner)) { - auto const &lattice = get_lattice(); - auto &block = *(lattice.get_blocks()->begin()); - auto pdf_field = block.template getData(m_pdf_field_id); - auto force_field = - block.template getData(m_last_applied_force_field_id); - auto vel_field = block.template getData(m_velocity_field_id); - assert(++(lattice.get_blocks()->begin()) == lattice.get_blocks()->end()); assert(velocity.size() == 3u * ci->numCells()); - std::vector const values(velocity.begin(), velocity.end()); - lbm::accessor::Velocity::set(pdf_field, vel_field, force_field, values, - *ci); + int64_t const stride_y = (ci->max().z() - ci->min().z() + 1u); + int64_t const stride_x = (ci->max().y() - ci->min().y() + 1u) * stride_y; + auto const &lattice = get_lattice(); + for (auto b = lattice.get_blocks()->begin(); + b != lattice.get_blocks()->end(); ++b) { + auto &block = *b; + auto const local_offset = to_vector3i(block.getAABB().min()); + if (auto const bci = get_block_interval(lower_corner, upper_corner, + local_offset, block)) { + auto pdf_field = block.template getData(m_pdf_field_id); + auto force_field = block.template getData( + m_last_applied_force_field_id); + auto vel_field = + block.template getData(m_velocity_field_id); + std::vector values = std::vector( + static_cast(3u * bci->numCells())); + int64_t const stride_ly = (bci->max().z() - bci->min().z() + 1u); + int64_t const stride_lx = + (bci->max().y() - bci->min().y() + 1u) * stride_ly; + auto const lower_cell = bci->min(); + auto const upper_cell = bci->max(); + // In the loop, x,y,z are in block-local coordinates + // The field data given in the argument knows about BlockForest + // (lattice) indices from lower_corner to upper_corner It is converted + // to block-local coordinates The same applies to other set_slice + // methods + for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) { + for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) { + for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) { + auto const node = local_offset + Utils::Vector3i{{x, y, z}}; + auto const index = stride_x * (node[0] - lower_corner[0]) + + stride_y * (node[1] - lower_corner[1]) + + node[2] - lower_corner[2]; + auto const local_index = stride_lx * (x - lower_cell.x()) + + stride_ly * (y - lower_cell.y()) + z - + lower_cell.z(); + for (uint_t f = 0u; f < 3u; ++f) { + values[static_cast(3u * local_index + f)] = + numeric_cast( + velocity[static_cast(3u * index + f)]); + } + } + } + } + lbm::accessor::Velocity::set(pdf_field, vel_field, force_field, + values, *bci); + } + } } } @@ -1073,8 +1183,11 @@ class LBWalberlaImpl : public LBWalberlaBase { return false; auto const force_at_node = [this, &force](std::array const node, double weight) { - auto const bc = - get_block_and_cell(get_lattice(), Utils::Vector3i(node), true); + auto bc = get_block_and_cell(get_lattice(), Utils::Vector3i(node), false); + if (!bc) { + bc = get_block_and_cell(get_lattice(), Utils::Vector3i(node), true); + } + if (bc) { auto const weighted_force = to_vector3(weight * force); auto force_field = @@ -1137,18 +1250,47 @@ class LBWalberlaImpl : public LBWalberlaBase { Utils::Vector3i const &upper_corner) const override { std::vector out; if (auto const ci = get_interval(lower_corner, upper_corner)) { + out = std::vector(static_cast(3u * ci->numCells())); + int64_t const stride_y = (ci->max().z() - ci->min().z() + 1u); + int64_t const stride_x = (ci->max().y() - ci->min().y() + 1u) * stride_y; auto const &lattice = get_lattice(); - auto const &block = *(lattice.get_blocks()->begin()); - auto const field = - block.template getData(m_last_applied_force_field_id); - auto const values = lbm::accessor::Vector::get(field, *ci); - assert(++(lattice.get_blocks()->begin()) == lattice.get_blocks()->end()); - assert(values.size() == 3u * ci->numCells()); - if constexpr (std::is_same_v) { - out = std::move(values); - } else { - out = std::vector(values.begin(), values.end()); + for (auto b = lattice.get_blocks()->begin(); + b != lattice.get_blocks()->end(); ++b) { + auto const &block = *b; + auto const local_offset = to_vector3i(block.getAABB().min()); + if (auto const bci = get_block_interval(lower_corner, upper_corner, + local_offset, block)) { + auto const field = block.template getData( + m_last_applied_force_field_id); + auto const values = lbm::accessor::Vector::get(field, *bci); + assert(values.size() == 3u * bci->numCells()); + int64_t const stride_ly = (bci->max().z() - bci->min().z() + 1u); + int64_t const stride_lx = + (bci->max().y() - bci->min().y() + 1u) * stride_ly; + auto const lower_cell = bci->min(); + auto const upper_cell = bci->max(); + // The field data "values" knows about block-local indices + // In the loop, x,y,z are in block-local coordinates + // It is converted to BlockForest (lattice) coordinates assigned to a + // mpi rank The same applies to other get_slice methods + for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) { + for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) { + for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) { + auto const node = local_offset + Utils::Vector3i{{x, y, z}}; + auto const index = stride_x * (node[0] - lower_corner[0]) + + stride_y * (node[1] - lower_corner[1]) + + node[2] - lower_corner[2]; + auto const local_index = stride_lx * (x - lower_cell.x()) + + stride_ly * (y - lower_cell.y()) + z - + lower_cell.z(); + for (uint_t f = 0u; f < 3u; ++f) { + out[static_cast(3u * index + f)] = + values[static_cast(3u * local_index + f)]; + } + } + } + } + } } } return out; @@ -1160,16 +1302,55 @@ class LBWalberlaImpl : public LBWalberlaBase { m_pending_ghost_comm.set(GhostComm::VEL); m_pending_ghost_comm.set(GhostComm::LAF); if (auto const ci = get_interval(lower_corner, upper_corner)) { - auto const &lattice = get_lattice(); - auto &block = *(lattice.get_blocks()->begin()); - auto pdf_field = block.template getData(m_pdf_field_id); - auto force_field = - block.template getData(m_last_applied_force_field_id); - auto vel_field = block.template getData(m_velocity_field_id); - assert(++(lattice.get_blocks()->begin()) == lattice.get_blocks()->end()); assert(force.size() == 3u * ci->numCells()); - std::vector const values(force.begin(), force.end()); - lbm::accessor::Force::set(pdf_field, vel_field, force_field, values, *ci); + int64_t const stride_y = (ci->max().z() - ci->min().z() + 1u); + int64_t const stride_x = (ci->max().y() - ci->min().y() + 1u) * stride_y; + auto const &lattice = get_lattice(); + for (auto b = lattice.get_blocks()->begin(); + b != lattice.get_blocks()->end(); ++b) { + auto &block = *b; + auto const local_offset = to_vector3i(block.getAABB().min()); + if (auto const bci = get_block_interval(lower_corner, upper_corner, + local_offset, block)) { + auto pdf_field = block.template getData(m_pdf_field_id); + auto force_field = block.template getData( + m_last_applied_force_field_id); + auto vel_field = + block.template getData(m_velocity_field_id); + std::vector values = std::vector( + static_cast(3u * bci->numCells())); + int64_t const stride_ly = (bci->max().z() - bci->min().z() + 1u); + int64_t const stride_lx = + (bci->max().y() - bci->min().y() + 1u) * stride_ly; + auto const lower_cell = bci->min(); + auto const upper_cell = bci->max(); + // In the loop, x,y,z are in block-local coordinates + // The field data given in the argument knows about BlockForest + // (lattice) indices from lower_corner to upper_corner It is converted + // to block-local coordinates The same applies to other set_slice + // methods + for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) { + for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) { + for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) { + auto const node = local_offset + Utils::Vector3i{{x, y, z}}; + auto const index = stride_x * (node[0] - lower_corner[0]) + + stride_y * (node[1] - lower_corner[1]) + + node[2] - lower_corner[2]; + auto const local_index = stride_lx * (x - lower_cell.x()) + + stride_ly * (y - lower_cell.y()) + z - + lower_cell.z(); + for (uint_t f = 0u; f < 3u; ++f) { + values[static_cast(3u * local_index + f)] = + numeric_cast( + force[static_cast(3u * index + f)]); + } + } + } + } + lbm::accessor::Force::set(pdf_field, vel_field, force_field, values, + *bci); + } + } } } @@ -1220,17 +1401,49 @@ class LBWalberlaImpl : public LBWalberlaBase { Utils::Vector3i const &upper_corner) const override { std::vector out; if (auto const ci = get_interval(lower_corner, upper_corner)) { + out = std::vector( + static_cast(stencil_size() * ci->numCells())); + int64_t const stride_y = (ci->max().z() - ci->min().z() + 1u); + int64_t const stride_x = (ci->max().y() - ci->min().y() + 1u) * stride_y; auto const &lattice = get_lattice(); - auto const &block = *(lattice.get_blocks()->begin()); - auto const pdf_field = block.template getData(m_pdf_field_id); - auto const values = lbm::accessor::Population::get(pdf_field, *ci); - assert(++(lattice.get_blocks()->begin()) == lattice.get_blocks()->end()); - assert(values.size() == stencil_size() * ci->numCells()); - if constexpr (std::is_same_v) { - out = std::move(values); - } else { - out = std::vector(values.begin(), values.end()); + for (auto b = lattice.get_blocks()->begin(); + b != lattice.get_blocks()->end(); ++b) { + auto const &block = *b; + auto const local_offset = to_vector3i(block.getAABB().min()); + if (auto const bci = get_block_interval(lower_corner, upper_corner, + local_offset, block)) { + auto const pdf_field = + block.template getData(m_pdf_field_id); + auto const values = lbm::accessor::Population::get(pdf_field, *bci); + assert(values.size() == stencil_size() * bci->numCells()); + int64_t const stride_ly = (bci->max().z() - bci->min().z() + 1u); + int64_t const stride_lx = + (bci->max().y() - bci->min().y() + 1u) * stride_ly; + auto const lower_cell = bci->min(); + auto const upper_cell = bci->max(); + // The field data "values" knows about block-local indices + // In the loop, x,y,z are in block-local coordinates + // It is converted to BlockForest (lattice) coordinates assigned to a + // mpi rank The same applies to other get_slice methods + for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) { + for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) { + for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) { + auto const node = local_offset + Utils::Vector3i{{x, y, z}}; + auto const index = stride_x * (node[0] - lower_corner[0]) + + stride_y * (node[1] - lower_corner[1]) + + node[2] - lower_corner[2]; + auto const local_index = stride_lx * (x - lower_cell.x()) + + stride_ly * (y - lower_cell.y()) + z - + lower_cell.z(); + for (uint_t f = 0u; f < stencil_size(); ++f) { + out[static_cast(stencil_size() * index + f)] = + values[static_cast( + stencil_size() * local_index + f)]; + } + } + } + } + } } } return out; @@ -1240,17 +1453,57 @@ class LBWalberlaImpl : public LBWalberlaBase { Utils::Vector3i const &upper_corner, std::vector const &population) override { if (auto const ci = get_interval(lower_corner, upper_corner)) { - auto const &lattice = get_lattice(); - auto &block = *(lattice.get_blocks()->begin()); - auto pdf_field = block.template getData(m_pdf_field_id); - auto force_field = - block.template getData(m_last_applied_force_field_id); - auto vel_field = block.template getData(m_velocity_field_id); assert(population.size() == stencil_size() * ci->numCells()); - assert(++(lattice.get_blocks()->begin()) == lattice.get_blocks()->end()); - std::vector const values(population.begin(), population.end()); - lbm::accessor::Population::set(pdf_field, vel_field, force_field, values, - *ci); + int64_t const stride_y = (ci->max().z() - ci->min().z() + 1u); + int64_t const stride_x = (ci->max().y() - ci->min().y() + 1u) * stride_y; + auto const &lattice = get_lattice(); + for (auto b = lattice.get_blocks()->begin(); + b != lattice.get_blocks()->end(); ++b) { + auto &block = *b; + auto const local_offset = to_vector3i(block.getAABB().min()); + if (auto const bci = get_block_interval(lower_corner, upper_corner, + local_offset, block)) { + auto pdf_field = block.template getData(m_pdf_field_id); + auto force_field = block.template getData( + m_last_applied_force_field_id); + auto vel_field = + block.template getData(m_velocity_field_id); + std::vector values = std::vector( + static_cast(stencil_size() * bci->numCells())); + int64_t const stride_ly = (bci->max().z() - bci->min().z() + 1u); + int64_t const stride_lx = + (bci->max().y() - bci->min().y() + 1u) * stride_ly; + auto const lower_cell = bci->min(); + auto const upper_cell = bci->max(); + // In the loop, x,y,z are in block-local coordinates + // The field data given in the argument knows about BlockForest + // (lattice) indices from lower_corner to upper_corner It is converted + // to block-local coordinates The same applies to other set_slice + // methods + for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) { + for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) { + for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) { + auto const node = local_offset + Utils::Vector3i{{x, y, z}}; + auto const index = stride_x * (node[0] - lower_corner[0]) + + stride_y * (node[1] - lower_corner[1]) + + node[2] - lower_corner[2]; + auto const local_index = stride_lx * (x - lower_cell.x()) + + stride_ly * (y - lower_cell.y()) + z - + lower_cell.z(); + for (uint_t f = 0u; f < stencil_size(); ++f) { + values[static_cast( + stencil_size() * local_index + f)] = + numeric_cast( + population[static_cast( + stencil_size() * index + f)]); + } + } + } + } + lbm::accessor::Population::set(pdf_field, vel_field, force_field, + values, *bci); + } + } } } @@ -1286,17 +1539,44 @@ class LBWalberlaImpl : public LBWalberlaBase { Utils::Vector3i const &upper_corner) const override { std::vector out; if (auto const ci = get_interval(lower_corner, upper_corner)) { + out = std::vector(ci->numCells()); + int64_t const stride_y = (ci->max().z() - ci->min().z() + 1u); + int64_t const stride_x = (ci->max().y() - ci->min().y() + 1u) * stride_y; auto const &lattice = get_lattice(); - auto const &block = *(lattice.get_blocks()->begin()); - auto const pdf_field = block.template getData(m_pdf_field_id); - auto const values = lbm::accessor::Density::get(pdf_field, *ci); - assert(++(lattice.get_blocks()->begin()) == lattice.get_blocks()->end()); - assert(values.size() == ci->numCells()); - if constexpr (std::is_same_v) { - out = std::move(values); - } else { - out = std::vector(values.begin(), values.end()); + for (auto b = lattice.get_blocks()->begin(); + b != lattice.get_blocks()->end(); ++b) { + auto const &block = *b; + auto const local_offset = to_vector3i(block.getAABB().min()); + if (auto const bci = get_block_interval(lower_corner, upper_corner, + local_offset, block)) { + auto const pdf_field = + block.template getData(m_pdf_field_id); + auto const values = lbm::accessor::Density::get(pdf_field, *bci); + assert(values.size() == bci->numCells()); + int64_t const stride_ly = (bci->max().z() - bci->min().z() + 1u); + int64_t const stride_lx = + (bci->max().y() - bci->min().y() + 1u) * stride_ly; + auto const lower_cell = bci->min(); + auto const upper_cell = bci->max(); + // The field data "values" knows about block-local indices + // In the loop, x,y,z are in block-local coordinates + // It is converted to BlockForest (lattice) coordinates assigned to a + // mpi rank The same applies to other get_slice methods + for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) { + for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) { + for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) { + auto const node = local_offset + Utils::Vector3i{{x, y, z}}; + auto const index = stride_x * (node[0] - lower_corner[0]) + + stride_y * (node[1] - lower_corner[1]) + + node[2] - lower_corner[2]; + auto const local_index = stride_lx * (x - lower_cell.x()) + + stride_ly * (y - lower_cell.y()) + z - + lower_cell.z(); + out[index] = values[local_index]; + } + } + } + } } } return out; @@ -1307,13 +1587,46 @@ class LBWalberlaImpl : public LBWalberlaBase { std::vector const &density) override { m_pending_ghost_comm.set(GhostComm::PDF); if (auto const ci = get_interval(lower_corner, upper_corner)) { - auto const &lattice = get_lattice(); - auto &block = *(lattice.get_blocks()->begin()); - auto pdf_field = block.template getData(m_pdf_field_id); assert(density.size() == ci->numCells()); - assert(++(lattice.get_blocks()->begin()) == lattice.get_blocks()->end()); - std::vector const values(density.begin(), density.end()); - lbm::accessor::Density::set(pdf_field, values, *ci); + int64_t const stride_y = (ci->max().z() - ci->min().z() + 1u); + int64_t const stride_x = (ci->max().y() - ci->min().y() + 1u) * stride_y; + auto const &lattice = get_lattice(); + for (auto b = lattice.get_blocks()->begin(); + b != lattice.get_blocks()->end(); ++b) { + auto &block = *b; + auto const local_offset = to_vector3i(block.getAABB().min()); + if (auto const bci = get_block_interval(lower_corner, upper_corner, + local_offset, block)) { + auto pdf_field = block.template getData(m_pdf_field_id); + std::vector values = + std::vector(bci->numCells()); + int64_t const stride_ly = (bci->max().z() - bci->min().z() + 1u); + int64_t const stride_lx = + (bci->max().y() - bci->min().y() + 1u) * stride_ly; + auto const lower_cell = bci->min(); + auto const upper_cell = bci->max(); + // In the loop, x,y,z are in block-local coordinates + // The field data given in the argument knows about BlockForest + // (lattice) indices from lower_corner to upper_corner It is converted + // to block-local coordinates The same applies to other set_slice + // methods + for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) { + for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) { + for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) { + auto const node = local_offset + Utils::Vector3i{{x, y, z}}; + auto const index = stride_x * (node[0] - lower_corner[0]) + + stride_y * (node[1] - lower_corner[1]) + + node[2] - lower_corner[2]; + auto const local_index = stride_lx * (x - lower_cell.x()) + + stride_ly * (y - lower_cell.y()) + z - + lower_cell.z(); + values[local_index] = numeric_cast(density[index]); + } + } + } + lbm::accessor::Density::set(pdf_field, values, *bci); + } + } } } @@ -1332,7 +1645,10 @@ class LBWalberlaImpl : public LBWalberlaBase { Utils::Vector3d const &velocity) override { on_boundary_add(); m_pending_ghost_comm.set(GhostComm::UBB); - auto bc = get_block_and_cell(get_lattice(), node, true); + auto bc = get_block_and_cell(get_lattice(), node, false); + if (!bc) { + bc = get_block_and_cell(get_lattice(), node, true); + } if (bc) { m_boundary->set_node_value_at_boundary( node, to_vector3(velocity), *bc); @@ -1345,26 +1661,40 @@ class LBWalberlaImpl : public LBWalberlaBase { Utils::Vector3i const &upper_corner) const override { std::vector> out; if (auto const ci = get_interval(lower_corner, upper_corner)) { + out = std::vector>(ci->numCells()); + int64_t const stride_y = (ci->max().z() - ci->min().z() + 1u); + int64_t const stride_x = (ci->max().y() - ci->min().y() + 1u) * stride_y; auto const &lattice = get_lattice(); - auto const local_offset = std::get<0>(lattice.get_local_grid_range()); - auto const lower_cell = ci->min(); - auto const upper_cell = ci->max(); - auto const n_values = ci->numCells(); - out.reserve(n_values); - for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) { - for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) { - for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) { - auto const node = local_offset + Utils::Vector3i{{x, y, z}}; - if (m_boundary->node_is_boundary(node)) { - out.emplace_back( - to_vector3d(m_boundary->get_node_value_at_boundary(node))); - } else { - out.emplace_back(std::nullopt); + for (auto b = lattice.get_blocks()->begin(); + b != lattice.get_blocks()->end(); ++b) { + auto const &block = *b; + auto const local_offset = to_vector3i(block.getAABB().min()); + if (auto const bci = get_block_interval(lower_corner, upper_corner, + local_offset, block)) { + auto const lower_cell = bci->min(); + auto const upper_cell = bci->max(); + // In the loop, x,y,z are in block-local coordinates + // It is converted to BlockForest (lattice) coordinates assigned to a + // mpi rank The same applies to other get_slice methods + for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) { + for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) { + for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) { + auto const node = local_offset + Utils::Vector3i{{x, y, z}}; + auto const index = stride_x * (node[0] - lower_corner[0]) + + stride_y * (node[1] - lower_corner[1]) + + node[2] - lower_corner[2]; + if (m_boundary->node_is_boundary(node)) { + out[index] = + to_vector3d(m_boundary->get_node_value_at_boundary(node)); + } else { + out[index] = std::nullopt; + } + } } } } } - assert(out.size() == n_values); + assert(out.size() == ci->numCells()); } return out; } @@ -1375,25 +1705,41 @@ class LBWalberlaImpl : public LBWalberlaBase { on_boundary_add(); m_pending_ghost_comm.set(GhostComm::UBB); if (auto const ci = get_interval(lower_corner, upper_corner)) { - auto const &lattice = get_lattice(); - auto const local_offset = std::get<0>(lattice.get_local_grid_range()); - auto const lower_cell = ci->min(); - auto const upper_cell = ci->max(); - auto it = velocity.begin(); assert(velocity.size() == ci->numCells()); - for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) { - for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) { - for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) { - auto const node = local_offset + Utils::Vector3i{{x, y, z}}; - auto const bc = get_block_and_cell(lattice, node, false); - auto const &opt = *it; - if (opt) { - m_boundary->set_node_value_at_boundary( - node, to_vector3(*opt), *bc); - } else { - m_boundary->remove_node_from_boundary(node, *bc); + int64_t const stride_y = (ci->max().z() - ci->min().z() + 1u); + int64_t const stride_x = (ci->max().y() - ci->min().y() + 1u) * stride_y; + auto const &lattice = get_lattice(); + for (auto b = lattice.get_blocks()->begin(); + b != lattice.get_blocks()->end(); ++b) { + auto &block = *b; + auto const local_offset = to_vector3i(block.getAABB().min()); + if (auto const bci = get_block_interval(lower_corner, upper_corner, + local_offset, block)) { + auto const lower_cell = bci->min(); + auto const upper_cell = bci->max(); + // In the loop, x,y,z are in block-local coordinates + // The field data given in the argument knows about BlockForest + // (lattice) indices from lower_corner to upper_corner It is converted + // to block-local coordinates The same applies to other set_slice + // methods + for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) { + for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) { + for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) { + auto const node = local_offset + Utils::Vector3i{{x, y, z}}; + auto const index = stride_x * (node[0] - lower_corner[0]) + + stride_y * (node[1] - lower_corner[1]) + + node[2] - lower_corner[2]; + auto const bc = get_block_and_cell(lattice, node, false); + assert(bc->block->getAABB() == block.getAABB()); + auto const &opt = velocity[index]; + if (opt) { + m_boundary->set_node_value_at_boundary( + node, to_vector3(*opt), *bc); + } else { + m_boundary->remove_node_from_boundary(node, *bc); + } + } } - ++it; } } } @@ -1410,7 +1756,10 @@ class LBWalberlaImpl : public LBWalberlaBase { } bool remove_node_from_boundary(Utils::Vector3i const &node) override { - auto bc = get_block_and_cell(get_lattice(), node, true); + auto bc = get_block_and_cell(get_lattice(), node, false); + if (!bc) { + bc = get_block_and_cell(get_lattice(), node, true); + } if (bc) { m_boundary->remove_node_from_boundary(node, *bc); } @@ -1433,21 +1782,35 @@ class LBWalberlaImpl : public LBWalberlaBase { Utils::Vector3i const &upper_corner) const override { std::vector out; if (auto const ci = get_interval(lower_corner, upper_corner)) { + out = std::vector(ci->numCells()); + int64_t const stride_y = (ci->max().z() - ci->min().z() + 1u); + int64_t const stride_x = (ci->max().y() - ci->min().y() + 1u) * stride_y; auto const &lattice = get_lattice(); - auto const local_offset = std::get<0>(lattice.get_local_grid_range()); - auto const lower_cell = ci->min(); - auto const upper_cell = ci->max(); - auto const n_values = ci->numCells(); - out.reserve(n_values); - for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) { - for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) { - for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) { - auto const node = local_offset + Utils::Vector3i{x, y, z}; - out.emplace_back(m_boundary->node_is_boundary(node)); + for (auto b = lattice.get_blocks()->begin(); + b != lattice.get_blocks()->end(); ++b) { + auto const &block = *b; + auto const local_offset = to_vector3i(block.getAABB().min()); + if (auto const bci = get_block_interval(lower_corner, upper_corner, + local_offset, block)) { + auto const lower_cell = bci->min(); + auto const upper_cell = bci->max(); + // In the loop, x,y,z are in block-local coordinates + // It is converted to BlockForest (lattice) coordinates assigned to a + // mpi rank The same applies to other get_slice methods + for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) { + for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) { + for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) { + auto const node = local_offset + Utils::Vector3i{{x, y, z}}; + auto const index = stride_x * (node[0] - lower_corner[0]) + + stride_y * (node[1] - lower_corner[1]) + + node[2] - lower_corner[2]; + out[index] = m_boundary->node_is_boundary(node); + } + } } } } - assert(out.size() == n_values); + assert(out.size() == ci->numCells()); } return out; } @@ -1501,20 +1864,49 @@ class LBWalberlaImpl : public LBWalberlaBase { Utils::Vector3i const &upper_corner) const override { std::vector out; if (auto const ci = get_interval(lower_corner, upper_corner)) { + out = std::vector(static_cast(9u * ci->numCells())); + int64_t const stride_y = (ci->max().z() - ci->min().z() + 1u); + int64_t const stride_x = (ci->max().y() - ci->min().y() + 1u) * stride_y; auto const &lattice = get_lattice(); - auto const &block = *(lattice.get_blocks()->begin()); - auto const pdf_field = block.template getData(m_pdf_field_id); - auto values = lbm::accessor::PressureTensor::get(pdf_field, *ci); - assert(++(lattice.get_blocks()->begin()) == lattice.get_blocks()->end()); - assert(values.size() == 9u * ci->numCells()); - for (auto it = values.begin(); it != values.end(); std::advance(it, 9l)) { - pressure_tensor_correction(std::span(it, 9ul)); - } - if constexpr (std::is_same_v) { - out = std::move(values); - } else { - out = std::vector(values.begin(), values.end()); + for (auto b = lattice.get_blocks()->begin(); + b != lattice.get_blocks()->end(); ++b) { + auto const &block = *b; + auto const local_offset = to_vector3i(block.getAABB().min()); + if (auto const bci = get_block_interval(lower_corner, upper_corner, + local_offset, block)) { + auto const pdf_field = + block.template getData(m_pdf_field_id); + auto values = lbm::accessor::PressureTensor::get(pdf_field, *bci); + assert(values.size() == 9u * bci->numCells()); + int64_t const stride_ly = (bci->max().z() - bci->min().z() + 1u); + int64_t const stride_lx = + (bci->max().y() - bci->min().y() + 1u) * stride_ly; + auto const lower_cell = bci->min(); + auto const upper_cell = bci->max(); + // The field data "values" knows about block-local indices + // In the loop, x,y,z are in block-local coordinates + // It is converted to BlockForest (lattice) coordinates assigned to a + // mpi rank The same applies to other get_slice methods + for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) { + for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) { + for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) { + auto const node = local_offset + Utils::Vector3i{{x, y, z}}; + auto const index = stride_x * (node[0] - lower_corner[0]) + + stride_y * (node[1] - lower_corner[1]) + + node[2] - lower_corner[2]; + auto const local_index = stride_lx * (x - lower_cell.x()) + + stride_ly * (y - lower_cell.y()) + z - + lower_cell.z(); + pressure_tensor_correction(std::span( + &values[static_cast(9u * local_index)], 9ul)); + for (uint_t f = 0u; f < 9u; ++f) { + out[static_cast(9u * index + f)] = + values[static_cast(9u * local_index + f)]; + } + } + } + } + } } } return out; diff --git a/src/walberla_bridge/src/utils/boundary.hpp b/src/walberla_bridge/src/utils/boundary.hpp index 719c028aa4..dbd2a9ab25 100644 --- a/src/walberla_bridge/src/utils/boundary.hpp +++ b/src/walberla_bridge/src/utils/boundary.hpp @@ -85,15 +85,15 @@ void set_boundary_from_grid(BoundaryModel &boundary, auto const &conv = es2walberla; auto const grid_size = lattice.get_grid_dimensions(); - auto const offset = lattice.get_local_grid_range().first; auto const gl = static_cast(lattice.get_ghost_layers()); assert(raster_flat.size() == static_cast(Utils::product(grid_size))); auto const n_y = static_cast(grid_size[1]); auto const n_z = static_cast(grid_size[2]); - for (auto const &block : *lattice.get_blocks()) { + for (auto &block : *lattice.get_blocks()) { auto const [size_i, size_j, size_k] = boundary.block_dims(block); + auto const offset = to_vector3i(block.getAABB().min()); // Get field data which knows about the indices // In the loop, i,j,k are in block-local coordinates for (int i = -gl; i < size_i + gl; ++i) { @@ -106,8 +106,9 @@ void set_boundary_from_grid(BoundaryModel &boundary, static_cast(idx[2]); if (raster_flat[index]) { auto const &value = data_flat[index]; - auto const bc = get_block_and_cell(lattice, node, true); - assert(bc.has_value()); + std::optional bc; + bc->block = █ + bc->cell = Cell(i, j, k); boundary.set_node_value_at_boundary(node, conv(value), *bc); } } diff --git a/src/walberla_bridge/src/utils/types_conversion.hpp b/src/walberla_bridge/src/utils/types_conversion.hpp index 6f196cb57a..72968a25de 100644 --- a/src/walberla_bridge/src/utils/types_conversion.hpp +++ b/src/walberla_bridge/src/utils/types_conversion.hpp @@ -68,6 +68,10 @@ inline Utils::VectorXd<9> to_vector9d(Matrix3 const &m) { double_c(m[3]), double_c(m[4]), double_c(m[5]), double_c(m[6]), double_c(m[7]), double_c(m[8])}; } +inline Utils::Vector3i to_vector3i(Vector3 const &v) { + return Utils::Vector3i{ + {static_cast(v[0]), static_cast(v[1]), static_cast(v[2])}}; +} template void interpolate_bspline_at_pos(Utils::Vector3d const &pos, Function const &f) { diff --git a/src/walberla_bridge/tests/CMakeLists.txt b/src/walberla_bridge/tests/CMakeLists.txt index 7b3a85ab1b..c5d7805960 100644 --- a/src/walberla_bridge/tests/CMakeLists.txt +++ b/src/walberla_bridge/tests/CMakeLists.txt @@ -30,9 +30,12 @@ function(ESPRESSO_ADD_TEST) espresso::walberla_codegen_cuda) endif() if(${TEST_SRC} MATCHES ".*\.cu$") - target_link_libraries(${TEST_NAME} PRIVATE espresso::walberla::cuda_flags) + target_link_libraries( + ${TEST_NAME} PRIVATE espresso::walberla::cuda_flags + espresso::walberla_cuda espresso::config) else() - target_link_libraries(${TEST_NAME} PRIVATE espresso::walberla::cpp_flags) + target_link_libraries(${TEST_NAME} PRIVATE espresso::walberla::cpp_flags + espresso::config) endif() set_target_properties(${TEST_NAME} PROPERTIES CXX_CLANG_TIDY "") target_include_directories(${TEST_NAME} PRIVATE ${WALBERLA_INCLUDE_DIRS} @@ -50,6 +53,7 @@ espresso_add_test(SRC LBWalberlaImpl_unit_tests.cpp DEPENDS Boost::mpi NUM_PROC espresso_add_test(SRC LBWalberlaImpl_bspline_tests.cpp DEPENDS Boost::mpi NUM_PROC 2) espresso_add_test(SRC LBWalberlaImpl_flow_tests.cpp DEPENDS Boost::mpi) +espresso_configure_walberla_target(espresso_walberla_codegen) espresso_add_test(SRC LBWalberlaImpl_lees_edwards_tests.cpp DEPENDS Boost::mpi) espresso_add_test(SRC EKinWalberlaImpl_unit_tests.cpp DEPENDS Boost::mpi NUM_PROC 2) diff --git a/src/walberla_bridge/tests/EKinWalberlaImpl_unit_tests.cpp b/src/walberla_bridge/tests/EKinWalberlaImpl_unit_tests.cpp index 210b5edb57..3e086d7c63 100644 --- a/src/walberla_bridge/tests/EKinWalberlaImpl_unit_tests.cpp +++ b/src/walberla_bridge/tests/EKinWalberlaImpl_unit_tests.cpp @@ -570,8 +570,8 @@ int main(int argc, char **argv) { params.ext_efield = Vector3d{0.01, 0.02, 0.03}; params.grid_dimensions = Vector3i{12, 12, 18}; params.box_dimensions = Vector3d{12, 12, 18}; - params.lattice = - std::make_shared(params.grid_dimensions, mpi_shape, 1u); + params.lattice = std::make_shared(params.grid_dimensions, + mpi_shape, mpi_shape, 1u); auto const res = boost::unit_test::unit_test_main(init_unit_test, argc, argv); MPI_Finalize(); diff --git a/src/walberla_bridge/tests/LBWalberlaImpl_bspline_tests.cpp b/src/walberla_bridge/tests/LBWalberlaImpl_bspline_tests.cpp index a0123cbe67..3be29c54d1 100644 --- a/src/walberla_bridge/tests/LBWalberlaImpl_bspline_tests.cpp +++ b/src/walberla_bridge/tests/LBWalberlaImpl_bspline_tests.cpp @@ -156,8 +156,8 @@ int main(int argc, char **argv) { params.density = 1.4; params.grid_dimensions = Vector3i{12, 6, 9}; params.box_dimensions = Vector3d{12, 6, 9}; - params.lattice = - std::make_shared(params.grid_dimensions, mpi_shape, 1u); + params.lattice = std::make_shared(params.grid_dimensions, + mpi_shape, mpi_shape, 1u); auto const res = boost::unit_test::unit_test_main(init_unit_test, argc, argv); MPI_Finalize(); diff --git a/src/walberla_bridge/tests/LBWalberlaImpl_field_accessors_tests.cu b/src/walberla_bridge/tests/LBWalberlaImpl_field_accessors_tests.cu index 30c98ab2e7..f02c76c188 100644 --- a/src/walberla_bridge/tests/LBWalberlaImpl_field_accessors_tests.cu +++ b/src/walberla_bridge/tests/LBWalberlaImpl_field_accessors_tests.cu @@ -106,11 +106,13 @@ boost::test_tools::predicate_result almost_equal(R const &val, R const &ref, for (auto i = 0ul; i < val.size(); ++i) { if (auto const diff = std::abs(val[i] - ref[i]); diff > atol) { res = false; - res.message() << "val{" << print_first_n(val) << "} and " << "ref{" - << print_first_n(ref) << "} mismatch: " << "val[" << i - << "]{" << val[i] << "} != " << "ref[" << i << "]{" - << ref[i] << "} " << "(difference{" << diff << "} > delta{" - << atol << "})"; + // clang-format off + res.message() << "val{" << print_first_n(val) << "} and " + << "ref{" << print_first_n(ref) << "} mismatch: " + << "val[" << i << "]{" << val[i] << "} != " + << "ref[" << i << "]{" << ref[i] << "} " + << "(difference{" << diff << "} > delta{" << atol << "})"; + // clang-format on break; } } @@ -156,7 +158,8 @@ template struct Fixture { auto const grid_dim = Utils::Vector3i::broadcast(4); auto const viscosity = FT(1.5); auto const density = FT(0.9); - lattice = std::make_shared<::LatticeWalberla>(grid_dim, mpi_shape, 1u); + lattice = + std::make_shared<::LatticeWalberla>(grid_dim, mpi_shape, mpi_shape, 1u); lbfluid = std::make_shared>( lattice, viscosity, density); } diff --git a/src/walberla_bridge/tests/LBWalberlaImpl_flow_tests.cpp b/src/walberla_bridge/tests/LBWalberlaImpl_flow_tests.cpp index 36526ee3ce..96049bff27 100644 --- a/src/walberla_bridge/tests/LBWalberlaImpl_flow_tests.cpp +++ b/src/walberla_bridge/tests/LBWalberlaImpl_flow_tests.cpp @@ -167,8 +167,8 @@ int main(int argc, char **argv) { params.density = 1.4; params.grid_dimensions = Vector3i{12, 12, 18}; params.box_dimensions = Vector3d{6, 6, 9}; - params.lattice = - std::make_shared(params.grid_dimensions, mpi_shape, 1u); + params.lattice = std::make_shared(params.grid_dimensions, + mpi_shape, mpi_shape, 1u); auto const res = boost::unit_test::unit_test_main(init_unit_test, argc, argv); MPI_Finalize(); diff --git a/src/walberla_bridge/tests/LBWalberlaImpl_lees_edwards_tests.cpp b/src/walberla_bridge/tests/LBWalberlaImpl_lees_edwards_tests.cpp index 8e66ed037e..44667b4fa0 100644 --- a/src/walberla_bridge/tests/LBWalberlaImpl_lees_edwards_tests.cpp +++ b/src/walberla_bridge/tests/LBWalberlaImpl_lees_edwards_tests.cpp @@ -71,8 +71,8 @@ BOOST_AUTO_TEST_CASE(test_transient_shear) { using LBImplementation = walberla::LBWalberlaImpl; double density = 1; double viscosity = 1. / 7.; - auto lattice = - std::make_shared(Vector3i{8, 64, 8}, mpi_shape, 1); + auto lattice = std::make_shared(Vector3i{8, 64, 8}, + mpi_shape, mpi_shape, 1); auto lb = LBImplementation(lattice, viscosity, density); auto le_pack = std::make_unique( 0u, 1u, []() { return 0.0; }, [=]() { return v0; }); @@ -96,8 +96,8 @@ static auto setup_lb_with_offset(double offset) { using LBImplementation = walberla::LBWalberlaImpl; auto density = 1.; auto viscosity = 1. / 7.; - auto lattice = - std::make_shared(Vector3i{10, 10, 10}, mpi_shape, 1); + auto lattice = std::make_shared(Vector3i{10, 10, 10}, + mpi_shape, mpi_shape, 1); auto lb = std::make_shared(lattice, viscosity, density); auto le_pack = std::make_unique( 0u, 1u, [=]() { return offset; }, []() { return 0.0; }); diff --git a/src/walberla_bridge/tests/LBWalberlaImpl_statistical_tests.cpp b/src/walberla_bridge/tests/LBWalberlaImpl_statistical_tests.cpp index 9732bc8a71..30e4b4b695 100644 --- a/src/walberla_bridge/tests/LBWalberlaImpl_statistical_tests.cpp +++ b/src/walberla_bridge/tests/LBWalberlaImpl_statistical_tests.cpp @@ -132,8 +132,8 @@ int main(int argc, char **argv) { params.density = 1.4; params.grid_dimensions = Vector3i{12, 12, 18}; params.box_dimensions = Vector3d{6, 6, 9}; - params.lattice = - std::make_shared(params.grid_dimensions, mpi_shape, 1u); + params.lattice = std::make_shared(params.grid_dimensions, + mpi_shape, mpi_shape, 1u); auto const res = boost::unit_test::unit_test_main(init_unit_test, argc, argv); MPI_Finalize(); diff --git a/src/walberla_bridge/tests/LBWalberlaImpl_unit_tests.cpp b/src/walberla_bridge/tests/LBWalberlaImpl_unit_tests.cpp index c3352fcbed..c473a4fc78 100644 --- a/src/walberla_bridge/tests/LBWalberlaImpl_unit_tests.cpp +++ b/src/walberla_bridge/tests/LBWalberlaImpl_unit_tests.cpp @@ -587,8 +587,8 @@ BOOST_DATA_TEST_CASE(vtk_exceptions, BOOST_AUTO_TEST_CASE(lb_exceptions) { using LB = walberla::LBWalberlaImpl; - auto lb_lattice_without_ghosts = - std::make_shared(params.grid_dimensions, mpi_shape, 0u); + auto lb_lattice_without_ghosts = std::make_shared( + params.grid_dimensions, mpi_shape, mpi_shape, 0u); BOOST_CHECK_THROW(LB(lb_lattice_without_ghosts, 1., 1.), std::runtime_error); } @@ -630,8 +630,8 @@ int main(int argc, char **argv) { params.density = 1.4; params.grid_dimensions = Vector3i{12, 12, 18}; params.box_dimensions = Vector3d{12, 12, 18}; - params.lattice = - std::make_shared(params.grid_dimensions, mpi_shape, 1u); + params.lattice = std::make_shared(params.grid_dimensions, + mpi_shape, mpi_shape, 1u); auto const res = boost::unit_test::unit_test_main(init_unit_test, argc, argv); MPI_Finalize(); diff --git a/src/walberla_bridge/tests/LatticeWalberla_unit_tests.cpp b/src/walberla_bridge/tests/LatticeWalberla_unit_tests.cpp index 977586ad89..8385981e93 100644 --- a/src/walberla_bridge/tests/LatticeWalberla_unit_tests.cpp +++ b/src/walberla_bridge/tests/LatticeWalberla_unit_tests.cpp @@ -52,8 +52,8 @@ static LatticeTestParameters params; // populated in main() static Vector3i mpi_shape; // populated in main BOOST_DATA_TEST_CASE(domain_and_halo, bdata::xrange(3u), n_ghost_layers) { - auto const lattice = - LatticeWalberla(params.grid_dimensions, mpi_shape, n_ghost_layers); + auto const lattice = LatticeWalberla(params.grid_dimensions, mpi_shape, + mpi_shape, n_ghost_layers); auto const [my_left, my_right] = lattice.get_local_domain(); for (auto const &n : all_nodes_incl_ghosts(lattice)) { @@ -104,7 +104,7 @@ BOOST_AUTO_TEST_CASE(exceptions) { auto grid_dims = Vector3i::broadcast(1); grid_dims[i] = 3; node_grid[i] = 2; - BOOST_CHECK_THROW(LatticeWalberla(grid_dims, node_grid, 1u), + BOOST_CHECK_THROW(LatticeWalberla(grid_dims, node_grid, node_grid, 1u), std::runtime_error); } } diff --git a/testsuite/python/lb.py b/testsuite/python/lb.py index a16ff6c5a3..8fad535b3b 100644 --- a/testsuite/python/lb.py +++ b/testsuite/python/lb.py @@ -517,7 +517,11 @@ def test_agrid_rounding(self): phi = 0.05 lj_sig = 1.0 l = (n_part * 4. / 3. * np.pi * (lj_sig / 2.)**3 / phi)**(1. / 3.) - system.box_l = [l] * 3 * np.array(system.cell_system.node_grid) + if hasattr(self, 'blocks_per_mpi_rank'): + system.box_l = [ + l] * 3 * np.array(system.cell_system.node_grid) * np.array(self.blocks_per_mpi_rank) + else: + system.box_l = [l] * 3 * np.array(system.cell_system.node_grid) lbf = self.lb_class(agrid=l / 31, density=1, kinematic_viscosity=1, kT=0, tau=system.time_step, **self.lb_params) system.lb = lbf @@ -825,6 +829,25 @@ def params_with_tau(tau): np.testing.assert_allclose(v1, v2, rtol=1e-2) np.testing.assert_allclose(f1, f2, rtol=1e-2) + def test_raise_block_grid_mismatch(self): + if not hasattr(self, 'blocks_per_mpi_rank'): + self.skipTest( + "Skipping test: this test is only for the systme allocating multiple blocks to one mpi rank") + with self.assertRaisesRegex(RuntimeError, "Lattice grid dimensions and block grid are not compatible"): + self.lb_class( + **self.params, single_precision=self.lb_params["single_precision"], blocks_per_mpi_rank=[11, 1, 1]) + + @utx.skipIfMissingGPU() + def test_raise_blocks_for_GPU(self): + if self.lb_class != espressomd.lb.LBFluidWalberlaGPU: + self.skipTest( + "Skipping test: this test is only for LBFluidWalberlaGPU") + blocks_per_mpi_rank = [2, 2, 2] + self.lb_params = {"single_precision": False, + "blocks_per_mpi_rank": blocks_per_mpi_rank} + with self.assertRaisesRegex(RuntimeError, "GPU architecture PROHIBITED allocating many blocks to 1 CPU"): + self.lb_class(**self.params, **self.lb_params) + @utx.skipIfMissingFeatures("WALBERLA") class LBTestWalberlaDoublePrecisionCPU(LBTest, ut.TestCase): @@ -864,5 +887,27 @@ class LBTestWalberlaSinglePrecisionGPU(LBTest, ut.TestCase): rtol = 2e-4 +@utx.skipIfMissingFeatures("WALBERLA") +class LBTestWalberlaDoublePrecisionBlocksCPU(LBTest, ut.TestCase): + lb_class = espressomd.lb.LBFluidWalberla + lb_lattice_class = espressomd.lb.LatticeWalberla + blocks_per_mpi_rank = [2, 2, 2] + lb_params = {"single_precision": False, + "blocks_per_mpi_rank": blocks_per_mpi_rank} + atol = 1e-10 + rtol = 1e-7 + + +@utx.skipIfMissingFeatures("WALBERLA") +class LBTestWalberlaSinglePrecisionBlocksCPU(LBTest, ut.TestCase): + lb_class = espressomd.lb.LBFluidWalberla + lb_lattice_class = espressomd.lb.LatticeWalberla + blocks_per_mpi_rank = [2, 2, 2] + lb_params = {"single_precision": True, + "blocks_per_mpi_rank": blocks_per_mpi_rank} + atol = 1e-6 + rtol = 2e-4 + + if __name__ == "__main__": ut.main() diff --git a/testsuite/python/lb_boundary.py b/testsuite/python/lb_boundary.py index 6ad5a6c0ad..7d46007335 100644 --- a/testsuite/python/lb_boundary.py +++ b/testsuite/python/lb_boundary.py @@ -125,5 +125,11 @@ class LBBoundariesWalberlaSinglePrecisionGPU(LBBoundariesBase, ut.TestCase): lb_params = {"single_precision": True} +@utx.skipIfMissingFeatures(["WALBERLA"]) +class LBBoundariesWalberlaDoublePrecisionCPU(LBBoundariesBase, ut.TestCase): + lb_class = espressomd.lb.LBFluidWalberla + lb_params = {"single_precision": False, "blocks_per_mpi_rank": [2, 1, 1]} + + if __name__ == "__main__": ut.main() diff --git a/testsuite/python/lb_boundary_ghost_layer.py b/testsuite/python/lb_boundary_ghost_layer.py index 84ce9180f0..46bcb36d3f 100644 --- a/testsuite/python/lb_boundary_ghost_layer.py +++ b/testsuite/python/lb_boundary_ghost_layer.py @@ -117,5 +117,12 @@ class LBPoiseuilleWalberlaDoublePrecisionGPU(TestCommon, ut.TestCase): lb_params = {"single_precision": False} +@utx.skipIfMissingFeatures(["WALBERLA"]) +# @ut.skipIf(TestCommon.n_nodes != 2, "only runs for 2 MPI ranks") +class LBPoiseuilleWalberlaDoublePrecisionBlocksCPU(TestCommon, ut.TestCase): + lb_class = espressomd.lb.LBFluidWalberla + lb_params = {"single_precision": False, "blocks_per_mpi_rank": [2, 1, 1]} + + if __name__ == "__main__": ut.main() diff --git a/testsuite/python/lb_boundary_volume_force.py b/testsuite/python/lb_boundary_volume_force.py index 76beda388f..9f402839ba 100644 --- a/testsuite/python/lb_boundary_volume_force.py +++ b/testsuite/python/lb_boundary_volume_force.py @@ -111,5 +111,11 @@ class LBBoundaryForceWalberlaSinglePrecision( lb_params = {"single_precision": True} +@utx.skipIfMissingFeatures(["WALBERLA"]) +class LBBoundaryForceWalberlaBlocks(LBBoundaryForceCommon, ut.TestCase): + lb_class = espressomd.lb.LBFluidWalberla + lb_params = {"single_precision": False, "blocks_per_mpi_rank": [2, 2, 2]} + + if __name__ == '__main__': ut.main() diff --git a/testsuite/python/lb_circular_couette.py b/testsuite/python/lb_circular_couette.py index f16b6bbf24..2c9b1a1ad7 100644 --- a/testsuite/python/lb_circular_couette.py +++ b/testsuite/python/lb_circular_couette.py @@ -172,5 +172,17 @@ class LBCircularCouetteWalberlaSinglePrecisionGPU(LBCouetteTest, ut.TestCase): lb_params = {"single_precision": True} +@utx.skipIfMissingFeatures(["WALBERLA"]) +class LBCircularCouetteWalberlaDoublePRecisionBlocksCPU(LBCouetteTest, ut.TestCase): + lb_class = espressomd.lb.LBFluidWalberla + lb_params = {"single_precision": False, "blocks_per_mpi_rank": [2, 2, 2]} + + +@utx.skipIfMissingFeatures(["WALBERLA"]) +class LBCircularCouetteWalberlaSinglePRecisionBlocksCPU(LBCouetteTest, ut.TestCase): + lb_class = espressomd.lb.LBFluidWalberla + lb_params = {"single_precision": True, "blocks_per_mpi_rank": [2, 2, 2]} + + if __name__ == "__main__": ut.main() diff --git a/testsuite/python/lb_couette_xy.py b/testsuite/python/lb_couette_xy.py new file mode 100644 index 0000000000..742f03ff2c --- /dev/null +++ b/testsuite/python/lb_couette_xy.py @@ -0,0 +1,147 @@ +# +# Copyright (C) 2021-2023 The ESPResSo project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import espressomd.lb +import espressomd.lees_edwards + +import unittest as ut +import unittest_decorators as utx +import numpy as np + + +def analytical(x, t, nu, v, h, k_max): + """ + Analytical solution with Fourier series of the Navier-Stokes equation. + + Parameters + ---------- + x : :obj:`float` + Height within the channel + t : :obj:`float` + Time since the start up of the shear flow + nu: :obj:`float` + Kinematic kinematic_viscosity + v: :obj:`float` + Shearing velocity + h : :obj:`float` + Distance between shear planes + k_max : :obj:`int` + Upper limit of sums for sinus series + + """ + u = x / h - 0.5 + for k in np.arange(1, k_max + 1): + wave = 2 * np.pi * k / h + u += np.exp(-nu * wave ** 2 * t) * np.sin(wave * x) / (np.pi * k) + return v * u + + +LB_PARAMS = {'agrid': 1., + 'density': 1., + 'kinematic_viscosity': 1. / 6., + 'tau': 1.} + +system = espressomd.System(box_l=[32, 32, 32]) +system.time_step = LB_PARAMS['tau'] +system.cell_system.skin = 0.1 +system.cell_system.set_n_square() +n_nodes = np.prod(system.cell_system.node_grid) + +coord_indexes = {"x": 0, "y": 1, "z": 2} + + +class LBCouetteFlowCommon: + + def setUp(self): + system.time = 0. + + # def tearDown(self): + system.lb = None + system.lees_edwards.protocol = None + + def check_profile(self, u_getter, **kwargs): + # carefully select the domain decomposition + assert kwargs["shear_plane_normal"] == "y" + assert system.cell_system.node_grid[coord_indexes[kwargs["shear_direction"]]] == 1 + h = system.box_l[coord_indexes[kwargs["shear_plane_normal"]]] + shear_velocity = 0.05 + k_max = 100 + + protocol = espressomd.lees_edwards.LinearShear( + shear_velocity=shear_velocity, initial_pos_offset=0., time_0=0.) + system.lees_edwards.set_boundary_conditions( + protocol=protocol, **kwargs) + agrid = LB_PARAMS["agrid"] + + lbf = self.lb_class(**LB_PARAMS, **self.lb_params) + system.lb = lbf + + # warmup + system.integrator.run(8) + + # sampling + for i in range(4, 9): + steps = (2**i - 2**(i - 1)) + system.integrator.run(steps) + pos = np.array(range(int(h))) + agrid / 2. + u_ref = analytical(pos, system.time - 1., lbf.kinematic_viscosity, + shear_velocity, h, k_max) + u_lbf = np.copy(u_getter(lbf).reshape([-1])) + np.testing.assert_allclose(u_lbf, u_ref, atol=1e-4, rtol=0.) + + @ut.skipIf(n_nodes == 1, "test is designed to run on multiple MPI ranks") + @ut.expectedFailure + def test_profile_xy_divided_shear_direction(self): + system.cell_system.node_grid = [n_nodes, 1, 1] + self.check_profile(lambda lbf: lbf[5, :, 0].velocity[:, 0], + shear_direction="x", shear_plane_normal="y") + + @ut.skip("TODO: LB+Lees Edwards doesnt'work for certian node grids") # TODO + @ut.skipIf(n_nodes == 1, "test is designed to run on multiple MPI ranks") + def test_profile_xy_divided_normal_direction(self): + system.cell_system.node_grid = [1, n_nodes, 1] + self.check_profile(lambda lbf: lbf[5, :, 0].velocity[:, 0], + shear_direction="x", shear_plane_normal="y") + + def test_profile_xy_divided_z_direction(self): + system.cell_system.node_grid = [1, 1, n_nodes] + self.check_profile(lambda lbf: lbf[5, :, 0].velocity[:, 0], + shear_direction="x", shear_plane_normal="y") + + +@utx.skipIfMissingFeatures(["WALBERLA"]) +class LBCouetteFlowWalberla(LBCouetteFlowCommon, ut.TestCase): + + """Test for the Walberla implementation of the LB in double-precision.""" + + lb_class = espressomd.lb.LBFluidWalberla + lb_params = {"single_precision": False} + + +@utx.skipIfMissingFeatures(["WALBERLA"]) +class LBCouetteFlowWalberlaSinglePrecision(LBCouetteFlowCommon, ut.TestCase): + + """Test for the Walberla implementation of the LB in single-precision.""" + + lb_class = espressomd.lb.LBFluidWalberla + lb_params = {"single_precision": True} + + +if __name__ == '__main__': + ut.main() diff --git a/testsuite/python/lb_interpolation.py b/testsuite/python/lb_interpolation.py index 3a2bd9a491..f67fcdf268 100644 --- a/testsuite/python/lb_interpolation.py +++ b/testsuite/python/lb_interpolation.py @@ -55,6 +55,7 @@ class LBInterpolation: system = espressomd.System(box_l=[BOX_L] * 3) system.cell_system.skin = 0.4 * AGRID system.time_step = TIME_STEP + system.periodicity = [False, True, True] def setUp(self): self.lbf = self.lb_class(**LB_PARAMETERS, **self.lb_params) @@ -187,5 +188,17 @@ class LBInterpolationWalberlaSinglePrecisionGPU(LBInterpolation, ut.TestCase): lb_params = {"single_precision": True} +@utx.skipIfMissingFeatures(["WALBERLA"]) +class LBInterpolationWalberlaDoublePrecisionBlocksCPU(LBInterpolation, ut.TestCase): + lb_class = espressomd.lb.LBFluidWalberla + lb_params = {"single_precision": False, "blocks_per_mpi_rank": [2, 2, 2]} + + +@utx.skipIfMissingFeatures(["WALBERLA"]) +class LBInterpolationWalberlaSinglePrecisionBlocksCPU(LBInterpolation, ut.TestCase): + lb_class = espressomd.lb.LBFluidWalberla + lb_params = {"single_precision": True, "blocks_per_mpi_rank": [2, 2, 2]} + + if __name__ == "__main__": ut.main() diff --git a/testsuite/python/lb_mass_conservation.py b/testsuite/python/lb_mass_conservation.py index fcbbab66b6..15d4be7f29 100644 --- a/testsuite/python/lb_mass_conservation.py +++ b/testsuite/python/lb_mass_conservation.py @@ -41,7 +41,7 @@ class LBMassCommon: """Check the lattice-Boltzmann mass conservation.""" - system = espressomd.System(box_l=[3.0, 3.0, 3.0]) + system = espressomd.System(box_l=[6.0, 6.0, 6.0]) system.time_step = TIME_STEP system.cell_system.skin = 0.4 * AGRID @@ -96,5 +96,14 @@ class LBMassWalberlaSinglePrecisionGPU(LBMassCommon, ut.TestCase): atol = 5e-7 +@utx.skipIfMissingFeatures(["WALBERLA"]) +class LBMassWalberlaDoublePrecisionBlocksCPU(LBMassCommon, ut.TestCase): + lb_class = espressomd.lb.LBFluidWalberla + blocks_per_mpi_rank = [2, 2, 2] + lb_params = {"single_precision": False, + "blocks_per_mpi_rank": blocks_per_mpi_rank} + atol = 1e-10 + + if __name__ == '__main__': ut.main() diff --git a/testsuite/python/lb_momentum_conservation.py b/testsuite/python/lb_momentum_conservation.py index 0d72f83ec5..f64c0543a5 100644 --- a/testsuite/python/lb_momentum_conservation.py +++ b/testsuite/python/lb_momentum_conservation.py @@ -218,5 +218,19 @@ def set_cellsystem(self): self.system.cell_system.set_n_square() +@ut.skipIf(TestLBMomentumConservation.n_nodes != 1, + "LB with regular decomposition already tested with 2 MPI ranks") +@utx.skipIfMissingFeatures(["WALBERLA", "EXTERNAL_FORCES"]) +class TestLBMomentumConservationRegularDoublePrecisionWalberlaBlocksCPU( + TestLBMomentumConservation, ut.TestCase): + + lb_class = espressomd.lb.LBFluidWalberla + lb_params = {"single_precision": False, "blocks_per_mpi_rank": [2, 2, 2]} + atol = 1.2e-4 + + def set_cellsystem(self): + self.system.cell_system.set_regular_decomposition() + + if __name__ == "__main__": ut.main() diff --git a/testsuite/python/lb_planar_couette.py b/testsuite/python/lb_planar_couette.py index 7295128b86..6edda76921 100644 --- a/testsuite/python/lb_planar_couette.py +++ b/testsuite/python/lb_planar_couette.py @@ -116,6 +116,9 @@ def test_profile_xy(self): @ut.skipIf(n_nodes > 1, "Skipping test: only runs for n_nodes == 1") def test_profile_zy(self): + if hasattr(self, 'blocks_per_mpi_rank'): + self.skipTest( + "Skipping test: only runs for blocks_per_mpi_rank=[1,1,1]") self.check_profile(lambda lbf: lbf[0, :, 5].velocity[:, 0], shear_direction="z", shear_plane_normal="y") @@ -142,5 +145,18 @@ class LBCouetteFlowWalberlaSinglePrecision(LBCouetteFlowCommon, ut.TestCase): lb_params = {"single_precision": True} +@utx.skipIfMissingFeatures(["WALBERLA"]) +@ut.skipIf(LBCouetteFlowCommon.n_nodes > 2, + "Skipping test: only runs for n_nodes <= 2") +class LBCouetteFlowWalberlaBlocks(LBCouetteFlowCommon, ut.TestCase): + + """Test for the Walberla implementation of the LB in double-precision.""" + + lb_class = espressomd.lb.LBFluidWalberla + blocks_per_mpi_rank = [2, 1, 1] + lb_params = {"single_precision": False, + "blocks_per_mpi_rank": blocks_per_mpi_rank} + + if __name__ == '__main__': ut.main() diff --git a/testsuite/python/lb_poiseuille.py b/testsuite/python/lb_poiseuille.py index 9a4178d7af..4b259653e7 100644 --- a/testsuite/python/lb_poiseuille.py +++ b/testsuite/python/lb_poiseuille.py @@ -117,6 +117,7 @@ def test_profile(self): EXT_FORCE, KINEMATIC_VISC * DENS) np.testing.assert_allclose(v_measured, v_expected, rtol=5E-5) + # np.testing.assert_allclose(v_measured, v_expected, rtol=5E-5, atol=8E-4) @utx.skipIfMissingFeatures(["WALBERLA"]) @@ -145,5 +146,17 @@ class LBPoiseuilleWalberlaSinglePrecisionGPU(LBPoiseuilleCommon, ut.TestCase): lb_params = {"single_precision": True} +@utx.skipIfMissingFeatures(["WALBERLA"]) +class LBPoiseuilleWalberlaDoublePrecisionBlocksCPU(LBPoiseuilleCommon, ut.TestCase): + lb_class = espressomd.lb.LBFluidWalberla + lb_params = {"single_precision": False, "blocks_per_mpi_rank": [2, 2, 2]} + + +@utx.skipIfMissingFeatures(["WALBERLA"]) +class LBPoiseuilleWalberlaSinglePrecisionBlocksCPU(LBPoiseuilleCommon, ut.TestCase): + lb_class = espressomd.lb.LBFluidWalberla + lb_params = {"single_precision": True, "blocks_per_mpi_rank": [2, 2, 2]} + + if __name__ == '__main__': ut.main() diff --git a/testsuite/python/lb_poiseuille_cylinder.py b/testsuite/python/lb_poiseuille_cylinder.py index 4499f8661d..aa6493b48c 100644 --- a/testsuite/python/lb_poiseuille_cylinder.py +++ b/testsuite/python/lb_poiseuille_cylinder.py @@ -222,5 +222,11 @@ class LBPoiseuilleWalberlaSinglePrecisionGPU(LBPoiseuilleCommon, ut.TestCase): lb_params = {"single_precision": True} +@utx.skipIfMissingFeatures(["WALBERLA"]) +class LBPoiseuilleWalberlaDoublePrecisionBlocksCPU(LBPoiseuilleCommon, ut.TestCase): + lb_class = espressomd.lb.LBFluidWalberla + lb_params = {"single_precision": False, "blocks_per_mpi_rank": [2, 2, 2]} + + if __name__ == '__main__': ut.main() diff --git a/testsuite/python/lb_pressure_tensor.py b/testsuite/python/lb_pressure_tensor.py index 0c0b4df3a6..263ff9295f 100644 --- a/testsuite/python/lb_pressure_tensor.py +++ b/testsuite/python/lb_pressure_tensor.py @@ -156,6 +156,14 @@ class TestLBPressureTensorCPU(TestLBPressureTensor, ut.TestCase): steps = 5000 +@utx.skipIfMissingFeatures("WALBERLA") +class TestLBPressureTensorBlocksCPU(TestLBPressureTensor, ut.TestCase): + + lb_class = espressomd.lb.LBFluidWalberla + lb_params = {"single_precision": True, "blocks_per_mpi_rank": [2, 2, 2]} + steps = 5000 + + # TODO WALBERLA """ @utx.skipIfMissingFeatures("WALBERLA") diff --git a/testsuite/python/lb_shear.py b/testsuite/python/lb_shear.py index fef0838ba6..1b7cf59a1f 100644 --- a/testsuite/python/lb_shear.py +++ b/testsuite/python/lb_shear.py @@ -31,7 +31,7 @@ # Box size will be H +2 AGRID to make room for walls. # The number of grid cells should be divisible by four and 3 in all directions # for testing on multiple mpi nodes. -H = 12 * AGRID +H = 10 * AGRID W = 6 * AGRID SHEAR_VELOCITY = 0.3 @@ -85,7 +85,7 @@ class LBShearCommon: system.cell_system.skin = 0.4 * AGRID def setUp(self): - self.lbf = self.lb_class(**LB_PARAMS, **self.lb_params) + self.system.lb = None def tearDown(self): self.system.lb = None @@ -96,9 +96,14 @@ def check_profile(self, shear_plane_normal, shear_direction): the exact solution. """ self.tearDown() - self.system.box_l = np.max( - ((W, W, W), shear_plane_normal * (H + 2 * AGRID)), 0) - self.setUp() + if hasattr(self, 'blocks_per_mpi_rank'): + self.system.box_l = np.max( + ((W, W, W) * np.array(self.blocks_per_mpi_rank), + shear_plane_normal * (H + 2 * AGRID) * np.array(self.blocks_per_mpi_rank)), 0) + else: + self.system.box_l = np.max( + ((W, W, W), shear_plane_normal * (H + 2 * AGRID)), 0) + self.lbf = self.lb_class(**LB_PARAMS, **self.lb_params) self.system.lb = self.lbf self.lbf.clear_boundaries() @@ -204,5 +209,18 @@ class LBShearWalberlaSinglePrecision(LBShearCommon, ut.TestCase): rtol = 5e-3 +@utx.skipIfMissingFeatures(["WALBERLA"]) +class LBShearWalberlaBlocks(LBShearCommon, ut.TestCase): + + """Test for the Walberla implementation of the LB in double-precision.""" + + lb_class = espressomd.lb.LBFluidWalberla + blocks_per_mpi_rank = [2, 2, 2] + lb_params = {"single_precision": False, + "blocks_per_mpi_rank": blocks_per_mpi_rank} + atol = 5e-5 + rtol = 5e-4 + + if __name__ == '__main__': ut.main() diff --git a/testsuite/python/lb_slice.py b/testsuite/python/lb_slice.py index 09a49dc4bd..c2a43def65 100644 --- a/testsuite/python/lb_slice.py +++ b/testsuite/python/lb_slice.py @@ -200,5 +200,19 @@ class LBTestWalberlaSinglePrecisionGPU(LBTest, ut.TestCase): lb_params = {"single_precision": True} +@utx.skipIfMissingFeatures("WALBERLA") +class LBTestWalberlaDoublePrecisionBlocksCPU(LBTest, ut.TestCase): + lb_class = espressomd.lb.LBFluidWalberla + lb_lattice_class = espressomd.lb.LatticeWalberla + lb_params = {"single_precision": False, "blocks_per_mpi_rank": [1, 1, 2]} + + +@utx.skipIfMissingFeatures(["WALBERLA"]) +class LBTestWalberlaSinglePrecisionBlocksCPU(LBTest, ut.TestCase): + lb_class = espressomd.lb.LBFluidWalberla + lb_lattice_class = espressomd.lb.LatticeWalberla + lb_params = {"single_precision": True, "blocks_per_mpi_rank": [1, 1, 2]} + + if __name__ == "__main__": ut.main() diff --git a/testsuite/python/lb_streaming.py b/testsuite/python/lb_streaming.py index ad9fefa350..8798d1474f 100644 --- a/testsuite/python/lb_streaming.py +++ b/testsuite/python/lb_streaming.py @@ -163,5 +163,13 @@ class LBStreamingWalberlaSinglePrecisionGPU(LBStreamingCommon, ut.TestCase): rtol = 1e-5 +@utx.skipIfMissingFeatures(["WALBERLA"]) +class LBStreamingWalberlaDoublePrecisionBlocksCPU(LBStreamingCommon, ut.TestCase): + lb_class = espressomd.lb.LBFluidWalberla + lb_params = {"single_precision": False, "blocks_per_mpi_rank": [1, 2, 2]} + box_l = [3., 2., 2.] + rtol = 1e-10 + + if __name__ == "__main__": ut.main() diff --git a/testsuite/python/lb_thermostat.py b/testsuite/python/lb_thermostat.py index 53de0ea8ca..a5027cf699 100644 --- a/testsuite/python/lb_thermostat.py +++ b/testsuite/python/lb_thermostat.py @@ -245,5 +245,11 @@ class LBThermostatWalberlaSinglePrecisionGPU(LBThermostatCommon, ut.TestCase): lb_params = {"single_precision": True} +@utx.skipIfMissingFeatures(["WALBERLA"]) +class LBThermostatWalberlaDoublePrecisionBlocksCPU(LBThermostatCommon, ut.TestCase): + lb_class = espressomd.lb.LBFluidWalberla + lb_params = {"single_precision": False, "blocks_per_mpi_rank": [2, 2, 2]} + + if __name__ == '__main__': ut.main()