Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new P3M tuning options #5017

Merged
merged 4 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/doxygen/Doxyfile.in
Original file line number Diff line number Diff line change
Expand Up @@ -1511,7 +1511,7 @@ MATHJAX_RELPATH = https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/
# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
# This tag requires that the tag USE_MATHJAX is set to YES.

MATHJAX_EXTENSIONS =
MATHJAX_EXTENSIONS = TeX/AMSmath

# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
# of code that will be used on startup of the MathJax code. See the MathJax site
Expand Down
8 changes: 4 additions & 4 deletions src/core/electrostatics/elc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -368,10 +368,6 @@ ElectrostaticLayerCorrection::z_energy(ParticleRange const &particles) const {
auto const &box_geo = *get_system().box_geo;
auto const xy_area_inv = box_geo.length_inv()[0] * box_geo.length_inv()[1];
auto const pref = prefactor * 2. * std::numbers::pi * xy_area_inv;
auto const delta = elc.delta_mid_top * elc.delta_mid_bot;
auto const fac_delta_mid_bot = elc.delta_mid_bot / (1. - delta);
auto const fac_delta_mid_top = elc.delta_mid_top / (1. - delta);
auto const fac_delta = delta / (1. - delta);

/* for non-neutral systems, this shift gives the background contribution
* (rsp. for this shift, the DM of the background is zero) */
Expand All @@ -397,6 +393,10 @@ ElectrostaticLayerCorrection::z_energy(ParticleRange const &particles) const {
}
} else {
// metallic boundaries
auto const delta = elc.delta_mid_top * elc.delta_mid_bot;
auto const fac_delta_mid_bot = elc.delta_mid_bot / (1. - delta);
auto const fac_delta_mid_top = elc.delta_mid_top / (1. - delta);
auto const fac_delta = delta / (1. - delta);
clear_vec(gblcblk, size);
auto const h = elc.box_h;
ImageSum const image_sum{delta, shift, lz};
Expand Down
56 changes: 51 additions & 5 deletions src/core/electrostatics/p3m.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
#include <utils/integral_parameter.hpp>
#include <utils/math/int_pow.hpp>
#include <utils/math/sqr.hpp>
#include <utils/serialization/array.hpp>

#include <boost/mpi/collectives/all_reduce.hpp>
#include <boost/mpi/collectives/broadcast.hpp>
Expand Down Expand Up @@ -126,7 +127,7 @@ void CoulombP3MImpl<FloatType, Architecture>::count_charged_particles() {
template <typename FloatType, Arch Architecture>
void CoulombP3MImpl<FloatType, Architecture>::calc_influence_function_force() {
auto const [KX, KY, KZ] = p3m.fft->get_permutations();
p3m.g_force = grid_influence_function<FloatType, 1>(
p3m.g_force = grid_influence_function<FloatType, 1, P3M_BRILLOUIN>(
p3m.params, p3m.mesh.start, p3m.mesh.stop, KX, KY, KZ,
get_system().box_geo->length_inv());
}
Expand All @@ -137,7 +138,7 @@ void CoulombP3MImpl<FloatType, Architecture>::calc_influence_function_force() {
template <typename FloatType, Arch Architecture>
void CoulombP3MImpl<FloatType, Architecture>::calc_influence_function_energy() {
auto const [KX, KY, KZ] = p3m.fft->get_permutations();
p3m.g_energy = grid_influence_function<FloatType, 0>(
p3m.g_energy = grid_influence_function<FloatType, 0, P3M_BRILLOUIN>(
p3m.params, p3m.mesh.start, p3m.mesh.stop, KX, KY, KZ,
get_system().box_geo->length_inv());
}
Expand Down Expand Up @@ -597,14 +598,17 @@ class CoulombTuningAlgorithm : public TuningAlgorithm {
double m_mesh_density_min = -1., m_mesh_density_max = -1.;
// indicates if mesh should be tuned
bool m_tune_mesh = false;
std::pair<std::optional<int>, std::optional<int>> m_tune_limits;

protected:
P3MParameters &get_params() override { return p3m.params; }

public:
CoulombTuningAlgorithm(System::System &system, auto &input_p3m,
double prefactor, int timings)
: TuningAlgorithm(system, prefactor, timings), p3m{input_p3m} {}
double prefactor, int timings,
decltype(m_tune_limits) tune_limits)
: TuningAlgorithm(system, prefactor, timings), p3m{input_p3m},
m_tune_limits{std::move(tune_limits)} {}

void on_solver_change() const override { m_system.on_coulomb_change(); }

Expand Down Expand Up @@ -632,6 +636,38 @@ class CoulombTuningAlgorithm : public TuningAlgorithm {
return {};
}

std::optional<std::string> fft_decomposition_veto(
Utils::Vector3i const &mesh_size_r_space) const override {
#ifdef CUDA
if constexpr (Architecture == Arch::GPU) {
return std::nullopt;
}
#endif
using Array3i = Utils::Array<int, 3u>;
auto const [KX, KY, KZ] = p3m.fft->get_permutations();
auto valid_decomposition = false;
Array3i mesh_size_k_space = {};
boost::mpi::reduce(
::comm_cart, Array3i(p3m.mesh.stop), mesh_size_k_space,
[](Array3i const &lhs, Array3i const &rhs) {
return Array3i{{std::max(lhs[0u], rhs[0u]),
std::max(lhs[1u], rhs[1u]),
std::max(lhs[2u], rhs[2u])}};
},
0);
if (::this_node == 0) {
valid_decomposition = (mesh_size_r_space[0u] == mesh_size_k_space[KX] and
mesh_size_r_space[1u] == mesh_size_k_space[KY] and
mesh_size_r_space[2u] == mesh_size_k_space[KZ]);
}
boost::mpi::broadcast(::comm_cart, valid_decomposition, 0);
std::optional<std::string> retval{"conflict with FFT domain decomposition"};
if (valid_decomposition) {
retval = std::nullopt;
}
return retval;
}

std::tuple<double, double, double, double>
calculate_accuracy(Utils::Vector3i const &mesh, int cao,
double r_cut_iL) const override {
Expand Down Expand Up @@ -689,6 +725,16 @@ class CoulombTuningAlgorithm : public TuningAlgorithm {
max_npart_per_dim, std::cbrt(static_cast<double>(p3m.sum_qpart)));
m_mesh_density_min = min_npart_per_dim / normalized_box_dim;
m_mesh_density_max = max_npart_per_dim / normalized_box_dim;
if (m_tune_limits.first or m_tune_limits.second) {
auto const &box_l = box_geo.length();
auto const dim = std::max({box_l[0], box_l[1], box_l[2]});
if (m_tune_limits.first) {
m_mesh_density_min = static_cast<double>(*m_tune_limits.first) / dim;
}
if (m_tune_limits.second) {
m_mesh_density_max = static_cast<double>(*m_tune_limits.second) / dim;
}
}
m_tune_mesh = true;
} else {
m_mesh_density_min = m_mesh_density_max = mesh_density;
Expand Down Expand Up @@ -772,7 +818,7 @@ void CoulombP3MImpl<FloatType, Architecture>::tune() {
}
try {
CoulombTuningAlgorithm<FloatType, Architecture> parameters(
system, p3m, prefactor, tune_timings);
system, p3m, prefactor, tune_timings, tune_limits);
parameters.setup_logger(tune_verbose);
// parameter ranges
parameters.determine_mesh_limits();
Expand Down
6 changes: 4 additions & 2 deletions src/core/electrostatics/p3m.impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <utils/Vector.hpp>

#include <memory>
#include <optional>
#include <stdexcept>
#include <type_traits>
#include <utility>
Expand Down Expand Up @@ -69,6 +70,7 @@ struct CoulombP3MImpl : public CoulombP3M {
/** @brief Coulomb P3M meshes and FFT algorithm. */
std::unique_ptr<p3m_data_struct_coulomb<FloatType>> p3m_impl;
int tune_timings;
std::pair<std::optional<int>, std::optional<int>> tune_limits;
bool tune_verbose;
bool check_complex_residuals;
bool m_is_tuned;
Expand All @@ -77,10 +79,10 @@ struct CoulombP3MImpl : public CoulombP3M {
CoulombP3MImpl(
std::unique_ptr<p3m_data_struct_coulomb<FloatType>> &&p3m_handle,
double prefactor, int tune_timings, bool tune_verbose,
bool check_complex_residuals)
decltype(tune_limits) tune_limits, bool check_complex_residuals)
: CoulombP3M(p3m_handle->params), p3m{*p3m_handle},
p3m_impl{std::move(p3m_handle)}, tune_timings{tune_timings},
tune_verbose{tune_verbose},
tune_limits{std::move(tune_limits)}, tune_verbose{tune_verbose},
check_complex_residuals{check_complex_residuals} {

if (tune_timings <= 0) {
Expand Down
25 changes: 18 additions & 7 deletions src/core/magnetostatics/dp3m.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
#include <sstream>
#include <stdexcept>
#include <tuple>
#include <utility>
#include <vector>

#ifdef FFTW3_H
Expand Down Expand Up @@ -115,8 +116,9 @@ double
DipolarP3MImpl<FloatType, Architecture>::calc_average_self_energy_k_space()
const {
auto const &box_geo = *get_system().box_geo;
auto const node_phi = grid_influence_function_self_energy(
dp3m.params, dp3m.mesh.start, dp3m.mesh.stop, dp3m.g_energy);
auto const node_phi =
grid_influence_function_self_energy<FloatType, P3M_BRILLOUIN>(
dp3m.params, dp3m.mesh.start, dp3m.mesh.stop, dp3m.g_energy);

double phi = 0.;
boost::mpi::reduce(comm_cart, node_phi, phi, std::plus<>(), 0);
Expand Down Expand Up @@ -518,14 +520,14 @@ double DipolarP3MImpl<FloatType, Architecture>::calc_surface_term(

template <typename FloatType, Arch Architecture>
void DipolarP3MImpl<FloatType, Architecture>::calc_influence_function_force() {
dp3m.g_force = grid_influence_function<FloatType, 3>(
dp3m.g_force = grid_influence_function<FloatType, 3, P3M_BRILLOUIN>(
dp3m.params, dp3m.mesh.start, dp3m.mesh.stop,
get_system().box_geo->length_inv());
}

template <typename FloatType, Arch Architecture>
void DipolarP3MImpl<FloatType, Architecture>::calc_influence_function_energy() {
dp3m.g_energy = grid_influence_function<FloatType, 2>(
dp3m.g_energy = grid_influence_function<FloatType, 2, P3M_BRILLOUIN>(
dp3m.params, dp3m.mesh.start, dp3m.mesh.stop,
get_system().box_geo->length_inv());
}
Expand All @@ -534,11 +536,14 @@ template <typename FloatType, Arch Architecture>
class DipolarTuningAlgorithm : public TuningAlgorithm {
p3m_data_struct_dipoles<FloatType> &dp3m;
int m_mesh_max = -1, m_mesh_min = -1;
std::pair<std::optional<int>, std::optional<int>> m_tune_limits;

public:
DipolarTuningAlgorithm(System::System &system, decltype(dp3m) &input_dp3m,
double prefactor, int timings)
: TuningAlgorithm(system, prefactor, timings), dp3m{input_dp3m} {}
double prefactor, int timings,
decltype(m_tune_limits) tune_limits)
: TuningAlgorithm(system, prefactor, timings), dp3m{input_dp3m},
m_tune_limits{std::move(tune_limits)} {}

P3MParameters &get_params() override { return dp3m.params; }

Expand Down Expand Up @@ -603,6 +608,12 @@ class DipolarTuningAlgorithm : public TuningAlgorithm {
m_mesh_min = static_cast<int>(std::round(std::pow(2., std::floor(expo))));
/* avoid using more than 1 GB of FFT arrays */
m_mesh_max = 128;
if (m_tune_limits.first) {
m_mesh_min = *m_tune_limits.first;
}
if (m_tune_limits.second) {
m_mesh_max = *m_tune_limits.second;
}
} else {
m_mesh_min = m_mesh_max = dp3m.params.mesh[0];
m_logger->report_fixed_mesh(dp3m.params.mesh);
Expand Down Expand Up @@ -662,7 +673,7 @@ void DipolarP3MImpl<FloatType, Architecture>::tune() {
}
try {
DipolarTuningAlgorithm<FloatType, Architecture> parameters(
system, dp3m, prefactor, tune_timings);
system, dp3m, prefactor, tune_timings, tune_limits);
parameters.setup_logger(tune_verbose);
// parameter ranges
parameters.determine_mesh_limits();
Expand Down
7 changes: 5 additions & 2 deletions src/core/magnetostatics/dp3m.impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <utils/Vector.hpp>

#include <memory>
#include <optional>
#include <stdexcept>
#include <type_traits>
#include <utility>
Expand Down Expand Up @@ -72,16 +73,18 @@ struct DipolarP3MImpl : public DipolarP3M {
/** @brief Coulomb P3M meshes and FFT algorithm. */
std::unique_ptr<p3m_data_struct_dipoles<FloatType>> dp3m_impl;
int tune_timings;
std::pair<std::optional<int>, std::optional<int>> tune_limits;
bool tune_verbose;
bool m_is_tuned;

public:
DipolarP3MImpl(
std::unique_ptr<p3m_data_struct_dipoles<FloatType>> &&dp3m_handle,
double prefactor, int tune_timings, bool tune_verbose)
double prefactor, int tune_timings, bool tune_verbose,
decltype(tune_limits) tune_limits)
: DipolarP3M(dp3m_handle->params), dp3m{*dp3m_handle},
dp3m_impl{std::move(dp3m_handle)}, tune_timings{tune_timings},
tune_verbose{tune_verbose} {
tune_limits{std::move(tune_limits)}, tune_verbose{tune_verbose} {

if (tune_timings <= 0) {
throw std::domain_error("Parameter 'timings' must be > 0");
Expand Down
20 changes: 15 additions & 5 deletions src/core/p3m/TuningAlgorithm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ static auto constexpr P3M_TUNE_CAO_TOO_LARGE = 1.;
static auto constexpr P3M_TUNE_ELC_GAP_SIZE = 2.;
/** could not achieve target accuracy */
static auto constexpr P3M_TUNE_ACCURACY_TOO_LARGE = 3.;
/** conflict with FFT domain decomposition */
static auto constexpr P3M_TUNE_FFT_MESH_SIZE = 4.;
/**@}*/

/** @brief Precision threshold for a non-zero real-space cutoff. */
Expand Down Expand Up @@ -110,7 +112,7 @@ void TuningAlgorithm::commit(Utils::Vector3i const &mesh, int cao,
* @param[in,out] tuned_accuracy @copybrief P3MParameters::accuracy
*
* @returns The integration time in case of success, otherwise
* -@ref P3M_TUNE_ACCURACY_TOO_LARGE,
* -@ref P3M_TUNE_ACCURACY_TOO_LARGE, -@ref P3M_TUNE_FFT_MESH_SIZE,
* -@ref P3M_TUNE_CAO_TOO_LARGE, or -@ref P3M_TUNE_ELC_GAP_SIZE
*/
double TuningAlgorithm::get_mc_time(Utils::Vector3i const &mesh, int cao,
Expand Down Expand Up @@ -171,17 +173,25 @@ double TuningAlgorithm::get_mc_time(Utils::Vector3i const &mesh, int cao,
* we know that the desired minimal accuracy is obtained */
tuned_r_cut_iL = r_cut_iL = r_cut_iL_max;

auto const report_veto = [&](auto const &veto) {
if (veto) {
m_logger->log_skip(*veto, mesh[0], cao, r_cut_iL, tuned_alpha_L,
tuned_accuracy, rs_err, ks_err);
}
return static_cast<bool>(veto);
};

/* if we are running P3M+ELC, check that r_cut is compatible */
auto const r_cut = r_cut_iL * box_geo.length()[0];
auto const veto = layer_correction_veto_r_cut(r_cut);
if (veto) {
m_logger->log_skip(*veto, mesh[0], cao, r_cut_iL, tuned_alpha_L,
tuned_accuracy, rs_err, ks_err);
if (report_veto(layer_correction_veto_r_cut(r_cut))) {
return -P3M_TUNE_ELC_GAP_SIZE;
}

commit(mesh, cao, r_cut_iL, tuned_alpha_L);
on_solver_change();
if (report_veto(fft_decomposition_veto(mesh))) {
return -P3M_TUNE_FFT_MESH_SIZE;
}
auto const int_time = benchmark_integration_step(m_system, m_timings);

std::tie(tuned_accuracy, rs_err, ks_err, tuned_alpha_L) =
Expand Down
6 changes: 6 additions & 0 deletions src/core/p3m/TuningAlgorithm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,12 @@ class TuningAlgorithm {
virtual std::optional<std::string>
layer_correction_veto_r_cut(double r_cut) const = 0;

/** @brief Veto FFT decomposition in non-cubic boxes. */
virtual std::optional<std::string>
fft_decomposition_veto(Utils::Vector3i const &) const {
return std::nullopt;
}

/** @brief Write tuned parameters to the P3M parameter struct. */
void commit(Utils::Vector3i const &mesh, int cao, double r_cut_iL,
double alpha_L);
Expand Down
11 changes: 5 additions & 6 deletions src/core/p3m/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ auto constexpr P3M_EPSILON_METALLIC = 0.0;
#include <span>
#include <stdexcept>

/** @brief P3M kernel architecture. */
enum class Arch { CPU, GPU };

/** @brief Structure to hold P3M parameters and some dependent variables. */
Expand All @@ -65,8 +66,8 @@ struct P3MParameters {
/** cutoff radius for real space electrostatics (>0), rescaled to
* @p r_cut_iL = @p r_cut * @p box_l_i. */
double r_cut_iL;
/** number of mesh points per coordinate direction (>0). */
Utils::Vector3i mesh = {};
/** number of mesh points per coordinate direction (>0), in real space. */
Utils::Vector3i mesh;
/** offset of the first mesh point (lower left corner) from the
* coordinate origin ([0,1[). */
Utils::Vector3d mesh_off;
Expand Down Expand Up @@ -237,15 +238,14 @@ template <typename FloatType> struct P3MFFTMesh {

#endif // defined(P3M) or defined(DP3M)

namespace detail {
/** Calculate indices that shift @ref P3MParameters::mesh "mesh" by `mesh/2`.
/** @brief Calculate indices that shift @ref P3MParameters::mesh by `mesh/2`.
* For each mesh size @f$ n @f$ in @c mesh_size, create a sequence of integer
* values @f$ \left( 0, \ldots, \lfloor n/2 \rfloor, -\lfloor n/2 \rfloor,
* \ldots, -1\right) @f$ if @c zero_out_midpoint is false, otherwise
* @f$ \left( 0, \ldots, \lfloor n/2 - 1 \rfloor, 0, -\lfloor n/2 \rfloor,
* \ldots, -1\right) @f$.
*/
std::array<std::vector<int>, 3> inline calc_meshift(
std::array<std::vector<int>, 3> inline calc_p3m_mesh_shift(
Utils::Vector3i const &mesh_size, bool zero_out_midpoint = false) {
std::array<std::vector<int>, 3> ret{};

Expand All @@ -262,4 +262,3 @@ std::array<std::vector<int>, 3> inline calc_meshift(

return ret;
}
} // namespace detail
2 changes: 1 addition & 1 deletion src/core/p3m/data_struct.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ template <typename FloatType> struct p3m_data_struct {
* i.e. the prefactor @f$ 2i\pi/L @f$ is missing!
*/
void calc_differential_operator() {
d_op = detail::calc_meshift(params.mesh, true);
d_op = calc_p3m_mesh_shift(params.mesh, true);
}

/** @brief Force optimised influence function (k-space) */
Expand Down
Loading
Loading