espressomd · kodiakhq · Dec 12, 2024 · Dec 9, 2024 · Dec 10, 2024 · Dec 11, 2024
diff --git a/doc/doxygen/Doxyfile.in b/doc/doxygen/Doxyfile.in
@@ -1511,7 +1511,7 @@ MATHJAX_RELPATH        = https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/
 # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
 # This tag requires that the tag USE_MATHJAX is set to YES.
 
-MATHJAX_EXTENSIONS     =
+MATHJAX_EXTENSIONS     = TeX/AMSmath
 
 # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
 # of code that will be used on startup of the MathJax code. See the MathJax site

diff --git a/src/core/electrostatics/elc.cpp b/src/core/electrostatics/elc.cpp
@@ -368,10 +368,6 @@ ElectrostaticLayerCorrection::z_energy(ParticleRange const &particles) const {
   auto const &box_geo = *get_system().box_geo;
   auto const xy_area_inv = box_geo.length_inv()[0] * box_geo.length_inv()[1];
   auto const pref = prefactor * 2. * std::numbers::pi * xy_area_inv;
-  auto const delta = elc.delta_mid_top * elc.delta_mid_bot;
-  auto const fac_delta_mid_bot = elc.delta_mid_bot / (1. - delta);
-  auto const fac_delta_mid_top = elc.delta_mid_top / (1. - delta);
-  auto const fac_delta = delta / (1. - delta);
 
   /* for non-neutral systems, this shift gives the background contribution
    * (rsp. for this shift, the DM of the background is zero) */
@@ -397,6 +393,10 @@ ElectrostaticLayerCorrection::z_energy(ParticleRange const &particles) const {
       }
     } else {
       // metallic boundaries
+      auto const delta = elc.delta_mid_top * elc.delta_mid_bot;
+      auto const fac_delta_mid_bot = elc.delta_mid_bot / (1. - delta);
+      auto const fac_delta_mid_top = elc.delta_mid_top / (1. - delta);
+      auto const fac_delta = delta / (1. - delta);
       clear_vec(gblcblk, size);
       auto const h = elc.box_h;
       ImageSum const image_sum{delta, shift, lz};

diff --git a/src/core/electrostatics/p3m.cpp b/src/core/electrostatics/p3m.cpp
@@ -66,6 +66,7 @@
 #include <utils/integral_parameter.hpp>
 #include <utils/math/int_pow.hpp>
 #include <utils/math/sqr.hpp>
+#include <utils/serialization/array.hpp>
 
 #include <boost/mpi/collectives/all_reduce.hpp>
 #include <boost/mpi/collectives/broadcast.hpp>
@@ -126,7 +127,7 @@ void CoulombP3MImpl<FloatType, Architecture>::count_charged_particles() {
 template <typename FloatType, Arch Architecture>
 void CoulombP3MImpl<FloatType, Architecture>::calc_influence_function_force() {
   auto const [KX, KY, KZ] = p3m.fft->get_permutations();
-  p3m.g_force = grid_influence_function<FloatType, 1>(
+  p3m.g_force = grid_influence_function<FloatType, 1, P3M_BRILLOUIN>(
       p3m.params, p3m.mesh.start, p3m.mesh.stop, KX, KY, KZ,
       get_system().box_geo->length_inv());
 }
@@ -137,7 +138,7 @@ void CoulombP3MImpl<FloatType, Architecture>::calc_influence_function_force() {
 template <typename FloatType, Arch Architecture>
 void CoulombP3MImpl<FloatType, Architecture>::calc_influence_function_energy() {
   auto const [KX, KY, KZ] = p3m.fft->get_permutations();
-  p3m.g_energy = grid_influence_function<FloatType, 0>(
+  p3m.g_energy = grid_influence_function<FloatType, 0, P3M_BRILLOUIN>(
       p3m.params, p3m.mesh.start, p3m.mesh.stop, KX, KY, KZ,
       get_system().box_geo->length_inv());
 }
@@ -597,14 +598,17 @@ class CoulombTuningAlgorithm : public TuningAlgorithm {
   double m_mesh_density_min = -1., m_mesh_density_max = -1.;
   // indicates if mesh should be tuned
   bool m_tune_mesh = false;
+  std::pair<std::optional<int>, std::optional<int>> m_tune_limits;
 
 protected:
   P3MParameters &get_params() override { return p3m.params; }
 
 public:
   CoulombTuningAlgorithm(System::System &system, auto &input_p3m,
-                         double prefactor, int timings)
-      : TuningAlgorithm(system, prefactor, timings), p3m{input_p3m} {}
+                         double prefactor, int timings,
+                         decltype(m_tune_limits) tune_limits)
+      : TuningAlgorithm(system, prefactor, timings), p3m{input_p3m},
+        m_tune_limits{std::move(tune_limits)} {}
 
   void on_solver_change() const override { m_system.on_coulomb_change(); }
 
@@ -632,6 +636,38 @@ class CoulombTuningAlgorithm : public TuningAlgorithm {
     return {};
   }
 
+  std::optional<std::string> fft_decomposition_veto(
+      Utils::Vector3i const &mesh_size_r_space) const override {
+#ifdef CUDA
+    if constexpr (Architecture == Arch::GPU) {
+      return std::nullopt;
+    }
+#endif
+    using Array3i = Utils::Array<int, 3u>;
+    auto const [KX, KY, KZ] = p3m.fft->get_permutations();
+    auto valid_decomposition = false;
+    Array3i mesh_size_k_space = {};
+    boost::mpi::reduce(
+        ::comm_cart, Array3i(p3m.mesh.stop), mesh_size_k_space,
+        [](Array3i const &lhs, Array3i const &rhs) {
+          return Array3i{{std::max(lhs[0u], rhs[0u]),
+                          std::max(lhs[1u], rhs[1u]),
+                          std::max(lhs[2u], rhs[2u])}};
+        },
+        0);
+    if (::this_node == 0) {
+      valid_decomposition = (mesh_size_r_space[0u] == mesh_size_k_space[KX] and
+                             mesh_size_r_space[1u] == mesh_size_k_space[KY] and
+                             mesh_size_r_space[2u] == mesh_size_k_space[KZ]);
+    }
+    boost::mpi::broadcast(::comm_cart, valid_decomposition, 0);
+    std::optional<std::string> retval{"conflict with FFT domain decomposition"};
+    if (valid_decomposition) {
+      retval = std::nullopt;
+    }
+    return retval;
+  }
+
   std::tuple<double, double, double, double>
   calculate_accuracy(Utils::Vector3i const &mesh, int cao,
                      double r_cut_iL) const override {
@@ -689,6 +725,16 @@ class CoulombTuningAlgorithm : public TuningAlgorithm {
           max_npart_per_dim, std::cbrt(static_cast<double>(p3m.sum_qpart)));
       m_mesh_density_min = min_npart_per_dim / normalized_box_dim;
       m_mesh_density_max = max_npart_per_dim / normalized_box_dim;
+      if (m_tune_limits.first or m_tune_limits.second) {
+        auto const &box_l = box_geo.length();
+        auto const dim = std::max({box_l[0], box_l[1], box_l[2]});
+        if (m_tune_limits.first) {
+          m_mesh_density_min = static_cast<double>(*m_tune_limits.first) / dim;
+        }
+        if (m_tune_limits.second) {
+          m_mesh_density_max = static_cast<double>(*m_tune_limits.second) / dim;
+        }
+      }
       m_tune_mesh = true;
     } else {
       m_mesh_density_min = m_mesh_density_max = mesh_density;
@@ -772,7 +818,7 @@ void CoulombP3MImpl<FloatType, Architecture>::tune() {
     }
     try {
       CoulombTuningAlgorithm<FloatType, Architecture> parameters(
-          system, p3m, prefactor, tune_timings);
+          system, p3m, prefactor, tune_timings, tune_limits);
       parameters.setup_logger(tune_verbose);
       // parameter ranges
       parameters.determine_mesh_limits();

diff --git a/src/core/electrostatics/p3m.impl.hpp b/src/core/electrostatics/p3m.impl.hpp
@@ -36,6 +36,7 @@
 #include <utils/Vector.hpp>
 
 #include <memory>
+#include <optional>
 #include <stdexcept>
 #include <type_traits>
 #include <utility>
@@ -69,6 +70,7 @@ struct CoulombP3MImpl : public CoulombP3M {
   /** @brief Coulomb P3M meshes and FFT algorithm. */
   std::unique_ptr<p3m_data_struct_coulomb<FloatType>> p3m_impl;
   int tune_timings;
+  std::pair<std::optional<int>, std::optional<int>> tune_limits;
   bool tune_verbose;
   bool check_complex_residuals;
   bool m_is_tuned;
@@ -77,10 +79,10 @@ struct CoulombP3MImpl : public CoulombP3M {
   CoulombP3MImpl(
       std::unique_ptr<p3m_data_struct_coulomb<FloatType>> &&p3m_handle,
       double prefactor, int tune_timings, bool tune_verbose,
-      bool check_complex_residuals)
+      decltype(tune_limits) tune_limits, bool check_complex_residuals)
       : CoulombP3M(p3m_handle->params), p3m{*p3m_handle},
         p3m_impl{std::move(p3m_handle)}, tune_timings{tune_timings},
-        tune_verbose{tune_verbose},
+        tune_limits{std::move(tune_limits)}, tune_verbose{tune_verbose},
         check_complex_residuals{check_complex_residuals} {
 
     if (tune_timings <= 0) {

diff --git a/src/core/magnetostatics/dp3m.cpp b/src/core/magnetostatics/dp3m.cpp
@@ -74,6 +74,7 @@
 #include <sstream>
 #include <stdexcept>
 #include <tuple>
+#include <utility>
 #include <vector>
 
 #ifdef FFTW3_H
@@ -115,8 +116,9 @@ double
 DipolarP3MImpl<FloatType, Architecture>::calc_average_self_energy_k_space()
     const {
   auto const &box_geo = *get_system().box_geo;
-  auto const node_phi = grid_influence_function_self_energy(
-      dp3m.params, dp3m.mesh.start, dp3m.mesh.stop, dp3m.g_energy);
+  auto const node_phi =
+      grid_influence_function_self_energy<FloatType, P3M_BRILLOUIN>(
+          dp3m.params, dp3m.mesh.start, dp3m.mesh.stop, dp3m.g_energy);
 
   double phi = 0.;
   boost::mpi::reduce(comm_cart, node_phi, phi, std::plus<>(), 0);
@@ -518,14 +520,14 @@ double DipolarP3MImpl<FloatType, Architecture>::calc_surface_term(
 
 template <typename FloatType, Arch Architecture>
 void DipolarP3MImpl<FloatType, Architecture>::calc_influence_function_force() {
-  dp3m.g_force = grid_influence_function<FloatType, 3>(
+  dp3m.g_force = grid_influence_function<FloatType, 3, P3M_BRILLOUIN>(
       dp3m.params, dp3m.mesh.start, dp3m.mesh.stop,
       get_system().box_geo->length_inv());
 }
 
 template <typename FloatType, Arch Architecture>
 void DipolarP3MImpl<FloatType, Architecture>::calc_influence_function_energy() {
-  dp3m.g_energy = grid_influence_function<FloatType, 2>(
+  dp3m.g_energy = grid_influence_function<FloatType, 2, P3M_BRILLOUIN>(
       dp3m.params, dp3m.mesh.start, dp3m.mesh.stop,
       get_system().box_geo->length_inv());
 }
@@ -534,11 +536,14 @@ template <typename FloatType, Arch Architecture>
 class DipolarTuningAlgorithm : public TuningAlgorithm {
   p3m_data_struct_dipoles<FloatType> &dp3m;
   int m_mesh_max = -1, m_mesh_min = -1;
+  std::pair<std::optional<int>, std::optional<int>> m_tune_limits;
 
 public:
   DipolarTuningAlgorithm(System::System &system, decltype(dp3m) &input_dp3m,
-                         double prefactor, int timings)
-      : TuningAlgorithm(system, prefactor, timings), dp3m{input_dp3m} {}
+                         double prefactor, int timings,
+                         decltype(m_tune_limits) tune_limits)
+      : TuningAlgorithm(system, prefactor, timings), dp3m{input_dp3m},
+        m_tune_limits{std::move(tune_limits)} {}
 
   P3MParameters &get_params() override { return dp3m.params; }
 
@@ -603,6 +608,12 @@ class DipolarTuningAlgorithm : public TuningAlgorithm {
       m_mesh_min = static_cast<int>(std::round(std::pow(2., std::floor(expo))));
       /* avoid using more than 1 GB of FFT arrays */
       m_mesh_max = 128;
+      if (m_tune_limits.first) {
+        m_mesh_min = *m_tune_limits.first;
+      }
+      if (m_tune_limits.second) {
+        m_mesh_max = *m_tune_limits.second;
+      }
     } else {
       m_mesh_min = m_mesh_max = dp3m.params.mesh[0];
       m_logger->report_fixed_mesh(dp3m.params.mesh);
@@ -662,7 +673,7 @@ void DipolarP3MImpl<FloatType, Architecture>::tune() {
     }
     try {
       DipolarTuningAlgorithm<FloatType, Architecture> parameters(
-          system, dp3m, prefactor, tune_timings);
+          system, dp3m, prefactor, tune_timings, tune_limits);
       parameters.setup_logger(tune_verbose);
       // parameter ranges
       parameters.determine_mesh_limits();

diff --git a/src/core/magnetostatics/dp3m.impl.hpp b/src/core/magnetostatics/dp3m.impl.hpp
@@ -36,6 +36,7 @@
 #include <utils/Vector.hpp>
 
 #include <memory>
+#include <optional>
 #include <stdexcept>
 #include <type_traits>
 #include <utility>
@@ -72,16 +73,18 @@ struct DipolarP3MImpl : public DipolarP3M {
   /** @brief Coulomb P3M meshes and FFT algorithm. */
   std::unique_ptr<p3m_data_struct_dipoles<FloatType>> dp3m_impl;
   int tune_timings;
+  std::pair<std::optional<int>, std::optional<int>> tune_limits;
   bool tune_verbose;
   bool m_is_tuned;
 
 public:
   DipolarP3MImpl(
       std::unique_ptr<p3m_data_struct_dipoles<FloatType>> &&dp3m_handle,
-      double prefactor, int tune_timings, bool tune_verbose)
+      double prefactor, int tune_timings, bool tune_verbose,
+      decltype(tune_limits) tune_limits)
       : DipolarP3M(dp3m_handle->params), dp3m{*dp3m_handle},
         dp3m_impl{std::move(dp3m_handle)}, tune_timings{tune_timings},
-        tune_verbose{tune_verbose} {
+        tune_limits{std::move(tune_limits)}, tune_verbose{tune_verbose} {
 
     if (tune_timings <= 0) {
       throw std::domain_error("Parameter 'timings' must be > 0");

diff --git a/src/core/p3m/TuningAlgorithm.cpp b/src/core/p3m/TuningAlgorithm.cpp
@@ -49,6 +49,8 @@ static auto constexpr P3M_TUNE_CAO_TOO_LARGE = 1.;
 static auto constexpr P3M_TUNE_ELC_GAP_SIZE = 2.;
 /** could not achieve target accuracy */
 static auto constexpr P3M_TUNE_ACCURACY_TOO_LARGE = 3.;
+/** conflict with FFT domain decomposition */
+static auto constexpr P3M_TUNE_FFT_MESH_SIZE = 4.;
 /**@}*/
 
 /** @brief Precision threshold for a non-zero real-space cutoff. */
@@ -110,7 +112,7 @@ void TuningAlgorithm::commit(Utils::Vector3i const &mesh, int cao,
  * @param[in,out] tuned_accuracy  @copybrief P3MParameters::accuracy
  *
  * @returns The integration time in case of success, otherwise
- *          -@ref P3M_TUNE_ACCURACY_TOO_LARGE,
+ *          -@ref P3M_TUNE_ACCURACY_TOO_LARGE, -@ref P3M_TUNE_FFT_MESH_SIZE,
  *          -@ref P3M_TUNE_CAO_TOO_LARGE, or -@ref P3M_TUNE_ELC_GAP_SIZE
  */
 double TuningAlgorithm::get_mc_time(Utils::Vector3i const &mesh, int cao,
@@ -171,17 +173,25 @@ double TuningAlgorithm::get_mc_time(Utils::Vector3i const &mesh, int cao,
    * we know that the desired minimal accuracy is obtained */
   tuned_r_cut_iL = r_cut_iL = r_cut_iL_max;
 
+  auto const report_veto = [&](auto const &veto) {
+    if (veto) {
+      m_logger->log_skip(*veto, mesh[0], cao, r_cut_iL, tuned_alpha_L,
+                         tuned_accuracy, rs_err, ks_err);
+    }
+    return static_cast<bool>(veto);
+  };
+
   /* if we are running P3M+ELC, check that r_cut is compatible */
   auto const r_cut = r_cut_iL * box_geo.length()[0];
-  auto const veto = layer_correction_veto_r_cut(r_cut);
-  if (veto) {
-    m_logger->log_skip(*veto, mesh[0], cao, r_cut_iL, tuned_alpha_L,
-                       tuned_accuracy, rs_err, ks_err);
+  if (report_veto(layer_correction_veto_r_cut(r_cut))) {
     return -P3M_TUNE_ELC_GAP_SIZE;
   }
 
   commit(mesh, cao, r_cut_iL, tuned_alpha_L);
   on_solver_change();
+  if (report_veto(fft_decomposition_veto(mesh))) {
+    return -P3M_TUNE_FFT_MESH_SIZE;
+  }
   auto const int_time = benchmark_integration_step(m_system, m_timings);
 
   std::tie(tuned_accuracy, rs_err, ks_err, tuned_alpha_L) =

diff --git a/src/core/p3m/TuningAlgorithm.hpp b/src/core/p3m/TuningAlgorithm.hpp
@@ -147,6 +147,12 @@ class TuningAlgorithm {
   virtual std::optional<std::string>
   layer_correction_veto_r_cut(double r_cut) const = 0;
 
+  /** @brief Veto FFT decomposition in non-cubic boxes. */
+  virtual std::optional<std::string>
+  fft_decomposition_veto(Utils::Vector3i const &) const {
+    return std::nullopt;
+  }
+
   /** @brief Write tuned parameters to the P3M parameter struct. */
   void commit(Utils::Vector3i const &mesh, int cao, double r_cut_iL,
               double alpha_L);

diff --git a/src/core/p3m/common.hpp b/src/core/p3m/common.hpp
@@ -53,6 +53,7 @@ auto constexpr P3M_EPSILON_METALLIC = 0.0;
 #include <span>
 #include <stdexcept>
 
+/** @brief P3M kernel architecture. */
 enum class Arch { CPU, GPU };
 
 /** @brief Structure to hold P3M parameters and some dependent variables. */
@@ -65,8 +66,8 @@ struct P3MParameters {
   /** cutoff radius for real space electrostatics (>0), rescaled to
    *  @p r_cut_iL = @p r_cut * @p box_l_i. */
   double r_cut_iL;
-  /** number of mesh points per coordinate direction (>0). */
-  Utils::Vector3i mesh = {};
+  /** number of mesh points per coordinate direction (>0), in real space. */
+  Utils::Vector3i mesh;
   /** offset of the first mesh point (lower left corner) from the
    *  coordinate origin ([0,1[). */
   Utils::Vector3d mesh_off;
@@ -237,15 +238,14 @@ template <typename FloatType> struct P3MFFTMesh {
 
 #endif // defined(P3M) or defined(DP3M)
 
-namespace detail {
-/** Calculate indices that shift @ref P3MParameters::mesh "mesh" by `mesh/2`.
+/** @brief Calculate indices that shift @ref P3MParameters::mesh by `mesh/2`.
  *  For each mesh size @f$ n @f$ in @c mesh_size, create a sequence of integer
  *  values @f$ \left( 0, \ldots, \lfloor n/2 \rfloor, -\lfloor n/2 \rfloor,
  *  \ldots, -1\right) @f$ if @c zero_out_midpoint is false, otherwise
  *  @f$ \left( 0, \ldots, \lfloor n/2 - 1 \rfloor, 0, -\lfloor n/2 \rfloor,
  *  \ldots, -1\right) @f$.
  */
-std::array<std::vector<int>, 3> inline calc_meshift(
+std::array<std::vector<int>, 3> inline calc_p3m_mesh_shift(
     Utils::Vector3i const &mesh_size, bool zero_out_midpoint = false) {
   std::array<std::vector<int>, 3> ret{};
 
@@ -262,4 +262,3 @@ std::array<std::vector<int>, 3> inline calc_meshift(
 
   return ret;
 }
-} // namespace detail
diff --git a/src/core/p3m/data_struct.hpp b/src/core/p3m/data_struct.hpp
@@ -66,7 +66,7 @@ template <typename FloatType> struct p3m_data_struct {
    *  i.e. the prefactor @f$ 2i\pi/L @f$ is missing!
    */
   void calc_differential_operator() {
-    d_op = detail::calc_meshift(params.mesh, true);
+    d_op = calc_p3m_mesh_shift(params.mesh, true);
   }
 
   /** @brief Force optimised influence function (k-space) */