From df5284e11995b644cde50f56ab0b8e43c9a83fd7 Mon Sep 17 00:00:00 2001
From: Francois-Xavier Coudert <fxcoudert@gmail.com>
Date: Wed, 24 Apr 2024 11:48:19 +0200
Subject: [PATCH 1/5] lj_potential: use numpy array operations

numpy array operations are faster, so coerce necessary data into numpy
arrays and avoid doing math or linalg operations inside Python loops.

Tested on a representative MOF (QETBUP_clean.cif) on my system, with all
default parameters: the wall time goes down from 5.89 s to 4.86 s, so
that's 17% faster. Voxel statistics are otherwise identical.
---
 src/moxel/utils.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/moxel/utils.py b/src/moxel/utils.py
index 13717c0..20c2cb2 100644
--- a/src/moxel/utils.py
+++ b/src/moxel/utils.py
@@ -212,14 +212,15 @@ def lj_potential(self, coords):
 
         energy = 0
         if len(neighbors) != 0:
-            for atom in neighbors:
-                r_ij = np.linalg.norm(cartesian_coords - atom.coords)
-                if r_ij <= 1e-3:
-                    energy += 1000
-                else:
-                    e_j, s_j = lj_params[atom.species_string]
-                    x = (0.5 * (s_j + self.sigma)) / r_ij
-                    energy += 4 * np.sqrt(e_j * self.epsilon) * (x**12 - x**6)
+            neigh_coords = np.stack([atom.coords for atom in neighbors])
+            r_ij = np.linalg.norm(cartesian_coords - neigh_coords, axis=1)
+            if np.any(r_ij < 1e-3):
+                return 0.
+
+            es_j = np.array([lj_params[atom.species_string] for atom in neighbors])
+            x = (0.5 * (es_j[:,1] + self.sigma)) / r_ij
+            e = 4 * np.sqrt(es_j[:,0] * self.epsilon)
+            energy = sum(e * (x**12 - x**6))
 
         # This should be changed with clipping in future versions.
         return np.exp(-(1 / 298) * energy)  # For numerical stability.

From 821ee2af84358fe569a065b25aa18b323714ddd9 Mon Sep 17 00:00:00 2001
From: Francois-Xavier Coudert <fxcoudert@gmail.com>
Date: Wed, 24 Apr 2024 23:05:38 +0200
Subject: [PATCH 2/5] No need to calculate distances!

The distance of each atom to the central point is already provided by
the get_sites_in_sphere() function, in the nn_distance field.
---
 src/moxel/utils.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/moxel/utils.py b/src/moxel/utils.py
index 20c2cb2..38e0f28 100644
--- a/src/moxel/utils.py
+++ b/src/moxel/utils.py
@@ -212,8 +212,7 @@ def lj_potential(self, coords):
 
         energy = 0
         if len(neighbors) != 0:
-            neigh_coords = np.stack([atom.coords for atom in neighbors])
-            r_ij = np.linalg.norm(cartesian_coords - neigh_coords, axis=1)
+            r_ij = np.stack([atom.nn_distance for atom in neighbors])
             if np.any(r_ij < 1e-3):
                 return 0.
 

From 5f3f294d89b138c32775eea8459c84d65d3ab965 Mon Sep 17 00:00:00 2001
From: Francois-Xavier Coudert <fxcoudert@gmail.com>
Date: Thu, 25 Apr 2024 09:50:56 +0200
Subject: [PATCH 3/5] Use lower-level function `get_points_in_sphere()`

This speeds up the calculation significantly by avoiding a lot of
overhead from pymatgen Structure objects.
---
 src/moxel/utils.py | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/src/moxel/utils.py b/src/moxel/utils.py
index 38e0f28..0d80628 100644
--- a/src/moxel/utils.py
+++ b/src/moxel/utils.py
@@ -178,6 +178,8 @@ def calculate(self, cubic_box=False, length=30, potential='lj', n_jobs=None):
             self._simulation_box = self.structure * scale
         
         if potential == 'lj':
+            # Cache LJ parameters for all atoms in the simulation box
+            self._lj_params = np.array([lj_params[atom.species_string] for atom in self._simulation_box])
             # Embarrassingly parallel.
             with Pool(processes=n_jobs) as p:
                 energies = p.map(
@@ -205,21 +207,17 @@ def lj_potential(self, coords):
         """
         if self.cubic_box:
             cartesian_coords = coords
-            neighbors = self._simulation_box.get_sites_in_sphere(cartesian_coords, self.cutoff)
         else:
             cartesian_coords = self._simulation_box.lattice.get_cartesian_coords(coords)
-            neighbors = self._simulation_box.get_sites_in_sphere(cartesian_coords, self.cutoff)
-
-        energy = 0
-        if len(neighbors) != 0:
-            r_ij = np.stack([atom.nn_distance for atom in neighbors])
-            if np.any(r_ij < 1e-3):
-                return 0.
-
-            es_j = np.array([lj_params[atom.species_string] for atom in neighbors])
-            x = (0.5 * (es_j[:,1] + self.sigma)) / r_ij
-            e = 4 * np.sqrt(es_j[:,0] * self.epsilon)
-            energy = sum(e * (x**12 - x**6))
+
+        _, r_ij, indices, _ = self._simulation_box._lattice.get_points_in_sphere(self._simulation_box.frac_coords, cartesian_coords, self.cutoff, zip_results=False)
+        if np.any(r_ij < 1e-3):
+            return 0.
+
+        es_j = self._lj_params[indices]
+        x = (0.5 * (es_j[:,1] + self.sigma)) / r_ij
+        e = 4 * np.sqrt(es_j[:,0] * self.epsilon)
+        energy = sum(e * (x**12 - x**6))
 
         # This should be changed with clipping in future versions.
         return np.exp(-(1 / 298) * energy)  # For numerical stability.

From ac709b83e121c9bbeed5e99efae8798f98a02310 Mon Sep 17 00:00:00 2001
From: Francois-Xavier Coudert <fxcoudert@gmail.com>
Date: Thu, 25 Apr 2024 10:02:13 +0200
Subject: [PATCH 4/5] Cache the box fractional coordinates

---
 src/moxel/utils.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/moxel/utils.py b/src/moxel/utils.py
index 0d80628..a588114 100644
--- a/src/moxel/utils.py
+++ b/src/moxel/utils.py
@@ -180,6 +180,9 @@ def calculate(self, cubic_box=False, length=30, potential='lj', n_jobs=None):
         if potential == 'lj':
             # Cache LJ parameters for all atoms in the simulation box
             self._lj_params = np.array([lj_params[atom.species_string] for atom in self._simulation_box])
+            # Cache fractional coordinates, since this is a slow function in pymatgen
+            self._frac_coords = self._simulation_box.frac_coords
+
             # Embarrassingly parallel.
             with Pool(processes=n_jobs) as p:
                 energies = p.map(
@@ -210,7 +213,7 @@ def lj_potential(self, coords):
         else:
             cartesian_coords = self._simulation_box.lattice.get_cartesian_coords(coords)
 
-        _, r_ij, indices, _ = self._simulation_box._lattice.get_points_in_sphere(self._simulation_box.frac_coords, cartesian_coords, self.cutoff, zip_results=False)
+        _, r_ij, indices, _ = self._simulation_box._lattice.get_points_in_sphere(self._frac_coords, cartesian_coords, self.cutoff, zip_results=False)
         if np.any(r_ij < 1e-3):
             return 0.
 

From 856471e09f44f6986c88da1fda1dbebe37ee406e Mon Sep 17 00:00:00 2001
From: Francois-Xavier Coudert <fxcoudert@gmail.com>
Date: Wed, 1 May 2024 12:05:13 +0200
Subject: [PATCH 5/5] Work around pymatgen issue when no neighbor is found

---
 src/moxel/utils.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/moxel/utils.py b/src/moxel/utils.py
index a588114..8b35faa 100644
--- a/src/moxel/utils.py
+++ b/src/moxel/utils.py
@@ -214,6 +214,13 @@ def lj_potential(self, coords):
             cartesian_coords = self._simulation_box.lattice.get_cartesian_coords(coords)
 
         _, r_ij, indices, _ = self._simulation_box._lattice.get_points_in_sphere(self._frac_coords, cartesian_coords, self.cutoff, zip_results=False)
+
+        # No neighbor, zero energy
+        # Need to check for length of r_ij because of https://github.com/materialsproject/pymatgen/issues/3794
+        if len(r_ij) == 0:
+            return 1.
+
+        # Close contact, infinite energy
         if np.any(r_ij < 1e-3):
             return 0.