From df5284e11995b644cde50f56ab0b8e43c9a83fd7 Mon Sep 17 00:00:00 2001 From: Francois-Xavier Coudert Date: Wed, 24 Apr 2024 11:48:19 +0200 Subject: [PATCH 1/5] lj_potential: use numpy array operations numpy array operations are faster, so coerce necessary data into numpy arrays and avoid doing math or linalg operations inside Python loops. Tested on a representative MOF (QETBUP_clean.cif) on my system, with all default parameters: the wall time goes down from 5.89 s to 4.86 s, so that's 17% faster. Voxel statistics are otherwise identical. --- src/moxel/utils.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/moxel/utils.py b/src/moxel/utils.py index 13717c0..20c2cb2 100644 --- a/src/moxel/utils.py +++ b/src/moxel/utils.py @@ -212,14 +212,15 @@ def lj_potential(self, coords): energy = 0 if len(neighbors) != 0: - for atom in neighbors: - r_ij = np.linalg.norm(cartesian_coords - atom.coords) - if r_ij <= 1e-3: - energy += 1000 - else: - e_j, s_j = lj_params[atom.species_string] - x = (0.5 * (s_j + self.sigma)) / r_ij - energy += 4 * np.sqrt(e_j * self.epsilon) * (x**12 - x**6) + neigh_coords = np.stack([atom.coords for atom in neighbors]) + r_ij = np.linalg.norm(cartesian_coords - neigh_coords, axis=1) + if np.any(r_ij < 1e-3): + return 0. + + es_j = np.array([lj_params[atom.species_string] for atom in neighbors]) + x = (0.5 * (es_j[:,1] + self.sigma)) / r_ij + e = 4 * np.sqrt(es_j[:,0] * self.epsilon) + energy = sum(e * (x**12 - x**6)) # This should be changed with clipping in future versions. return np.exp(-(1 / 298) * energy) # For numerical stability. From 821ee2af84358fe569a065b25aa18b323714ddd9 Mon Sep 17 00:00:00 2001 From: Francois-Xavier Coudert Date: Wed, 24 Apr 2024 23:05:38 +0200 Subject: [PATCH 2/5] No need to calculate distances! The distance of each atom to the central point is already provided by the get_sites_in_sphere() function, in the nn_distance field. --- src/moxel/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/moxel/utils.py b/src/moxel/utils.py index 20c2cb2..38e0f28 100644 --- a/src/moxel/utils.py +++ b/src/moxel/utils.py @@ -212,8 +212,7 @@ def lj_potential(self, coords): energy = 0 if len(neighbors) != 0: - neigh_coords = np.stack([atom.coords for atom in neighbors]) - r_ij = np.linalg.norm(cartesian_coords - neigh_coords, axis=1) + r_ij = np.stack([atom.nn_distance for atom in neighbors]) if np.any(r_ij < 1e-3): return 0. From 5f3f294d89b138c32775eea8459c84d65d3ab965 Mon Sep 17 00:00:00 2001 From: Francois-Xavier Coudert Date: Thu, 25 Apr 2024 09:50:56 +0200 Subject: [PATCH 3/5] Use lower-level function `get_points_in_sphere()` This speeds up the calculation significantly by avoiding a lot of overhead from pymatgen Structure objects. --- src/moxel/utils.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/src/moxel/utils.py b/src/moxel/utils.py index 38e0f28..0d80628 100644 --- a/src/moxel/utils.py +++ b/src/moxel/utils.py @@ -178,6 +178,8 @@ def calculate(self, cubic_box=False, length=30, potential='lj', n_jobs=None): self._simulation_box = self.structure * scale if potential == 'lj': + # Cache LJ parameters for all atoms in the simulation box + self._lj_params = np.array([lj_params[atom.species_string] for atom in self._simulation_box]) # Embarrassingly parallel. with Pool(processes=n_jobs) as p: energies = p.map( @@ -205,21 +207,17 @@ def lj_potential(self, coords): """ if self.cubic_box: cartesian_coords = coords - neighbors = self._simulation_box.get_sites_in_sphere(cartesian_coords, self.cutoff) else: cartesian_coords = self._simulation_box.lattice.get_cartesian_coords(coords) - neighbors = self._simulation_box.get_sites_in_sphere(cartesian_coords, self.cutoff) - - energy = 0 - if len(neighbors) != 0: - r_ij = np.stack([atom.nn_distance for atom in neighbors]) - if np.any(r_ij < 1e-3): - return 0. - - es_j = np.array([lj_params[atom.species_string] for atom in neighbors]) - x = (0.5 * (es_j[:,1] + self.sigma)) / r_ij - e = 4 * np.sqrt(es_j[:,0] * self.epsilon) - energy = sum(e * (x**12 - x**6)) + + _, r_ij, indices, _ = self._simulation_box._lattice.get_points_in_sphere(self._simulation_box.frac_coords, cartesian_coords, self.cutoff, zip_results=False) + if np.any(r_ij < 1e-3): + return 0. + + es_j = self._lj_params[indices] + x = (0.5 * (es_j[:,1] + self.sigma)) / r_ij + e = 4 * np.sqrt(es_j[:,0] * self.epsilon) + energy = sum(e * (x**12 - x**6)) # This should be changed with clipping in future versions. return np.exp(-(1 / 298) * energy) # For numerical stability. From ac709b83e121c9bbeed5e99efae8798f98a02310 Mon Sep 17 00:00:00 2001 From: Francois-Xavier Coudert Date: Thu, 25 Apr 2024 10:02:13 +0200 Subject: [PATCH 4/5] Cache the box fractional coordinates --- src/moxel/utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/moxel/utils.py b/src/moxel/utils.py index 0d80628..a588114 100644 --- a/src/moxel/utils.py +++ b/src/moxel/utils.py @@ -180,6 +180,9 @@ def calculate(self, cubic_box=False, length=30, potential='lj', n_jobs=None): if potential == 'lj': # Cache LJ parameters for all atoms in the simulation box self._lj_params = np.array([lj_params[atom.species_string] for atom in self._simulation_box]) + # Cache fractional coordinates, since this is a slow function in pymatgen + self._frac_coords = self._simulation_box.frac_coords + # Embarrassingly parallel. with Pool(processes=n_jobs) as p: energies = p.map( @@ -210,7 +213,7 @@ def lj_potential(self, coords): else: cartesian_coords = self._simulation_box.lattice.get_cartesian_coords(coords) - _, r_ij, indices, _ = self._simulation_box._lattice.get_points_in_sphere(self._simulation_box.frac_coords, cartesian_coords, self.cutoff, zip_results=False) + _, r_ij, indices, _ = self._simulation_box._lattice.get_points_in_sphere(self._frac_coords, cartesian_coords, self.cutoff, zip_results=False) if np.any(r_ij < 1e-3): return 0. From 856471e09f44f6986c88da1fda1dbebe37ee406e Mon Sep 17 00:00:00 2001 From: Francois-Xavier Coudert Date: Wed, 1 May 2024 12:05:13 +0200 Subject: [PATCH 5/5] Work around pymatgen issue when no neighbor is found --- src/moxel/utils.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/moxel/utils.py b/src/moxel/utils.py index a588114..8b35faa 100644 --- a/src/moxel/utils.py +++ b/src/moxel/utils.py @@ -214,6 +214,13 @@ def lj_potential(self, coords): cartesian_coords = self._simulation_box.lattice.get_cartesian_coords(coords) _, r_ij, indices, _ = self._simulation_box._lattice.get_points_in_sphere(self._frac_coords, cartesian_coords, self.cutoff, zip_results=False) + + # No neighbor, zero energy + # Need to check for length of r_ij because of https://github.com/materialsproject/pymatgen/issues/3794 + if len(r_ij) == 0: + return 1. + + # Close contact, infinite energy if np.any(r_ij < 1e-3): return 0.