Skip to content

Commit

Permalink
Merge pull request #67 from MorrowChem/regularisation-cleanup
Browse files Browse the repository at this point in the history
Regularisation cleanup
  • Loading branch information
JaGeo authored Jun 12, 2024
2 parents 618afcc + 5af0089 commit e6eb066
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 67 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ name: Testing Linux

on:
push:
branches: [ main ]
branches: '*'
pull_request:
branches: [ main ]

Expand Down
2 changes: 1 addition & 1 deletion autoplex/fitting/common/flows.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def make(
ase.io.write("train_wo_sigma.extxyz", atoms, format="extxyz")
atoms_with_sigma = set_sigma(
atoms,
etup=[(0.1, 1), (0.001, 0.1), (0.0316, 0.316), (0.0632, 0.632)],
reg_minmax=[(0.1, 1), (0.001, 0.1), (0.0316, 0.316), (0.0632, 0.632)],
)
ase.io.write("train.extxyz", atoms_with_sigma, format="extxyz")
if self.separated:
Expand Down
137 changes: 72 additions & 65 deletions autoplex/fitting/common/regularization.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Functions for automatic regularization and weighting of training data."""

# adapted from MorrowChem's RSS routines.
from __future__ import annotations

import traceback
Expand All @@ -11,27 +12,31 @@

def set_sigma(
atoms,
etup,
reg_minmax,
isol_es: dict | None = None,
scheme="linear-hull",
energy_name="REF_energy",
force_name="REF_forces",
virial_name="REF_virial",
element_order=None,
max_energy=20.0,
config_type_override=None,
):
"""
Handle automatic regularisation based on distance to convex hull, amongst other things.
Need to make sure this works for multi-stoichiometry systems.
TODO: Need to make sure this works for full multi-stoichiometry systems.
Parameters
----------
atoms: (list of ase.Atoms)
list of atoms objects to set reg. for. Usually fitting database
etup: (list of tuples)
reg_minmax: (list of tuples)
list of tuples of (min, max) values for energy, force, virial sigmas
scheme: (str)
scheme to use for regularisation. Options are: linear_hull, volume-stoichiometry
method to use for regularization. Options are: linear_hull, volume-stoichiometry
linear_hull: for single-composition system, use 2D convex hull (E, V)
volume-stoichiometry: for multi-composition system, use 3D convex hull of (E, V, mole-fraction)
energy_name: (str)
name of energy key in atoms.info
force_name: (str)
Expand All @@ -44,42 +49,50 @@ def set_sigma(
for SiO2
element_order: (list)
list of atomic numbers in order of choice (e.g. [42, 16] for MoS2)
max_energy: (float)
ignore any structures with energy above hull greater than this value (eV)
config_type_override: (dict)
give custom regularization for specific config types
e.g. etup = [(0.1, 1), (0.001, 0.1), (0.0316, 0.316), (0.0632, 0.632)]
e.g. reg_minmax = [(0.1, 1), (0.001, 0.1), (0.0316, 0.316), (0.0632, 0.632)]
[(emin, emax), (semin, semax), (sfmin, sfmax), (ssvmin, svmax)]
"""
sigs = [[], [], []] # type: ignore
atoms_modi = []

if config_type_override is None:
config_type_override = {
"IsolatedAtom": (1e-4, 0.0, 0.0),
"dimer": (0.1, 0.5, 0.0),
}

for at in atoms:
for config_type, sigs_override in config_type_override.items():
if at.info["config_type"] == config_type:
at.info["energy_sigma"] = sigs_override[0]
at.info["force_sigma"] = sigs_override[1]
at.info["virial_sigma"] = sigs_override[2]
atoms_modi.append(at)

if at.info["config_type"] == "IsolatedAtom":
# isol_es
at.info["energy_sigma"] = 0.0001
at.calc = None # TODO side-effect alert
atoms_modi.append(at)
del at.info["force_sigma"]
del at.info["virial_sigma"]
continue

if at.info["config_type"] == "dimer":
at.info["energy_sigma"] = 0.1
at.info["force_sigma"] = 0.5
with suppress(Exception):
del at.info[virial_name]

atoms_modi.append(at)
continue

if scheme == "linear-hull":
# Use this one for a simple single-composition system.
# Makes a 2D convex hull of volume vs. energy and calculates distance to it
print("Regularising with linear hull")
hull, p = get_convex_hull(atoms, energy_name=energy_name)
# print("got hull pts", [p1 for p1 in zip(hull[:, 0], hull[:, 1])])
get_e_distance_func = get_e_distance_to_hull

elif scheme == "volume-stoichiometry":
# Use this one for a binary or pseudo-binary-composition system.
# Makes a 3D convex hull of volume vs. mole fraction vs. energy and calculates distance to it
print("Regularising with 3D volume-mole fraction hull")
if isol_es == {}:
raise ValueError("Need to supply dictionary of isolated energies.")
Expand All @@ -89,7 +102,6 @@ def set_sigma(
) # label atoms with volume and mole fraction
hull = calculate_hull_3D(p) # calculate 3D convex hull
get_e_distance_func = get_e_distance_to_hull_3D # type: ignore
# function to calculate distance to hull (in energy)

p = {}
for group in sorted(
Expand Down Expand Up @@ -118,48 +130,50 @@ def set_sigma(
hull, val, energy_name=energy_name, isol_es=isol_es
)

if de > 20.0:
if de > max_energy:
# don't even fit if too high
continue
if val.info["config_type"] != "IsolatedAtom":
if val.info["config_type"] not in config_type_override:
if group == "initial":
sigs[0].append(etup[1][1])
sigs[1].append(etup[2][1])
sigs[2].append(etup[3][1])
val.info["energy_sigma"] = etup[1][1]
val.info["force_sigma"] = etup[2][1]
val.info["virial_sigma"] = etup[3][1]
sigs[0].append(reg_minmax[1][1])
sigs[1].append(reg_minmax[2][1])
sigs[2].append(reg_minmax[3][1])
val.info["energy_sigma"] = reg_minmax[1][1]
val.info["force_sigma"] = reg_minmax[2][1]
val.info["virial_sigma"] = reg_minmax[3][1]
atoms_modi.append(val)
continue

if de <= etup[0][0]:
sigs[0].append(etup[1][0])
sigs[1].append(etup[2][0])
sigs[2].append(etup[3][0])
val.info["energy_sigma"] = etup[1][0]
val.info["force_sigma"] = etup[2][0]
val.info["virial_sigma"] = etup[3][0]
if de <= reg_minmax[0][0]:
sigs[0].append(reg_minmax[1][0])
sigs[1].append(reg_minmax[2][0])
sigs[2].append(reg_minmax[3][0])
val.info["energy_sigma"] = reg_minmax[1][0]
val.info["force_sigma"] = reg_minmax[2][0]
val.info["virial_sigma"] = reg_minmax[3][0]
atoms_modi.append(val)

elif de >= etup[0][1]:
sigs[0].append(etup[1][1])
sigs[1].append(etup[2][1])
sigs[2].append(etup[3][1])
val.info["energy_sigma"] = etup[1][1]
val.info["force_sigma"] = etup[2][1]
val.info["virial_sigma"] = etup[3][1]
elif de >= reg_minmax[0][1]:
sigs[0].append(reg_minmax[1][1])
sigs[1].append(reg_minmax[2][1])
sigs[2].append(reg_minmax[3][1])
val.info["energy_sigma"] = reg_minmax[1][1]
val.info["force_sigma"] = reg_minmax[2][1]
val.info["virial_sigma"] = reg_minmax[3][1]
atoms_modi.append(val)

else:
# rat = (de-etup[0][0]) / (etup[0][1]-etup[0][0])
# e = rat*(etup[1][1]-etup[1][0]) + etup[1][0]
# f = rat*(etup[2][1]-etup[2][0]) + etup[2][0]
# v = rat*(etup[3][1]-etup[3][0]) + etup[3][0]
[e, f, v] = piecewise_linear(
de,
[
(0.1, [etup[1][0], etup[2][0], etup[3][0]]),
(1.0, [etup[1][1], etup[2][1], etup[3][1]]),
(
0.1,
[reg_minmax[1][0], reg_minmax[2][0], reg_minmax[3][0]],
),
(
1.0,
[reg_minmax[1][1], reg_minmax[2][1], reg_minmax[3][1]],
),
],
)
sigs[0].append(e)
Expand All @@ -174,15 +188,17 @@ def set_sigma(
data_type = [np.array(sig) for sig in sigs] # [e, f, v]

for label, data in zip(labels, data_type):
if len(data) == 0:
print("No automatic regularisation performed (no structures requested)")
continue
if label == "E":
# Print header
# Report of the regularisation statistics
print(f"Automatic regularisation statistics for {len(data)} structures:\n")
print(
"{:>20s}{:>20s}{:>20s}{:>20s}{:>20s}".format(
"", "Mean", "Std", "Nmin", "Nmax"
)
)

print(
"{:>20s}{:>20.4f}{:>20.4f}{:>20d}{:>20d}".format(
label,
Expand All @@ -198,7 +214,7 @@ def set_sigma(

def get_convex_hull(atoms, energy_name="energy", **kwargs):
"""
Calculate simple linear convex hull of volume vs. energy.
Calculate simple linear (E,V) convex hull.
Parameters
----------
Expand All @@ -209,7 +225,8 @@ def get_convex_hull(atoms, energy_name="energy", **kwargs):
Returns
-------
the list of points in the convex hull (lower half only), and additionally all the points for testing purposes
the list of points in the convex hull (lower half only),
and additionally all the points for testing purposes
"""
p = []
Expand All @@ -225,7 +242,8 @@ def get_convex_hull(atoms, energy_name="energy", **kwargs):
p.append((v, e))
except Exception:
ct += 1
print(f"Convex hull failed to include {ct}/{len(atoms)} structures")
if ct > 0:
print(f"Convex hull failed to include {ct}/{len(atoms)} structures")
p = np.array(p)
p = p.T[:, np.argsort(p.T[0])].T # sort in volume axis

Expand All @@ -251,8 +269,6 @@ def get_convex_hull(atoms, energy_name="energy", **kwargs):
lower_half_hull_points[:, 1] <= np.max(lower_half_hull_points[:, 1])
]

# plot_convex_hull(p, lower_half_hull_points)

return lower_half_hull_points, p


Expand All @@ -273,7 +289,6 @@ def get_e_distance_to_hull(hull, at, energy_name="energy", **kwargs):
volume = at.get_volume() / len(at)
energy = at.info[energy_name] / len(at)
tp = np.array([volume, energy])

hull_ps = hull.points if isinstance(hull, ConvexHull) else hull

if any(
Expand All @@ -288,8 +303,8 @@ def get_e_distance_to_hull(hull, at, energy_name="energy", **kwargs):
return (
energy
- get_intersect(
tp, # get intersection of the vertical line (energy axis) and the line between the nearest hull points
tp + np.array([0, 1]),
tp, # get intersection of the vertical line (energy axis)
tp + np.array([0, 1]), # and the line between the nearest hull points
hull_ps[(nearest - 1) % len(hull_ps.T[0])],
hull_ps[nearest % len(hull_ps.T[0])],
)[1]
Expand Down Expand Up @@ -340,7 +355,6 @@ def get_x(at, element_order=None):
el, cts = np.unique(at.get_atomic_numbers(), return_counts=True)

if element_order is None and len(el) < 3: # compatibility with old version
# print('using old version of get_x')
x = cts[1] / sum(cts) if len(el) == 2 else 1

else: # new version, requires element_order, recommended for all new calculations
Expand All @@ -355,9 +369,7 @@ def get_x(at, element_order=None):
el = np.array([el[np.argwhere(el == i).squeeze()] for i in element_order])

x = cts[1:] / sum(cts)
# print(x, at)

# print(at)
return x


Expand All @@ -370,23 +382,22 @@ def label_stoichiometry_volume(ats, isol_es, e_name, element_order=None):
ats: (list)
list of atoms objects
isol_es: (dict)
dictionary of isolated atom energies
dictionary of isolated atom energies {atomic_number: energy}
e_name: (str)
name of energy key in atoms.info (typically a DFT energy)
element_order: (list)
list of atomic numbers in order of choice (e.g. [42, 16] for MoS2)
"""
p = []
for _ct, at in enumerate(ats):
for at in ats:
try:
v = at.get_volume() / len(at)
# make energy relative to isolated atoms
e = (
at.info[e_name] - sum([isol_es[j] for j in at.get_atomic_numbers()])
) / len(at)
x = get_x(at, element_order=element_order)
# print(x)
p.append(np.hstack((x, v, e)))
except Exception:
traceback.print_exc()
Expand Down Expand Up @@ -503,7 +514,6 @@ def calculate_hull_ND(p):
pn = np.delete(pn, i, axis=1)
print(f"Convex hull lower dimensional - removing dimension {i}")
remove_dim.append(i)
# print(pn)

hull = ConvexHull(pn, qhull_options="QG0")
print("done calculating hull")
Expand Down Expand Up @@ -547,9 +557,6 @@ def get_e_distance_to_hull_3D(
return get_e_distance_to_hull(hull, at, energy_name=energy_name)

for _ct, visible_facet in enumerate(hull.simplices[hull.good]):
# print(sp[:-1])
# print(*hull.points[visible_facet][:, :-1])

if point_in_triangle_ND(sp[:-1], *hull.points[visible_facet][:, :-1]):
n_3 = hull.points[visible_facet]
e = sp[-1]
Expand Down
Loading

0 comments on commit e6eb066

Please sign in to comment.