From 0cb766dbe6e297c63d94b3395cb26bb804d5d2ba Mon Sep 17 00:00:00 2001 From: svdenhau Date: Tue, 31 Oct 2023 18:40:01 +0100 Subject: [PATCH] added PySCF support --- .github/workflows/run_pytest.yaml | 2 +- Dockerfile | 7 +- psiflow/reference/__init__.py | 2 +- psiflow/reference/_nwchem.py | 2 +- psiflow/reference/_pyscf.py | 261 ++++++++++++++++++++++++++++++ psiflow/utils.py | 2 +- tests/test_reference.py | 145 ++++++++++++++--- 7 files changed, 393 insertions(+), 28 deletions(-) create mode 100644 psiflow/reference/_pyscf.py diff --git a/.github/workflows/run_pytest.yaml b/.github/workflows/run_pytest.yaml index 0986ca6..04e323d 100644 --- a/.github/workflows/run_pytest.yaml +++ b/.github/workflows/run_pytest.yaml @@ -24,7 +24,6 @@ jobs: py-plumed cp2k spglib=2.0.* - nwchem pip -c conda-forge init-shell: bash @@ -41,6 +40,7 @@ jobs: pip install git+https://github.com/mir-group/nequip.git@develop --no-deps pip install git+https://github.com/mir-group/allegro --no-deps pip install git+https://github.com/svandenhaute/openmm-ml.git@triclinic + pip install pyscf pip install 'psiflow[test] @ git+https://github.com/molmod/psiflow.git' cd ${{ runner.temp }} && git clone https://github.com/molmod/psiflow cd psiflow diff --git a/Dockerfile b/Dockerfile index 2edb1ee..8c3937a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -45,9 +45,9 @@ RUN micromamba install --yes --name base --channel conda-forge \ micromamba clean --all --yes RUN CONDA_OVERRIDE_CUDA="11.8" micromamba install -n base --yes -c conda-forge \ - python=3.9 pip ndcctools=7.6.1 \ + python=3.9 pip \ openmm-plumed openmm-torch pytorch=1.13.1=cuda* \ - nwchem py-plumed && \ + py-plumed && \ micromamba clean -af --yes ARG MAMBA_DOCKERFILE_ACTIVATE=1 # (otherwise python will not be found) @@ -60,9 +60,10 @@ RUN pip install git+https://github.com/acesuit/MACE.git@55f7411 && \ pip install git+https://github.com/mir-group/nequip.git@develop --no-deps && \ pip install git+https://github.com/mir-group/allegro --no-deps && \ pip install git+https://github.com/svandenhaute/openmm-ml.git@triclinic +RUN pip install pyscf ARG GIT_COMMIT_SHA -RUN pip install git+https://github.com/molmod/psiflow +RUN pip install 'psiflow[parsl] @ git+https://github.com/molmod/psiflow' RUN pip cache purge ENV OMPI_MCA_plm_rsh_agent= diff --git a/psiflow/reference/__init__.py b/psiflow/reference/__init__.py index d66cd1a..0282d19 100644 --- a/psiflow/reference/__init__.py +++ b/psiflow/reference/__init__.py @@ -1,4 +1,4 @@ from ._cp2k import CP2KReference # noqa: F401 from ._emt import EMTReference # noqa: F401 -from ._nwchem import NWChemReference # noqa: F401 +from ._pyscf import PySCFReference # noqa: F401 from .base import BaseReference # noqa: F401 diff --git a/psiflow/reference/_nwchem.py b/psiflow/reference/_nwchem.py index 19507c7..8f53be0 100644 --- a/psiflow/reference/_nwchem.py +++ b/psiflow/reference/_nwchem.py @@ -133,7 +133,7 @@ def nwchem_singlepoint_pre( command_cd, command_write, command_mkdir, - "export OMP_NUM_THREADS=2;", + "export OMP_NUM_THREADS=1;", "timeout -k 5 {}s".format(max(walltime - 20, 0)), nwchem_command + " nwchem.nwi || true", ] diff --git a/psiflow/reference/_pyscf.py b/psiflow/reference/_pyscf.py new file mode 100644 index 0000000..7c0be46 --- /dev/null +++ b/psiflow/reference/_pyscf.py @@ -0,0 +1,261 @@ +from __future__ import annotations # necessary for type-guarding class methods + +import logging + +import numpy as np +import parsl +from ase import Atoms +from ase.data import atomic_numbers +from parsl.app.app import bash_app, join_app, python_app +from parsl.data_provider.files import File + +import psiflow +from psiflow.data import FlowAtoms, NullState +from psiflow.reference.base import BaseReference +from psiflow.utils import copy_app_future + +logger = logging.getLogger(__name__) # logging per module + + +def atoms_to_molecule(ase_atoms, basis, spin): + from pyscf import gto + from pyscf.pbc import gto as pbcgto + + atom_symbols = ase_atoms.get_chemical_symbols() + atom_coords = ase_atoms.get_positions() + atom_spec = [ + (symbol, tuple(coord)) for symbol, coord in zip(atom_symbols, atom_coords) + ] + + if ase_atoms.get_pbc().any(): # Periodic boundary conditions + cell_params = ase_atoms.get_cell() + pyscf_cell = pbcgto.Cell() + pyscf_cell.atom = atom_spec + pyscf_cell.basis = basis + pyscf_cell.spin = spin + pyscf_cell.a = cell_params + pyscf_cell.build() + return pyscf_cell + else: # Non-periodic (molecular) + pyscf_mol = gto.Mole() + pyscf_mol.atom = atom_spec + pyscf_mol.basis = basis + pyscf_mol.spin = spin + pyscf_mol.verbose = 5 + pyscf_mol.build() + return pyscf_mol + + +def serialize_atoms(atoms): + atoms_str = "dict(symbols={}, positions={}, cell={}, pbc={})".format( + atoms.get_chemical_symbols(), + atoms.get_positions().tolist(), + atoms.get_cell().tolist(), + atoms.get_pbc().tolist(), + ) + return atoms_str + # atoms_dict = { + # 'symbols': atoms.get_chemical_symbols(), + # 'positions': atoms.get_positions().tolist(), # Convert numpy array to list + # 'cell': atoms.get_cell().tolist(), + # 'pbc': atoms.get_pbc().tolist(), + # } + # return repr(atoms_dict) + + +def deserialize_atoms(atoms_dict): + return Atoms( + symbols=atoms_dict["symbols"], + positions=np.array(atoms_dict["positions"]), # Convert list back to numpy array + cell=np.array(atoms_dict["cell"]), + pbc=atoms_dict["pbc"], + ) + + +def generate_script(state, routine, basis, spin): + # print 'energy' and 'forces' variables + routine = routine.strip() + routine += """ +print('total energy = {}'.format(energy * Ha)) +print('total forces = ') +for force in forces: + print(*(force * Ha / Bohr)) +""" + lines = routine.split("\n") # indent entire routine + for i in range(len(lines)): + lines[i] = " " + lines[i] + routine = "\n".join(lines) + + script = """ +from ase.units import Ha, Bohr +from psiflow.reference._pyscf import deserialize_atoms, atoms_to_molecule + +def main(molecule): +{} + + +""".format( + routine + ) + script += """ +if __name__ == '__main__': + atoms_dict = {} + atoms = deserialize_atoms(atoms_dict) + molecule = atoms_to_molecule( + atoms, + basis='{}', + spin={}, + ) + main(molecule) +""".format( + serialize_atoms(state).strip(), + basis, + spin, + ) + return script + + +def parse_energy_forces(stdout): + energy = None + forces_str = None + lines = stdout.split("\n") + for i, line in enumerate(lines[::-1]): # start from back! + if energy is None and "total energy = " in line: + energy = float(line.split("total energy = ")[1]) + if forces_str is None and "total forces =" in line: + forces_str = "\n".join(lines[-i:]) + assert energy is not None + assert forces_str is not None + rows = forces_str.strip().split("\n") + nrows = len(rows) + ncols = len(rows[0].split()) + assert ncols == 3 + forces = np.fromstring("\n".join(rows), sep=" ", dtype=float) + return energy, np.reshape(forces, (nrows, ncols)) + + +def pyscf_singlepoint_pre( + atoms: FlowAtoms, + omp_num_threads: int, + stdout: str = "", + stderr: str = "", + walltime: int = 0, + **parameters, +) -> str: + from psiflow.reference._pyscf import generate_script + + script = generate_script(atoms, **parameters) + command_tmp = 'mytmpdir=$(mktemp -d 2>/dev/null || mktemp -d -t "mytmpdir");' + command_cd = "cd $mytmpdir;" + command_write = 'echo "{}" > generated.py;'.format(script) + command_list = [ + command_tmp, + command_cd, + command_write, + "export OMP_NUM_THREADS={};".format(omp_num_threads), + "timeout -s 9 {}s python generated.py || true".format(max(walltime - 2, 0)), + ] + return " ".join(command_list) + + +def pyscf_singlepoint_post( + atoms: FlowAtoms, + inputs: list[File] = [], +) -> FlowAtoms: + from psiflow.reference._pyscf import parse_energy_forces + + atoms.reference_stdout = inputs[0] + atoms.reference_stderr = inputs[1] + with open(atoms.reference_stdout, "r") as f: + content = f.read() + try: + energy, forces = parse_energy_forces(content) + assert forces.shape == atoms.positions.shape + atoms.info["energy"] = energy + atoms.arrays["forces"] = forces + atoms.reference_status = True + except Exception: + atoms.reference_status = False + return atoms + + +class PySCFReference(BaseReference): + required_files = [] + + def __init__(self, routine, basis, spin): + assert ( + "energy = " in routine + ), "define the total energy (in Ha) in your pyscf routine" + assert ( + "forces = " in routine + ), "define the forces (in Ha/Bohr) in your pyscf routine" + assert "pyscf" in routine, "put all necessary imports inside the routine!" + self.routine = routine + self.basis = basis + self.spin = spin + super().__init__() + + def get_single_atom_references(self, element): + number = atomic_numbers[element] + references = [] + for spin in range(15): + config = {"spin": spin} + mult = spin + 1 + if number % 2 == 0 and mult % 2 == 0: + continue + if mult == 1 and number % 2 == 1: + continue + if mult - 1 > number: + continue + parameters = self.parameters + parameters["spin"] = spin + reference = self.__class__(**parameters) + references.append((config, reference)) + return references + + @property + def parameters(self): + return { + "routine": self.routine, + "basis": self.basis, + "spin": self.spin, + } + + @classmethod + def create_apps(cls): + context = psiflow.context() + definition = context[cls] + label = definition.name() + ncores = definition.cores_per_worker + walltime = definition.max_walltime + + singlepoint_pre = bash_app( + pyscf_singlepoint_pre, + executors=[label], + ) + singlepoint_post = python_app( + pyscf_singlepoint_post, + executors=["default_threads"], + ) + + @join_app + def singlepoint_wrapped(atoms, parameters, file_names, inputs=[]): + assert len(file_names) == 0 + if atoms == NullState: + return copy_app_future(NullState) + else: + pre = singlepoint_pre( + atoms, + omp_num_threads=ncores, + stdout=parsl.AUTO_LOGNAME, + stderr=parsl.AUTO_LOGNAME, + walltime=60 * walltime, # killed after walltime - 10s + **parameters, + ) + return singlepoint_post( + atoms=atoms, + inputs=[pre.stdout, pre.stderr, pre], # wait for bash app + ) + + context.register_app(cls, "evaluate_single", singlepoint_wrapped) + super(PySCFReference, cls).create_apps() diff --git a/psiflow/utils.py b/psiflow/utils.py index 2345acc..de3b7c9 100644 --- a/psiflow/utils.py +++ b/psiflow/utils.py @@ -38,8 +38,8 @@ def set_logger( # hacky "psiflow.models._mace", "psiflow.models._nequip", "psiflow.reference._cp2k", - "psiflow.reference._nwchem", "psiflow.reference._emt", + "psiflow.reference._pyscf", "psiflow.walkers.base", "psiflow.walkers.bias", "psiflow.walkers.dynamic", diff --git a/tests/test_reference.py b/tests/test_reference.py index 31744d5..b9d09e9 100644 --- a/tests/test_reference.py +++ b/tests/test_reference.py @@ -4,14 +4,16 @@ import numpy as np import pytest import requests -from ase.units import Pascal +from ase import Atoms +from ase.units import Bohr, Ha, Pascal from parsl.dataflow.futures import AppFuture from pymatgen.io.cp2k.inputs import Cp2kInput import psiflow from psiflow.data import Dataset, FlowAtoms, NullState -from psiflow.reference import CP2KReference, EMTReference, NWChemReference +from psiflow.reference import CP2KReference, EMTReference, PySCFReference from psiflow.reference._cp2k import insert_filepaths_in_input +from psiflow.reference._pyscf import generate_script, parse_energy_forces @pytest.fixture @@ -407,31 +409,111 @@ def test_cp2k_atomic_energies(cp2k_reference, dataset): assert abs(energy.result() - (-13.6)) < 1 # reasonably close to exact value +# @pytest.fixture +# def nwchem_reference(context): +# calculator_kwargs = { +# "basis": {"H": "cc-pvqz"}, +# "dft": { +# "xc": "pbe96", +# "mult": 1, +# "convergence": { +# "energy": 1e-6, +# "density": 1e-6, +# "gradient": 1e-6, +# }, +# "disp": {"vdw": 3}, +# }, +# } +# return NWChemReference(**calculator_kwargs) +# +# +# def test_nwchem_success(nwchem_reference): +# atoms = FlowAtoms( # simple H2 at ~optimized interatomic distance +# numbers=np.ones(2), +# positions=np.array([[0, 0, 0], [0.74, 0, 0]]), +# ) +# dataset = Dataset([atoms]) +# evaluated = nwchem_reference.evaluate(dataset[0]) +# assert isinstance(evaluated, AppFuture) +# assert evaluated.result().reference_status +# assert Path(evaluated.result().reference_stdout).is_file() +# assert Path(evaluated.result().reference_stderr).is_file() +# assert "energy" in evaluated.result().info.keys() +# assert "stress" not in evaluated.result().info.keys() +# assert "forces" in evaluated.result().arrays.keys() +# assert evaluated.result().arrays["forces"][0, 0] < 0 +# assert evaluated.result().arrays["forces"][1, 0] > 0 +# +# nwchem_reference.evaluate(dataset) +# assert nwchem_reference.compute_atomic_energy("H").result() < 0 + + @pytest.fixture -def nwchem_reference(context): - calculator_kwargs = { - "basis": {"H": "cc-pvqz"}, - "dft": { - "xc": "pbe96", - "mult": 1, - "convergence": { - "energy": 1e-6, - "density": 1e-6, - "gradient": 1e-6, - }, - "disp": {"vdw": 3}, - }, - } - return NWChemReference(**calculator_kwargs) +def pyscf_reference(context): + routine = """ +from pyscf import dft +mf = dft.RKS(molecule) +mf.xc = 'pbe,pbe' -def test_nwchem_success(nwchem_reference): +energy = mf.kernel() +forces = -mf.nuc_grad_method().kernel() +""" + basis = "cc-pvtz" + spin = 0 + return PySCFReference(routine, basis, spin) + + +def test_pyscf_generate_script(pyscf_reference): + atoms = Atoms("H2", positions=[[0, 0, 0], [0, 0, 0.75]], pbc=False) + _ = generate_script( + atoms, + pyscf_reference.routine, + pyscf_reference.basis, + pyscf_reference.spin, + ) + + +def test_pyscf_extract_energy_forces(): + stdout = """ +total energy = as;ldkfj +tion, you can put the setting "B3LYP_WITH_VWN5 = True" in pyscf_conf.py + warnings.warn('Since PySCF-2.3, B3LYP (and B3P86) are changed to the VWN-RPA variant, ' +converged SCF energy = -1.16614771756639 +total forces = ;s';dlfkj +--------------- RKS gradients --------------- + x y z +0 H -0.0000000000 0.0000000000 0.0005538080 +1 H -0.0000000000 0.0000000000 -0.0005538080 +---------------------------------------------- +total energy = -31.73249570413387 +total forces = +8.504463377857879e-16 -6.70273006321553e-16 -0.02847795118636264 +1.9587146782865252e-16 -2.135019926691156e-15 0.028477951186359783 +""" + energy, forces = parse_energy_forces(stdout) + assert np.allclose(energy, -1.16614771756639 * Ha) + forces_ = ( + (-1.0) + * np.array( + [ + [-0.0000000000, 0.0000000000, 0.0005538080], + [-0.0000000000, 0.0000000000, -0.0005538080], + ] + ) + * Ha + / Bohr + ) + assert np.allclose(forces, forces_) + + +def test_pyscf_success(pyscf_reference): atoms = FlowAtoms( # simple H2 at ~optimized interatomic distance numbers=np.ones(2), positions=np.array([[0, 0, 0], [0.74, 0, 0]]), ) dataset = Dataset([atoms]) - evaluated = nwchem_reference.evaluate(dataset[0]) + evaluated = pyscf_reference.evaluate(dataset[0]) assert isinstance(evaluated, AppFuture) assert evaluated.result().reference_status assert Path(evaluated.result().reference_stdout).is_file() @@ -442,5 +524,26 @@ def test_nwchem_success(nwchem_reference): assert evaluated.result().arrays["forces"][0, 0] < 0 assert evaluated.result().arrays["forces"][1, 0] > 0 - nwchem_reference.evaluate(dataset) - assert nwchem_reference.compute_atomic_energy("H").result() < 0 + pyscf_reference.evaluate(dataset) + assert pyscf_reference.compute_atomic_energy("H").result() < 0 + + +def test_pyscf_timeout(context): + routine = """ +from pyscf import scf, cc + +mf = scf.HF(molecule).run() +mycc = cc.CCSD(mf) +mycc.kernel() +energy = mycc.e_tot +forces = -mycc.nuc_grad_method().kernel() +""" + basis = "cc-pvqz" + spin = 0 + reference = PySCFReference(routine, basis, spin) + atoms = FlowAtoms( + numbers=np.ones(4), + positions=np.array([[0, 0, 0], [0.74, 0, 0], [0, 3, 0], [0.74, 3, 0]]), + ) + reference.evaluate(atoms).result() + assert not atoms.reference_status