From 77a831161a004945e0be87fcb327b126446ba39b Mon Sep 17 00:00:00 2001
From: "Lori A. Burns" <lori.burns@gmail.com>
Date: Mon, 27 Jan 2025 17:54:33 -0500
Subject: [PATCH] point charges for psi4, nwc, cfour

---
 qcengine/programs/cfour/harvester.py    |  11 +-
 qcengine/programs/cfour/keywords.py     |  35 ++++-
 qcengine/programs/cfour/runner.py       |   4 +-
 qcengine/programs/nwchem/keywords.py    |  17 ++-
 qcengine/programs/nwchem/runner.py      |  17 ++-
 qcengine/programs/tests/test_charges.py | 192 ++++++++++++++++++++++++
 6 files changed, 262 insertions(+), 14 deletions(-)
 create mode 100644 qcengine/programs/tests/test_charges.py

diff --git a/qcengine/programs/cfour/harvester.py b/qcengine/programs/cfour/harvester.py
index 8e436e386..88e184e64 100644
--- a/qcengine/programs/cfour/harvester.py
+++ b/qcengine/programs/cfour/harvester.py
@@ -1185,7 +1185,7 @@ def harvest(in_mol: Molecule, method: str, c4out, **largs):
     qcvars, out_mol, outGrad, version, module, error = harvest_output(c4out)
 
     if largs.get("GRD"):
-        grdMol, grdGrad = harvest_GRD(largs["GRD"])
+        grdMol, grdGrad = harvest_GRD(largs["GRD"], len(in_mol.symbols))
     else:
         grdMol, grdGrad = None, None
 
@@ -1348,7 +1348,7 @@ def harvest(in_mol: Molecule, method: str, c4out, **largs):
     return qcvars, return_hess, return_grad, return_mol, version, module, error
 
 
-def harvest_GRD(grd):
+def harvest_GRD(grd, expected_natom):
     """Parses the contents *grd* of the Cfour GRD file into the gradient
     array and coordinate information. The coordinate info is converted
     into a rather dinky Molecule (no charge, multiplicity, or fragment),
@@ -1360,8 +1360,13 @@ def harvest_GRD(grd):
     Nat = int(grd[0].split()[0])
     molxyz = f"{Nat} bohr\n\n"
 
+    # Mostly, the GRD file contains the atomic coordinates followed by the gradient.
+    #   When extern_pot is present, the file contains the atomic coordinates followed
+    #   by the point charges followed by the atomic gradient followed by the point
+    #   charge gradient. Hence the mixed indices: Nat and expected_natom.
+
     grad = []
-    for at in range(Nat):
+    for at in range(expected_natom):
         mline = grd[at + 1].split()
 
         # "@Xe" is potentially dangerous bypass for ghosts
diff --git a/qcengine/programs/cfour/keywords.py b/qcengine/programs/cfour/keywords.py
index fc60c24e7..c75299b01 100644
--- a/qcengine/programs/cfour/keywords.py
+++ b/qcengine/programs/cfour/keywords.py
@@ -9,15 +9,46 @@ def format_keywords(keywords: Dict[str, Any]) -> str:
 
     """
     text = []
+    percents = {}
 
     keywords = {k.upper(): v for k, v in keywords.items()}
     for key, val in sorted(keywords.items()):
-        text.append("=".join(format_keyword(key, val)))
+        if key.startswith("%"):
+            percents[key] = keywords.pop(key)
+        else:
+            text.append("=".join(format_keyword(key, val)))
 
     text = "\n".join(text)
     text = "\n\n*CFOUR(" + text + ")\n\n"
 
-    return text
+    # not yet tested with multiple % sections
+    percent_text = []
+    for key, val in percents.items():
+        percent_text.append("\n".join(format_percent(key, val)))
+    percent_text = "\n".join(percent_text)
+
+    return text, percent_text
+
+
+def format_percent(keyword: str, val: Any) -> Tuple[str, str]:
+    keyword = keyword.lower()
+
+    if isinstance(val, list):
+        if type(val[0]).__name__ == "list":
+            if type(val[0][0]).__name__ == "list":
+                raise InputError("Option has level of array nesting inconsistent with Cfour.")
+            else:
+                # for %extern_pot*
+                text = "\n".join([" ".join([str(inner_v) for inner_v in outer_v]) for outer_v in val])
+        else:
+            # option is plain 1D array
+            text = " ".join([str(v) for v in val])
+
+    # No Transform
+    else:
+        text = str(val).upper()
+
+    return keyword, text
 
 
 def format_keyword(keyword: str, val: Any) -> Tuple[str, str]:
diff --git a/qcengine/programs/cfour/runner.py b/qcengine/programs/cfour/runner.py
index 6fe67e2b6..7c1cffea8 100644
--- a/qcengine/programs/cfour/runner.py
+++ b/qcengine/programs/cfour/runner.py
@@ -129,11 +129,11 @@ def build_input(
             bascmd = "\n".join(text)
 
         # Handle conversion from schema (flat key/value) keywords into local format
-        optcmd = format_keywords(opts)
+        optcmd, percentcmd = format_keywords(opts)
 
         xcfour = which("xcfour")
         genbas = Path(xcfour).parent.parent / "basis" / "GENBAS"
-        cfourrec["infiles"]["ZMAT"] = molcmd + optcmd + bascmd
+        cfourrec["infiles"]["ZMAT"] = molcmd + optcmd + bascmd + percentcmd
         cfourrec["infiles"]["GENBAS"] = genbas.read_text()
         cfourrec["command"] = [xcfour]
 
diff --git a/qcengine/programs/nwchem/keywords.py b/qcengine/programs/nwchem/keywords.py
index baaac3d50..6b8eaa6fe 100644
--- a/qcengine/programs/nwchem/keywords.py
+++ b/qcengine/programs/nwchem/keywords.py
@@ -21,7 +21,16 @@ def format_keyword(keyword: str, val: Any, lop_off: bool = True, preserve_case:
         return key, f"{val} double"
 
     elif isinstance(val, list):
-        text = " ".join([str(v) for v in val])
+        if type(val[0]).__name__ == "list":
+            if type(val[0][0]).__name__ == "list":
+                raise InputError("Option has level of array nesting inconsistent with NWChem.")
+            else:
+                # for bq__data
+                text = "\n".join([" ".join([str(inner_v) for inner_v in outer_v]) for outer_v in val])
+        else:
+            # option is plain 1D array
+            text = " ".join([str(v) for v in val])
+
     elif isinstance(val, dict):
         text = []
         for k, v in val.items():
@@ -32,6 +41,10 @@ def format_keyword(keyword: str, val: Any, lop_off: bool = True, preserve_case:
     else:
         text = str(val)
 
+    if key == "data" or (lop_off and key[7:] == "data"):
+        # data indicates "anonymous" kw to fill in block
+        return (text,)
+
     if lop_off:
         return key[7:], text
     else:
@@ -84,6 +97,8 @@ def rec_dd():
                     [word in line for word in ["spherical", "cartesian", "print", "noprint", "rel"]]
                 ):
                     group_level_lines.append(line)
+                elif group.lower() == "bq" and any([word in line for word in ["units", "namespace"]]):
+                    group_level_lines.append(line)
                 else:
                     lines.append(line)
         if group == "aaaglobal":
diff --git a/qcengine/programs/nwchem/runner.py b/qcengine/programs/nwchem/runner.py
index 65a08aa05..ce20eb2b7 100644
--- a/qcengine/programs/nwchem/runner.py
+++ b/qcengine/programs/nwchem/runner.py
@@ -177,12 +177,17 @@ def build_input(
         molcmd, moldata = input_model.molecule.to_string(dtype="nwchem", units="Bohr", return_data=True)
         opts.update(moldata["keywords"])
 
-        if opts.pop("geometry__noautoz", False):
-            molcmd = re.sub(r"geometry ([^\n]*)", r"geometry \1 noautoz", molcmd)
-        # someday when minimum >=py38 `if val := opts.pop("geometry__autosym", False):`
-        if opts.get("geometry__autosym", False):
-            val = opts.pop("geometry__autosym")
-            molcmd = re.sub(r"geometry ([^\n]*)", rf"geometry \1 autosym {val}", molcmd)
+        # * top line kwds for geometry block handled here b/c block not processed through keywords.py
+        for word in ["autoz", "noautoz", "center", "nocenter", "autosym", "noautosym"]:
+            if f"geometry__{word}" in opts:
+                val = opts.pop(f"geometry__{word}")
+                if word == "autosym" and val not in [True, False]:
+                    replace = f"{word} {val}"
+                elif val is True:
+                    replace = word
+                elif val is False:
+                    replace = word[2:] if word.startswith("no") else f"no{word}"
+                molcmd = re.sub(r"geometry ([^\n]*)", rf"geometry \1 {replace}", molcmd)
 
         # Handle calc type and quantum chemical method
         mdccmd, mdcopts = muster_modelchem(input_model.model.method, input_model.driver, opts.pop("qc_module", False))
diff --git a/qcengine/programs/tests/test_charges.py b/qcengine/programs/tests/test_charges.py
new file mode 100644
index 000000000..d79e297db
--- /dev/null
+++ b/qcengine/programs/tests/test_charges.py
@@ -0,0 +1,192 @@
+import pprint
+import re
+
+import numpy as np
+import pytest
+import qcelemental as qcel
+from qcelemental.testing import compare_values
+
+import qcengine as qcng
+from qcengine.testing import using
+
+# point charge coords in psi4 and cfour must be in bohr. nwchem can be ang or bohr.
+h2o_extern_bohr_qxyz = [
+    [-0.834, [3.11659683, 0.0, -4.45223936]],
+    [0.417, [1.02944157, 0.0, -7.18088642]],
+    [0.417, [1.02944157, 0.0, -1.72359229]],
+]
+h2o_extern_bohr_xyzq = [[*pt[1], pt[0]] for pt in h2o_extern_bohr_qxyz]
+h2o_extern_ang_xyzq = np.array(h2o_extern_bohr_xyzq)
+# h2o_extern_ang_xyzq = [1.649, 0.0, -2.356, 0.545, 0.0, -3.800, 0.545, 0.0, -0.912]
+h2o_extern_ang_xyzq[:, [0, 1, 2]] *= qcel.constants.bohr2angstroms
+h2o_extern_ang_xyzq = h2o_extern_ang_xyzq.tolist()
+h2o_extern_bohr_Nxyzq = [[len(h2o_extern_bohr_xyzq)], *h2o_extern_bohr_xyzq]
+
+ne2_extern_bohr_qxyz = [[1.05, [0.0, 0.94486306, 0.94486306]]]
+ne2_extern_bohr_xyzq = [[*pt[1], pt[0]] for pt in ne2_extern_bohr_qxyz]
+ne2_extern_ang_xyzq = np.array(ne2_extern_bohr_xyzq)
+ne2_extern_ang_xyzq[:, [0, 1, 2]] *= qcel.constants.bohr2angstroms
+ne2_extern_ang_xyzq = ne2_extern_ang_xyzq.tolist()
+ne2_extern_bohr_Nxyzq = [[len(ne2_extern_bohr_xyzq)], *ne2_extern_bohr_xyzq]
+
+
+@pytest.mark.parametrize("driver", ["energy", "gradient"])
+@pytest.mark.parametrize(
+    "variation,program,basis,keywords",
+    [
+        pytest.param("h2o_plain_df", "psi4", "6-31G*", {}, marks=using("psi4")),
+        pytest.param("h2o_plain_conv", "psi4", "6-31G*", {"scf_type": "direct"}, marks=using("psi4")),
+        pytest.param(
+            "h2o_ee_df",
+            "psi4",
+            "6-31G*",
+            {"function_kwargs": {"external_potentials": h2o_extern_bohr_qxyz}},
+            marks=using("psi4"),
+        ),
+        pytest.param(
+            "h2o_ee_conv",
+            "psi4",
+            "6-31G*",
+            {"scf_type": "direct", "function_kwargs": {"external_potentials": h2o_extern_bohr_qxyz}},
+            marks=using("psi4"),
+        ),
+        pytest.param("h2o_plain_conv", "nwchem", "6-31G*", {}, marks=using("nwchem")),
+        pytest.param(
+            "h2o_ee_conv",
+            "nwchem",
+            "6-31G*",
+            {
+                "bq__units": "au",
+                "bq__data": h2o_extern_bohr_xyzq,
+                "geometry__nocenter": True,
+                "geometry__autosym": False,
+            },
+            marks=using("nwchem"),
+        ),
+        pytest.param(
+            "h2o_ee_conv",
+            "nwchem",
+            "6-31G*",
+            {"bq__data": h2o_extern_ang_xyzq, "geometry__nocenter": True, "geometry__autosym": False},
+            marks=using("nwchem"),
+        ),
+        pytest.param("h2o_plain_conv", "cfour", "6-31g*", {"spherical": 0, "scf_conv": 12}, marks=using("cfour")),
+        pytest.param(
+            "h2o_ee_conv",
+            "cfour",
+            "6-31g*",
+            {"spherical": 0, "scf_conv": 12, "extern_pot": True, "%extern_pot*": h2o_extern_bohr_Nxyzq},
+            marks=using("cfour"),
+        ),
+        # with ghost atom
+        pytest.param("ne2_plain_conv", "psi4", "6-31G", {"scf_type": "direct"}, marks=using("psi4")),
+        pytest.param(
+            "ne2_ee_conv",
+            "psi4",
+            "6-31G",
+            {"scf_type": "direct", "function_kwargs": {"external_potentials": ne2_extern_bohr_qxyz}},
+            marks=using("psi4"),
+        ),
+        pytest.param("ne2_plain_conv", "nwchem", "6-31G", {}, marks=using("nwchem")),
+        pytest.param(
+            "ne2_ee_conv",
+            "nwchem",
+            "6-31G",
+            {
+                "bq__units": "au",
+                "bq__data": ne2_extern_bohr_xyzq,
+                "geometry__nocenter": True,
+                "geometry__autosym": False,
+            },
+            marks=using("nwchem"),
+        ),
+        pytest.param(
+            "ne2_ee_conv",
+            "nwchem",
+            "6-31G",
+            {"bq__data": ne2_extern_ang_xyzq, "geometry__nocenter": True, "geometry__autosym": False},
+            marks=using("nwchem"),
+        ),
+        pytest.param("ne2_plain_conv", "cfour", "6-31g", {"spherical": 0, "scf_conv": 12}, marks=using("cfour")),
+        pytest.param(
+            "ne2_ee_conv",
+            "cfour",
+            "6-31g",
+            {"spherical": 0, "scf_conv": 12, "extern_pot": True, "%extern_pot*": ne2_extern_bohr_Nxyzq},
+            marks=using("cfour"),
+        ),
+    ],
+)
+def test_simple_external_charges(driver, variation, program, basis, keywords, request):
+
+    h2o_bohr = [-1.47172438, 0.0, 2.14046066, -1.25984639, 1.44393784, 3.22442268, -1.25984639, -1.44393784, 3.22442079]
+    mol_ang = [-0.778803, 0.000000, 1.132683, -0.666682, 0.764099, 1.706291, -0.666682, -0.764099, 1.706290]
+    ne2_bohr = [0.0, 0.0, 1.8897261254578281, 0.0, 0.0, 0.0]
+    ne2_ang = [0.0, 0.0, 1.0, 0.0, 0.0, 0.0]
+
+    water = qcel.models.Molecule(geometry=h2o_bohr, symbols=["O", "H", "H"], fix_com=True, fix_orientation=True)
+    ne2 = qcel.models.Molecule(
+        geometry=ne2_bohr, symbols=["Ne", "Ne"], real=[False, True], fix_com=True, fix_orientation=True
+    )
+    mol = water if variation.startswith("h2o_") else ne2
+
+    atin = {"molecule": mol, "driver": driver, "model": {"method": "hf", "basis": basis}, "keywords": keywords}
+
+    # fmt: off
+    ans = {
+        # plain are values w/o embedded point charges
+        "h2o_plain_df": {  # copied from psi4/extern1 test
+            "energy": -76.010274923509,
+            "gradient": [
+        -3.61281240e-03, -4.93441226e-07, -1.84830343e-02,
+         1.80644240e-03,  1.27352333e-02,  9.24171070e-03,
+         1.80637000e-03, -1.27347399e-02,  9.24132361e-03],
+        },
+        "h2o_plain_conv": {
+            "energy": -76.01030124,
+            "gradient": [
+        -0.00360958806,  -0.000000493573, -0.018466538001,
+         0.001804830243,  0.012733248901,  0.009233462596,
+         0.001804757818, -0.012732755328,  0.009233075406],
+        },
+        "h2o_ee_df": {  # copied from psi4/extern1 test
+            "energy": -76.0194112285529968,
+            "gradient": [
+        -6.82055778e-03, -4.91935494e-07, -1.37359563e-02,
+         1.97327564e-03,  1.21536580e-02,  8.95300956e-03,
+         1.97320511e-03, -1.21531644e-02,  8.95261922e-03],
+        },
+        "h2o_ee_conv": {
+            "energy": -76.01943687,
+            "gradient": [
+        -0.00681662153,  -0.000000492037, -0.01372019165,
+         0.001971465483,  0.012151813875,  0.008945014046,
+         0.001971394934, -0.012151320165,  0.008944623631],
+        },
+        "ne2_plain_conv": {
+            "energy": -128.4754448306251,
+            "gradient": [
+         0.0, 0.0,  1.61194524e-04,
+         0.0, 0.0, -1.61194524e-04],
+        },
+        "ne2_ee_conv": {
+            "energy": -128.2723042630023,
+            "gradient": [
+         0.0, -2.28636935e-02,  3.64658269e-02,
+         0.0,  6.15278512e-01,  6.01676379e-01],
+        },
+    }
+    # fmt: on
+    thisans = {
+        "energy": ans[variation]["energy"],
+        "gradient": ans[variation]["gradient"],
+    }
+
+    atres = qcng.compute(atin, program, raise_error=True, return_dict=True)
+    assert atres["success"] is True
+    pprint.pprint(atres, width=200)
+
+    atol = 2.0e-6
+
+    assert compare_values(thisans["energy"], atres["properties"]["return_energy"], atol=atol, label="ene")
+    assert compare_values(thisans[driver], atres["return_result"], atol=atol, label="return")