Merge remote-tracking branch 'upstream/main' into versioningit

openforcefield · Dec 17, 2024 · 8bad98c · 8bad98c
2 parents b9661ef + 1b6a2a3
commit 8bad98c
Show file tree

Hide file tree

Showing 24 changed files with 1,509 additions and 34 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -15,6 +15,7 @@ repos:
   rev: v0.7.4
   hooks:
     - id: ruff
+      args: ['--fix']
     - id: ruff-format
 - repo: https://github.com/asottile/add-trailing-comma
   rev: v3.1.0

diff --git a/LICENSE-3RD-PARTY b/LICENSE-3RD-PARTY
@@ -1,4 +1,5 @@
 ================================= openff-nagl =================================
+
 MIT License
 
 Copyright (c) 2022 Lily Wang
@@ -83,3 +84,27 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
+
+============================ openff-strike-team ===============================
+
+MIT License
+
+Copyright (c) 2024 Lily Wang
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/run_torsion_comparisons.py b/run_torsion_comparisons.py
@@ -0,0 +1,89 @@
+import pathlib
+from multiprocessing import freeze_support
+
+from matplotlib import pyplot
+
+from yammbs.torsion import TorsionStore
+from yammbs.torsion.inputs import QCArchiveTorsionDataset
+
+
+def main():
+    force_fields = [
+        "openff-1.0.0",
+        # "openff-1.1.0",
+        # "openff-1.3.0",
+        # "openff-2.0.0",
+        # "openff-2.1.0",
+        "openff-2.2.1",
+    ]
+
+    if not pathlib.Path("torsiondrive-data.json").exists():
+        from openff.qcsubmit.results import TorsionDriveResultCollection
+
+        collection = TorsionDriveResultCollection.parse_file("filtered-supp-td.json")
+
+        with open("torsiondrive-data.json", "w") as f:
+            f.write(QCArchiveTorsionDataset.from_qcsubmit_collection(collection).model_dump_json())
+
+    dataset = QCArchiveTorsionDataset.model_validate_json(open("torsiondrive-data.json").read())
+
+    if pathlib.Path("torsion-example.sqlite").exists():
+        store = TorsionStore("torsion-example.sqlite")
+    else:
+        store = TorsionStore.from_torsion_dataset(
+            dataset,
+            database_name="torsion-example.sqlite",
+        )
+
+    for force_field in force_fields:
+        store.optimize_mm(force_field=force_field, n_processes=24)
+
+    with open("minimized.json", "w") as f:
+        f.write(store.get_outputs().model_dump_json())
+
+    with open("metrics.json", "w") as f:
+        f.write(store.get_metrics().model_dump_json())
+
+    fig, axes = pyplot.subplots(5, 4, figsize=(20, 20))
+
+    for molecule_id, axis in zip(store.get_molecule_ids(), axes.flatten()):
+        _qm = store.get_qm_energies_by_molecule_id(molecule_id)
+
+        _qm = dict(sorted(_qm.items()))
+
+        qm_minimum_index = min(_qm, key=_qm.get)
+
+        # Make a new dict to avoid in-place modification while iterating
+        qm = {key: _qm[key] - _qm[qm_minimum_index] for key in _qm}
+
+        axis.plot(
+            qm.keys(),
+            qm.values(),
+            "k.-",
+            label=f"QM {molecule_id}",
+        )
+
+        for force_field in force_fields:
+            mm = dict(sorted(store.get_mm_energies_by_molecule_id(molecule_id, force_field=force_field).items()))
+            if len(mm) == 0:
+                continue
+
+            axis.plot(
+                mm.keys(),
+                [val - mm[qm_minimum_index] for val in mm.values()],
+                "o--",
+                label=force_field,
+            )
+
+        axis.legend(loc=0)
+        axis.grid(axis="both")
+
+    fig.savefig("random.png")
+
+
+if __name__ == "__main__":
+    # This setup is necessary for reasons that confused me - both setting it up in the __main__ block and calling
+    # freeze_support(). This is probably not necessary after MoleculeStore.optimize_mm() is called, so you can load up
+    # the same database for later analysis once the MM conformers are stored
+    freeze_support()
+    main()
diff --git a/yammbs/_minimize.py b/yammbs/_minimize.py
@@ -30,7 +30,8 @@ def _shorthand_to_full_force_field_name(
     if make_unconstrained:
         # Split on '-' immediately followed by a number;
         # cannot split on '-' because of i.e. 'de-force-1.0.0'
-        prefix, version = re.split(r"-[0-9]", shorthand, maxsplit=1)
+        prefix, version, _ = re.split(r"-([\d.]+)", shorthand, maxsplit=1)
+
         return f"{prefix}_unconstrained-{version}.offxml"
     else:
         return shorthand + ".offxml"
@@ -50,7 +51,7 @@ def _lazy_load_force_field(force_field_name: str) -> ForceField:
     if not force_field_name.endswith(".offxml"):
         force_field_name = _shorthand_to_full_force_field_name(
             force_field_name,
-            make_unconstrained=False,
+            make_unconstrained=True,
         )
 
     return ForceField(

diff --git a/yammbs/_molecule.py b/yammbs/_molecule.py
@@ -1,5 +1,6 @@
 """Molecule conversion utilities"""
 
+from functools import lru_cache
 from typing import TYPE_CHECKING
 
 from openff.toolkit import Molecule, Quantity
@@ -39,3 +40,12 @@ def _molecule_with_conformer_from_smiles(
     molecule.add_conformer(Quantity(conformer, "angstrom"))
 
     return molecule
+
+
+@lru_cache
+def _smiles_to_inchi_key(smiles: str) -> str:
+    from openff.toolkit import Molecule
+
+    return Molecule.from_mapped_smiles(smiles, allow_undefined_stereo=True).to_inchi(
+        fixed_hydrogens=True,
+    )
diff --git a/yammbs/_store.py b/yammbs/_store.py
@@ -2,7 +2,7 @@
 import pathlib
 from collections import defaultdict
 from contextlib import contextmanager
-from typing import Generator, Iterable, TypeVar
+from typing import Generator, Iterable
 
 import numpy
 from numpy.typing import NDArray
@@ -18,7 +18,7 @@
     DBMoleculeRecord,
     DBQMConformerRecord,
 )
-from yammbs._molecule import _molecule_with_conformer_from_smiles
+from yammbs._molecule import _molecule_with_conformer_from_smiles, _smiles_to_inchi_key
 from yammbs._session import DBSessionManager
 from yammbs._types import Pathlike
 from yammbs.analysis import (
@@ -42,8 +42,6 @@
 
 LOGGER = logging.getLogger(__name__)
 
-MS = TypeVar("MS", bound="MoleculeStore")
-
 
 class MoleculeStore:
     def __len__(self):
@@ -133,7 +131,7 @@ def _set_provenance(
             self.general_provenance = db.get_general_provenance()
             self.software_provenance = db.get_software_provenance()
 
-    def store(
+    def store_molecule_record(
         self,
         records: MoleculeRecord | Iterable[MoleculeRecord],
     ):
@@ -151,6 +149,8 @@ def store(
             for record in records:
                 db.store_molecule_record(record)
 
+    store = store_molecule_record
+
     def store_qcarchive(
         self,
         records: QMConformerRecord | Iterable[QMConformerRecord],
@@ -422,7 +422,7 @@ def from_qm_dataset(
         for qm_molecule in dataset.qm_molecules:
             molecule_record = MoleculeRecord(
                 mapped_smiles=qm_molecule.mapped_smiles,
-                inchi_key=smiles_to_inchi_key(qm_molecule.mapped_smiles),
+                inchi_key=_smiles_to_inchi_key(qm_molecule.mapped_smiles),
             )
 
             store.store(molecule_record)
@@ -561,7 +561,10 @@ def optimize_mm(
     ):
         from yammbs._minimize import _minimize_blob
 
-        inchi_key_qm_conformer_mapping = self._map_inchi_keys_to_qm_conformers(
+        inchi_key_qm_conformer_mapping: dict[
+            str,
+            list[NDArray],
+        ] = self._map_inchi_keys_to_qm_conformers(
             force_field=force_field,
         )
 
@@ -822,7 +825,7 @@ def get_tfd(
                         ),
                     )
                 except Exception as e:
-                    logging.warning(f"Molecule {inchi_key} failed with {e!s}")
+                    LOGGER.warning(f"Molecule {inchi_key} failed with {e!s}")
 
         return tfds
 
@@ -929,11 +932,3 @@ def smirks_in_smiles(smirks: str, smiles: str) -> bool:
                 smiles=self.get_smiles_by_molecule_id(id),
             )
         ]
-
-
-def smiles_to_inchi_key(smiles: str) -> str:
-    from openff.toolkit import Molecule
-
-    return Molecule.from_mapped_smiles(smiles, allow_undefined_stereo=True).to_inchi(
-        fixed_hydrogens=True,
-    )
diff --git a/yammbs/_tests/conftest.py b/yammbs/_tests/conftest.py
@@ -17,6 +17,7 @@
 
 from yammbs import MoleculeStore
 from yammbs.inputs import QCArchiveDataset
+from yammbs.torsion.inputs import QCArchiveTorsionDataset, QCArchiveTorsionProfile
 
 
 @pytest.fixture
@@ -116,3 +117,27 @@ def conformers(allicin):
     other_allicin.generate_conformers(n_conformers=10)
 
     return other_allicin.conformers
+
+
+@pytest.fixture
+def torsion_dataset():
+    return QCArchiveTorsionDataset.model_validate_json(
+        open(
+            get_data_file_path(
+                "_tests/data/yammbs/torsiondrive-data.json",
+                "yammbs",
+            ),
+        ).read(),
+    )
+
+
+@pytest.fixture
+def single_torsion_dataset(torsion_dataset):
+    """`torsion_dataset` with only one (QM) torsion profile."""
+
+    # TODO: The dict round-trip should not be necessary
+    return QCArchiveTorsionDataset(
+        tag=torsion_dataset.tag,
+        version=torsion_dataset.version,
+        qm_torsions=[QCArchiveTorsionProfile(**torsion_dataset.qm_torsions[-1].dict())],
+    )