Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into versioningit
Browse files Browse the repository at this point in the history
  • Loading branch information
mattwthompson committed Dec 17, 2024
2 parents b9661ef + 1b6a2a3 commit 8bad98c
Show file tree
Hide file tree
Showing 24 changed files with 1,509 additions and 34 deletions.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ repos:
rev: v0.7.4
hooks:
- id: ruff
args: ['--fix']
- id: ruff-format
- repo: https://github.com/asottile/add-trailing-comma
rev: v3.1.0
Expand Down
25 changes: 25 additions & 0 deletions LICENSE-3RD-PARTY
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
================================= openff-nagl =================================

MIT License

Copyright (c) 2022 Lily Wang
Expand Down Expand Up @@ -83,3 +84,27 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

============================ openff-strike-team ===============================

MIT License

Copyright (c) 2024 Lily Wang

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
89 changes: 89 additions & 0 deletions run_torsion_comparisons.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import pathlib
from multiprocessing import freeze_support

from matplotlib import pyplot

from yammbs.torsion import TorsionStore
from yammbs.torsion.inputs import QCArchiveTorsionDataset


def main():
force_fields = [
"openff-1.0.0",
# "openff-1.1.0",
# "openff-1.3.0",
# "openff-2.0.0",
# "openff-2.1.0",
"openff-2.2.1",
]

if not pathlib.Path("torsiondrive-data.json").exists():
from openff.qcsubmit.results import TorsionDriveResultCollection

collection = TorsionDriveResultCollection.parse_file("filtered-supp-td.json")

with open("torsiondrive-data.json", "w") as f:
f.write(QCArchiveTorsionDataset.from_qcsubmit_collection(collection).model_dump_json())

dataset = QCArchiveTorsionDataset.model_validate_json(open("torsiondrive-data.json").read())

if pathlib.Path("torsion-example.sqlite").exists():
store = TorsionStore("torsion-example.sqlite")
else:
store = TorsionStore.from_torsion_dataset(
dataset,
database_name="torsion-example.sqlite",
)

for force_field in force_fields:
store.optimize_mm(force_field=force_field, n_processes=24)

with open("minimized.json", "w") as f:
f.write(store.get_outputs().model_dump_json())

with open("metrics.json", "w") as f:
f.write(store.get_metrics().model_dump_json())

fig, axes = pyplot.subplots(5, 4, figsize=(20, 20))

for molecule_id, axis in zip(store.get_molecule_ids(), axes.flatten()):
_qm = store.get_qm_energies_by_molecule_id(molecule_id)

_qm = dict(sorted(_qm.items()))

qm_minimum_index = min(_qm, key=_qm.get)

# Make a new dict to avoid in-place modification while iterating
qm = {key: _qm[key] - _qm[qm_minimum_index] for key in _qm}

axis.plot(
qm.keys(),
qm.values(),
"k.-",
label=f"QM {molecule_id}",
)

for force_field in force_fields:
mm = dict(sorted(store.get_mm_energies_by_molecule_id(molecule_id, force_field=force_field).items()))
if len(mm) == 0:
continue

axis.plot(
mm.keys(),
[val - mm[qm_minimum_index] for val in mm.values()],
"o--",
label=force_field,
)

axis.legend(loc=0)
axis.grid(axis="both")

fig.savefig("random.png")


if __name__ == "__main__":
# This setup is necessary for reasons that confused me - both setting it up in the __main__ block and calling
# freeze_support(). This is probably not necessary after MoleculeStore.optimize_mm() is called, so you can load up
# the same database for later analysis once the MM conformers are stored
freeze_support()
main()
5 changes: 3 additions & 2 deletions yammbs/_minimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def _shorthand_to_full_force_field_name(
if make_unconstrained:
# Split on '-' immediately followed by a number;
# cannot split on '-' because of i.e. 'de-force-1.0.0'
prefix, version = re.split(r"-[0-9]", shorthand, maxsplit=1)
prefix, version, _ = re.split(r"-([\d.]+)", shorthand, maxsplit=1)

return f"{prefix}_unconstrained-{version}.offxml"
else:
return shorthand + ".offxml"
Expand All @@ -50,7 +51,7 @@ def _lazy_load_force_field(force_field_name: str) -> ForceField:
if not force_field_name.endswith(".offxml"):
force_field_name = _shorthand_to_full_force_field_name(
force_field_name,
make_unconstrained=False,
make_unconstrained=True,
)

return ForceField(
Expand Down
10 changes: 10 additions & 0 deletions yammbs/_molecule.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Molecule conversion utilities"""

from functools import lru_cache
from typing import TYPE_CHECKING

from openff.toolkit import Molecule, Quantity
Expand Down Expand Up @@ -39,3 +40,12 @@ def _molecule_with_conformer_from_smiles(
molecule.add_conformer(Quantity(conformer, "angstrom"))

return molecule


@lru_cache
def _smiles_to_inchi_key(smiles: str) -> str:
from openff.toolkit import Molecule

return Molecule.from_mapped_smiles(smiles, allow_undefined_stereo=True).to_inchi(
fixed_hydrogens=True,
)
27 changes: 11 additions & 16 deletions yammbs/_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pathlib
from collections import defaultdict
from contextlib import contextmanager
from typing import Generator, Iterable, TypeVar
from typing import Generator, Iterable

import numpy
from numpy.typing import NDArray
Expand All @@ -18,7 +18,7 @@
DBMoleculeRecord,
DBQMConformerRecord,
)
from yammbs._molecule import _molecule_with_conformer_from_smiles
from yammbs._molecule import _molecule_with_conformer_from_smiles, _smiles_to_inchi_key
from yammbs._session import DBSessionManager
from yammbs._types import Pathlike
from yammbs.analysis import (
Expand All @@ -42,8 +42,6 @@

LOGGER = logging.getLogger(__name__)

MS = TypeVar("MS", bound="MoleculeStore")


class MoleculeStore:
def __len__(self):
Expand Down Expand Up @@ -133,7 +131,7 @@ def _set_provenance(
self.general_provenance = db.get_general_provenance()
self.software_provenance = db.get_software_provenance()

def store(
def store_molecule_record(
self,
records: MoleculeRecord | Iterable[MoleculeRecord],
):
Expand All @@ -151,6 +149,8 @@ def store(
for record in records:
db.store_molecule_record(record)

store = store_molecule_record

def store_qcarchive(
self,
records: QMConformerRecord | Iterable[QMConformerRecord],
Expand Down Expand Up @@ -422,7 +422,7 @@ def from_qm_dataset(
for qm_molecule in dataset.qm_molecules:
molecule_record = MoleculeRecord(
mapped_smiles=qm_molecule.mapped_smiles,
inchi_key=smiles_to_inchi_key(qm_molecule.mapped_smiles),
inchi_key=_smiles_to_inchi_key(qm_molecule.mapped_smiles),
)

store.store(molecule_record)
Expand Down Expand Up @@ -561,7 +561,10 @@ def optimize_mm(
):
from yammbs._minimize import _minimize_blob

inchi_key_qm_conformer_mapping = self._map_inchi_keys_to_qm_conformers(
inchi_key_qm_conformer_mapping: dict[
str,
list[NDArray],
] = self._map_inchi_keys_to_qm_conformers(
force_field=force_field,
)

Expand Down Expand Up @@ -822,7 +825,7 @@ def get_tfd(
),
)
except Exception as e:
logging.warning(f"Molecule {inchi_key} failed with {e!s}")
LOGGER.warning(f"Molecule {inchi_key} failed with {e!s}")

return tfds

Expand Down Expand Up @@ -929,11 +932,3 @@ def smirks_in_smiles(smirks: str, smiles: str) -> bool:
smiles=self.get_smiles_by_molecule_id(id),
)
]


def smiles_to_inchi_key(smiles: str) -> str:
from openff.toolkit import Molecule

return Molecule.from_mapped_smiles(smiles, allow_undefined_stereo=True).to_inchi(
fixed_hydrogens=True,
)
25 changes: 25 additions & 0 deletions yammbs/_tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

from yammbs import MoleculeStore
from yammbs.inputs import QCArchiveDataset
from yammbs.torsion.inputs import QCArchiveTorsionDataset, QCArchiveTorsionProfile


@pytest.fixture
Expand Down Expand Up @@ -116,3 +117,27 @@ def conformers(allicin):
other_allicin.generate_conformers(n_conformers=10)

return other_allicin.conformers


@pytest.fixture
def torsion_dataset():
return QCArchiveTorsionDataset.model_validate_json(
open(
get_data_file_path(
"_tests/data/yammbs/torsiondrive-data.json",
"yammbs",
),
).read(),
)


@pytest.fixture
def single_torsion_dataset(torsion_dataset):
"""`torsion_dataset` with only one (QM) torsion profile."""

# TODO: The dict round-trip should not be necessary
return QCArchiveTorsionDataset(
tag=torsion_dataset.tag,
version=torsion_dataset.version,
qm_torsions=[QCArchiveTorsionProfile(**torsion_dataset.qm_torsions[-1].dict())],
)
Loading

0 comments on commit 8bad98c

Please sign in to comment.