diff --git a/LICENSE-3RD-PARTY b/LICENSE-3RD-PARTY index 993584f..13fcf6c 100644 --- a/LICENSE-3RD-PARTY +++ b/LICENSE-3RD-PARTY @@ -20,3 +20,42 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +================================= ForceBalance ================================ + + BSD 3-clause (aka BSD 2.0) License + +Copyright 2011-2015 Stanford University and the Authors +Copyright 2015-2018 Regents of the University of California and the Authors + +Developed by: Lee-Ping Wang + University of California, Davis + http://www.ucdavis.edu + +Contributors: Yudong Qiu, Keri A. McKiernan, Jeffrey R. Wagner, Hyesu Jang, +Simon Boothroyd, Arthur Vigil, Erik G. Brandt, Johnny Israeli, John Stoppelman + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/devtools/conda-envs/dev.yaml b/devtools/conda-envs/dev.yaml index 1c1f229..96ccf50 100644 --- a/devtools/conda-envs/dev.yaml +++ b/devtools/conda-envs/dev.yaml @@ -10,6 +10,7 @@ dependencies: - openmmforcefields - smirnoff-plugins =2023.08.0 # espaloma =0.3 + - geometric =1 - ipython - ipdb diff --git a/ibstore/_base/array.py b/ibstore/_base/array.py index 8bc70d1..fdc1e6c 100644 --- a/ibstore/_base/array.py +++ b/ibstore/_base/array.py @@ -21,4 +21,9 @@ def validate_type(cls, val): dtype = getattr(cls, "__dtype__", Any) if dtype is Any: dtype = None + + # If Quantity, manually strip units to avoid downcast warning + if hasattr(val, "magnitude"): + val = val.magnitude + return numpy.asanyarray(val, dtype=dtype) diff --git a/ibstore/_db.py b/ibstore/_db.py index 033fc25..94ebaae 100644 --- a/ibstore/_db.py +++ b/ibstore/_db.py @@ -2,8 +2,7 @@ from typing import Dict, List from sqlalchemy import Column, Float, ForeignKey, Integer, PickleType, String -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import relationship +from sqlalchemy.orm import declarative_base, relationship from ibstore.models import MMConformerRecord, QMConformerRecord diff --git a/ibstore/_forcebalance.py b/ibstore/_forcebalance.py new file mode 100644 index 0000000..7ed578f --- /dev/null +++ b/ibstore/_forcebalance.py @@ -0,0 +1,51 @@ +"""Code vendored from ForceBalance.""" +import numpy + + +def periodic_diff(a, b, v_periodic): + """convenient function for computing the minimum difference in periodic coordinates + Parameters + ---------- + a: np.ndarray or float + Reference values in a numpy array + b: np.ndarray or float + Target values in a numpy arrary + v_periodic: float > 0 + Value of the periodic boundary + + Returns + ------- + diff: np.ndarray + The array of same shape containing the difference between a and b + All return values are in range [-v_periodic/2, v_periodic/2), + "( )" means exclusive, "[ ]" means inclusive + + Examples + ------- + periodic_diff(0.0, 2.1, 2.0) => -0.1 + periodic_diff(0.0, 1.9, 2.0) => 0.1 + periodic_diff(0.0, 1.0, 2.0) => -1.0 + periodic_diff(1.0, 0.0, 2.0) => -1.0 + periodic_diff(1.0, 0.1, 2.0) => -0.9 + periodic_diff(1.0, 10.1, 2.0) => 0.9 + periodic_diff(1.0, 9.9, 2.0) => -0.9 + """ + assert v_periodic > 0 + h = 0.5 * v_periodic + return (a - b + h) % v_periodic - h + + +def compute_rmsd(ref, tar, v_periodic=None): + """ + Compute the RMSD between two arrays, supporting periodic difference + """ + assert len(ref) == len(tar), "array length must match" + n = len(ref) + if n == 0: + return 0.0 + if v_periodic is not None: + diff = periodic_diff(ref, tar, v_periodic) + else: + diff = ref - tar + rmsd = numpy.sqrt(numpy.sum(diff**2) / n) + return rmsd diff --git a/ibstore/_molecule.py b/ibstore/_molecule.py new file mode 100644 index 0000000..30c2d60 --- /dev/null +++ b/ibstore/_molecule.py @@ -0,0 +1,29 @@ +"""Molecule conversion utilities""" +from typing import TYPE_CHECKING + +from openff.toolkit import Molecule + +from ibstore._base.array import Array + +if TYPE_CHECKING: + from geometric.molecule import Molecule as GeometricMolecule + + +def _to_geometric_molecule( + molecule: Molecule, + coordinates: Array, +) -> "GeometricMolecule": + from geometric.molecule import Molecule as GeometricMolecule + + geometric_molecule = GeometricMolecule() + + geometric_molecule.Data = { + "resname": ["UNK"] * molecule.n_atoms, + "resid": [0] * molecule.n_atoms, + "elem": [atom.symbol for atom in molecule.atoms], + "bonds": [(bond.atom1_index, bond.atom2_index) for bond in molecule.bonds], + "name": molecule.name, + "xyzs": [coordinates], + } + + return geometric_molecule diff --git a/ibstore/_store.py b/ibstore/_store.py index 537995b..1afb143 100644 --- a/ibstore/_store.py +++ b/ibstore/_store.py @@ -20,11 +20,14 @@ from ibstore._types import Pathlike from ibstore.analysis import ( DDE, + ICRMSD, RMSD, TFD, DDECollection, + ICRMSDCollection, RMSDCollection, TFDCollection, + get_internal_coordinate_rmsds, get_rmsd, get_tfd, ) @@ -542,6 +545,41 @@ def get_rmsd( return rmsds + def get_internal_coordinate_rmsd( + self, + force_field: str, + ) -> ICRMSDCollection: + self.optimize_mm(force_field=force_field) + + icrmsds = ICRMSDCollection() + + for inchi_key in self.get_inchi_keys(): + molecule = Molecule.from_inchi(inchi_key, allow_undefined_stereo=True) + molecule_id = self.get_molecule_id_by_inchi_key(inchi_key) + + qcarchive_ids = self.get_qcarchive_ids_by_molecule_id(molecule_id) + + qm_conformers = self.get_qm_conformers_by_molecule_id(molecule_id) + mm_conformers = self.get_mm_conformers_by_molecule_id( + molecule_id, + force_field, + ) + + for qm, mm, id in zip( + qm_conformers, + mm_conformers, + qcarchive_ids, + ): + icrmsds.append( + ICRMSD( + qcarchive_id=id, + icrmsd=get_internal_coordinate_rmsds(molecule, qm, mm), + force_field=force_field, + ), + ) + + return icrmsds + def get_tfd( self, force_field: str, diff --git a/ibstore/_tests/conftest.py b/ibstore/_tests/conftest.py index c6530b2..cc644a5 100644 --- a/ibstore/_tests/conftest.py +++ b/ibstore/_tests/conftest.py @@ -1,10 +1,36 @@ import pytest +from openff.interchange._tests import MoleculeWithConformer from openff.qcsubmit.results import OptimizationResultCollection +from openff.toolkit import Molecule from openff.utilities.utilities import get_data_file_path from ibstore._store import MoleculeStore +@pytest.fixture() +def water(): + return MoleculeWithConformer.from_smiles("O") + + +@pytest.fixture() +def hydrogen_peroxide(): + return MoleculeWithConformer.from_smiles("OO") + + +@pytest.fixture() +def formaldehyde(): + return MoleculeWithConformer.from_smiles("C=O") + + +@pytest.fixture() +def ligand(): + """Return a ligand that can have many viable conformers.""" + molecule = Molecule.from_smiles("C[C@@H](C(c1ccccc1)c2ccccc2)Nc3c4cc(c(cc4ncn3)F)F") + molecule.generate_conformers(n_conformers=100) + + return molecule + + @pytest.fixture() def small_collection() -> OptimizationResultCollection: return OptimizationResultCollection.parse_file( diff --git a/ibstore/_tests/unit_tests/test_analysis.py b/ibstore/_tests/unit_tests/test_analysis.py new file mode 100644 index 0000000..b289e0c --- /dev/null +++ b/ibstore/_tests/unit_tests/test_analysis.py @@ -0,0 +1,58 @@ +""" +def get_internal_coordinate_rmsds( + molecule: Molecule, + reference: Array, + target: Array, + _types: tuple[str] = ("Bond", "Angle", "Dihedral", "Improper"), +) -> dict[str, float]: +""" +from ibstore.analysis import get_internal_coordinate_rmsds + + +class TestInternalCoordinateRMSD: + def test_rmsds_between_conformers(self, ligand): + assert ligand.n_conformers + + rmsds = get_internal_coordinate_rmsds( + molecule=ligand, + reference=ligand.conformers[0], + target=ligand.conformers[-1], + ) + + assert all( + [rmsds[key] > 0.0 for key in ["Bond", "Angle", "Dihedral", "Improper"]], + ) + + def test_matching_conformers_zero_rmsd(self, ligand): + rmsds = get_internal_coordinate_rmsds( + molecule=ligand, + reference=ligand.conformers[0], + target=ligand.conformers[0], + ) + + assert all( + [rmsds[key] == 0.0 for key in ["Bond", "Angle", "Dihedral", "Improper"]], + ) + + def test_no_torsions(self, water): + rmsds = get_internal_coordinate_rmsds( + molecule=water, + reference=water.conformers[0], + target=water.conformers[0], + ) + + assert rmsds["Bond"] == 0.0 + assert rmsds["Angle"] == 0.0 + + assert "Dihedral" not in rmsds + assert "Improper" not in rmsds + + def test_no_impropers(self, hydrogen_peroxide): + rmsds = get_internal_coordinate_rmsds( + molecule=hydrogen_peroxide, + reference=hydrogen_peroxide.conformers[0], + target=hydrogen_peroxide.conformers[0], + ) + + assert "Dihedral" in rmsds + assert "Improper" not in rmsds diff --git a/ibstore/_tests/unit_tests/test_molecule.py b/ibstore/_tests/unit_tests/test_molecule.py new file mode 100644 index 0000000..cf35cbb --- /dev/null +++ b/ibstore/_tests/unit_tests/test_molecule.py @@ -0,0 +1,20 @@ +import numpy +from openff.toolkit import Molecule +from openff.units import unit + +from ibstore._molecule import _to_geometric_molecule + + +def test_to_geometric_molecule(): + molecule = Molecule.from_smiles("C1([H])CCCCN1") + molecule.generate_conformers(n_conformers=1) + + geometric_molecule = _to_geometric_molecule(molecule, molecule.conformers[0].m) + + assert molecule.n_atoms == len(geometric_molecule.Data["elem"]) + assert molecule.n_bonds == len(geometric_molecule.Data["bonds"]) + + assert numpy.allclose( + molecule.conformers[0].m_as(unit.angstrom), + geometric_molecule.Data["xyzs"][0], + ) diff --git a/ibstore/analysis.py b/ibstore/analysis.py index e17db1b..8af4e4e 100644 --- a/ibstore/analysis.py +++ b/ibstore/analysis.py @@ -1,6 +1,7 @@ import numpy import pandas from openff.toolkit import Molecule +from openff.units import Quantity, unit from ibstore._base.array import Array from ibstore._base.base import ImmutableModel @@ -42,6 +43,16 @@ def to_csv(self, path: str): self.to_dataframe().to_csv(path) +class ICRMSD(ImmutableModel): + qcarchive_id: int + force_field: str + icrmsd: dict[str, float] + + +class ICRMSDCollection(list): + pass + + class TFD(ImmutableModel): qcarchive_id: int force_field: str @@ -86,6 +97,82 @@ def get_rmsd( ) +def get_internal_coordinate_rmsds( + molecule: Molecule, + reference: Array, + target: Array, + _types: tuple[str] = ("Bond", "Angle", "Dihedral", "Improper"), +) -> dict[str, float]: + """Get internal coordinate RMSDs for one conformer of one molecule.""" + from geometric.internal import ( + Angle, + Dihedral, + Distance, + OutOfPlane, + PrimitiveInternalCoordinates, + ) + + from ibstore._forcebalance import compute_rmsd as forcebalance_rmsd + from ibstore._molecule import _to_geometric_molecule + + if isinstance(reference, Quantity): + reference = reference.m_as(unit.angstrom) + + if isinstance(target, Quantity): + target = target.m_as(unit.angstrom) + + _generator = PrimitiveInternalCoordinates( + _to_geometric_molecule(molecule=molecule, coordinates=target), + ) + + types = { + _type: { + "Bond": Distance, + "Angle": Angle, + "Dihedral": Dihedral, + "Improper": OutOfPlane, + }.get(_type) + for _type in _types + } + + internal_coordinates = { + label: [ + ( + internal_coordinate.value(reference), + internal_coordinate.value(target), + ) + for internal_coordinate in _generator.Internals + if isinstance(internal_coordinate, internal_coordinate_class) + ] + for label, internal_coordinate_class in types.items() + } + + internal_coordinate_rmsd = dict() + + for _type, _values in internal_coordinates.items(): + if len(_values) == 0: + continue + + qm_values, mm_values = zip(*_values) + + qm_values = numpy.array(qm_values) + mm_values = numpy.array(mm_values) + + # Converting from radians to degrees + if _type in ["Angle", "Dihedral", "Improper"]: + rmsd = forcebalance_rmsd( + qm_values * 180 / numpy.pi, + mm_values * 180 / numpy.pi, + 360, + ) + else: + rmsd = forcebalance_rmsd(qm_values, mm_values) + + internal_coordinate_rmsd[_type] = rmsd + + return internal_coordinate_rmsd + + def _get_rmsd( reference: Array, target: Array,