Skip to content

Commit

Permalink
add register_multiple_confs (#31)
Browse files Browse the repository at this point in the history
Co-authored-by: Greg Landrum <[email protected]>
  • Loading branch information
greglandrum and Greg Landrum authored Dec 1, 2023
1 parent c723227 commit 1bbb1c5
Show file tree
Hide file tree
Showing 4 changed files with 250 additions and 104 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,8 @@ Note that once a database is created in `registerConformers` mode, it probably s
Just as molecular hashes are used to recognize when two molecules are the same, lwreg uses a hashing scheme to detect when two conformers are the same. The algorithm for this is simple:
The atomic positions are converted into strings (rounding the floating point values to a fixed, but configurable, number of digits), sorting the positions, and then combining them into a single string, which is the final hash.

If registering a multi-conformer molecule, it is most efficient to call `register_multiple_conformers()`. That only does the work of standardizing the molecule and calculating the molecule hash once.

# Data layout

## The base tables
Expand Down
2 changes: 1 addition & 1 deletion lwreg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
# The contents are covered by the terms of the MIT license
# which is included in the file LICENSE,

from .utils import initdb, register, query, retrieve, bulk_register, RegistrationFailureReasons
from .utils import initdb, register, query, retrieve, bulk_register, register_multiple_conformers, RegistrationFailureReasons
93 changes: 68 additions & 25 deletions lwreg/test_lwreg.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,13 +518,15 @@ def testBulkConformers(self):
random.shuffle(aorder)
nmol = Chem.RenumberAtoms(self._mol1, aorder)
expected = {
'sqlite3':((1, 1), (1, 2), (1, 1), (2, 3)),
'postgresql':((1, 1), (1, 2), (1, 1), (4, 4)),
'sqlite3': ((1, 1), (1, 2), (1, 1), (2, 3)),
'postgresql': ((1, 1), (1, 2), (1, 1), (4, 4)),
}
self.assertEqual(
utils.bulk_register(mols=(self._mol1, self._mol2, nmol, self._mol3),
utils.bulk_register(mols=(self._mol1, self._mol2, nmol,
self._mol3),
failOnDuplicate=False,
config=self._config), expected[self._config['dbtype']])
config=self._config),
expected[self._config['dbtype']])

def testNoConformers(self):
utils.initdb(config=self._config, confirm=True)
Expand All @@ -544,50 +546,91 @@ def testNoConformers(self):
with self.assertRaises(self.integrityError):
utils.register(mol=m, config=self._config)

def testMultiConfMolecule(self):
utils.initdb(config=self._config, confirm=True)

mol = Chem.Mol(self._mol1)
cids = rdDistGeom.EmbedMultipleConfs(mol, 10, randomSeed=0xf00d)
self.assertEqual(len(cids), 10)
# add a duplicate conformer to ensure that is handled correctly
mol.AddConformer(mol.GetConformer())

rres = utils.register_multiple_conformers(mol=mol,
fail_on_duplicate=False,
config=self._config)
self.assertEqual(len(rres), 11)
self.assertEqual(len(set(rres)), 10)
self.assertEqual(len(set([mrn for mrn, cid in rres])), 1)

# make sure we can add more conformers:
mol2 = Chem.Mol(mol)
cids = rdDistGeom.EmbedMultipleConfs(mol2, 10, randomSeed=0xd00f)
self.assertEqual(len(cids), 10)
rres = utils.register_multiple_conformers(mol=mol2,
fail_on_duplicate=True,
config=self._config)
self.assertEqual(len(rres), 10)
self.assertEqual(len(set(rres)), 10)
self.assertEqual(len(set([mrn for mrn, cid in rres])), 1)

# make sure we can still fail on duplicate conformers:
utils.initdb(config=self._config, confirm=True)
with self.assertRaises(self.integrityError):
utils.register_multiple_conformers(mol=mol,
fail_on_duplicate=True,
config=self._config)

def testConformerQuery(self):
''' querying using a molecule which has conformers '''
utils.initdb(config=self._config, confirm=True)
regids = utils.bulk_register(mols=(self._mol1, self._mol3),
config=self._config)
self.assertEqual(sorted(utils.query(mol=self._mol1, config=self._config)),[regids[0]])
config=self._config)
self.assertEqual(
sorted(utils.query(mol=self._mol1, config=self._config)),
[regids[0]])
# matches topology, but not conformer
self.assertEqual(sorted(utils.query(mol=self._mol2, config=self._config)),[])
self.assertEqual(sorted(utils.query(mol=self._mol3, config=self._config)),[regids[1]])
self.assertEqual(
sorted(utils.query(mol=self._mol2, config=self._config)), [])
self.assertEqual(
sorted(utils.query(mol=self._mol3, config=self._config)),
[regids[1]])

# query with no conformer
qm = Chem.Mol(self._mol1)
qm.RemoveAllConformers()
self.assertEqual(sorted(utils.query(mol=qm, config=self._config)),[regids[0][0]])

self.assertEqual(sorted(utils.query(mol=qm, config=self._config)),
[regids[0][0]])

def testConformerRetrieve(self):
''' querying using a molecule which has conformers '''
utils.initdb(config=self._config, confirm=True)
regids = utils.bulk_register(mols=(self._mol1, self._mol2, self._mol3),
config=self._config)
res = utils.retrieve(ids=(regids[0],regids[2]), config=self._config)
self.assertEqual(res[0][0:2],(regids[0][0],regids[0][1]))
config=self._config)

res = utils.retrieve(ids=(regids[0], regids[2]), config=self._config)
self.assertEqual(res[0][0:2], (regids[0][0], regids[0][1]))
self.assertTrue('M END' in res[0][2])
self.assertEqual(res[1][0:2],(regids[2][0],regids[2][1]))
self.assertEqual(res[1][0:2], (regids[2][0], regids[2][1]))
self.assertTrue('M END' in res[1][2])



def testConformerQueryById(self):
utils.initdb(config=self._config, confirm=True)
regids = utils.bulk_register(mols=(self._mol1, self._mol2, self._mol3),
config=self._config)
mrns,cids = zip(*regids)
self.assertEqual(sorted(utils.query(ids=mrns[0:1], config=self._config)), [(1, 1),
(1, 2)])
config=self._config)
mrns, cids = zip(*regids)
self.assertEqual(
sorted(utils.query(ids=mrns[0:1], config=self._config)), [(1, 1),
(1, 2)])
expected = {
'sqlite3':[(1, 1), (1, 2), (2, 3)],
'postgresql':[(1, 1), (1, 2), (3, 3)],
'sqlite3': [(1, 1), (1, 2), (2, 3)],
'postgresql': [(1, 1), (1, 2), (3, 3)],
}
self.assertEqual(sorted(utils.query(ids=mrns, config=self._config)), expected[self._config['dbtype']])
self.assertEqual(sorted(utils.query(ids=tuple(reversed(mrns)), config=self._config)), expected[self._config['dbtype']])
self.assertEqual(sorted(utils.query(ids=mrns, config=self._config)),
expected[self._config['dbtype']])
self.assertEqual(
sorted(utils.query(ids=tuple(reversed(mrns)),
config=self._config)),
expected[self._config['dbtype']])
with self.assertRaises(ValueError):
cnf = copy.deepcopy(self._config)
cnf['registerConformers'] = False
Expand Down
Loading

0 comments on commit 1bbb1c5

Please sign in to comment.