-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtest_rdkit_atom_count.py
57 lines (43 loc) · 1.84 KB
/
test_rdkit_atom_count.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import unittest
from unittest import TestCase
import pandas as pd
import numpy as np
import rdkit_atom_count as ac
import rdkit.Chem as Chem
from rdkit_atom_count import Elements
import pandas_utils as pu
def count_atom(smiles, element: Elements):
return ac.count_element(mol(smiles), element.atomic_number)
def mol(smiles):
mol = Chem.MolFromSmiles(smiles) if pu.notnull(smiles) else np.NAN
mol = Chem.AddHs(mol) if pu.notnull(mol) else np.NAN
return mol
class Test(TestCase):
def test_count_element(self):
phenol = "C1=CC=C(C=C1)O"
self.check(6, Elements.C, phenol)
self.check(1, Elements.O, phenol)
self.check(6, Elements.H, phenol)
def test_count_element_empty(self):
empty = ""
self.check(0, Elements.C, empty)
self.check(0, Elements.O, empty)
self.check(0, Elements.H, empty)
def test_count_element_df(self):
df = pd.DataFrame({"mol": [mol("C1=CC=C(C=C1)O")]})
df = ac.count_element_atoms_df(df, df["mol"])
self.assertEqual(6, df.at[0, f"at_n_{Elements.C.symbol}"])
self.assertEqual(1, df.at[0, f"at_n_{Elements.O.symbol}"])
self.assertEqual(6, df.at[0, f"at_n_{Elements.H.symbol}"])
def test_count_element_df_limit_elements(self):
df = pd.DataFrame({"mol": [mol("C1=CC=C(C=C1)O")]})
# only search C
df = ac.count_element_atoms_df(df, df["mol"], [Elements.C])
self.assertEqual(6, df.at[0, f"at_n_{Elements.C.symbol}"])
self.assertTrue(f"at_n_{Elements.C.symbol}" in df.columns)
self.assertFalse(f"at_n_{Elements.H.symbol}" in df.columns)
self.assertFalse(f"at_n_{Elements.O.symbol}" in df.columns)
def check(self, expected, element: ac.Elements, smiles):
self.assertEqual(expected, count_atom(smiles, element))
if __name__ == "__main__":
unittest.main()