Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handling of radicals in kekulization (fix #120) #125

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions selfies/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@
"O": (2, 4), "S": (2, 4), "Se": (2, 4), "Te": (2, 4)
}

VALENCE_ELECTRONS = {
"B": 3, "Al": 3,
"C": 4, "Si": 4,
"N": 5, "P": 5, "As": 5,
"O": 6, "S": 6, "Se": 6, "Te": 6
}

AROMATIC_SUBSET = set(e.lower() for e in AROMATIC_VALENCES)

# =============================================================================
Expand Down
31 changes: 23 additions & 8 deletions selfies/mol_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from dataclasses import dataclass, field

from selfies.bond_constraints import get_bonding_capacity
from selfies.constants import AROMATIC_VALENCES
from selfies.constants import AROMATIC_VALENCES, VALENCE_ELECTRONS
from selfies.utils.matching_utils import find_perfect_matching


Expand Down Expand Up @@ -254,7 +254,7 @@ def kekulize(self) -> bool:

ds = self._delocal_subgraph
kept_nodes = set(itertools.filterfalse(self._prune_from_ds, ds))

# relabel kept DS nodes to be 0, 1, 2, ...
label_to_node = list(sorted(kept_nodes))
node_to_label = {v: i for i, v in enumerate(label_to_node)}
Expand All @@ -265,7 +265,7 @@ def kekulize(self) -> bool:
label = node_to_label[node]
for adj in filter(lambda v: v in kept_nodes, ds[node]):
pruned_ds[label].append(node_to_label[adj])

matching = find_perfect_matching(pruned_ds)
if matching is None:
return False
Expand All @@ -288,18 +288,33 @@ def _prune_from_ds(self, node):
adj_nodes = self._delocal_subgraph[node]
if not adj_nodes:
return True # aromatic atom with no aromatic bonds

atom = self._atoms[node]
valences = AROMATIC_VALENCES[atom.element]

# each bond in DS has order 1.5 - we treat them as single bonds
used_electrons = int(self._bond_counts[node] - 0.5 * len(adj_nodes))

if atom.h_count is None: # account for implicit Hs
assert atom.charge == 0
return any(used_electrons == v for v in valences)
else:
valence = valences[-1] - atom.charge
used_electrons += atom.h_count
free_electrons = valence - used_electrons
return not ((free_electrons >= 0) and (free_electrons % 2 != 0))

# count the total number of bound electrons of each atom
bound_electrons = (max(0, atom.charge) + atom.h_count
+ int(self._bond_counts[node])
+ int(2 * (self._bond_counts[node] % 1)))

# calculate the number of unpaired electrons of each atom
radical_electrons = (max(0, VALENCE_ELECTRONS[atom.element]
- bound_electrons) % 2)

# unpaired electrons do not contribute to the aromatic system
free_electrons = valence - used_electrons - radical_electrons

if any(used_electrons == v - atom.charge for v in valences):
return True
else:
return not ((free_electrons >= 0) and (free_electrons % 2 != 0))
22 changes: 22 additions & 0 deletions tests/test_specific_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@ def decode_eq(selfies, smiles):
return s == smiles


def roundtrip_eq(smiles_in, smiles_out):
sel = sf.encoder(smiles_in)
smi = sf.decoder(sel)
return smi == smiles_out


def test_branch_and_ring_at_state_X0():
"""Tests SELFIES with branches and rings at state X0 (i.e. at the
very beginning of a SELFIES). These symbols should be skipped.
Expand Down Expand Up @@ -330,6 +336,7 @@ def test_old_symbols():
except Exception:
assert False


def test_large_selfies_decoding():
"""Test that we can decode extremely large SELFIES strings (used to cause a RecursionError)
"""
Expand All @@ -339,8 +346,23 @@ def test_large_selfies_decoding():

assert decode_eq(large_selfies, expected_smiles)


def test_radical_kekulization():
"""Tests kekulization of aromatic systems with radicals and charges.
"""

assert roundtrip_eq("c1ccc[c]c1", "C1=CC=C[CH0]=C1")
assert roundtrip_eq("c1[c]n1(C)", "C1=[CH0]N1C")
assert roundtrip_eq("c1[C][n+]1(C)", "C=1[CH0][N+1]=1C")
assert roundtrip_eq("c1nnn[n-]1", "C1=NN=N[N-1]1")
assert roundtrip_eq("c1ccn[c-](C)[n+]1=O", "C1=CC=N[C-1](C)[N+1]1=O")
assert roundtrip_eq("c1ccs[n+]1c2ccccc2", "C=1C=CS[N+1]=1C2=CC=CC=C2")
assert roundtrip_eq("c1ccs[nH+]1", "C=1C=CS[NH1+1]=1")


def test_novel_charged_symbols():
"""Test decoding of updated constraints for charged atoms (update in 2.2.0)."""
assert decode_eq("[N][#C+1][#NH1][#C@H1]", "N#[C+1]")
assert decode_eq("[O+1][=P+1][#P-1][#C@@]", "[O+1]=[P+1]=[P-1]#[C@@]")
assert decode_eq("[=C-1][#S+1][#B]", "[C-1]#[S+1]=B")

Loading