-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Created new module specific to SMIDGE string parsing and validation
- Loading branch information
Showing
1 changed file
with
77 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
'''Utilities for parsing, validating, and translating SMILES-like Monomer Interconnectivity and Degree Graph Encoding (SMIDGE) string''' | ||
|
||
import re | ||
from rdkit import Chem | ||
|
||
from typing import ClassVar, Union, Optional | ||
from dataclasses import dataclass, field | ||
from polymerist.rdutils.smileslib.primitives import BONDTYPE_BY_BOND_SMARTS, BOND_SMARTS_BY_BONDTYPE, BOND_PRIMITIVES_FOR_REGEX | ||
|
||
|
||
# PROCESSING BOND TOKENS IN SMIDGE STRINGS | ||
BOND_TOKEN_RE = re.compile( | ||
r'(?P<incoming_flavor>\d*)' \ | ||
f'(?P<bondtype>{BOND_PRIMITIVES_FOR_REGEX})' \ | ||
r'(?P<outgoing_flavor>\d*)' | ||
) | ||
|
||
@dataclass | ||
class MonomerGraphBondInfo: | ||
'''Encapsulated information about an intermonomer bond in a monomer graph''' | ||
DEFAULT_BONDTYPE : ClassVar[Chem.BondType] = Chem.BondType.UNSPECIFIED | ||
DEFAULT_FLAVOR : ClassVar[int] = 0 | ||
|
||
incoming_flavor : int = field(default=DEFAULT_FLAVOR) | ||
bondtype : Chem.BondType = field(default=DEFAULT_BONDTYPE) | ||
outgoing_flavor : int = field(default=DEFAULT_FLAVOR) | ||
|
||
def __post_init__(self) -> None: | ||
'''Perform appropriate type conversions and apply defaults''' | ||
if self.incoming_flavor is None: | ||
self.incoming_flavor = self.DEFAULT_FLAVOR | ||
|
||
if self.bondtype is None: | ||
self.bondtype = self.DEFAULT_BONDTYPE | ||
|
||
if self.outgoing_flavor is None: | ||
self.outgoing_flavor = self.DEFAULT_FLAVOR | ||
|
||
@property | ||
def bond_str(self) -> str: | ||
'''SMARTS representation of the current bondtype (defaults to the default symbol if None or invalid bondtype is provided)''' | ||
return BOND_SMARTS_BY_BONDTYPE.get(self.bondtype, BOND_SMARTS_BY_BONDTYPE.get(self.DEFAULT_BONDTYPE)) | ||
|
||
def __str__(self) -> str: | ||
return f'{self.incoming_flavor or ""}{self.bond_str}{self.outgoing_flavor or ""}' | ||
|
||
|
||
@staticmethod | ||
def parse_str_dict(str_dict : dict[str, str]) -> dict[str, Optional[Union[str, Chem.BondType]]]: | ||
'''Parse string-valued dict into dict of correct types and NoneType defaults for empty keys''' | ||
# 0) create copy of dict to avoid any in-place modification | ||
imb_info = {k : v for k, v in str_dict.items()} | ||
|
||
# 1) process bond type | ||
imb_info['bondtype'] = BONDTYPE_BY_BOND_SMARTS.get(imb_info.get('bondtype')) # set to None either if no bondtype is provided OR the provided type is not a registered primitive | ||
|
||
# 2) process port flavors | ||
for flavor_attr in ('incoming_flavor', 'outgoing_flavor'): | ||
if not (flavor_str := imb_info.get(flavor_attr)): # raised when the flavor_str is either not present or empty | ||
imb_info[flavor_attr] = None | ||
elif isinstance(flavor_str, str) and flavor_str.isdigit(): | ||
imb_info[flavor_attr] = int(flavor_str) # convert parsed strings to ints where possible | ||
|
||
return imb_info | ||
|
||
@classmethod | ||
def from_dict(cls, str_dict : dict[str, str]) -> 'MonomerGraphBondInfo': | ||
'''Initialize from dictionary of values, after sanitizing''' | ||
return cls(**cls.parse_str_dict(str_dict)) | ||
|
||
@classmethod | ||
def from_match(cls, match : Optional[re.Match]) -> 'MonomerGraphBondInfo': | ||
'''Initialize from groupdict of regex Match''' | ||
if match is None: | ||
# TODO: add logged warning | ||
return cls() | ||
return cls.from_dict(match.groupdict()) |