Skip to content

Commit

Permalink
fix: improve support for newer dssp versions #403 (#404)
Browse files Browse the repository at this point in the history
* fix: improve support for newer dssp versions #403

* docs: rename  arg to newer  arg in documentation notebooks #389

* feat: add decorators for checking python/external dependencies

* feat: add decorators to mol submodule

* feat: decorate pymol

* feat: decorate meshes

* fix: undo dssp tuple unpacking

---------

Co-authored-by: Arian Jamasb <[email protected]>
  • Loading branch information
a-r-j and Arian Jamasb authored Aug 4, 2024
1 parent f1bef66 commit 2ebd211
Show file tree
Hide file tree
Showing 10 changed files with 147 additions and 28 deletions.
9 changes: 5 additions & 4 deletions graphein/molecule/atoms.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,17 @@

from typing import Dict, List

from loguru import logger

from graphein.utils.dependencies import import_message

try:
import rdkit.Chem as Chem
except ImportError:
import_message(
"graphein.molecule.atoms", "rdkit", "rdkit", True, extras=True
except (ImportError, ModuleNotFoundError):
logger.warning(
import_message(__name__, "rdkit", "rdkit", True, extras=True)
)


BASE_ATOMS: List[str] = [
"C",
"H",
Expand Down
4 changes: 4 additions & 0 deletions graphein/molecule/chembl.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@
# Code Repository: https://github.com/a-r-j/graphein
from bioservices import ChEMBL

from graphein.utils.dependencies import requires_python_libs


@requires_python_libs("bioservices")
def get_smiles_from_chembl(chembl_id: str) -> str:
"""Retrieves a SMILE string from a ChEMBL ID.
Expand All @@ -27,6 +30,7 @@ def get_smiles_from_chembl(chembl_id: str) -> str:
return data["molecule_structures"]["canonical_smiles"]


@requires_python_libs("bioservices")
def get_chembl_id_from_smiles(smiles: str) -> str:
"""Retrieves a ChEMBL ID from a SMILE string.
Expand Down
9 changes: 7 additions & 2 deletions graphein/molecule/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from loguru import logger as log
from tqdm.contrib.concurrent import process_map, thread_map

from graphein.utils.dependencies import import_message
from graphein.utils.dependencies import import_message, requires_python_libs
from graphein.utils.utils import (
annotate_edge_metadata,
annotate_graph_metadata,
Expand All @@ -36,9 +36,11 @@
from rdkit import Chem
from rdkit.Chem import AllChem
except ImportError:
import_message("graphein.molecule.graphs", "rdkit", "rdkit", True)
msg = import_message("graphein.molecule.graphs", "rdkit", "rdkit", True)
log.warning(msg)


@requires_python_libs("rdkit")
def initialise_graph_with_metadata(
name: str,
rdmol: rdkit.Mol,
Expand All @@ -60,6 +62,7 @@ def initialise_graph_with_metadata(
)


@requires_python_libs("rdkit")
def add_nodes_to_graph(
G: nx.Graph,
verbose: bool = False,
Expand Down Expand Up @@ -92,6 +95,7 @@ def add_nodes_to_graph(
return G


@requires_python_libs("rdkit")
def generate_3d(
mol: Union[nx.Graph, Chem.Mol], recompute_graph: bool = False
) -> Union[nx.Graph, rdkit.Chem.rdchem.Mol]:
Expand Down Expand Up @@ -130,6 +134,7 @@ def generate_3d(
return rdmol


@requires_python_libs("rdkit")
def construct_graph(
config: Optional[MoleculeGraphConfig] = None,
mol: Optional[rdkit.Mol] = None,
Expand Down
22 changes: 21 additions & 1 deletion graphein/molecule/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from scipy.sparse import csr_matrix
from scipy.sparse.csgraph import minimum_spanning_tree

from graphein.utils.dependencies import import_message
from graphein.utils.dependencies import import_message, requires_python_libs

try:
import rdkit
Expand Down Expand Up @@ -55,6 +55,7 @@
MAX_NCAND: int = 2000


@requires_python_libs("rdkit")
def get_center(
mol: Union[nx.Graph, Chem.Mol], weights: Optional[np.ndarray] = None
) -> np.ndarray:
Expand All @@ -76,6 +77,7 @@ def get_center(
return np.array(ComputeCentroid(mol.GetConformer(0), weights=weights))


@requires_python_libs("rdkit")
def get_shape_moments(mol: Union[nx.Graph, Chem.Mol]) -> Tuple[float, float]:
"""Calculate principal moments of inertia as defined in https://pubs.acs.org/doi/10.1021/ci025599w
Expand All @@ -94,6 +96,7 @@ def get_shape_moments(mol: Union[nx.Graph, Chem.Mol]) -> Tuple[float, float]:
return npr1, npr2


@requires_python_libs("rdkit")
def count_fragments(mol: Union[nx.Graph, Chem.Mol]) -> int:
"""Counts the number of the disconnected fragments in a molecule.
Expand All @@ -107,6 +110,7 @@ def count_fragments(mol: Union[nx.Graph, Chem.Mol]) -> int:
return len(Chem.GetMolFrags(mol, asMols=True))


@requires_python_libs("rdkit")
def get_max_ring_size(mol: Union[nx.Graph, Chem.Mol]) -> int:
"""
Get the size of the largest ring in a molecule.
Expand All @@ -124,6 +128,7 @@ def get_max_ring_size(mol: Union[nx.Graph, Chem.Mol]) -> int:
return 0 if len(atom_rings) == 0 else max(len(x) for x in ri.AtomRings())


@requires_python_libs("rdkit")
def label_rdmol_atoms(
mol: Union[nx.Graph, Chem.Mol], labels: List[Any]
) -> Union[nx.Graph, Chem.Mol]:
Expand All @@ -146,6 +151,7 @@ def label_rdmol_atoms(
return mol


@requires_python_libs("rdkit")
def tag_rdmol_atoms(
mol, atoms_to_tag, tag: str = "x"
) -> Union[nx.Graph, Chem.Mol]:
Expand All @@ -156,6 +162,7 @@ def tag_rdmol_atoms(
return mol


@requires_python_libs("rdkit")
def get_mol(smiles: str) -> rdkit.Chem.rdchem.Mol:
"""
Function for getting rdmol from smiles. Applies kekulization.
Expand All @@ -172,6 +179,7 @@ def get_mol(smiles: str) -> rdkit.Chem.rdchem.Mol:
return mol


@requires_python_libs("rdkit")
def get_smiles(mol: Union[nx.Graph, rdkit.Chem.rdchem.Mol]) -> str:
"""
Function for getting smiles from rdmol. Applies kekulization.
Expand All @@ -186,6 +194,7 @@ def get_smiles(mol: Union[nx.Graph, rdkit.Chem.rdchem.Mol]) -> str:
return Chem.MolToSmiles(mol, kekuleSmiles=True)


@requires_python_libs("rdkit")
def sanitize(mol: rdkit.Chem.rdchem.Mol) -> rdkit.Chem.rdchem.Mol:
"""
Function for sanitizing a rdmol
Expand All @@ -203,6 +212,7 @@ def sanitize(mol: rdkit.Chem.rdchem.Mol) -> rdkit.Chem.rdchem.Mol:
return mol


@requires_python_libs("rdkit")
def copy_edit_mol(mol: rdkit.Chem.rdchem.Mol) -> rdkit.Chem.rdchem.Mol:
"""
Function for copying a rdmol
Expand All @@ -224,6 +234,7 @@ def copy_edit_mol(mol: rdkit.Chem.rdchem.Mol) -> rdkit.Chem.rdchem.Mol:
return new_mol


@requires_python_libs("rdkit")
def get_clique_mol(mol: rdkit.Chem.rdchem.Atom, atoms: List[int]):
"""
Function for getting clique rdmol
Expand All @@ -242,6 +253,7 @@ def get_clique_mol(mol: rdkit.Chem.rdchem.Atom, atoms: List[int]):
return new_mol


@requires_python_libs("rdkit")
def copy_rdmol_atom(atom: rdkit.Chem.rdchem.Atom) -> rdkit.Chem.rdchem.Atom:
"""
Function for copying an atom
Expand All @@ -257,6 +269,7 @@ def copy_rdmol_atom(atom: rdkit.Chem.rdchem.Atom) -> rdkit.Chem.rdchem.Atom:
return new_atom


@requires_python_libs("rdkit")
def get_morgan_fp(
mol: Union[nx.Graph, rdkit.Chem.rdchem.Mol],
radius: int = 2,
Expand All @@ -281,6 +294,7 @@ def get_morgan_fp(
)


@requires_python_libs("rdkit")
def get_morgan_fp_np(
mol: Union[nx.Graph, rdkit.Chem.rdchem.Mol],
radius: int = 2,
Expand All @@ -307,18 +321,21 @@ def get_morgan_fp_np(
return arr


@requires_python_libs("rdkit")
def compute_fragments(mol: Union[nx.Graph, Chem.Mol]) -> List[Chem.Mol]:
if isinstance(mol, nx.Graph):
mol = mol.graph["rdmol"]
return list(Chem.GetMolFrags(mol, asMols=True))


@requires_python_libs("rdkit")
def get_mol_weight(mol: Union[nx.Graph, Chem.Mol]) -> float:
if isinstance(mol, nx.Graph):
mol = mol.graph["rdmol"]
return mol # TDOO


@requires_python_libs("rdkit")
def get_qed_score(
mol: Union[nx.Graph, rdkit.Chem.rdchem.Mol]
) -> Union[float, None]:
Expand Down Expand Up @@ -364,6 +381,7 @@ def simplify_smile(smile: str) -> str:
return "".join(stripped_smile)


@requires_python_libs("selfies")
def smile_to_selfies(smile: str) -> str:
"""Encodes a SMILES string into a Selfies string.
Expand All @@ -375,6 +393,7 @@ def smile_to_selfies(smile: str) -> str:
return sf.encoder(smile)


@requires_python_libs("selfies")
def selfies_to_smile(selfie: str) -> str:
"""Decodes a selfies string into a SMILES string.
Expand All @@ -386,6 +405,7 @@ def selfies_to_smile(selfie: str) -> str:
return sf.decoder(selfie)


@requires_python_libs("rdkit")
def tree_decomp(mol: rdkit.Chem.rdchem.Mol) -> Tuple[List]:
"""
Function for decomposing rdmol to a tree
Expand Down
40 changes: 26 additions & 14 deletions graphein/protein/features/nodes/dssp.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@
import networkx as nx
import pandas as pd
from Bio.PDB.DSSP import dssp_dict_from_pdb_file, residue_max_acc
from loguru import logger

from graphein.protein.resi_atoms import STANDARD_AMINO_ACID_MAPPING_1_TO_3
from graphein.protein.utils import save_pdb_df_to_pdb
from graphein.utils.dependencies import is_tool
from graphein.utils.dependencies import is_tool, requires_external_dependencies

DSSP_COLS = [
"chain",
Expand Down Expand Up @@ -70,6 +71,7 @@ def parse_dssp_df(dssp: Dict[str, Any]) -> pd.DataFrame:
return pd.DataFrame.from_records(appender, columns=DSSP_COLS)


@requires_external_dependencies("mkdssp")
def add_dssp_df(
G: nx.Graph,
dssp_config: Optional[DSSPConfig],
Expand All @@ -79,12 +81,13 @@ def add_dssp_df(
:param G: Input protein graph
:param G: nx.Graph
:param dssp_config: DSSPConfig object. Specifies which executable to run. Located in graphein.protein.config
:param dssp_config: DSSPConfig object. Specifies which executable to run.
Located in `:obj:graphein.protein.config`.
:type dssp_config: DSSPConfig, optional
:return: Protein graph with DSSP dataframe added
:rtype: nx.Graph
"""

# if dssp_config is None:
config = G.graph["config"]
pdb_code = G.graph["pdb_code"]
path = G.graph["path"]
Expand All @@ -107,14 +110,14 @@ def add_dssp_df(
if os.path.isfile(config.pdb_dir / (pdb_code + ".pdb")):
pdb_file = config.pdb_dir / (pdb_code + ".pdb")

# get dssp version string
dssp_version = re.search(
r"version ([\d\.]+)", os.popen(f"{executable} --version").read()
).group(
1
) # e.g. "4.0.4"
# Check for existence of pdb file. If not, reconstructs it from the raw df.
if pdb_file:
# get dssp version string
dssp_version = re.search(
r"version ([\d\.]+)", os.popen(f"{executable} --version").read()
).group(
1
) # e.g. "4.0.4"
dssp_dict = dssp_dict_from_pdb_file(
pdb_file, DSSP=executable, dssp_version=dssp_version
)
Expand All @@ -124,17 +127,26 @@ def add_dssp_df(
G.graph["raw_pdb_df"], tmpdirname + f"/{pdb_name}.pdb"
)
dssp_dict = dssp_dict_from_pdb_file(
tmpdirname + f"/{pdb_name}.pdb", DSSP=executable
tmpdirname + f"/{pdb_name}.pdb",
DSSP=executable,
dssp_version=dssp_version,
)

if len(dssp_dict[0]) == 0:
raise ValueError(
"DSSP could not be calculated. Check DSSP version "
f"({dssp_version}) orthat the input PDB file is valid."
)

if config.verbose:
print(f"Using DSSP executable '{executable}'")
logger.debug(f"Using DSSP executable '{executable}'")

dssp_dict = parse_dssp_df(dssp_dict)
# Convert 1 letter aa code to 3 letter
dssp_dict["aa"] = dssp_dict["aa"].map(STANDARD_AMINO_ACID_MAPPING_1_TO_3)

# Resolve UNKs NOTE: the original didn't work if HETATM residues exist in DSSP output
# Resolve UNKs
# NOTE: the original didn't work if HETATM residues exist in DSSP output
_raw_pdb_df = G.graph["raw_pdb_df"].copy().drop_duplicates("node_id")
_dssp_df_unk = dssp_dict.loc[dssp_dict["aa"] == "UNK"][
["chain", "resnum", "icode"]
Expand Down Expand Up @@ -177,7 +189,7 @@ def add_dssp_df(
dssp_dict.set_index("node_id", inplace=True)

if config.verbose:
print(dssp_dict)
logger.debug(dssp_dict)

# Assign DSSP Dict
G.graph["dssp_df"] = dssp_dict
Expand Down Expand Up @@ -241,7 +253,7 @@ def add_dssp_feature(G: nx.Graph, feature: str) -> nx.Graph:
nx.set_node_attributes(G, dict(dssp_df[feature]), feature)

if config.verbose:
print("Added " + feature + " features to graph nodes")
logger.debug("Added " + feature + " features to graph nodes")

return G

Expand Down
Loading

0 comments on commit 2ebd211

Please sign in to comment.