diff --git a/graphein/molecule/atoms.py b/graphein/molecule/atoms.py index 47202d533..39d45b70d 100644 --- a/graphein/molecule/atoms.py +++ b/graphein/molecule/atoms.py @@ -15,16 +15,17 @@ from typing import Dict, List +from loguru import logger + from graphein.utils.dependencies import import_message try: import rdkit.Chem as Chem -except ImportError: - import_message( - "graphein.molecule.atoms", "rdkit", "rdkit", True, extras=True +except (ImportError, ModuleNotFoundError): + logger.warning( + import_message(__name__, "rdkit", "rdkit", True, extras=True) ) - BASE_ATOMS: List[str] = [ "C", "H", diff --git a/graphein/molecule/chembl.py b/graphein/molecule/chembl.py index c5efb0cfa..aa681040c 100644 --- a/graphein/molecule/chembl.py +++ b/graphein/molecule/chembl.py @@ -12,7 +12,10 @@ # Code Repository: https://github.com/a-r-j/graphein from bioservices import ChEMBL +from graphein.utils.dependencies import requires_python_libs + +@requires_python_libs("bioservices") def get_smiles_from_chembl(chembl_id: str) -> str: """Retrieves a SMILE string from a ChEMBL ID. @@ -27,6 +30,7 @@ def get_smiles_from_chembl(chembl_id: str) -> str: return data["molecule_structures"]["canonical_smiles"] +@requires_python_libs("bioservices") def get_chembl_id_from_smiles(smiles: str) -> str: """Retrieves a ChEMBL ID from a SMILE string. diff --git a/graphein/molecule/graphs.py b/graphein/molecule/graphs.py index 7f61e2f78..5c41edbd0 100644 --- a/graphein/molecule/graphs.py +++ b/graphein/molecule/graphs.py @@ -17,7 +17,7 @@ from loguru import logger as log from tqdm.contrib.concurrent import process_map, thread_map -from graphein.utils.dependencies import import_message +from graphein.utils.dependencies import import_message, requires_python_libs from graphein.utils.utils import ( annotate_edge_metadata, annotate_graph_metadata, @@ -36,9 +36,11 @@ from rdkit import Chem from rdkit.Chem import AllChem except ImportError: - import_message("graphein.molecule.graphs", "rdkit", "rdkit", True) + msg = import_message("graphein.molecule.graphs", "rdkit", "rdkit", True) + log.warning(msg) +@requires_python_libs("rdkit") def initialise_graph_with_metadata( name: str, rdmol: rdkit.Mol, @@ -60,6 +62,7 @@ def initialise_graph_with_metadata( ) +@requires_python_libs("rdkit") def add_nodes_to_graph( G: nx.Graph, verbose: bool = False, @@ -92,6 +95,7 @@ def add_nodes_to_graph( return G +@requires_python_libs("rdkit") def generate_3d( mol: Union[nx.Graph, Chem.Mol], recompute_graph: bool = False ) -> Union[nx.Graph, rdkit.Chem.rdchem.Mol]: @@ -130,6 +134,7 @@ def generate_3d( return rdmol +@requires_python_libs("rdkit") def construct_graph( config: Optional[MoleculeGraphConfig] = None, mol: Optional[rdkit.Mol] = None, diff --git a/graphein/molecule/utils.py b/graphein/molecule/utils.py index dfb38e2d4..4be3ed9e0 100644 --- a/graphein/molecule/utils.py +++ b/graphein/molecule/utils.py @@ -22,7 +22,7 @@ from scipy.sparse import csr_matrix from scipy.sparse.csgraph import minimum_spanning_tree -from graphein.utils.dependencies import import_message +from graphein.utils.dependencies import import_message, requires_python_libs try: import rdkit @@ -55,6 +55,7 @@ MAX_NCAND: int = 2000 +@requires_python_libs("rdkit") def get_center( mol: Union[nx.Graph, Chem.Mol], weights: Optional[np.ndarray] = None ) -> np.ndarray: @@ -76,6 +77,7 @@ def get_center( return np.array(ComputeCentroid(mol.GetConformer(0), weights=weights)) +@requires_python_libs("rdkit") def get_shape_moments(mol: Union[nx.Graph, Chem.Mol]) -> Tuple[float, float]: """Calculate principal moments of inertia as defined in https://pubs.acs.org/doi/10.1021/ci025599w @@ -94,6 +96,7 @@ def get_shape_moments(mol: Union[nx.Graph, Chem.Mol]) -> Tuple[float, float]: return npr1, npr2 +@requires_python_libs("rdkit") def count_fragments(mol: Union[nx.Graph, Chem.Mol]) -> int: """Counts the number of the disconnected fragments in a molecule. @@ -107,6 +110,7 @@ def count_fragments(mol: Union[nx.Graph, Chem.Mol]) -> int: return len(Chem.GetMolFrags(mol, asMols=True)) +@requires_python_libs("rdkit") def get_max_ring_size(mol: Union[nx.Graph, Chem.Mol]) -> int: """ Get the size of the largest ring in a molecule. @@ -124,6 +128,7 @@ def get_max_ring_size(mol: Union[nx.Graph, Chem.Mol]) -> int: return 0 if len(atom_rings) == 0 else max(len(x) for x in ri.AtomRings()) +@requires_python_libs("rdkit") def label_rdmol_atoms( mol: Union[nx.Graph, Chem.Mol], labels: List[Any] ) -> Union[nx.Graph, Chem.Mol]: @@ -146,6 +151,7 @@ def label_rdmol_atoms( return mol +@requires_python_libs("rdkit") def tag_rdmol_atoms( mol, atoms_to_tag, tag: str = "x" ) -> Union[nx.Graph, Chem.Mol]: @@ -156,6 +162,7 @@ def tag_rdmol_atoms( return mol +@requires_python_libs("rdkit") def get_mol(smiles: str) -> rdkit.Chem.rdchem.Mol: """ Function for getting rdmol from smiles. Applies kekulization. @@ -172,6 +179,7 @@ def get_mol(smiles: str) -> rdkit.Chem.rdchem.Mol: return mol +@requires_python_libs("rdkit") def get_smiles(mol: Union[nx.Graph, rdkit.Chem.rdchem.Mol]) -> str: """ Function for getting smiles from rdmol. Applies kekulization. @@ -186,6 +194,7 @@ def get_smiles(mol: Union[nx.Graph, rdkit.Chem.rdchem.Mol]) -> str: return Chem.MolToSmiles(mol, kekuleSmiles=True) +@requires_python_libs("rdkit") def sanitize(mol: rdkit.Chem.rdchem.Mol) -> rdkit.Chem.rdchem.Mol: """ Function for sanitizing a rdmol @@ -203,6 +212,7 @@ def sanitize(mol: rdkit.Chem.rdchem.Mol) -> rdkit.Chem.rdchem.Mol: return mol +@requires_python_libs("rdkit") def copy_edit_mol(mol: rdkit.Chem.rdchem.Mol) -> rdkit.Chem.rdchem.Mol: """ Function for copying a rdmol @@ -224,6 +234,7 @@ def copy_edit_mol(mol: rdkit.Chem.rdchem.Mol) -> rdkit.Chem.rdchem.Mol: return new_mol +@requires_python_libs("rdkit") def get_clique_mol(mol: rdkit.Chem.rdchem.Atom, atoms: List[int]): """ Function for getting clique rdmol @@ -242,6 +253,7 @@ def get_clique_mol(mol: rdkit.Chem.rdchem.Atom, atoms: List[int]): return new_mol +@requires_python_libs("rdkit") def copy_rdmol_atom(atom: rdkit.Chem.rdchem.Atom) -> rdkit.Chem.rdchem.Atom: """ Function for copying an atom @@ -257,6 +269,7 @@ def copy_rdmol_atom(atom: rdkit.Chem.rdchem.Atom) -> rdkit.Chem.rdchem.Atom: return new_atom +@requires_python_libs("rdkit") def get_morgan_fp( mol: Union[nx.Graph, rdkit.Chem.rdchem.Mol], radius: int = 2, @@ -281,6 +294,7 @@ def get_morgan_fp( ) +@requires_python_libs("rdkit") def get_morgan_fp_np( mol: Union[nx.Graph, rdkit.Chem.rdchem.Mol], radius: int = 2, @@ -307,18 +321,21 @@ def get_morgan_fp_np( return arr +@requires_python_libs("rdkit") def compute_fragments(mol: Union[nx.Graph, Chem.Mol]) -> List[Chem.Mol]: if isinstance(mol, nx.Graph): mol = mol.graph["rdmol"] return list(Chem.GetMolFrags(mol, asMols=True)) +@requires_python_libs("rdkit") def get_mol_weight(mol: Union[nx.Graph, Chem.Mol]) -> float: if isinstance(mol, nx.Graph): mol = mol.graph["rdmol"] return mol # TDOO +@requires_python_libs("rdkit") def get_qed_score( mol: Union[nx.Graph, rdkit.Chem.rdchem.Mol] ) -> Union[float, None]: @@ -364,6 +381,7 @@ def simplify_smile(smile: str) -> str: return "".join(stripped_smile) +@requires_python_libs("selfies") def smile_to_selfies(smile: str) -> str: """Encodes a SMILES string into a Selfies string. @@ -375,6 +393,7 @@ def smile_to_selfies(smile: str) -> str: return sf.encoder(smile) +@requires_python_libs("selfies") def selfies_to_smile(selfie: str) -> str: """Decodes a selfies string into a SMILES string. @@ -386,6 +405,7 @@ def selfies_to_smile(selfie: str) -> str: return sf.decoder(selfie) +@requires_python_libs("rdkit") def tree_decomp(mol: rdkit.Chem.rdchem.Mol) -> Tuple[List]: """ Function for decomposing rdmol to a tree diff --git a/graphein/protein/features/nodes/dssp.py b/graphein/protein/features/nodes/dssp.py index eb06c16ca..d6cb088ac 100644 --- a/graphein/protein/features/nodes/dssp.py +++ b/graphein/protein/features/nodes/dssp.py @@ -15,10 +15,11 @@ import networkx as nx import pandas as pd from Bio.PDB.DSSP import dssp_dict_from_pdb_file, residue_max_acc +from loguru import logger from graphein.protein.resi_atoms import STANDARD_AMINO_ACID_MAPPING_1_TO_3 from graphein.protein.utils import save_pdb_df_to_pdb -from graphein.utils.dependencies import is_tool +from graphein.utils.dependencies import is_tool, requires_external_dependencies DSSP_COLS = [ "chain", @@ -70,6 +71,7 @@ def parse_dssp_df(dssp: Dict[str, Any]) -> pd.DataFrame: return pd.DataFrame.from_records(appender, columns=DSSP_COLS) +@requires_external_dependencies("mkdssp") def add_dssp_df( G: nx.Graph, dssp_config: Optional[DSSPConfig], @@ -79,12 +81,13 @@ def add_dssp_df( :param G: Input protein graph :param G: nx.Graph - :param dssp_config: DSSPConfig object. Specifies which executable to run. Located in graphein.protein.config + :param dssp_config: DSSPConfig object. Specifies which executable to run. + Located in `:obj:graphein.protein.config`. :type dssp_config: DSSPConfig, optional :return: Protein graph with DSSP dataframe added :rtype: nx.Graph """ - + # if dssp_config is None: config = G.graph["config"] pdb_code = G.graph["pdb_code"] path = G.graph["path"] @@ -107,14 +110,14 @@ def add_dssp_df( if os.path.isfile(config.pdb_dir / (pdb_code + ".pdb")): pdb_file = config.pdb_dir / (pdb_code + ".pdb") + # get dssp version string + dssp_version = re.search( + r"version ([\d\.]+)", os.popen(f"{executable} --version").read() + ).group( + 1 + ) # e.g. "4.0.4" # Check for existence of pdb file. If not, reconstructs it from the raw df. if pdb_file: - # get dssp version string - dssp_version = re.search( - r"version ([\d\.]+)", os.popen(f"{executable} --version").read() - ).group( - 1 - ) # e.g. "4.0.4" dssp_dict = dssp_dict_from_pdb_file( pdb_file, DSSP=executable, dssp_version=dssp_version ) @@ -124,17 +127,26 @@ def add_dssp_df( G.graph["raw_pdb_df"], tmpdirname + f"/{pdb_name}.pdb" ) dssp_dict = dssp_dict_from_pdb_file( - tmpdirname + f"/{pdb_name}.pdb", DSSP=executable + tmpdirname + f"/{pdb_name}.pdb", + DSSP=executable, + dssp_version=dssp_version, ) + if len(dssp_dict[0]) == 0: + raise ValueError( + "DSSP could not be calculated. Check DSSP version " + f"({dssp_version}) orthat the input PDB file is valid." + ) + if config.verbose: - print(f"Using DSSP executable '{executable}'") + logger.debug(f"Using DSSP executable '{executable}'") dssp_dict = parse_dssp_df(dssp_dict) # Convert 1 letter aa code to 3 letter dssp_dict["aa"] = dssp_dict["aa"].map(STANDARD_AMINO_ACID_MAPPING_1_TO_3) - # Resolve UNKs NOTE: the original didn't work if HETATM residues exist in DSSP output + # Resolve UNKs + # NOTE: the original didn't work if HETATM residues exist in DSSP output _raw_pdb_df = G.graph["raw_pdb_df"].copy().drop_duplicates("node_id") _dssp_df_unk = dssp_dict.loc[dssp_dict["aa"] == "UNK"][ ["chain", "resnum", "icode"] @@ -177,7 +189,7 @@ def add_dssp_df( dssp_dict.set_index("node_id", inplace=True) if config.verbose: - print(dssp_dict) + logger.debug(dssp_dict) # Assign DSSP Dict G.graph["dssp_df"] = dssp_dict @@ -241,7 +253,7 @@ def add_dssp_feature(G: nx.Graph, feature: str) -> nx.Graph: nx.set_node_attributes(G, dict(dssp_df[feature]), feature) if config.verbose: - print("Added " + feature + " features to graph nodes") + logger.debug("Added " + feature + " features to graph nodes") return G diff --git a/graphein/protein/meshes.py b/graphein/protein/meshes.py index 1f8fc7c48..18360e06f 100644 --- a/graphein/protein/meshes.py +++ b/graphein/protein/meshes.py @@ -15,7 +15,11 @@ from loguru import logger as log from graphein.protein.config import ProteinMeshConfig -from graphein.utils.dependencies import import_message +from graphein.utils.dependencies import ( + import_message, + requires_external_dependencies, + requires_python_libs, +) from graphein.utils.pymol import MolViewer try: @@ -61,6 +65,7 @@ def configure_pymol_session( pymol.start([config.pymol_command_line_options]) +@requires_external_dependencies("pymol") def get_obj_file( pdb_file: Optional[str] = None, pdb_code: Optional[str] = None, @@ -84,8 +89,6 @@ def get_obj_file( """ pymol = MolViewer() - check_for_pymol_installation() - # Check inputs if not pdb_code and not pdb_file: raise ValueError("Please pass either a pdb_file or pdb_code argument") @@ -136,6 +139,7 @@ def parse_pymol_commands(config: ProteinMeshConfig) -> List[str]: return config.pymol_commands +@requires_external_dependencies("pymol") def run_pymol_commands(commands: List[str]) -> None: """ Runs Pymol Commands. @@ -150,6 +154,7 @@ def run_pymol_commands(commands: List[str]) -> None: pymol.do(c) +@requires_python_libs("pytorch3d") def create_mesh( pdb_file: Optional[str] = None, pdb_code: Optional[str] = None, @@ -186,6 +191,7 @@ def create_mesh( return verts, faces, aux +@requires_python_libs("torch") def normalize_and_center_mesh_vertices( verts: torch.FloatTensor, ) -> torch.FloatTensor: @@ -207,6 +213,7 @@ def normalize_and_center_mesh_vertices( return verts +@requires_python_libs("torch", "pytorch3d") def convert_verts_and_face_to_mesh( verts: torch.FloatTensor, faces: NamedTuple ) -> Meshes: diff --git a/graphein/utils/dependencies.py b/graphein/utils/dependencies.py index 44fe37a66..932346768 100644 --- a/graphein/utils/dependencies.py +++ b/graphein/utils/dependencies.py @@ -1,5 +1,6 @@ import os import sys +from functools import wraps from shutil import which from typing import Optional @@ -84,3 +85,70 @@ def is_tool(name: str, error: bool = False) -> bool: if not found and error: raise MissingDependencyError(name) return found + + +# Decorator for checking if a function has the required dependencies +def requires_external_dependencies(*deps): + """ + A decorator to check if all required dependencies are installed before + calling the decorated function. If a dependency is missing, it raises + a MissingDependencyError. + + :param deps: A list of dependencies (as strings) to check for. + """ + + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + missing_deps = [dep for dep in deps if not is_tool(dep)] + if missing_deps: + missing = ", ".join(missing_deps) + raise MissingDependencyError( + f"Missing dependencies: {missing}" + ) + return func(*args, **kwargs) + + return wrapper + + return decorator + + +_lib_check_cache = {} + + +def requires_python_libs(*libs): + """ + A decorator to check if all required Python library dependencies are installed + before calling the decorated function. If a library is missing, it raises + an ImportError with details about the missing libraries. Caches check results + to avoid repeated imports. + + :param libs: A list of library names (as strings) to check for. + """ + + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + missing_libs = [] + for lib in libs: + # Check if the library check result is cached + if lib in _lib_check_cache: + if not _lib_check_cache[lib]: + missing_libs.append(lib) + else: + try: + __import__(lib) + _lib_check_cache[lib] = True + except ImportError: + _lib_check_cache[lib] = False + missing_libs.append(lib) + if missing_libs: + missing = ", ".join(missing_libs) + raise ImportError( + f"Missing Python library dependencies: {missing}" + ) + return func(*args, **kwargs) + + return wrapper + + return decorator diff --git a/graphein/utils/pymol/core.py b/graphein/utils/pymol/core.py index 3da11bdd5..da27d3036 100644 --- a/graphein/utils/pymol/core.py +++ b/graphein/utils/pymol/core.py @@ -5,6 +5,7 @@ import tempfile import time +from ..dependencies import requires_external_dependencies from .compat import Server HOST = os.environ.get("PYMOL_RPCHOST", "localhost") @@ -29,6 +30,7 @@ def __getattr__(self, key): def _process_is_running(self): return self._process is not None and self._process.poll() is None + @requires_external_dependencies("pymol") def start(self, args=("-Q",), exe="pymol"): """Start the PyMOL RPC server and connect to it Start simple GUI (-xi), suppress all output (-Q): diff --git a/notebooks/ppi_graph.ipynb b/notebooks/ppi_graph.ipynb index b4caa9fb7..3e9153266 100644 --- a/notebooks/ppi_graph.ipynb +++ b/notebooks/ppi_graph.ipynb @@ -1990,11 +1990,11 @@ "for n, d in g.nodes(data=True):\n", " try:\n", " fp = download_alphafold_structure(d['uniprot_ids'][0])[0]\n", - " pg = construct_graph(pg_config, pdb_path=fp)\n", - " \n", + " pg = construct_graph(pg_config, path=fp)\n", + "\n", " # Add protein graph as node feature\n", " d['protein_graph'] = pg\n", - " \n", + "\n", " # Plot\n", " ax = plot_protein_structure_graph(pg, label_node_ids=False, colour_nodes_by=\"residue_name\")\n", " ax.set_title(d[\"uniprot_ids\"][0])\n", diff --git a/notebooks/residue_graphs.ipynb b/notebooks/residue_graphs.ipynb index ff4f34864..656ba39ea 100644 --- a/notebooks/residue_graphs.ipynb +++ b/notebooks/residue_graphs.ipynb @@ -273,7 +273,7 @@ "If you wish to use a local `.pdb` file, you can run:\n", "\n", "```python\n", - "g = construct_graph(config=config, pdb_path=\"../graphein/examples/pdbs/3eiy.pdb\")\n", + "g = construct_graph(config=config, path=\"../graphein/examples/pdbs/3eiy.pdb\")\n", "```" ] },