Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for DSSP version and add rsa node features to residues with insertion codes #355

Merged
merged 30 commits into from
Mar 31, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
4173006
Add support for DSSP version and insertions in
biochunan Nov 2, 2023
8849abe
Add test for DSSP DataFrame and surface subgraph with insertion codes
biochunan Nov 3, 2023
400ae8f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 3, 2023
a7c3817
add test data containing insertion code
Nov 6, 2023
76f76f8
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 6, 2023
996b35c
change path to use test_data directory for structure file
Nov 6, 2023
09d7480
Merge branch 'chunan' of https://github.com/biochunan/graphein into p…
Nov 6, 2023
be209d2
fix filename typo
a-r-j Nov 6, 2023
2404377
Add conditional skip if DSSP not installed
Nov 6, 2023
c90f7c9
revert dssp executable path
Nov 6, 2023
dbc026d
Merge branch 'master' into pr/biochunan/355
Nov 8, 2023
e5a55f3
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 8, 2023
e78c3cf
change DSSP executable name
a-r-j Nov 8, 2023
fedc146
use a fixture for example graph in DSSP test
a-r-j Nov 8, 2023
578a362
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 8, 2023
f090b6e
the orignial vesion won't work if dssp output contains HETATM residue…
biochunan Feb 29, 2024
8728c7d
match chain, resnum, and insertion, the original won't work in case o…
biochunan Feb 29, 2024
843e789
Merge remote-tracking branch 'upstream/master'
biochunan Mar 4, 2024
605efbc
Merge branch 'master' into chunan
biochunan Mar 4, 2024
66efd00
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 4, 2024
25cf8ee
clarified the comment
biochunan Mar 7, 2024
4e89e42
the original only worked 4.0.4, now supports 3.0.0
biochunan Mar 15, 2024
6474cca
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 15, 2024
a4df540
import re
biochunan Mar 15, 2024
15d45b9
Merge branch 'chunan' of github.com:biochunan/graphein into chunan
biochunan Mar 15, 2024
a8362e4
default value of numeric_only is deprecated
biochunan Mar 15, 2024
4969ea7
fix test fixture
a-r-j Mar 25, 2024
39f83e2
bump changelog
a-r-j Mar 31, 2024
f39f152
Merge branch 'master' into chunan
a-r-j Mar 31, 2024
26cfdbb
Merge branch 'master' into chunan
a-r-j Mar 31, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion graphein/protein/features/nodes/dssp.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,13 @@ def add_dssp_df(

# Check for existence of pdb file. If not, reconstructs it from the raw df.
if pdb_file:
dssp_dict = dssp_dict_from_pdb_file(pdb_file, DSSP=executable)
# get dssp version string
dssp_version = (
os.popen(f"{executable} --version").read().strip().split()[-1]
) # e.g. "4.0.4"
dssp_dict = dssp_dict_from_pdb_file(
pdb_file, DSSP=executable, dssp_version=dssp_version
)
else:
with tempfile.TemporaryDirectory() as tmpdirname:
save_pdb_df_to_pdb(
Expand Down Expand Up @@ -143,6 +149,14 @@ def add_dssp_df(
+ ":"
+ dssp_dict["resnum"].astype(str)
)
if G.graph["config"].insertions:
dssp_dict["node_id"] = (
dssp_dict["node_id"] + ":" + dssp_dict["icode"].apply(str)
)
# Replace trailing : for non insertions
dssp_dict["node_id"] = dssp_dict["node_id"].str.replace(
r":\s*$", "", regex=True
)

dssp_dict.set_index("node_id", inplace=True)

Expand Down
65 changes: 65 additions & 0 deletions tests/protein/nodes/features/test_dssp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# basic
from functools import partial
from pathlib import Path

# test
import pytest

from graphein.protein.config import DSSPConfig, ProteinGraphConfig
from graphein.protein.edges import distance as D
from graphein.protein.features.nodes import rsa

# graphein
from graphein.protein.graphs import construct_graph
from graphein.protein.subgraphs import extract_surface_subgraph
from graphein.protein.utils import ProteinGraphConfigurationError

# ---------- input ----------
pdb_path = "example_pdb_with_cryst1_insertions.pdb"
dssp_exe = "/usr/bin/mkdssp"
RSA_THRESHOLD = 0.2

# ---------- graph config ----------
params_to_change = {
"granularity": "centroids", # "atom", "CA", "centroids"
"insertions": True,
"edge_construction_functions": [
# graphein.protein.edges.distance.add_peptide_bonds,
D.add_distance_to_edges,
D.add_hydrogen_bond_interactions,
D.add_ionic_interactions,
D.add_backbone_carbonyl_carbonyl_interactions,
D.add_salt_bridges,
# distance
partial(
D.add_distance_threshold,
long_interaction_threshold=4,
threshold=4.5,
),
],
"dssp_config": DSSPConfig(executable=dssp_exe),
"graph_metadata_functions": [rsa],
}
config = ProteinGraphConfig(**params_to_change)
# ---------- construct graph ----------
g = construct_graph(config=config, path=pdb_path, verbose=False)


# ---------- test: dssp DataFrame ----------
def test_assert_nonempty_dssp_df():
"""if not provided dssp version to dssp.add_dssp_df, will output an empty DataFrame"""
if g.graph["dssp_df"].empty:
pytest.fail("DSSP dataframe is empty")


# ---------- test: surface subgraph nodes with insertion code ----------
def test_extract_surface_subgraph_insertion_node():
"""if not added insertion codes, will raise ProteinGraphConfigurationError"""
try:
# without the modification, the following line will raise
# ProteinGraphConfigurationError RSA not defined for all nodes (H:TYR:52:A).
s_g = extract_surface_subgraph(g, RSA_THRESHOLD)
except ProteinGraphConfigurationError as e:
pytest.fail(
"extract_surface_subgraph raised ProteinGraphConfigurationError:\n{e}"
)