From cae7aa45ccaa8ebe2b8f39e2523ba31cf9ab87cc Mon Sep 17 00:00:00 2001 From: kierandidi Date: Sun, 14 Apr 2024 17:12:37 +0100 Subject: [PATCH 1/3] exposed fill_value to protein_to_pyg function --- graphein/protein/tensor/io.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/graphein/protein/tensor/io.py b/graphein/protein/tensor/io.py index eeaa93e4..abc87119 100644 --- a/graphein/protein/tensor/io.py +++ b/graphein/protein/tensor/io.py @@ -108,6 +108,7 @@ def protein_to_pyg( atom_types: List[str] = PROTEIN_ATOMS, remove_nonstandard: bool = True, store_het: bool = False, + fill_value_coords: float = 1e-5 ) -> Data: """ Parses a protein (from either: a PDB code, PDB file or a UniProt ID @@ -237,7 +238,7 @@ def protein_to_pyg( df["residue_id"] = df.residue_id + ":" + df.insertion out = Data( - coords=protein_df_to_tensor(df, atoms_to_keep=atom_types), + coords=protein_df_to_tensor(df, atoms_to_keep=atom_types, fill_value=fill_value_coords), residues=get_sequence( df, chains=chain_selection, From 2458b68840e3cb58e5b60eb12379d4f04df0b704 Mon Sep 17 00:00:00 2001 From: kierandidi Date: Sun, 14 Apr 2024 17:15:29 +0100 Subject: [PATCH 2/3] added to CHANGELOG --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f867abc7..9929c5bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,8 @@ * Fix bug where the `deprotonate` argument is not wired up to `graphein.protein.graphs.construct_graphs`. [#375](https://github.com/a-r-j/graphein/pull/375) #### Misc -* Updated Foldcomp datasets with improved setup function and updated database choices such as ESMAtlas [#382](https://github.com/a-r-j/graphein/pull/382) +* exposed `fill_value` option to `protein_to_pyg` function. [#385](https://github.com/a-r-j/graphein/pull/385) +* Updated Foldcomp datasets with improved setup function and updated database choices such as ESMAtlas. [#382](https://github.com/a-r-j/graphein/pull/382) * Resolve issue with notebook version and `pluggy` in Dockerfile. [#372](https://github.com/a-r-j/graphein/pull/372) * Remove `typing_extension` as dependency since we now primarily support Python >=3.8 and `Literal` is included in `typing` there. From 739cafb22b9c27b4cc33de8ed41ba3f2ff05b597 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 14 Apr 2024 16:16:53 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- graphein/protein/tensor/io.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/graphein/protein/tensor/io.py b/graphein/protein/tensor/io.py index abc87119..cc074ecd 100644 --- a/graphein/protein/tensor/io.py +++ b/graphein/protein/tensor/io.py @@ -108,7 +108,7 @@ def protein_to_pyg( atom_types: List[str] = PROTEIN_ATOMS, remove_nonstandard: bool = True, store_het: bool = False, - fill_value_coords: float = 1e-5 + fill_value_coords: float = 1e-5, ) -> Data: """ Parses a protein (from either: a PDB code, PDB file or a UniProt ID @@ -238,7 +238,9 @@ def protein_to_pyg( df["residue_id"] = df.residue_id + ":" + df.insertion out = Data( - coords=protein_df_to_tensor(df, atoms_to_keep=atom_types, fill_value=fill_value_coords), + coords=protein_df_to_tensor( + df, atoms_to_keep=atom_types, fill_value=fill_value_coords + ), residues=get_sequence( df, chains=chain_selection,