From c6bd3d624be1a165e2949019879da76bd0a48c64 Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Mon, 8 Jul 2024 18:11:46 +0100 Subject: [PATCH 01/21] switch pytest to unittest --- biopandas/mmtf/tests/test_read_mmtf.py | 4 ++-- biopandas/mmtf/tests/test_write_mmtf.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/biopandas/mmtf/tests/test_read_mmtf.py b/biopandas/mmtf/tests/test_read_mmtf.py index 9694822..78889d5 100644 --- a/biopandas/mmtf/tests/test_read_mmtf.py +++ b/biopandas/mmtf/tests/test_read_mmtf.py @@ -4,7 +4,7 @@ # Project Website: http://rasbt.github.io/biopandas/ # Code Repository: https://github.com/rasbt/biopandas -import pytest +import unittest import os from urllib.error import HTTPError, URLError from urllib.request import urlopen @@ -42,7 +42,7 @@ #"charge", ] -@pytest.mark.skip(reason="PDB No longer serves MMTF files.") +@unittest.skip(reason="PDB No longer serves MMTF files.") def test_fetch_pdb(): """Test fetch_pdb""" ppdb = PandasMmtf() diff --git a/biopandas/mmtf/tests/test_write_mmtf.py b/biopandas/mmtf/tests/test_write_mmtf.py index e934ce1..ce7bab2 100644 --- a/biopandas/mmtf/tests/test_write_mmtf.py +++ b/biopandas/mmtf/tests/test_write_mmtf.py @@ -1,12 +1,12 @@ import os -import pytest +import unittest import pandas as pd from pandas.testing import assert_frame_equal from biopandas.mmtf.pandas_mmtf import PandasMmtf, write_mmtf -@pytest.mark.skip(reason="PDB No longer serves MMTF files.") +@unittest.skip(reason="PDB No longer serves MMTF files.") def test_write_mmtf_bp(): PDB_CODES = ["4hhb", "3eiy", "1t48", "1ehz", "4ggb", "1bxa", "1cbn", "1rcf"] for pdb in PDB_CODES: @@ -22,7 +22,7 @@ def test_write_mmtf_bp(): os.remove("test.mmtf") -@pytest.mark.skip(reason="PDB No longer serves MMTF files.") +@unittest.skip(reason="PDB No longer serves MMTF files.") def test_write_mmtf(): PDB_CODES = ["4hhb", "3eiy", "1t48", "1ehz", "4ggb", "1bxa", "1cbn", "1rcf"] for pdb in PDB_CODES: From 5a4bee3af40ff0169616c0274753a9d72d1ca457 Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Mon, 8 Jul 2024 18:27:52 +0100 Subject: [PATCH 02/21] switch testing over to pytest from nose/unittest --- .appveyor.yml | 4 ++-- biopandas/mmcif/tests/test_read_mmcif.py | 6 ++---- biopandas/mmcif/tests/test_rmsd.py | 8 ++++---- biopandas/mmtf/tests/test_read_mmtf.py | 5 ----- biopandas/mmtf/tests/test_rmsd.py | 9 +++++---- biopandas/pdb/tests/test_gyradius.py | 10 ++++------ biopandas/pdb/tests/test_impute.py | 3 --- biopandas/pdb/tests/test_read_pdb.py | 7 +++---- biopandas/pdb/tests/test_rmsd.py | 9 ++++----- ci/.travis_install.sh | 6 +++--- ci/.travis_test.sh | 4 ++-- docs/CONTRIBUTING.md | 21 ++++++++++----------- setup.py | 2 +- 13 files changed, 40 insertions(+), 54 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index 8efc418..e300d19 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -14,8 +14,8 @@ install: - conda config --set always_yes yes --set changeps1 no - conda update -q conda - conda info -a - - conda create -q -n test-environment --channel=conda-forge mmtf-python numpy scipy pandas nose looseversion python=%PYTHON_VERSION% + - conda create -q -n test-environment --channel=conda-forge mmtf-python numpy scipy pandas pytest looseversion python=%PYTHON_VERSION% - activate test-environment test_script: - - nosetests -s -v + - pytest -s -v diff --git a/biopandas/mmcif/tests/test_read_mmcif.py b/biopandas/mmcif/tests/test_read_mmcif.py index 1d8b38d..b325206 100644 --- a/biopandas/mmcif/tests/test_read_mmcif.py +++ b/biopandas/mmcif/tests/test_read_mmcif.py @@ -6,16 +6,14 @@ import os +import pytest from urllib.error import HTTPError -from urllib.request import urlopen from pathlib import Path -import numpy as np import pandas as pd from biopandas.mmcif import PandasMmcif from biopandas.pdb import PandasPdb from biopandas.testutils import assert_raises -from nose.tools import raises from pandas.testing import assert_frame_equal TESTDATA_FILENAME = os.path.join(os.path.dirname(__file__), "data", "3eiy.cif") @@ -282,7 +280,7 @@ def test_read_pdb_with_pathlib(): # assert ppdb.code == "4eiy", ppdb.code -@raises(AttributeError) +@pytest.mark.xfail(AttributeError) def test_get_exceptions(): ppdb = PandasMmcif() ppdb.read_mmcif(TESTDATA_FILENAME) diff --git a/biopandas/mmcif/tests/test_rmsd.py b/biopandas/mmcif/tests/test_rmsd.py index cb8d0ac..122c1f9 100644 --- a/biopandas/mmcif/tests/test_rmsd.py +++ b/biopandas/mmcif/tests/test_rmsd.py @@ -5,9 +5,9 @@ # Code Repository: https://github.com/rasbt/biopandas import os +import pytest from biopandas.mmcif import PandasMmcif -from nose.tools import raises TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48.cif") TESTDATA_1t49 = os.path.join(os.path.dirname(__file__), "data", "1t49.cif") @@ -32,17 +32,17 @@ def test_equal(): assert r == 0.000, r -@raises(AttributeError) +@pytest.mark.xfail(AttributeError) def test_wrong_arg(): PandasMmcif.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s="bla") -@raises(AttributeError) +@pytest.mark.xfail(AttributeError) def test_incompatible(): PandasMmcif.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s=None) -@raises(AttributeError) +@pytest.mark.xfail(AttributeError) def test_invalid_query(): PandasMmcif.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s="bla") diff --git a/biopandas/mmtf/tests/test_read_mmtf.py b/biopandas/mmtf/tests/test_read_mmtf.py index 78889d5..cc3c1f9 100644 --- a/biopandas/mmtf/tests/test_read_mmtf.py +++ b/biopandas/mmtf/tests/test_read_mmtf.py @@ -6,16 +6,11 @@ import unittest import os -from urllib.error import HTTPError, URLError -from urllib.request import urlopen -import numpy as np import pandas as pd -from nose.tools import raises from biopandas.mmtf import PandasMmtf from biopandas.pdb import PandasPdb -from biopandas.testutils import assert_raises MMTF_TESTDATA_FILENAME = os.path.join(os.path.dirname(__file__), "data", "3eiy.mmtf") MMTF_TESTDATA_FILENAME_GZ = os.path.join(os.path.dirname(__file__), "data", "3eiy.mmtf.gz") diff --git a/biopandas/mmtf/tests/test_rmsd.py b/biopandas/mmtf/tests/test_rmsd.py index b9292b5..561ba85 100644 --- a/biopandas/mmtf/tests/test_rmsd.py +++ b/biopandas/mmtf/tests/test_rmsd.py @@ -5,9 +5,10 @@ # Code Repository: https://github.com/rasbt/biopandas import os +import pytest from biopandas.mmtf import PandasMmtf -from nose.tools import raises + TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48.mmtf") TESTDATA_1t49 = os.path.join(os.path.dirname(__file__), "data", "1t49.mmtf") @@ -35,17 +36,17 @@ def test_equal(): assert r == 0.000, r -@raises(AttributeError) +@pytest.mark.xfail(AttributeError) def test_wrong_arg(): PandasMmtf.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s="bla") -@raises(AttributeError) +@pytest.mark.xfail(AttributeError) def test_incompatible(): PandasMmtf.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s=None) -@raises(AttributeError) +@pytest.mark.xfail(AttributeError) def test_invalid_query(): PandasMmtf.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s="bla") diff --git a/biopandas/pdb/tests/test_gyradius.py b/biopandas/pdb/tests/test_gyradius.py index 3098010..9781f75 100644 --- a/biopandas/pdb/tests/test_gyradius.py +++ b/biopandas/pdb/tests/test_gyradius.py @@ -6,9 +6,7 @@ from biopandas.pdb import PandasPdb import os -import pandas as pd -from nose.tools import raises -import warnings +import pytest TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48_995.pdb") @@ -40,12 +38,12 @@ def test_atom_and_hetatm(): assert rg == expected_rg, f"Expected {expected_rg}, got {rg} instead" -@raises(KeyError) +@pytest.mark.xfail(KeyError) def test_wrong_record_name(): p1t48.gyradius(("Wrong",)) -@raises(TypeError) +@pytest.mark.xfail(TypeError) def test_wrong_arg_type(): p1t48.gyradius(5) @@ -62,7 +60,7 @@ def test_negative_decimals(): assert rg == expected_rg, f"Expected {expected_rg}, got {rg} instead" -@raises(TypeError) +@pytest.mark.xfail(TypeError) def test_wrong_decimals_arg(): p1t48.gyradius(decimals='five') diff --git a/biopandas/pdb/tests/test_impute.py b/biopandas/pdb/tests/test_impute.py index 729ce2b..225f5c5 100644 --- a/biopandas/pdb/tests/test_impute.py +++ b/biopandas/pdb/tests/test_impute.py @@ -7,9 +7,6 @@ from biopandas.pdb import PandasPdb import os -import numpy as np -import pandas as pd -from nose.tools import raises TESTDATA_FILENAME = os.path.join( os.path.dirname(__file__), "data", "3eiy_stripped_no_ele.pdb" diff --git a/biopandas/pdb/tests/test_read_pdb.py b/biopandas/pdb/tests/test_read_pdb.py index 68cc9eb..912140a 100644 --- a/biopandas/pdb/tests/test_read_pdb.py +++ b/biopandas/pdb/tests/test_read_pdb.py @@ -6,14 +6,13 @@ import os -from urllib.error import HTTPError, URLError -from urllib.request import urlopen +import pytest +from urllib.error import HTTPError import numpy as np import pandas as pd from biopandas.pdb import PandasPdb from biopandas.testutils import assert_raises -from nose.tools import raises TESTDATA_FILENAME = os.path.join(os.path.dirname(__file__), "data", "3eiy.pdb") TESTDATA_FILENAME2 = os.path.join( @@ -269,7 +268,7 @@ def test_anisou_input_handling(): assert ppdb.code == "4eiy", ppdb.code -@raises(AttributeError) +@pytest.mark.xfail(AttributeError) def test_get_exceptions(): ppdb = PandasPdb() ppdb.read_pdb(TESTDATA_FILENAME) diff --git a/biopandas/pdb/tests/test_rmsd.py b/biopandas/pdb/tests/test_rmsd.py index a5f2ea6..f8ca140 100644 --- a/biopandas/pdb/tests/test_rmsd.py +++ b/biopandas/pdb/tests/test_rmsd.py @@ -6,8 +6,7 @@ from biopandas.pdb import PandasPdb import os -from nose.tools import raises - +import pytest TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48_995.pdb") TESTDATA_1t49 = os.path.join(os.path.dirname(__file__), "data", "1t49_995.pdb") @@ -32,17 +31,17 @@ def test_equal(): assert r == 0.000, r -@raises(AttributeError) +@pytest.mark.xfail(AttributeError) def test_wrong_arg(): PandasPdb.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s="bla") -@raises(AttributeError) +@pytest.mark.xfail(AttributeError) def test_incompatible(): PandasPdb.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s=None) -@raises(AttributeError) +@pytest.mark.xfail(AttributeError) def test_invalid_query(): PandasPdb.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s="bla") diff --git a/ci/.travis_install.sh b/ci/.travis_install.sh index c7547b5..f26ae3b 100755 --- a/ci/.travis_install.sh +++ b/ci/.travis_install.sh @@ -32,12 +32,12 @@ conda info -a # Configure the conda environment and put it in the path using the # provided versions if [[ "$LATEST" == "true" ]]; then - conda create -n testenv --yes python=$PYTHON_VERSION pip nose \ + conda create -n testenv --yes python=$PYTHON_VERSION pip pytest \ numpy scipy pandas else - conda create -n testenv --yes python=$PYTHON_VERSION pip nose \ + conda create -n testenv --yes python=$PYTHON_VERSION pip pytest \ numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \ - pandas=$PANDAS_VERSION + pandas=$PANDAS_VERSION fi conda init bash diff --git a/ci/.travis_test.sh b/ci/.travis_test.sh index de7e4c2..c2003bd 100755 --- a/ci/.travis_test.sh +++ b/ci/.travis_test.sh @@ -14,8 +14,8 @@ python -c "import scipy; print('scipy %s' % scipy.__version__)" python -c "import pandas; print('pandas %s' % pandas.__version__)" if [[ "$COVERAGE" == "true" ]]; then - nosetests -s -v --with-coverage --cover-package=biopandas + pytest -s -v --with-coverage --cover-package=biopandas else - nosetests -s -v biopandas + pytest -s -v biopandas fi #make test-doc test-sphinxext diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index c6d3c10..2f0ca4b 100755 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -6,22 +6,21 @@ I would be very happy about any kind of contributions that help to improve and e If this is your first contribution, please review the [Code of Conduct](CODE_OF_CONDUCT.md). - ### Quick Contributor Checklist This is a quick checklist about the different steps of a typical contribution to biopandas and other open source projects. Consider copying this list to a local text file (or the issue tracker) and checking off items as you go. -1. [ ] Open a new "issue" on GitHub to discuss the new feature / bug fix +1. [ ] Open a new "issue" on GitHub to discuss the new feature / bug fix 2. [ ] Fork the biopandas repository from GitHub (if not already done earlier) -3. [ ] Create and checkout a new topic branch -4. [ ] Implement new feature or apply the bug-fix -5. [ ] Add appropriate unit test functions -6. [ ] Run `nosetests -sv` and make sure that all unit tests pass -7. [ ] Check/improve the test coverage by running `nosetests --with-coverage` -8. [ ] Add a note about the change to the `./docs/sources/CHANGELOG.md` file -9. [ ] Modify documentation in the appropriate location under `biopandas/docs/sources/` +3. [ ] Create and checkout a new topic branch +4. [ ] Implement new feature or apply the bug-fix +5. [ ] Add appropriate unit test functions +6. [ ] Run `pytest -sv` and make sure that all unit tests pass +7. [ ] Check/improve the test coverage by running `pytest --with-coverage` +8. [ ] Add a note about the change to the `./docs/sources/CHANGELOG.md` file +9. [ ] Modify documentation in the appropriate location under `biopandas/docs/sources/` 10. [ ] Push the topic branch to the server and create a pull request 11. [ ] Check the Travis-CI build passed at [https://travis-ci.org/rasbt/biopandas](https://travis-ci.org/rasbt/biopandas) 12. [ ] Check/improve the unit test coverage at [https://coveralls.io/github/rasbt/biopandas](https://coveralls.io/github/rasbt/biopandas) @@ -154,11 +153,11 @@ $ git checkout Adding/modifying the unit tests and check if they pass: ```bash -$ nosetests -sv +$ pytest -sv ``` ```bash -$ nosetests --with-coverage +$ pytest --with-coverage ``` #### 4. Documenting the changes diff --git a/setup.py b/setup.py index bc528ba..6751ea3 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ }, include_package_data=True, install_requires=install_reqs, - extras_require={'test': ['pytest', 'pytest-cov','flake8', 'nose'],}, + extras_require={'test': ['pytest', 'pytest-cov','flake8'],}, license='BSD 3-Clause', platforms='any', classifiers=[ From 77cc989ef65485d8a83e6c6aa0d9355b7e4e509c Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Mon, 8 Jul 2024 18:30:53 +0100 Subject: [PATCH 03/21] fix pytest expected failures --- biopandas/mmcif/tests/test_read_mmcif.py | 2 +- biopandas/mmcif/tests/test_rmsd.py | 6 +++--- biopandas/mmtf/tests/test_rmsd.py | 6 +++--- biopandas/pdb/tests/test_gyradius.py | 6 +++--- biopandas/pdb/tests/test_rmsd.py | 6 +++--- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/biopandas/mmcif/tests/test_read_mmcif.py b/biopandas/mmcif/tests/test_read_mmcif.py index b325206..7189702 100644 --- a/biopandas/mmcif/tests/test_read_mmcif.py +++ b/biopandas/mmcif/tests/test_read_mmcif.py @@ -280,7 +280,7 @@ def test_read_pdb_with_pathlib(): # assert ppdb.code == "4eiy", ppdb.code -@pytest.mark.xfail(AttributeError) +@pytest.mark.xfail(raises=AttributeError) def test_get_exceptions(): ppdb = PandasMmcif() ppdb.read_mmcif(TESTDATA_FILENAME) diff --git a/biopandas/mmcif/tests/test_rmsd.py b/biopandas/mmcif/tests/test_rmsd.py index 122c1f9..5507059 100644 --- a/biopandas/mmcif/tests/test_rmsd.py +++ b/biopandas/mmcif/tests/test_rmsd.py @@ -32,17 +32,17 @@ def test_equal(): assert r == 0.000, r -@pytest.mark.xfail(AttributeError) +@pytest.mark.xfail(raises=AttributeError) def test_wrong_arg(): PandasMmcif.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s="bla") -@pytest.mark.xfail(AttributeError) +@pytest.mark.xfail(raises=AttributeError) def test_incompatible(): PandasMmcif.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s=None) -@pytest.mark.xfail(AttributeError) +@pytest.mark.xfail(raises=AttributeError) def test_invalid_query(): PandasMmcif.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s="bla") diff --git a/biopandas/mmtf/tests/test_rmsd.py b/biopandas/mmtf/tests/test_rmsd.py index 561ba85..1d02e06 100644 --- a/biopandas/mmtf/tests/test_rmsd.py +++ b/biopandas/mmtf/tests/test_rmsd.py @@ -36,17 +36,17 @@ def test_equal(): assert r == 0.000, r -@pytest.mark.xfail(AttributeError) +@pytest.mark.xfail(raises=AttributeError) def test_wrong_arg(): PandasMmtf.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s="bla") -@pytest.mark.xfail(AttributeError) +@pytest.mark.xfail(raises=AttributeError) def test_incompatible(): PandasMmtf.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s=None) -@pytest.mark.xfail(AttributeError) +@pytest.mark.xfail(raises=AttributeError) def test_invalid_query(): PandasMmtf.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s="bla") diff --git a/biopandas/pdb/tests/test_gyradius.py b/biopandas/pdb/tests/test_gyradius.py index 9781f75..40f7227 100644 --- a/biopandas/pdb/tests/test_gyradius.py +++ b/biopandas/pdb/tests/test_gyradius.py @@ -38,12 +38,12 @@ def test_atom_and_hetatm(): assert rg == expected_rg, f"Expected {expected_rg}, got {rg} instead" -@pytest.mark.xfail(KeyError) +@pytest.mark.xfail(raises=KeyError) def test_wrong_record_name(): p1t48.gyradius(("Wrong",)) -@pytest.mark.xfail(TypeError) +@pytest.mark.xfail(raises=TypeError) def test_wrong_arg_type(): p1t48.gyradius(5) @@ -60,7 +60,7 @@ def test_negative_decimals(): assert rg == expected_rg, f"Expected {expected_rg}, got {rg} instead" -@pytest.mark.xfail(TypeError) +@pytest.mark.xfail(raises=TypeError) def test_wrong_decimals_arg(): p1t48.gyradius(decimals='five') diff --git a/biopandas/pdb/tests/test_rmsd.py b/biopandas/pdb/tests/test_rmsd.py index f8ca140..dc7a707 100644 --- a/biopandas/pdb/tests/test_rmsd.py +++ b/biopandas/pdb/tests/test_rmsd.py @@ -31,17 +31,17 @@ def test_equal(): assert r == 0.000, r -@pytest.mark.xfail(AttributeError) +@pytest.mark.xfail(raises=AttributeError) def test_wrong_arg(): PandasPdb.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s="bla") -@pytest.mark.xfail(AttributeError) +@pytest.mark.xfail(raises=AttributeError) def test_incompatible(): PandasPdb.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s=None) -@pytest.mark.xfail(AttributeError) +@pytest.mark.xfail(raises=AttributeError) def test_invalid_query(): PandasPdb.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s="bla") From f4b15d46f5585ebd269028a61d24f314cae67ba0 Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Mon, 8 Jul 2024 18:33:09 +0100 Subject: [PATCH 04/21] fix lingering expected fail --- biopandas/pdb/tests/test_read_pdb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/biopandas/pdb/tests/test_read_pdb.py b/biopandas/pdb/tests/test_read_pdb.py index 912140a..e6bd8fa 100644 --- a/biopandas/pdb/tests/test_read_pdb.py +++ b/biopandas/pdb/tests/test_read_pdb.py @@ -268,7 +268,7 @@ def test_anisou_input_handling(): assert ppdb.code == "4eiy", ppdb.code -@pytest.mark.xfail(AttributeError) +@pytest.mark.xfail(raises=AttributeError) def test_get_exceptions(): ppdb = PandasPdb() ppdb.read_pdb(TESTDATA_FILENAME) From 1114ff599d22c20bc99c262a41612930a1d11099 Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Mon, 8 Jul 2024 18:40:03 +0100 Subject: [PATCH 05/21] linting --- biopandas/constants.py | 1213 +++++++++++++++++++++---- biopandas/pdb/engines.py | 196 +++- biopandas/pdb/tests/test_amino3to1.py | 19 +- biopandas/pdb/tests/test_assign_df.py | 4 +- biopandas/pdb/tests/test_distance.py | 25 +- biopandas/pdb/tests/test_gyradius.py | 5 +- biopandas/pdb/tests/test_impute.py | 3 +- biopandas/pdb/tests/test_read_pdb.py | 9 +- biopandas/pdb/tests/test_rmsd.py | 23 +- biopandas/pdb/tests/test_write_pdb.py | 39 +- 10 files changed, 1290 insertions(+), 246 deletions(-) diff --git a/biopandas/constants.py b/biopandas/constants.py index 36e8259..153938f 100644 --- a/biopandas/constants.py +++ b/biopandas/constants.py @@ -1,178 +1,1043 @@ from typing import Dict -ATOMIC_MASSES: Dict[str, float] = {"C": 12.0107, "O": 15.9994, "N": 14.0067, "S": 32.065} +ATOMIC_MASSES: Dict[str, float] = { + "C": 12.0107, + "O": 15.9994, + "N": 14.0067, + "S": 32.065, +} protein_letters_3to1_extended: Dict[str, str] = { - "A5N": "N", "A8E": "V", "A9D": "S", "AA3": "A", "AA4": "A", "AAR": "R", - "ABA": "A", "ACL": "R", "AEA": "C", "AEI": "D", "AFA": "N", "AGM": "R", - "AGQ": "Y", "AGT": "C", "AHB": "N", "AHL": "R", "AHO": "A", "AHP": "A", - "AIB": "A", "AKL": "D", "AKZ": "D", "ALA": "A", "ALC": "A", "ALM": "A", - "ALN": "A", "ALO": "T", "ALS": "A", "ALT": "A", "ALV": "A", "ALY": "K", - "AME": "M", "AN6": "L", "AN8": "A", "API": "K", "APK": "K", "AR2": "R", - "AR4": "E", "AR7": "R", "ARG": "R", "ARM": "R", "ARO": "R", "AS7": "N", - "ASA": "D", "ASB": "D", "ASI": "D", "ASK": "D", "ASL": "D", "ASN": "N", - "ASP": "D", "ASQ": "D", "AYA": "A", "AZH": "A", "AZK": "K", "AZS": "S", - "AZY": "Y", "AVJ": "H", "A30": "Y", "A3U": "F", "ECC": "Q", "ECX": "C", - "EFC": "C", "EHP": "F", "ELY": "K", "EME": "E", "EPM": "M", "EPQ": "Q", - "ESB": "Y", "ESC": "M", "EXY": "L", "EXA": "K", "E0Y": "P", "E9V": "H", - "E9M": "W", "EJA": "C", "EUP": "T", "EZY": "G", "E9C": "Y", "EW6": "S", - "EXL": "W", "I2M": "I", "I4G": "G", "I58": "K", "IAM": "A", "IAR": "R", - "ICY": "C", "IEL": "K", "IGL": "G", "IIL": "I", "ILE": "I", "ILG": "E", - "ILM": "I", "ILX": "I", "ILY": "K", "IML": "I", "IOR": "R", "IPG": "G", - "IT1": "K", "IYR": "Y", "IZO": "M", "IC0": "G", "M0H": "C", "M2L": "K", - "M2S": "M", "M30": "G", "M3L": "K", "M3R": "K", "MA ": "A", "MAA": "A", - "MAI": "R", "MBQ": "Y", "MC1": "S", "MCL": "K", "MCS": "C", "MD3": "C", - "MD5": "C", "MD6": "G", "MDF": "Y", "ME0": "M", "MEA": "F", "MEG": "E", - "MEN": "N", "MEQ": "Q", "MET": "M", "MEU": "G", "MFN": "E", "MGG": "R", - "MGN": "Q", "MGY": "G", "MH1": "H", "MH6": "S", "MHL": "L", "MHO": "M", - "MHS": "H", "MHU": "F", "MIR": "S", "MIS": "S", "MK8": "L", "ML3": "K", - "MLE": "L", "MLL": "L", "MLY": "K", "MLZ": "K", "MME": "M", "MMO": "R", - "MNL": "L", "MNV": "V", "MP8": "P", "MPQ": "G", "MSA": "G", "MSE": "M", - "MSL": "M", "MSO": "M", "MT2": "M", "MTY": "Y", "MVA": "V", "MYK": "K", - "MYN": "R", "QCS": "C", "QIL": "I", "QMM": "Q", "QPA": "C", "QPH": "F", - "Q3P": "K", "QVA": "C", "QX7": "A", "Q2E": "W", "Q75": "M", "Q78": "F", - "QM8": "L", "QMB": "A", "QNQ": "C", "QNT": "C", "QNW": "C", "QO2": "C", - "QO5": "C", "QO8": "C", "QQ8": "Q", "U2X": "Y", "U3X": "F", "UF0": "S", - "UGY": "G", "UM1": "A", "UM2": "A", "UMA": "A", "UQK": "A", "UX8": "W", - "UXQ": "F", "YCM": "C", "YOF": "Y", "YPR": "P", "YPZ": "Y", "YTH": "T", - "Y1V": "L", "Y57": "K", "YHA": "K", "200": "F", "23F": "F", "23P": "A", - "26B": "T", "28X": "T", "2AG": "A", "2CO": "C", "2FM": "M", "2GX": "F", - "2HF": "H", "2JG": "S", "2KK": "K", "2KP": "K", "2LT": "Y", "2LU": "L", - "2ML": "L", "2MR": "R", "2MT": "P", "2OR": "R", "2P0": "P", "2QZ": "T", - "2R3": "Y", "2RA": "A", "2RX": "S", "2SO": "H", "2TY": "Y", "2VA": "V", - "2XA": "C", "2ZC": "S", "6CL": "K", "6CW": "W", "6GL": "A", "6HN": "K", - "60F": "C", "66D": "I", "6CV": "A", "6M6": "C", "6V1": "C", "6WK": "C", - "6Y9": "P", "6DN": "K", "DA2": "R", "DAB": "A", "DAH": "F", "DBS": "S", - "DBU": "T", "DBY": "Y", "DBZ": "A", "DC2": "C", "DDE": "H", "DDZ": "A", - "DI7": "Y", "DHA": "S", "DHN": "V", "DIR": "R", "DLS": "K", "DM0": "K", - "DMH": "N", "DMK": "D", "DNL": "K", "DNP": "A", "DNS": "K", "DNW": "A", - "DOH": "D", "DON": "L", "DP1": "R", "DPL": "P", "DPP": "A", "DPQ": "Y", - "DYS": "C", "D2T": "D", "DYA": "D", "DJD": "F", "DYJ": "P", "DV9": "E", - "H14": "F", "H1D": "M", "H5M": "P", "HAC": "A", "HAR": "R", "HBN": "H", - "HCM": "C", "HGY": "G", "HHI": "H", "HIA": "H", "HIC": "H", "HIP": "H", - "HIQ": "H", "HIS": "H", "HL2": "L", "HLU": "L", "HMR": "R", "HNC": "C", - "HOX": "F", "HPC": "F", "HPE": "F", "HPH": "F", "HPQ": "F", "HQA": "A", - "HR7": "R", "HRG": "R", "HRP": "W", "HS8": "H", "HS9": "H", "HSE": "S", - "HSK": "H", "HSL": "S", "HSO": "H", "HT7": "W", "HTI": "C", "HTR": "W", - "HV5": "A", "HVA": "V", "HY3": "P", "HYI": "M", "HYP": "P", "HZP": "P", - "HIX": "A", "HSV": "H", "HLY": "K", "HOO": "H", "H7V": "A", "L5P": "K", - "LRK": "K", "L3O": "L", "LA2": "K", "LAA": "D", "LAL": "A", "LBY": "K", - "LCK": "K", "LCX": "K", "LDH": "K", "LE1": "V", "LED": "L", "LEF": "L", - "LEH": "L", "LEM": "L", "LEN": "L", "LET": "K", "LEU": "L", "LEX": "L", - "LGY": "K", "LLO": "K", "LLP": "K", "LLY": "K", "LLZ": "K", "LME": "E", - "LMF": "K", "LMQ": "Q", "LNE": "L", "LNM": "L", "LP6": "K", "LPD": "P", - "LPG": "G", "LPS": "S", "LSO": "K", "LTR": "W", "LVG": "G", "LVN": "V", - "LWY": "P", "LYF": "K", "LYK": "K", "LYM": "K", "LYN": "K", "LYO": "K", - "LYP": "K", "LYR": "K", "LYS": "K", "LYU": "K", "LYX": "K", "LYZ": "K", - "LAY": "L", "LWI": "F", "LBZ": "K", "P1L": "C", "P2Q": "Y", "P2Y": "P", - "P3Q": "Y", "PAQ": "Y", "PAS": "D", "PAT": "W", "PBB": "C", "PBF": "F", - "PCA": "Q", "PCC": "P", "PCS": "F", "PE1": "K", "PEC": "C", "PF5": "F", - "PFF": "F", "PG1": "S", "PGY": "G", "PHA": "F", "PHD": "D", "PHE": "F", - "PHI": "F", "PHL": "F", "PHM": "F", "PKR": "P", "PLJ": "P", "PM3": "F", - "POM": "P", "PPN": "F", "PR3": "C", "PR4": "P", "PR7": "P", "PR9": "P", - "PRJ": "P", "PRK": "K", "PRO": "P", "PRS": "P", "PRV": "G", "PSA": "F", - "PSH": "H", "PTH": "Y", "PTM": "Y", "PTR": "Y", "PVH": "H", "PXU": "P", - "PYA": "A", "PYH": "K", "PYX": "C", "PH6": "P", "P9S": "C", "P5U": "S", - "POK": "R", "T0I": "Y", "T11": "F", "TAV": "D", "TBG": "V", "TBM": "T", - "TCQ": "Y", "TCR": "W", "TEF": "F", "TFQ": "F", "TH5": "T", "TH6": "T", - "THC": "T", "THR": "T", "THZ": "R", "TIH": "A", "TIS": "S", "TLY": "K", - "TMB": "T", "TMD": "T", "TNB": "C", "TNR": "S", "TNY": "T", "TOQ": "W", - "TOX": "W", "TPJ": "P", "TPK": "P", "TPL": "W", "TPO": "T", "TPQ": "Y", - "TQI": "W", "TQQ": "W", "TQZ": "C", "TRF": "W", "TRG": "K", "TRN": "W", - "TRO": "W", "TRP": "W", "TRQ": "W", "TRW": "W", "TRX": "W", "TRY": "W", - "TS9": "I", "TSY": "C", "TTQ": "W", "TTS": "Y", "TXY": "Y", "TY1": "Y", - "TY2": "Y", "TY3": "Y", "TY5": "Y", "TY8": "Y", "TY9": "Y", "TYB": "Y", - "TYC": "Y", "TYE": "Y", "TYI": "Y", "TYJ": "Y", "TYN": "Y", "TYO": "Y", - "TYQ": "Y", "TYR": "Y", "TYS": "Y", "TYT": "Y", "TYW": "Y", "TYY": "Y", - "T8L": "T", "T9E": "T", "TNQ": "W", "TSQ": "F", "TGH": "W", "X2W": "E", - "XCN": "C", "XPR": "P", "XSN": "N", "XW1": "A", "XX1": "K", "XYC": "A", - "XA6": "F", "11Q": "P", "11W": "E", "12L": "P", "12X": "P", "12Y": "P", - "143": "C", "1AC": "A", "1L1": "A", "1OP": "Y", "1PA": "F", "1PI": "A", - "1TQ": "W", "1TY": "Y", "1X6": "S", "56A": "H", "5AB": "A", "5CS": "C", - "5CW": "W", "5HP": "E", "5OH": "A", "5PG": "G", "51T": "Y", "54C": "W", - "5CR": "F", "5CT": "K", "5FQ": "A", "5GM": "I", "5JP": "S", "5T3": "K", - "5MW": "K", "5OW": "K", "5R5": "S", "5VV": "N", "5XU": "A", "55I": "F", - "999": "D", "9DN": "N", "9NE": "E", "9NF": "F", "9NR": "R", "9NV": "V", - "9E7": "K", "9KP": "K", "9WV": "A", "9TR": "K", "9TU": "K", "9TX": "K", - "9U0": "K", "9IJ": "F", "B1F": "F", "B27": "T", "B2A": "A", "B2F": "F", - "B2I": "I", "B2V": "V", "B3A": "A", "B3D": "D", "B3E": "E", "B3K": "K", - "B3U": "H", "B3X": "N", "B3Y": "Y", "BB6": "C", "BB7": "C", "BB8": "F", - "BB9": "C", "BBC": "C", "BCS": "C", "BCX": "C", "BFD": "D", "BG1": "S", - "BH2": "D", "BHD": "D", "BIF": "F", "BIU": "I", "BL2": "L", "BLE": "L", - "BLY": "K", "BMT": "T", "BNN": "F", "BOR": "R", "BP5": "A", "BPE": "C", - "BSE": "S", "BTA": "L", "BTC": "C", "BTK": "K", "BTR": "W", "BUC": "C", - "BUG": "V", "BYR": "Y", "BWV": "R", "BWB": "S", "BXT": "S", "F2F": "F", - "F2Y": "Y", "FAK": "K", "FB5": "A", "FB6": "A", "FC0": "F", "FCL": "F", - "FDL": "K", "FFM": "C", "FGL": "G", "FGP": "S", "FH7": "K", "FHL": "K", - "FHO": "K", "FIO": "R", "FLA": "A", "FLE": "L", "FLT": "Y", "FME": "M", - "FOE": "C", "FP9": "P", "FPK": "P", "FT6": "W", "FTR": "W", "FTY": "Y", - "FVA": "V", "FZN": "K", "FY3": "Y", "F7W": "W", "FY2": "Y", "FQA": "K", - "F7Q": "Y", "FF9": "K", "FL6": "D", "JJJ": "C", "JJK": "C", "JJL": "C", - "JLP": "K", "J3D": "C", "J9Y": "R", "J8W": "S", "JKH": "P", "N10": "S", - "N7P": "P", "NA8": "A", "NAL": "A", "NAM": "A", "NBQ": "Y", "NC1": "S", - "NCB": "A", "NEM": "H", "NEP": "H", "NFA": "F", "NIY": "Y", "NLB": "L", - "NLE": "L", "NLN": "L", "NLO": "L", "NLP": "L", "NLQ": "Q", "NLY": "G", - "NMC": "G", "NMM": "R", "NNH": "R", "NOT": "L", "NPH": "C", "NPI": "A", - "NTR": "Y", "NTY": "Y", "NVA": "V", "NWD": "A", "NYB": "C", "NYS": "C", - "NZH": "H", "N80": "P", "NZC": "T", "NLW": "L", "N0A": "F", "N9P": "A", - "N65": "K", "R1A": "C", "R4K": "W", "RE0": "W", "RE3": "W", "RGL": "R", - "RGP": "E", "RT0": "P", "RVX": "S", "RZ4": "S", "RPI": "R", "RVJ": "A", - "VAD": "V", "VAF": "V", "VAH": "V", "VAI": "V", "VAL": "V", "VB1": "K", - "VH0": "P", "VR0": "R", "V44": "C", "V61": "F", "VPV": "K", "V5N": "H", - "V7T": "K", "Z01": "A", "Z3E": "T", "Z70": "H", "ZBZ": "C", "ZCL": "F", - "ZU0": "T", "ZYJ": "P", "ZYK": "P", "ZZD": "C", "ZZJ": "A", "ZIQ": "W", - "ZPO": "P", "ZDJ": "Y", "ZT1": "K", "30V": "C", "31Q": "C", "33S": "F", - "33W": "A", "34E": "V", "3AH": "H", "3BY": "P", "3CF": "F", "3CT": "Y", - "3GA": "A", "3GL": "E", "3MD": "D", "3MY": "Y", "3NF": "Y", "3O3": "E", - "3PX": "P", "3QN": "K", "3TT": "P", "3XH": "G", "3YM": "Y", "3WS": "A", - "3WX": "P", "3X9": "C", "3ZH": "H", "7JA": "I", "73C": "S", "73N": "R", - "73O": "Y", "73P": "K", "74P": "K", "7N8": "F", "7O5": "A", "7XC": "F", - "7ID": "D", "7OZ": "A", "C1S": "C", "C1T": "C", "C1X": "K", "C22": "A", - "C3Y": "C", "C4R": "C", "C5C": "C", "C6C": "C", "CAF": "C", "CAS": "C", - "CAY": "C", "CCS": "C", "CEA": "C", "CGA": "E", "CGU": "E", "CGV": "C", - "CHP": "G", "CIR": "R", "CLE": "L", "CLG": "K", "CLH": "K", "CME": "C", - "CMH": "C", "CML": "C", "CMT": "C", "CR5": "G", "CS0": "C", "CS1": "C", - "CS3": "C", "CS4": "C", "CSA": "C", "CSB": "C", "CSD": "C", "CSE": "C", - "CSJ": "C", "CSO": "C", "CSP": "C", "CSR": "C", "CSS": "C", "CSU": "C", - "CSW": "C", "CSX": "C", "CSZ": "C", "CTE": "W", "CTH": "T", "CWD": "A", - "CWR": "S", "CXM": "M", "CY0": "C", "CY1": "C", "CY3": "C", "CY4": "C", - "CYA": "C", "CYD": "C", "CYF": "C", "CYG": "C", "CYJ": "K", "CYM": "C", - "CYQ": "C", "CYR": "C", "CYS": "C", "CYW": "C", "CZ2": "C", "CZZ": "C", - "CG6": "C", "C1J": "R", "C4G": "R", "C67": "R", "C6D": "R", "CE7": "N", - "CZS": "A", "G01": "E", "G8M": "E", "GAU": "E", "GEE": "G", "GFT": "S", - "GHC": "E", "GHG": "Q", "GHW": "E", "GL3": "G", "GLH": "Q", "GLJ": "E", - "GLK": "E", "GLN": "Q", "GLQ": "E", "GLU": "E", "GLY": "G", "GLZ": "G", - "GMA": "E", "GME": "E", "GNC": "Q", "GPL": "K", "GSC": "G", "GSU": "E", - "GT9": "C", "GVL": "S", "G3M": "R", "G5G": "L", "G1X": "Y", "G8X": "P", - "K1R": "C", "KBE": "K", "KCX": "K", "KFP": "K", "KGC": "K", "KNB": "A", - "KOR": "M", "KPI": "K", "KPY": "K", "KST": "K", "KYN": "W", "KYQ": "K", - "KCR": "K", "KPF": "K", "K5L": "S", "KEO": "K", "KHB": "K", "KKD": "D", - "K5H": "C", "K7K": "S", "OAR": "R", "OAS": "S", "OBS": "K", "OCS": "C", - "OCY": "C", "OHI": "H", "OHS": "D", "OLD": "H", "OLT": "T", "OLZ": "S", - "OMH": "S", "OMT": "M", "OMX": "Y", "OMY": "Y", "ONH": "A", "ORN": "A", - "ORQ": "R", "OSE": "S", "OTH": "T", "OXX": "D", "OYL": "H", "O7A": "T", - "O7D": "W", "O7G": "V", "O2E": "S", "O6H": "W", "OZW": "F", "S12": "S", - "S1H": "S", "S2C": "C", "S2P": "A", "SAC": "S", "SAH": "C", "SAR": "G", - "SBG": "S", "SBL": "S", "SCH": "C", "SCS": "C", "SCY": "C", "SD4": "N", - "SDB": "S", "SDP": "S", "SEB": "S", "SEE": "S", "SEG": "A", "SEL": "S", - "SEM": "S", "SEN": "S", "SEP": "S", "SER": "S", "SET": "S", "SGB": "S", - "SHC": "C", "SHP": "G", "SHR": "K", "SIB": "C", "SLL": "K", "SLZ": "K", - "SMC": "C", "SME": "M", "SMF": "F", "SNC": "C", "SNN": "N", "SOY": "S", - "SRZ": "S", "STY": "Y", "SUN": "S", "SVA": "S", "SVV": "S", "SVW": "S", - "SVX": "S", "SVY": "S", "SVZ": "S", "SXE": "S", "SKH": "K", "SNM": "S", - "SNK": "H", "SWW": "S", "WFP": "F", "WLU": "L", "WPA": "F", "WRP": "W", - "WVL": "V", "02K": "A", "02L": "N", "02O": "A", "02Y": "A", "033": "V", - "037": "P", "03Y": "C", "04U": "P", "04V": "P", "05N": "P", "07O": "C", - "0A0": "D", "0A1": "Y", "0A2": "K", "0A8": "C", "0A9": "F", "0AA": "V", - "0AB": "V", "0AC": "G", "0AF": "W", "0AG": "L", "0AH": "S", "0AK": "D", - "0AR": "R", "0BN": "F", "0CS": "A", "0E5": "T", "0EA": "Y", "0FL": "A", - "0LF": "P", "0NC": "A", "0PR": "Y", "0QL": "C", "0TD": "D", "0UO": "W", - "0WZ": "Y", "0X9": "R", "0Y8": "P", "4AF": "F", "4AR": "R", "4AW": "W", - "4BF": "F", "4CF": "F", "4CY": "M", "4DP": "W", "4FB": "P", "4FW": "W", - "4HL": "Y", "4HT": "W", "4IN": "W", "4MM": "M", "4PH": "F", "4U7": "A", - "41H": "F", "41Q": "N", "42Y": "S", "432": "S", "45F": "P", "4AK": "K", - "4D4": "R", "4GJ": "C", "4KY": "P", "4L0": "P", "4LZ": "Y", "4N7": "P", - "4N8": "P", "4N9": "P", "4OG": "W", "4OU": "F", "4OV": "S", "4OZ": "S", - "4PQ": "W", "4SJ": "F", "4WQ": "A", "4HH": "S", "4HJ": "S", "4J4": "C", - "4J5": "R", "4II": "F", "4VI": "R", "823": "N", "8SP": "S", "8AY": "A", -} \ No newline at end of file + "A5N": "N", + "A8E": "V", + "A9D": "S", + "AA3": "A", + "AA4": "A", + "AAR": "R", + "ABA": "A", + "ACL": "R", + "AEA": "C", + "AEI": "D", + "AFA": "N", + "AGM": "R", + "AGQ": "Y", + "AGT": "C", + "AHB": "N", + "AHL": "R", + "AHO": "A", + "AHP": "A", + "AIB": "A", + "AKL": "D", + "AKZ": "D", + "ALA": "A", + "ALC": "A", + "ALM": "A", + "ALN": "A", + "ALO": "T", + "ALS": "A", + "ALT": "A", + "ALV": "A", + "ALY": "K", + "AME": "M", + "AN6": "L", + "AN8": "A", + "API": "K", + "APK": "K", + "AR2": "R", + "AR4": "E", + "AR7": "R", + "ARG": "R", + "ARM": "R", + "ARO": "R", + "AS7": "N", + "ASA": "D", + "ASB": "D", + "ASI": "D", + "ASK": "D", + "ASL": "D", + "ASN": "N", + "ASP": "D", + "ASQ": "D", + "AYA": "A", + "AZH": "A", + "AZK": "K", + "AZS": "S", + "AZY": "Y", + "AVJ": "H", + "A30": "Y", + "A3U": "F", + "ECC": "Q", + "ECX": "C", + "EFC": "C", + "EHP": "F", + "ELY": "K", + "EME": "E", + "EPM": "M", + "EPQ": "Q", + "ESB": "Y", + "ESC": "M", + "EXY": "L", + "EXA": "K", + "E0Y": "P", + "E9V": "H", + "E9M": "W", + "EJA": "C", + "EUP": "T", + "EZY": "G", + "E9C": "Y", + "EW6": "S", + "EXL": "W", + "I2M": "I", + "I4G": "G", + "I58": "K", + "IAM": "A", + "IAR": "R", + "ICY": "C", + "IEL": "K", + "IGL": "G", + "IIL": "I", + "ILE": "I", + "ILG": "E", + "ILM": "I", + "ILX": "I", + "ILY": "K", + "IML": "I", + "IOR": "R", + "IPG": "G", + "IT1": "K", + "IYR": "Y", + "IZO": "M", + "IC0": "G", + "M0H": "C", + "M2L": "K", + "M2S": "M", + "M30": "G", + "M3L": "K", + "M3R": "K", + "MA ": "A", + "MAA": "A", + "MAI": "R", + "MBQ": "Y", + "MC1": "S", + "MCL": "K", + "MCS": "C", + "MD3": "C", + "MD5": "C", + "MD6": "G", + "MDF": "Y", + "ME0": "M", + "MEA": "F", + "MEG": "E", + "MEN": "N", + "MEQ": "Q", + "MET": "M", + "MEU": "G", + "MFN": "E", + "MGG": "R", + "MGN": "Q", + "MGY": "G", + "MH1": "H", + "MH6": "S", + "MHL": "L", + "MHO": "M", + "MHS": "H", + "MHU": "F", + "MIR": "S", + "MIS": "S", + "MK8": "L", + "ML3": "K", + "MLE": "L", + "MLL": "L", + "MLY": "K", + "MLZ": "K", + "MME": "M", + "MMO": "R", + "MNL": "L", + "MNV": "V", + "MP8": "P", + "MPQ": "G", + "MSA": "G", + "MSE": "M", + "MSL": "M", + "MSO": "M", + "MT2": "M", + "MTY": "Y", + "MVA": "V", + "MYK": "K", + "MYN": "R", + "QCS": "C", + "QIL": "I", + "QMM": "Q", + "QPA": "C", + "QPH": "F", + "Q3P": "K", + "QVA": "C", + "QX7": "A", + "Q2E": "W", + "Q75": "M", + "Q78": "F", + "QM8": "L", + "QMB": "A", + "QNQ": "C", + "QNT": "C", + "QNW": "C", + "QO2": "C", + "QO5": "C", + "QO8": "C", + "QQ8": "Q", + "U2X": "Y", + "U3X": "F", + "UF0": "S", + "UGY": "G", + "UM1": "A", + "UM2": "A", + "UMA": "A", + "UQK": "A", + "UX8": "W", + "UXQ": "F", + "YCM": "C", + "YOF": "Y", + "YPR": "P", + "YPZ": "Y", + "YTH": "T", + "Y1V": "L", + "Y57": "K", + "YHA": "K", + "200": "F", + "23F": "F", + "23P": "A", + "26B": "T", + "28X": "T", + "2AG": "A", + "2CO": "C", + "2FM": "M", + "2GX": "F", + "2HF": "H", + "2JG": "S", + "2KK": "K", + "2KP": "K", + "2LT": "Y", + "2LU": "L", + "2ML": "L", + "2MR": "R", + "2MT": "P", + "2OR": "R", + "2P0": "P", + "2QZ": "T", + "2R3": "Y", + "2RA": "A", + "2RX": "S", + "2SO": "H", + "2TY": "Y", + "2VA": "V", + "2XA": "C", + "2ZC": "S", + "6CL": "K", + "6CW": "W", + "6GL": "A", + "6HN": "K", + "60F": "C", + "66D": "I", + "6CV": "A", + "6M6": "C", + "6V1": "C", + "6WK": "C", + "6Y9": "P", + "6DN": "K", + "DA2": "R", + "DAB": "A", + "DAH": "F", + "DBS": "S", + "DBU": "T", + "DBY": "Y", + "DBZ": "A", + "DC2": "C", + "DDE": "H", + "DDZ": "A", + "DI7": "Y", + "DHA": "S", + "DHN": "V", + "DIR": "R", + "DLS": "K", + "DM0": "K", + "DMH": "N", + "DMK": "D", + "DNL": "K", + "DNP": "A", + "DNS": "K", + "DNW": "A", + "DOH": "D", + "DON": "L", + "DP1": "R", + "DPL": "P", + "DPP": "A", + "DPQ": "Y", + "DYS": "C", + "D2T": "D", + "DYA": "D", + "DJD": "F", + "DYJ": "P", + "DV9": "E", + "H14": "F", + "H1D": "M", + "H5M": "P", + "HAC": "A", + "HAR": "R", + "HBN": "H", + "HCM": "C", + "HGY": "G", + "HHI": "H", + "HIA": "H", + "HIC": "H", + "HIP": "H", + "HIQ": "H", + "HIS": "H", + "HL2": "L", + "HLU": "L", + "HMR": "R", + "HNC": "C", + "HOX": "F", + "HPC": "F", + "HPE": "F", + "HPH": "F", + "HPQ": "F", + "HQA": "A", + "HR7": "R", + "HRG": "R", + "HRP": "W", + "HS8": "H", + "HS9": "H", + "HSE": "S", + "HSK": "H", + "HSL": "S", + "HSO": "H", + "HT7": "W", + "HTI": "C", + "HTR": "W", + "HV5": "A", + "HVA": "V", + "HY3": "P", + "HYI": "M", + "HYP": "P", + "HZP": "P", + "HIX": "A", + "HSV": "H", + "HLY": "K", + "HOO": "H", + "H7V": "A", + "L5P": "K", + "LRK": "K", + "L3O": "L", + "LA2": "K", + "LAA": "D", + "LAL": "A", + "LBY": "K", + "LCK": "K", + "LCX": "K", + "LDH": "K", + "LE1": "V", + "LED": "L", + "LEF": "L", + "LEH": "L", + "LEM": "L", + "LEN": "L", + "LET": "K", + "LEU": "L", + "LEX": "L", + "LGY": "K", + "LLO": "K", + "LLP": "K", + "LLY": "K", + "LLZ": "K", + "LME": "E", + "LMF": "K", + "LMQ": "Q", + "LNE": "L", + "LNM": "L", + "LP6": "K", + "LPD": "P", + "LPG": "G", + "LPS": "S", + "LSO": "K", + "LTR": "W", + "LVG": "G", + "LVN": "V", + "LWY": "P", + "LYF": "K", + "LYK": "K", + "LYM": "K", + "LYN": "K", + "LYO": "K", + "LYP": "K", + "LYR": "K", + "LYS": "K", + "LYU": "K", + "LYX": "K", + "LYZ": "K", + "LAY": "L", + "LWI": "F", + "LBZ": "K", + "P1L": "C", + "P2Q": "Y", + "P2Y": "P", + "P3Q": "Y", + "PAQ": "Y", + "PAS": "D", + "PAT": "W", + "PBB": "C", + "PBF": "F", + "PCA": "Q", + "PCC": "P", + "PCS": "F", + "PE1": "K", + "PEC": "C", + "PF5": "F", + "PFF": "F", + "PG1": "S", + "PGY": "G", + "PHA": "F", + "PHD": "D", + "PHE": "F", + "PHI": "F", + "PHL": "F", + "PHM": "F", + "PKR": "P", + "PLJ": "P", + "PM3": "F", + "POM": "P", + "PPN": "F", + "PR3": "C", + "PR4": "P", + "PR7": "P", + "PR9": "P", + "PRJ": "P", + "PRK": "K", + "PRO": "P", + "PRS": "P", + "PRV": "G", + "PSA": "F", + "PSH": "H", + "PTH": "Y", + "PTM": "Y", + "PTR": "Y", + "PVH": "H", + "PXU": "P", + "PYA": "A", + "PYH": "K", + "PYX": "C", + "PH6": "P", + "P9S": "C", + "P5U": "S", + "POK": "R", + "T0I": "Y", + "T11": "F", + "TAV": "D", + "TBG": "V", + "TBM": "T", + "TCQ": "Y", + "TCR": "W", + "TEF": "F", + "TFQ": "F", + "TH5": "T", + "TH6": "T", + "THC": "T", + "THR": "T", + "THZ": "R", + "TIH": "A", + "TIS": "S", + "TLY": "K", + "TMB": "T", + "TMD": "T", + "TNB": "C", + "TNR": "S", + "TNY": "T", + "TOQ": "W", + "TOX": "W", + "TPJ": "P", + "TPK": "P", + "TPL": "W", + "TPO": "T", + "TPQ": "Y", + "TQI": "W", + "TQQ": "W", + "TQZ": "C", + "TRF": "W", + "TRG": "K", + "TRN": "W", + "TRO": "W", + "TRP": "W", + "TRQ": "W", + "TRW": "W", + "TRX": "W", + "TRY": "W", + "TS9": "I", + "TSY": "C", + "TTQ": "W", + "TTS": "Y", + "TXY": "Y", + "TY1": "Y", + "TY2": "Y", + "TY3": "Y", + "TY5": "Y", + "TY8": "Y", + "TY9": "Y", + "TYB": "Y", + "TYC": "Y", + "TYE": "Y", + "TYI": "Y", + "TYJ": "Y", + "TYN": "Y", + "TYO": "Y", + "TYQ": "Y", + "TYR": "Y", + "TYS": "Y", + "TYT": "Y", + "TYW": "Y", + "TYY": "Y", + "T8L": "T", + "T9E": "T", + "TNQ": "W", + "TSQ": "F", + "TGH": "W", + "X2W": "E", + "XCN": "C", + "XPR": "P", + "XSN": "N", + "XW1": "A", + "XX1": "K", + "XYC": "A", + "XA6": "F", + "11Q": "P", + "11W": "E", + "12L": "P", + "12X": "P", + "12Y": "P", + "143": "C", + "1AC": "A", + "1L1": "A", + "1OP": "Y", + "1PA": "F", + "1PI": "A", + "1TQ": "W", + "1TY": "Y", + "1X6": "S", + "56A": "H", + "5AB": "A", + "5CS": "C", + "5CW": "W", + "5HP": "E", + "5OH": "A", + "5PG": "G", + "51T": "Y", + "54C": "W", + "5CR": "F", + "5CT": "K", + "5FQ": "A", + "5GM": "I", + "5JP": "S", + "5T3": "K", + "5MW": "K", + "5OW": "K", + "5R5": "S", + "5VV": "N", + "5XU": "A", + "55I": "F", + "999": "D", + "9DN": "N", + "9NE": "E", + "9NF": "F", + "9NR": "R", + "9NV": "V", + "9E7": "K", + "9KP": "K", + "9WV": "A", + "9TR": "K", + "9TU": "K", + "9TX": "K", + "9U0": "K", + "9IJ": "F", + "B1F": "F", + "B27": "T", + "B2A": "A", + "B2F": "F", + "B2I": "I", + "B2V": "V", + "B3A": "A", + "B3D": "D", + "B3E": "E", + "B3K": "K", + "B3U": "H", + "B3X": "N", + "B3Y": "Y", + "BB6": "C", + "BB7": "C", + "BB8": "F", + "BB9": "C", + "BBC": "C", + "BCS": "C", + "BCX": "C", + "BFD": "D", + "BG1": "S", + "BH2": "D", + "BHD": "D", + "BIF": "F", + "BIU": "I", + "BL2": "L", + "BLE": "L", + "BLY": "K", + "BMT": "T", + "BNN": "F", + "BOR": "R", + "BP5": "A", + "BPE": "C", + "BSE": "S", + "BTA": "L", + "BTC": "C", + "BTK": "K", + "BTR": "W", + "BUC": "C", + "BUG": "V", + "BYR": "Y", + "BWV": "R", + "BWB": "S", + "BXT": "S", + "F2F": "F", + "F2Y": "Y", + "FAK": "K", + "FB5": "A", + "FB6": "A", + "FC0": "F", + "FCL": "F", + "FDL": "K", + "FFM": "C", + "FGL": "G", + "FGP": "S", + "FH7": "K", + "FHL": "K", + "FHO": "K", + "FIO": "R", + "FLA": "A", + "FLE": "L", + "FLT": "Y", + "FME": "M", + "FOE": "C", + "FP9": "P", + "FPK": "P", + "FT6": "W", + "FTR": "W", + "FTY": "Y", + "FVA": "V", + "FZN": "K", + "FY3": "Y", + "F7W": "W", + "FY2": "Y", + "FQA": "K", + "F7Q": "Y", + "FF9": "K", + "FL6": "D", + "JJJ": "C", + "JJK": "C", + "JJL": "C", + "JLP": "K", + "J3D": "C", + "J9Y": "R", + "J8W": "S", + "JKH": "P", + "N10": "S", + "N7P": "P", + "NA8": "A", + "NAL": "A", + "NAM": "A", + "NBQ": "Y", + "NC1": "S", + "NCB": "A", + "NEM": "H", + "NEP": "H", + "NFA": "F", + "NIY": "Y", + "NLB": "L", + "NLE": "L", + "NLN": "L", + "NLO": "L", + "NLP": "L", + "NLQ": "Q", + "NLY": "G", + "NMC": "G", + "NMM": "R", + "NNH": "R", + "NOT": "L", + "NPH": "C", + "NPI": "A", + "NTR": "Y", + "NTY": "Y", + "NVA": "V", + "NWD": "A", + "NYB": "C", + "NYS": "C", + "NZH": "H", + "N80": "P", + "NZC": "T", + "NLW": "L", + "N0A": "F", + "N9P": "A", + "N65": "K", + "R1A": "C", + "R4K": "W", + "RE0": "W", + "RE3": "W", + "RGL": "R", + "RGP": "E", + "RT0": "P", + "RVX": "S", + "RZ4": "S", + "RPI": "R", + "RVJ": "A", + "VAD": "V", + "VAF": "V", + "VAH": "V", + "VAI": "V", + "VAL": "V", + "VB1": "K", + "VH0": "P", + "VR0": "R", + "V44": "C", + "V61": "F", + "VPV": "K", + "V5N": "H", + "V7T": "K", + "Z01": "A", + "Z3E": "T", + "Z70": "H", + "ZBZ": "C", + "ZCL": "F", + "ZU0": "T", + "ZYJ": "P", + "ZYK": "P", + "ZZD": "C", + "ZZJ": "A", + "ZIQ": "W", + "ZPO": "P", + "ZDJ": "Y", + "ZT1": "K", + "30V": "C", + "31Q": "C", + "33S": "F", + "33W": "A", + "34E": "V", + "3AH": "H", + "3BY": "P", + "3CF": "F", + "3CT": "Y", + "3GA": "A", + "3GL": "E", + "3MD": "D", + "3MY": "Y", + "3NF": "Y", + "3O3": "E", + "3PX": "P", + "3QN": "K", + "3TT": "P", + "3XH": "G", + "3YM": "Y", + "3WS": "A", + "3WX": "P", + "3X9": "C", + "3ZH": "H", + "7JA": "I", + "73C": "S", + "73N": "R", + "73O": "Y", + "73P": "K", + "74P": "K", + "7N8": "F", + "7O5": "A", + "7XC": "F", + "7ID": "D", + "7OZ": "A", + "C1S": "C", + "C1T": "C", + "C1X": "K", + "C22": "A", + "C3Y": "C", + "C4R": "C", + "C5C": "C", + "C6C": "C", + "CAF": "C", + "CAS": "C", + "CAY": "C", + "CCS": "C", + "CEA": "C", + "CGA": "E", + "CGU": "E", + "CGV": "C", + "CHP": "G", + "CIR": "R", + "CLE": "L", + "CLG": "K", + "CLH": "K", + "CME": "C", + "CMH": "C", + "CML": "C", + "CMT": "C", + "CR5": "G", + "CS0": "C", + "CS1": "C", + "CS3": "C", + "CS4": "C", + "CSA": "C", + "CSB": "C", + "CSD": "C", + "CSE": "C", + "CSJ": "C", + "CSO": "C", + "CSP": "C", + "CSR": "C", + "CSS": "C", + "CSU": "C", + "CSW": "C", + "CSX": "C", + "CSZ": "C", + "CTE": "W", + "CTH": "T", + "CWD": "A", + "CWR": "S", + "CXM": "M", + "CY0": "C", + "CY1": "C", + "CY3": "C", + "CY4": "C", + "CYA": "C", + "CYD": "C", + "CYF": "C", + "CYG": "C", + "CYJ": "K", + "CYM": "C", + "CYQ": "C", + "CYR": "C", + "CYS": "C", + "CYW": "C", + "CZ2": "C", + "CZZ": "C", + "CG6": "C", + "C1J": "R", + "C4G": "R", + "C67": "R", + "C6D": "R", + "CE7": "N", + "CZS": "A", + "G01": "E", + "G8M": "E", + "GAU": "E", + "GEE": "G", + "GFT": "S", + "GHC": "E", + "GHG": "Q", + "GHW": "E", + "GL3": "G", + "GLH": "Q", + "GLJ": "E", + "GLK": "E", + "GLN": "Q", + "GLQ": "E", + "GLU": "E", + "GLY": "G", + "GLZ": "G", + "GMA": "E", + "GME": "E", + "GNC": "Q", + "GPL": "K", + "GSC": "G", + "GSU": "E", + "GT9": "C", + "GVL": "S", + "G3M": "R", + "G5G": "L", + "G1X": "Y", + "G8X": "P", + "K1R": "C", + "KBE": "K", + "KCX": "K", + "KFP": "K", + "KGC": "K", + "KNB": "A", + "KOR": "M", + "KPI": "K", + "KPY": "K", + "KST": "K", + "KYN": "W", + "KYQ": "K", + "KCR": "K", + "KPF": "K", + "K5L": "S", + "KEO": "K", + "KHB": "K", + "KKD": "D", + "K5H": "C", + "K7K": "S", + "OAR": "R", + "OAS": "S", + "OBS": "K", + "OCS": "C", + "OCY": "C", + "OHI": "H", + "OHS": "D", + "OLD": "H", + "OLT": "T", + "OLZ": "S", + "OMH": "S", + "OMT": "M", + "OMX": "Y", + "OMY": "Y", + "ONH": "A", + "ORN": "A", + "ORQ": "R", + "OSE": "S", + "OTH": "T", + "OXX": "D", + "OYL": "H", + "O7A": "T", + "O7D": "W", + "O7G": "V", + "O2E": "S", + "O6H": "W", + "OZW": "F", + "S12": "S", + "S1H": "S", + "S2C": "C", + "S2P": "A", + "SAC": "S", + "SAH": "C", + "SAR": "G", + "SBG": "S", + "SBL": "S", + "SCH": "C", + "SCS": "C", + "SCY": "C", + "SD4": "N", + "SDB": "S", + "SDP": "S", + "SEB": "S", + "SEE": "S", + "SEG": "A", + "SEL": "S", + "SEM": "S", + "SEN": "S", + "SEP": "S", + "SER": "S", + "SET": "S", + "SGB": "S", + "SHC": "C", + "SHP": "G", + "SHR": "K", + "SIB": "C", + "SLL": "K", + "SLZ": "K", + "SMC": "C", + "SME": "M", + "SMF": "F", + "SNC": "C", + "SNN": "N", + "SOY": "S", + "SRZ": "S", + "STY": "Y", + "SUN": "S", + "SVA": "S", + "SVV": "S", + "SVW": "S", + "SVX": "S", + "SVY": "S", + "SVZ": "S", + "SXE": "S", + "SKH": "K", + "SNM": "S", + "SNK": "H", + "SWW": "S", + "WFP": "F", + "WLU": "L", + "WPA": "F", + "WRP": "W", + "WVL": "V", + "02K": "A", + "02L": "N", + "02O": "A", + "02Y": "A", + "033": "V", + "037": "P", + "03Y": "C", + "04U": "P", + "04V": "P", + "05N": "P", + "07O": "C", + "0A0": "D", + "0A1": "Y", + "0A2": "K", + "0A8": "C", + "0A9": "F", + "0AA": "V", + "0AB": "V", + "0AC": "G", + "0AF": "W", + "0AG": "L", + "0AH": "S", + "0AK": "D", + "0AR": "R", + "0BN": "F", + "0CS": "A", + "0E5": "T", + "0EA": "Y", + "0FL": "A", + "0LF": "P", + "0NC": "A", + "0PR": "Y", + "0QL": "C", + "0TD": "D", + "0UO": "W", + "0WZ": "Y", + "0X9": "R", + "0Y8": "P", + "4AF": "F", + "4AR": "R", + "4AW": "W", + "4BF": "F", + "4CF": "F", + "4CY": "M", + "4DP": "W", + "4FB": "P", + "4FW": "W", + "4HL": "Y", + "4HT": "W", + "4IN": "W", + "4MM": "M", + "4PH": "F", + "4U7": "A", + "41H": "F", + "41Q": "N", + "42Y": "S", + "432": "S", + "45F": "P", + "4AK": "K", + "4D4": "R", + "4GJ": "C", + "4KY": "P", + "4L0": "P", + "4LZ": "Y", + "4N7": "P", + "4N8": "P", + "4N9": "P", + "4OG": "W", + "4OU": "F", + "4OV": "S", + "4OZ": "S", + "4PQ": "W", + "4SJ": "F", + "4WQ": "A", + "4HH": "S", + "4HJ": "S", + "4J4": "C", + "4J5": "R", + "4II": "F", + "4VI": "R", + "823": "N", + "8SP": "S", + "8AY": "A", +} diff --git a/biopandas/pdb/engines.py b/biopandas/pdb/engines.py index 6f14b8e..886b459 100644 --- a/biopandas/pdb/engines.py +++ b/biopandas/pdb/engines.py @@ -62,32 +62,72 @@ } pdb_atomdict = [ - {"id": "record_name", "line": [0, 6], "type": str, "strf": lambda x: "%-6s" % x}, + { + "id": "record_name", + "line": [0, 6], + "type": str, + "strf": lambda x: "%-6s" % x, + }, { "id": "atom_number", "line": [6, 11], "type": int, "strf": lambda x: "%+5s" % str(x), }, - {"id": "blank_1", "line": [11, 12], "type": str, "strf": lambda x: "%-1s" % x}, + { + "id": "blank_1", + "line": [11, 12], + "type": str, + "strf": lambda x: "%-1s" % x, + }, { "id": "atom_name", "line": [12, 16], "type": str, "strf": lambda x: " %-3s" % x if len(x) < 4 else "%-4s" % x, }, - {"id": "alt_loc", "line": [16, 17], "type": str, "strf": lambda x: "%-1s" % x}, - {"id": "residue_name", "line": [17, 20], "type": str, "strf": lambda x: "%+3s" % x}, - {"id": "blank_2", "line": [20, 21], "type": str, "strf": lambda x: "%-1s" % x}, - {"id": "chain_id", "line": [21, 22], "type": str, "strf": lambda x: "%-1s" % x}, + { + "id": "alt_loc", + "line": [16, 17], + "type": str, + "strf": lambda x: "%-1s" % x, + }, + { + "id": "residue_name", + "line": [17, 20], + "type": str, + "strf": lambda x: "%+3s" % x, + }, + { + "id": "blank_2", + "line": [20, 21], + "type": str, + "strf": lambda x: "%-1s" % x, + }, + { + "id": "chain_id", + "line": [21, 22], + "type": str, + "strf": lambda x: "%-1s" % x, + }, { "id": "residue_number", "line": [22, 26], "type": int, "strf": lambda x: "%+4s" % str(x), }, - {"id": "insertion", "line": [26, 27], "type": str, "strf": lambda x: "%-1s" % x}, - {"id": "blank_3", "line": [27, 30], "type": str, "strf": lambda x: "%-3s" % x}, + { + "id": "insertion", + "line": [26, 27], + "type": str, + "strf": lambda x: "%-1s" % x, + }, + { + "id": "blank_3", + "line": [27, 30], + "type": str, + "strf": lambda x: "%-3s" % x, + }, { "id": "x_coord", "line": [30, 38], @@ -116,10 +156,24 @@ "id": "b_factor", "line": [60, 66], "type": float, - "strf": lambda x: ("%+6.2f" % x).replace("+", " ") if len(str(int(x))) < 3 else ("%+6.2f" % x).replace("+", ""), + "strf": lambda x: ( + ("%+6.2f" % x).replace("+", " ") + if len(str(int(x))) < 3 + else ("%+6.2f" % x).replace("+", "") + ), + }, + { + "id": "blank_4", + "line": [66, 72], + "type": str, + "strf": lambda x: "%-7s" % x, + }, + { + "id": "segment_id", + "line": [72, 76], + "type": str, + "strf": lambda x: "%-3s" % x, }, - {"id": "blank_4", "line": [66, 72], "type": str, "strf": lambda x: "%-7s" % x}, - {"id": "segment_id", "line": [72, 76], "type": str, "strf": lambda x: "%-3s" % x}, { "id": "element_symbol", "line": [76, 78], @@ -130,45 +184,122 @@ "id": "charge", "line": [78, 80], "type": float, - "strf": lambda x: (("%+2.1f" % x).replace("+", " ") if pd.notnull(x) else ""), + "strf": lambda x: ( + ("%+2.1f" % x).replace("+", " ") if pd.notnull(x) else "" + ), }, ] pdb_anisoudict = [ - {"id": "record_name", "line": [0, 6], "type": str, "strf": lambda x: "%-6s" % x}, + { + "id": "record_name", + "line": [0, 6], + "type": str, + "strf": lambda x: "%-6s" % x, + }, { "id": "atom_number", "line": [6, 11], "type": int, "strf": lambda x: "%+5s" % str(x), }, - {"id": "blank_1", "line": [11, 12], "type": str, "strf": lambda x: "%-1s" % x}, + { + "id": "blank_1", + "line": [11, 12], + "type": str, + "strf": lambda x: "%-1s" % x, + }, { "id": "atom_name", "line": [12, 16], "type": str, "strf": lambda x: (" %-3s" % x if len(x) < 4 else "%-4s" % x), }, - {"id": "alt_loc", "line": [16, 17], "type": str, "strf": lambda x: "%-1s" % x}, - {"id": "residue_name", "line": [17, 20], "type": str, "strf": lambda x: "%+3s" % x}, - {"id": "blank_2", "line": [20, 21], "type": str, "strf": lambda x: "%-1s" % x}, - {"id": "chain_id", "line": [21, 22], "type": str, "strf": lambda x: "%-1s" % x}, + { + "id": "alt_loc", + "line": [16, 17], + "type": str, + "strf": lambda x: "%-1s" % x, + }, + { + "id": "residue_name", + "line": [17, 20], + "type": str, + "strf": lambda x: "%+3s" % x, + }, + { + "id": "blank_2", + "line": [20, 21], + "type": str, + "strf": lambda x: "%-1s" % x, + }, + { + "id": "chain_id", + "line": [21, 22], + "type": str, + "strf": lambda x: "%-1s" % x, + }, { "id": "residue_number", "line": [22, 26], "type": int, "strf": lambda x: "%+4s" % str(x), }, - {"id": "insertion", "line": [26, 27], "type": str, "strf": lambda x: "%-1s" % x}, - {"id": "blank_3", "line": [27, 28], "type": str, "strf": lambda x: "%-1s" % x}, - {"id": "U(1,1)", "line": [28, 35], "type": int, "strf": lambda x: "%+7s" % str(x)}, - {"id": "U(2,2)", "line": [35, 42], "type": int, "strf": lambda x: "%+7s" % str(x)}, - {"id": "U(3,3)", "line": [42, 49], "type": int, "strf": lambda x: "%+7s" % str(x)}, - {"id": "U(1,2)", "line": [49, 56], "type": int, "strf": lambda x: "%+7s" % str(x)}, - {"id": "U(1,3)", "line": [56, 63], "type": int, "strf": lambda x: "%+7s" % str(x)}, - {"id": "U(2,3)", "line": [63, 70], "type": int, "strf": lambda x: "%+7s" % str(x)}, - {"id": "blank_4", "line": [70, 76], "type": str, "strf": lambda x: "%+6s" % x}, + { + "id": "insertion", + "line": [26, 27], + "type": str, + "strf": lambda x: "%-1s" % x, + }, + { + "id": "blank_3", + "line": [27, 28], + "type": str, + "strf": lambda x: "%-1s" % x, + }, + { + "id": "U(1,1)", + "line": [28, 35], + "type": int, + "strf": lambda x: "%+7s" % str(x), + }, + { + "id": "U(2,2)", + "line": [35, 42], + "type": int, + "strf": lambda x: "%+7s" % str(x), + }, + { + "id": "U(3,3)", + "line": [42, 49], + "type": int, + "strf": lambda x: "%+7s" % str(x), + }, + { + "id": "U(1,2)", + "line": [49, 56], + "type": int, + "strf": lambda x: "%+7s" % str(x), + }, + { + "id": "U(1,3)", + "line": [56, 63], + "type": int, + "strf": lambda x: "%+7s" % str(x), + }, + { + "id": "U(2,3)", + "line": [63, 70], + "type": int, + "strf": lambda x: "%+7s" % str(x), + }, + { + "id": "blank_4", + "line": [70, 76], + "type": str, + "strf": lambda x: "%+6s" % x, + }, { "id": "element_symbol", "line": [76, 78], @@ -179,7 +310,9 @@ "id": "charge", "line": [78, 80], "type": float, - "strf": lambda x: (("%+2.1f" % x).replace("+", " ") if pd.notnull(x) else ""), + "strf": lambda x: ( + ("%+2.1f" % x).replace("+", " ") if pd.notnull(x) else "" + ), }, ] @@ -190,7 +323,12 @@ "type": str, "strf": lambda x: "%s%s" % (x, " " * (6 - len(x))), }, - {"id": "entry", "line": [6, -2], "type": str, "strf": lambda x: x.rstrip()}, + { + "id": "entry", + "line": [6, -2], + "type": str, + "strf": lambda x: x.rstrip(), + }, ] pdb_records = { diff --git a/biopandas/pdb/tests/test_amino3to1.py b/biopandas/pdb/tests/test_amino3to1.py index d8ac816..cabc9e4 100644 --- a/biopandas/pdb/tests/test_amino3to1.py +++ b/biopandas/pdb/tests/test_amino3to1.py @@ -4,13 +4,16 @@ # Project Website: http://rasbt.github.io/biopandas/ # Code Repository: https://github.com/rasbt/biopandas +import os + import numpy as np from biopandas.pdb import PandasPdb -import os def test_defaults(): - TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48_995.pdb") + TESTDATA_1t48 = os.path.join( + os.path.dirname(__file__), "data", "1t48_995.pdb" + ) p1t48 = PandasPdb() p1t48.read_pdb(TESTDATA_1t48) expect_res = [ @@ -146,7 +149,9 @@ def test_defaults(): def test_sameindex(): - TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48_995.pdb") + TESTDATA_1t48 = os.path.join( + os.path.dirname(__file__), "data", "1t48_995.pdb" + ) p1t48 = PandasPdb() p1t48.read_pdb(TESTDATA_1t48) print(p1t48) @@ -482,8 +487,12 @@ def test_multichain(): expect_chain = ["A" for _ in range(88)] + ["B" for _ in range(94)] got_chain = list(transl["chain_id"].values) - got_res_a = list(transl.loc[transl["chain_id"] == "A", "residue_name"].values) - got_res_b = list(transl.loc[transl["chain_id"] == "B", "residue_name"].values) + got_res_a = list( + transl.loc[transl["chain_id"] == "A", "residue_name"].values + ) + got_res_b = list( + transl.loc[transl["chain_id"] == "B", "residue_name"].values + ) assert expect_chain == got_chain assert expect_res_a == got_res_a diff --git a/biopandas/pdb/tests/test_assign_df.py b/biopandas/pdb/tests/test_assign_df.py index 700f638..f339609 100644 --- a/biopandas/pdb/tests/test_assign_df.py +++ b/biopandas/pdb/tests/test_assign_df.py @@ -4,10 +4,10 @@ # Project Website: http://rasbt.github.io/biopandas/ # Code Repository: https://github.com/rasbt/biopandas -from biopandas.pdb import PandasPdb -from biopandas.testutils import assert_raises import os +from biopandas.pdb import PandasPdb +from biopandas.testutils import assert_raises TESTDATA_FILENAME = os.path.join(os.path.dirname(__file__), "data", "3eiy.pdb") diff --git a/biopandas/pdb/tests/test_distance.py b/biopandas/pdb/tests/test_distance.py index 2c6e458..aec7f2e 100644 --- a/biopandas/pdb/tests/test_distance.py +++ b/biopandas/pdb/tests/test_distance.py @@ -4,44 +4,55 @@ # Project Website: http://rasbt.github.io/biopandas/ # Code Repository: https://github.com/rasbt/biopandas +import os + import pandas as pd from biopandas.pdb import PandasPdb -import os def test_equal(): - TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48_995.pdb") + TESTDATA_1t48 = os.path.join( + os.path.dirname(__file__), "data", "1t48_995.pdb" + ) p1t48 = PandasPdb() p1t48.read_pdb(TESTDATA_1t48) dist = p1t48.distance(xyz=(70.785, 15.477, 23.359), records=("ATOM",)) expect = pd.Series( - [2.533259, 1.520502, 0.000000, 1.257597, 1.252510], index=[12, 13, 14, 15, 16] + [2.533259, 1.520502, 0.000000, 1.257597, 1.252510], + index=[12, 13, 14, 15, 16], ) assert dist[dist < 3].all() == expect.all() def test_deprecated_str_arg(): - TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48_995.pdb") + TESTDATA_1t48 = os.path.join( + os.path.dirname(__file__), "data", "1t48_995.pdb" + ) p1t48 = PandasPdb() p1t48.read_pdb(TESTDATA_1t48) dist = p1t48.distance(xyz=(70.785, 15.477, 23.359), records="ATOM") expect = pd.Series( - [2.533259, 1.520502, 0.000000, 1.257597, 1.252510], index=[12, 13, 14, 15, 16] + [2.533259, 1.520502, 0.000000, 1.257597, 1.252510], + index=[12, 13, 14, 15, 16], ) assert dist[dist < 3].all() == expect.all() def test_use_external_df(): - TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48_995.pdb") + TESTDATA_1t48 = os.path.join( + os.path.dirname(__file__), "data", "1t48_995.pdb" + ) p1t48 = PandasPdb() p1t48.read_pdb(TESTDATA_1t48) new_df = p1t48.df["ATOM"].iloc[:-1, :].copy() dist = PandasPdb.distance_df(df=new_df, xyz=(70.785, 15.477, 23.359)) - expect = pd.Series([2.533259, 1.520502, 0.000000, 1.257597], index=[12, 13, 14, 15]) + expect = pd.Series( + [2.533259, 1.520502, 0.000000, 1.257597], index=[12, 13, 14, 15] + ) assert dist[dist < 3].all() == expect.all() diff --git a/biopandas/pdb/tests/test_gyradius.py b/biopandas/pdb/tests/test_gyradius.py index 40f7227..ef4f4c8 100644 --- a/biopandas/pdb/tests/test_gyradius.py +++ b/biopandas/pdb/tests/test_gyradius.py @@ -4,9 +4,10 @@ # Project Website: http://rasbt.github.io/biopandas/ # Code Repository: https://github.com/rasbt/biopandas -from biopandas.pdb import PandasPdb import os + import pytest +from biopandas.pdb import PandasPdb TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48_995.pdb") @@ -62,7 +63,7 @@ def test_negative_decimals(): @pytest.mark.xfail(raises=TypeError) def test_wrong_decimals_arg(): - p1t48.gyradius(decimals='five') + p1t48.gyradius(decimals="five") def test_both_args(): diff --git a/biopandas/pdb/tests/test_impute.py b/biopandas/pdb/tests/test_impute.py index 225f5c5..6035450 100644 --- a/biopandas/pdb/tests/test_impute.py +++ b/biopandas/pdb/tests/test_impute.py @@ -5,9 +5,10 @@ # Code Repository: https://github.com/rasbt/biopandas -from biopandas.pdb import PandasPdb import os +from biopandas.pdb import PandasPdb + TESTDATA_FILENAME = os.path.join( os.path.dirname(__file__), "data", "3eiy_stripped_no_ele.pdb" ) diff --git a/biopandas/pdb/tests/test_read_pdb.py b/biopandas/pdb/tests/test_read_pdb.py index e6bd8fa..c41792f 100644 --- a/biopandas/pdb/tests/test_read_pdb.py +++ b/biopandas/pdb/tests/test_read_pdb.py @@ -6,11 +6,11 @@ import os -import pytest from urllib.error import HTTPError import numpy as np import pandas as pd +import pytest from biopandas.pdb import PandasPdb from biopandas.testutils import assert_raises @@ -18,7 +18,9 @@ TESTDATA_FILENAME2 = os.path.join( os.path.dirname(__file__), "data", "4eiy_anisouchunk.pdb" ) -TESTDATA_FILENAME_GZ = os.path.join(os.path.dirname(__file__), "data", "3eiy.pdb.gz") +TESTDATA_FILENAME_GZ = os.path.join( + os.path.dirname(__file__), "data", "3eiy.pdb.gz" +) TESTDATA_FILENAME_AF2_V4 = os.path.join( os.path.dirname(__file__), "data", "AF-Q5VSL9-F1-model_v4.pdb" ) @@ -101,7 +103,8 @@ def test__read_pdb_raises(): Test if ValueError is raised for wrong file formats.""" expect = ( - "Wrong file format; allowed file formats are " ".pdb, .pdb.gz, .ent, .ent.gz" + "Wrong file format; allowed file formats are " + ".pdb, .pdb.gz, .ent, .ent.gz" ) def run_code_1(): diff --git a/biopandas/pdb/tests/test_rmsd.py b/biopandas/pdb/tests/test_rmsd.py index dc7a707..88c142b 100644 --- a/biopandas/pdb/tests/test_rmsd.py +++ b/biopandas/pdb/tests/test_rmsd.py @@ -4,16 +4,23 @@ # Project Website: http://rasbt.github.io/biopandas/ # Code Repository: https://github.com/rasbt/biopandas -from biopandas.pdb import PandasPdb import os + import pytest +from biopandas.pdb import PandasPdb TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48_995.pdb") TESTDATA_1t49 = os.path.join(os.path.dirname(__file__), "data", "1t49_995.pdb") -TESTDATA_lig1 = os.path.join(os.path.dirname(__file__), "data", "lig_conf_1.pdb") -TESTDATA_lig2 = os.path.join(os.path.dirname(__file__), "data", "lig_conf_2.pdb") +TESTDATA_lig1 = os.path.join( + os.path.dirname(__file__), "data", "lig_conf_1.pdb" +) +TESTDATA_lig2 = os.path.join( + os.path.dirname(__file__), "data", "lig_conf_2.pdb" +) -TESTDATA_rna = os.path.join(os.path.dirname(__file__), "data", "1ehz-rna_short.pdb") +TESTDATA_rna = os.path.join( + os.path.dirname(__file__), "data", "1ehz-rna_short.pdb" +) p1t48 = PandasPdb() p1t48.read_pdb(TESTDATA_1t48) @@ -47,7 +54,9 @@ def test_invalid_query(): def test_protein(): - r = PandasPdb.rmsd(p1t48.df["ATOM"], p1t49.df["ATOM"], s="c-alpha", invert=False) + r = PandasPdb.rmsd( + p1t48.df["ATOM"], p1t49.df["ATOM"], s="c-alpha", invert=False + ) assert r == 0.4785, r @@ -61,7 +70,9 @@ def test_rna_and_nonmatching_indices(): def test_ligand(): - r = PandasPdb.rmsd(pl1.df["HETATM"], pl2.df["HETATM"], s="hydrogen", invert=True) + r = PandasPdb.rmsd( + pl1.df["HETATM"], pl2.df["HETATM"], s="hydrogen", invert=True + ) assert r == 1.9959, r diff --git a/biopandas/pdb/tests/test_write_pdb.py b/biopandas/pdb/tests/test_write_pdb.py index 22734ec..352e51b 100644 --- a/biopandas/pdb/tests/test_write_pdb.py +++ b/biopandas/pdb/tests/test_write_pdb.py @@ -4,11 +4,11 @@ # Project Website: http://rasbt.github.io/biopandas/ # Code Repository: https://github.com/rasbt/biopandas -from biopandas.pdb import PandasPdb -import warnings -import pandas as pd import os +import warnings +import pandas as pd +from biopandas.pdb import PandasPdb TESTDATA_FILENAME = os.path.join(os.path.dirname(__file__), "data", "3eiy.pdb") TESTDATA_FILENAME2 = os.path.join( @@ -45,7 +45,9 @@ def test_defaults(): def test_nonexpected_column(): ppdb = PandasPdb() ppdb.read_pdb(TESTDATA_FILENAME) - ppdb.df["HETATM"]["test"] = pd.Series("test", index=ppdb.df["HETATM"].index) + ppdb.df["HETATM"]["test"] = pd.Series( + "test", index=ppdb.df["HETATM"].index + ) with warnings.catch_warnings(record=True) as w: ppdb.to_pdb(path=OUTFILE, records=["HETATM"]) with open(OUTFILE, "r") as f: @@ -80,12 +82,12 @@ def test_add_remark(): """Test adding a REMARK entry.""" # Add remark code = 3 - remark1 = 'THIS IS A HIGHLY IMPORTANT FREE-TEXT REMARK WHICH IS EXACTLY 80 CHARACTERS LONG.' - remark2 = '' - remark3 = 'THIS IS A NEXT MULTI-LINE INDENTED REMARK\n FOLLOWING THE BLANK REMARK.' + remark1 = "THIS IS A HIGHLY IMPORTANT FREE-TEXT REMARK WHICH IS EXACTLY 80 CHARACTERS LONG." + remark2 = "" + remark3 = "THIS IS A NEXT MULTI-LINE INDENTED REMARK\n FOLLOWING THE BLANK REMARK." ppdb = PandasPdb() ppdb.read_pdb(TESTDATA_FILENAME) - n_atoms = len(ppdb.df['ATOM']) + n_atoms = len(ppdb.df["ATOM"]) ppdb.add_remark(code, remark1) ppdb.add_remark(code, remark2) ppdb.add_remark(code, remark3, 5) @@ -110,18 +112,18 @@ def test_add_remark(): ppdb = PandasPdb() ppdb.read_pdb(OUTFILE) os.remove(OUTFILE) - assert len(ppdb.df['ATOM']) == n_atoms + assert len(ppdb.df["ATOM"]) == n_atoms def test_introduce_remark(): """Test introducing a REMARK entry to the file with no remarks.""" # Add remark code = 3 - remark = 'THIS IS A HIGHLY IMPORTANT FREE-TEXT REMARK WHICH IS EXACTLY 80 CHARACTERS LONG.' + remark = "THIS IS A HIGHLY IMPORTANT FREE-TEXT REMARK WHICH IS EXACTLY 80 CHARACTERS LONG." indent = 1 ppdb = PandasPdb() ppdb.read_pdb(TESTDATA_FILENAME3) - n_atoms = len(ppdb.df['ATOM']) + n_atoms = len(ppdb.df["ATOM"]) ppdb.add_remark(code, remark, indent) ppdb.to_pdb(path=OUTFILE) @@ -138,16 +140,19 @@ def test_introduce_remark(): ppdb = PandasPdb() ppdb.read_pdb(OUTFILE) os.remove(OUTFILE) - assert len(ppdb.df['ATOM']) == n_atoms + assert len(ppdb.df["ATOM"]) == n_atoms + - def test_b_factor_shift(): """Test b_factor shifting one white space when saving the fetched pdb.""" ppdb = PandasPdb() ppdb.fetch_pdb("2e28") ppdb.to_pdb(path=OUTFILE, records=None) - tmp_df = ppdb.read_pdb(path=OUTFILE).df['ATOM'] + tmp_df = ppdb.read_pdb(path=OUTFILE).df["ATOM"] os.remove(OUTFILE) - assert tmp_df[tmp_df["element_symbol"].isnull() | (tmp_df["element_symbol"] == '')].empty - assert not tmp_df[tmp_df["blank_4"].isnull() | (tmp_df["blank_4"] == '')].empty - + assert tmp_df[ + tmp_df["element_symbol"].isnull() | (tmp_df["element_symbol"] == "") + ].empty + assert not tmp_df[ + tmp_df["blank_4"].isnull() | (tmp_df["blank_4"] == "") + ].empty From 21a608050e1914a0353c521450de57a718a89341 Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Mon, 8 Jul 2024 18:42:29 +0100 Subject: [PATCH 06/21] linting --- biopandas/mmtf/pandas_mmtf.py | 161 +++++++++++++------ biopandas/mmtf/tests/test_amino3to1.py | 24 ++- biopandas/mmtf/tests/test_assign_df.py | 4 +- biopandas/mmtf/tests/test_distance.py | 22 ++- biopandas/mmtf/tests/test_multiple_models.py | 17 +- biopandas/mmtf/tests/test_read_mmtf.py | 51 +++--- biopandas/mmtf/tests/test_rmsd.py | 7 +- biopandas/mmtf/tests/test_write_mmtf.py | 45 +++++- 8 files changed, 231 insertions(+), 100 deletions(-) diff --git a/biopandas/mmtf/pandas_mmtf.py b/biopandas/mmtf/pandas_mmtf.py index 5897895..9912946 100644 --- a/biopandas/mmtf/pandas_mmtf.py +++ b/biopandas/mmtf/pandas_mmtf.py @@ -1,9 +1,10 @@ """Class for working with MMTF files.""" + from __future__ import annotations -import os import copy import gzip +import os import warnings from string import ascii_uppercase from typing import Any, Dict, List, Union @@ -11,10 +12,10 @@ import numpy as np import pandas as pd +from biopandas.constants import protein_letters_3to1_extended from looseversion import LooseVersion -from mmtf import MMTFDecoder, MMTFEncoder, fetch, parse, parse_gzip -from biopandas.constants import protein_letters_3to1_extended +from mmtf import MMTFDecoder, MMTFEncoder, fetch, parse, parse_gzip from ..pdb.engines import amino3to1dict, pdb_df_columns, pdb_records @@ -90,9 +91,9 @@ def impute_element(self, records=("ATOM", "HETATM"), inplace=False): t[d] = self.df[d].copy() for sec in records: - t[sec]["element_symbol"] = t[sec][["atom_name", "element_symbol"]].apply( - lambda x: x[0][1] if len(x[1]) == 3 else x[0][0], axis=1 - ) + t[sec]["element_symbol"] = t[sec][ + ["atom_name", "element_symbol"] + ].apply(lambda x: x[0][1] if len(x[1]) == 3 else x[0][0], axis=1) return t @staticmethod @@ -235,14 +236,18 @@ def amino3to1(self, record="ATOM", residue_col="residue_name", fillna="?"): cmp = "placeholder" indices = [] - residue_number_insertion = tmp["residue_number"].astype(str) + tmp["insertion"] + residue_number_insertion = ( + tmp["residue_number"].astype(str) + tmp["insertion"] + ) for num, ind in zip(residue_number_insertion, np.arange(tmp.shape[0])): if num != cmp: indices.append(ind) cmp = num - transl = tmp.iloc[indices][residue_col].map(amino3to1dict).fillna(fillna) + transl = ( + tmp.iloc[indices][residue_col].map(amino3to1dict).fillna(fillna) + ) return pd.concat((tmp.iloc[indices]["chain_id"], transl), axis=1) @@ -282,7 +287,9 @@ def distance(self, xyz=(0.00, 0.00, 0.00), records=("ATOM", "HETATM")): return np.sqrt( np.sum( - df[["x_coord", "y_coord", "z_coord"]].subtract(xyz, axis=1) ** 2, axis=1 + df[["x_coord", "y_coord", "z_coord"]].subtract(xyz, axis=1) + ** 2, + axis=1, ) ) @@ -308,7 +315,9 @@ def distance_df(df, xyz=(0.00, 0.00, 0.00)): """ return np.sqrt( np.sum( - df[["x_coord", "y_coord", "z_coord"]].subtract(xyz, axis=1) ** 2, axis=1 + df[["x_coord", "y_coord", "z_coord"]].subtract(xyz, axis=1) + ** 2, + axis=1, ) ) @@ -354,7 +363,9 @@ def to_pdb(self, path, records=None, gz=False, append_newline=True): if c in {"x_coord", "y_coord", "z_coord"}: for idx in range(dfs[r][c].values.shape[0]): if len(dfs[r][c].values[idx]) > 8: - dfs[r][c].values[idx] = str(dfs[r][c].values[idx]).strip() + dfs[r][c].values[idx] = str( + dfs[r][c].values[idx] + ).strip() if c in {"line_idx", "OUT"}: pass elif r in {"ATOM", "HETATM"} and c not in pdb_df_columns: @@ -398,7 +409,7 @@ def parse_sse(self): """Parse secondary structure elements""" raise NotImplementedError - def to_mmtf(self, path, records = ("ATOM", "HETATM")): + def to_mmtf(self, path, records=("ATOM", "HETATM")): """Write record DataFrames to an MMTF file. Parameters @@ -413,7 +424,6 @@ def to_mmtf(self, path, records = ("ATOM", "HETATM")): df = pd.concat(objs=[self.df[i] for i in records]) return write_mmtf(df, path) - def get_model(self, model_index: int) -> PandasMmtf: """Returns a new PandasPDB object with the dataframes subset to the given model index. @@ -432,7 +442,9 @@ def get_model(self, model_index: int) -> PandasMmtf: df = copy.deepcopy(self) if "ATOM" in df.df.keys(): - df.df["ATOM"] = df.df["ATOM"].loc[df.df["ATOM"]["model_id"] == model_index] + df.df["ATOM"] = df.df["ATOM"].loc[ + df.df["ATOM"]["model_id"] == model_index + ] if "HETATM" in df.df.keys(): df.df["HETATM"] = df.df["HETATM"].loc[ df.df["HETATM"]["model_id"] == model_index @@ -462,20 +474,28 @@ def get_models(self, model_indices: List[int]) -> PandasMmtf: if "ATOM" in df.df.keys(): df.df["ATOM"] = df.df["ATOM"].loc[ - [x in model_indices for x in df.df["ATOM"]["model_id"].tolist()] + [ + x in model_indices + for x in df.df["ATOM"]["model_id"].tolist() + ] ] if "HETATM" in df.df.keys(): df.df["HETATM"] = df.df["HETATM"].loc[ - [x in model_indices for x in df.df["HETATM"]["model_id"].tolist()] + [ + x in model_indices + for x in df.df["HETATM"]["model_id"].tolist() + ] ] if "ANISOU" in df.df.keys(): df.df["ANISOU"] = df.df["ANISOU"].loc[ - [x in model_indices for x in df.df["ANISOU"]["model_id"].tolist()] + [ + x in model_indices + for x in df.df["ANISOU"]["model_id"].tolist() + ] ] return df - def fetch_mmtf(pdb_code: str) -> pd.DataFrame: """Returns a dataframe from a PDB code. @@ -496,7 +516,11 @@ def parse_mmtf(file_path: str) -> pd.DataFrame: :return: Dataframe of protein structure. :rtype: pd.DataFrame """ - df = parse_gzip(file_path) if file_path.endswith(".gz") else parse(file_path) + df = ( + parse_gzip(file_path) + if file_path.endswith(".gz") + else parse(file_path) + ) return mmtf_to_df(df) @@ -528,7 +552,9 @@ def mmtf_to_df(mmtf_obj: MMTFDecoder) -> pd.DataFrame: else: chain_indices[i] = chain_indices[i] + chain_indices[i - 1] model_indices = mmtf_obj.chains_per_model - model_indices = [sum(model_indices[:i+1]) for i in range(len(model_indices))] + model_indices = [ + sum(model_indices[: i + 1]) for i in range(len(model_indices)) + ] ch_idx = 0 entity_types = {} @@ -540,23 +566,25 @@ def mmtf_to_df(mmtf_obj: MMTFDecoder) -> pd.DataFrame: ch_idx = next(ch_idx_iter) for idx, i in enumerate(mmtf_obj.group_type_list): res = mmtf_obj.group_list[i] - #record = "HETATM" if res["chemCompType"] == "NON-POLYMER" else "ATOM" - #record = ( + # record = "HETATM" if res["chemCompType"] == "NON-POLYMER" else "ATOM" + # record = ( # "ATOM" # if res["chemCompType"] in ["L-PEPTIDE LINKING", "PEPTIDE LINKING"] # else "HETATM" - #) + # ) if idx == chain_indices[ch_idx]: - #ch_idx += 1 + # ch_idx += 1 ch_idx = next(ch_idx_iter) record = "ATOM" if entity_types[ch_idx] == "polymer" else "HETATM" for _ in res["atomNameList"]: data["residue_name"].append(res["groupName"]) data["residue_number"].append(mmtf_obj.group_id_list[idx]) - #data["chain_id"].append([mmtf_obj.chain_name_list[ch_idx]]) + # data["chain_id"].append([mmtf_obj.chain_name_list[ch_idx]]) data["chain_id"].append([mmtf_obj.chain_name_list[ch_idx]]) - data["model_id"].append(int(np.argwhere(np.array(model_indices)>ch_idx)[0]) + 1) + data["model_id"].append( + int(np.argwhere(np.array(model_indices) > ch_idx)[0]) + 1 + ) data["record_name"].append(record) data["insertion"].append(mmtf_obj.ins_code_list[idx]) data["atom_name"].append(res["atomNameList"]) @@ -576,16 +604,19 @@ def mmtf_to_df(mmtf_obj: MMTFDecoder) -> pd.DataFrame: "record_name", "insertion", "atom_number", - "model_id" + "model_id", ]: continue data[k] = [i for sublist in v for i in sublist] - df = pd.DataFrame.from_dict(data).sort_values(by=["model_id", "atom_number"]) + df = pd.DataFrame.from_dict(data).sort_values( + by=["model_id", "atom_number"] + ) df.alt_loc = df.alt_loc.str.replace("\x00", "") df.insertion = df.insertion.str.replace("\x00", "") return df + def _seq1(seq, charmap: Dict[str, str], undef_code="X"): # sourcery skip: dict-assign-update-to-union """Convert protein sequence from three-letter to one-letter code. @@ -629,8 +660,6 @@ def _seq1(seq, charmap: Dict[str, str], undef_code="X"): return "".join(onecode.get(aa.upper(), undef_code) for aa in seqlist) - - def write_mmtf(df: pd.DataFrame, file_path: str): """Writes a biopandas dataframe to an MMTF file. @@ -668,7 +697,17 @@ def write_mmtf(df: pd.DataFrame, file_path: str): experimental_methods=None, ) - node_ids = df.model_id.astype(str) + ":" + df.chain_id + ":" + df.residue_name + ":" + df.residue_number.astype(str) + ":" + df.insertion.astype(str) + node_ids = ( + df.model_id.astype(str) + + ":" + + df.chain_id + + ":" + + df.residue_name + + ":" + + df.residue_number.astype(str) + + ":" + + df.insertion.astype(str) + ) df["residue_id"] = node_ids # Tracks values to replace them at the end chains_per_model = [] @@ -684,10 +723,10 @@ def write_mmtf(df: pd.DataFrame, file_path: str): count_models += 1 # Set the model info encoder.set_model_info( - #model_id=model_idx, # According to mmtf-python this is meaningless - model_id=model_idx, # According to mmtf-python this is meaningless - chain_count=0 # Set to 0 here and changed later - ) + # model_id=model_idx, # According to mmtf-python this is meaningless + model_id=model_idx, # According to mmtf-python this is meaningless + chain_count=0, # Set to 0 here and changed later + ) # Iterate over chains in model for chain_id in chains: seqs = [] @@ -725,51 +764,73 @@ def write_mmtf(df: pd.DataFrame, file_path: str): # structure object so we treat each molecule as a separate # entity if residue_type != prev_res_type or ( - residue_type == "HETATM" and resname != prev_resname - ): + residue_type == "HETATM" and resname != prev_resname + ): encoder.set_entity_info( chain_indices=[count_chains], - sequence="", # Set to empty here and changed later + sequence="", # Set to empty here and changed later description="", entity_type=entity_type, ) encoder.set_chain_info( chain_id=chain_id, - chain_name="\x00" if len(chain_id.strip()) == 0 else chain_id, - num_groups=0, # Set to 0 here and changed later + chain_name=( + "\x00" if len(chain_id.strip()) == 0 else chain_id + ), + num_groups=0, # Set to 0 here and changed later ) if count_chains > 0: - groups_per_chain.append(count_groups - sum(groups_per_chain) -1) + groups_per_chain.append( + count_groups - sum(groups_per_chain) - 1 + ) if not first_chain: seqs.append(seq) first_chain = False count_chains += 1 - seq="" + seq = "" if entity_type == "polymer": - seq += _seq1(residue_df.residue_name.unique()[0], charmap=protein_letters_3to1_extended) + seq += _seq1( + residue_df.residue_name.unique()[0], + charmap=protein_letters_3to1_extended, + ) prev_res_type = residue_type prev_resname = resname - group_type = "NON-POLYMER" if residue_type == "HETATM" else "L-PEPTIDE LINKING" + group_type = ( + "NON-POLYMER" + if residue_type == "HETATM" + else "L-PEPTIDE LINKING" + ) encoder.set_group_info( group_name=residue_df.residue_name.unique()[0], group_number=int(residue_df.residue_number.unique()[0]), - insertion_code="\x00" if residue_df.insertion.unique()[0] == "" else residue_df.insertion.unique()[0], - group_type=group_type, # Hack to ensure we can re-parse. + insertion_code=( + "\x00" + if residue_df.insertion.unique()[0] == "" + else residue_df.insertion.unique()[0] + ), + group_type=group_type, # Hack to ensure we can re-parse. atom_count=len(residue_df), bond_count=0, - single_letter_code=_seq1(df.residue_name.unique()[0], charmap=protein_letters_3to1_extended), - sequence_index=len(seq) - 1 if entity_type == "polymer" else -1, - secondary_structure_type=-1 + single_letter_code=_seq1( + df.residue_name.unique()[0], + charmap=protein_letters_3to1_extended, + ), + sequence_index=( + len(seq) - 1 if entity_type == "polymer" else -1 + ), + secondary_structure_type=-1, ) for row in residue_df.itertuples(): count_atoms += 1 encoder.set_atom_info( atom_name=row.atom_name, serial_number=row.atom_number, - alternative_location_id="\x00" if row.alt_loc == "" else row.alt_loc, + alternative_location_id=( + "\x00" if row.alt_loc == "" else row.alt_loc + ), x=row.x_coord, y=row.y_coord, z=row.z_coord, diff --git a/biopandas/mmtf/tests/test_amino3to1.py b/biopandas/mmtf/tests/test_amino3to1.py index e4ccdb9..081265f 100644 --- a/biopandas/mmtf/tests/test_amino3to1.py +++ b/biopandas/mmtf/tests/test_amino3to1.py @@ -11,7 +11,9 @@ def test_defaults(): - TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48.mmtf") + TESTDATA_1t48 = os.path.join( + os.path.dirname(__file__), "data", "1t48.mmtf" + ) p1t48 = PandasMmtf() p1t48.read_mmtf(TESTDATA_1t48) expect_res = [ @@ -319,7 +321,9 @@ def test_defaults(): def test_sameindex(): - TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48.mmtf") + TESTDATA_1t48 = os.path.join( + os.path.dirname(__file__), "data", "1t48.mmtf" + ) p1t48 = PandasMmtf() p1t48.read_mmtf(TESTDATA_1t48) p1t48.df["ATOM"].index = np.zeros(p1t48.df["ATOM"].shape[0], dtype=int) @@ -628,7 +632,9 @@ def test_sameindex(): def test_multichain(): - TESTDATA_5mtn = os.path.join(os.path.dirname(__file__), "data", "5mtn.mmtf") + TESTDATA_5mtn = os.path.join( + os.path.dirname(__file__), "data", "5mtn.mmtf" + ) mtn = PandasMmtf() mtn.read_mmtf(TESTDATA_5mtn) expect_res_a = [ @@ -823,8 +829,12 @@ def test_multichain(): expect_chain = ["A" for _ in range(88)] + ["B" for _ in range(94)] got_chain = list(transl["chain_id"].values) - got_res_a = list(transl.loc[transl["chain_id"] == "A", "residue_name"].values) - got_res_b = list(transl.loc[transl["chain_id"] == "B", "residue_name"].values) + got_res_a = list( + transl.loc[transl["chain_id"] == "A", "residue_name"].values + ) + got_res_b = list( + transl.loc[transl["chain_id"] == "B", "residue_name"].values + ) assert expect_chain == got_chain assert expect_res_a == got_res_a @@ -832,7 +842,9 @@ def test_multichain(): def test_pdb_with_insertion_codes(): - PDB_2D7T_PATH = os.path.join(os.path.dirname(__file__), "data", "2d7t.mmtf") + PDB_2D7T_PATH = os.path.join( + os.path.dirname(__file__), "data", "2d7t.mmtf" + ) ppdb = PandasMmtf().read_mmtf(PDB_2D7T_PATH) sequence = ppdb.amino3to1() diff --git a/biopandas/mmtf/tests/test_assign_df.py b/biopandas/mmtf/tests/test_assign_df.py index 66bd936..f9dd1bd 100644 --- a/biopandas/mmtf/tests/test_assign_df.py +++ b/biopandas/mmtf/tests/test_assign_df.py @@ -9,7 +9,9 @@ from biopandas.mmtf import PandasMmtf from biopandas.testutils import assert_raises -TESTDATA_FILENAME = os.path.join(os.path.dirname(__file__), "data", "3eiy.mmtf") +TESTDATA_FILENAME = os.path.join( + os.path.dirname(__file__), "data", "3eiy.mmtf" +) def test_overwrite_df(): diff --git a/biopandas/mmtf/tests/test_distance.py b/biopandas/mmtf/tests/test_distance.py index d876fcc..96291e3 100644 --- a/biopandas/mmtf/tests/test_distance.py +++ b/biopandas/mmtf/tests/test_distance.py @@ -11,38 +11,48 @@ def test_equal(): - TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48.mmtf") + TESTDATA_1t48 = os.path.join( + os.path.dirname(__file__), "data", "1t48.mmtf" + ) p1t48 = PandasMmtf() p1t48.read_mmtf(TESTDATA_1t48) dist = p1t48.distance(xyz=(70.785, 15.477, 23.359), records=("ATOM",)) expect = pd.Series( - [2.533259, 1.520502, 0.000000, 1.257597, 1.252510], index=[12, 13, 14, 15, 16] + [2.533259, 1.520502, 0.000000, 1.257597, 1.252510], + index=[12, 13, 14, 15, 16], ) assert dist[dist < 3].all() == expect.all() def test_deprecated_str_arg(): - TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48.mmtf") + TESTDATA_1t48 = os.path.join( + os.path.dirname(__file__), "data", "1t48.mmtf" + ) p1t48 = PandasMmtf() p1t48.read_mmtf(TESTDATA_1t48) dist = p1t48.distance(xyz=(70.785, 15.477, 23.359), records="ATOM") expect = pd.Series( - [2.533259, 1.520502, 0.000000, 1.257597, 1.252510], index=[12, 13, 14, 15, 16] + [2.533259, 1.520502, 0.000000, 1.257597, 1.252510], + index=[12, 13, 14, 15, 16], ) assert dist[dist < 3].all() == expect.all() def test_use_external_df(): - TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48.mmtf") + TESTDATA_1t48 = os.path.join( + os.path.dirname(__file__), "data", "1t48.mmtf" + ) p1t48 = PandasMmtf() p1t48.read_mmtf(TESTDATA_1t48) new_df = p1t48.df["ATOM"].iloc[:-1, :].copy() dist = PandasMmtf.distance_df(df=new_df, xyz=(70.785, 15.477, 23.359)) - expect = pd.Series([2.533259, 1.520502, 0.000000, 1.257597], index=[12, 13, 14, 15]) + expect = pd.Series( + [2.533259, 1.520502, 0.000000, 1.257597], index=[12, 13, 14, 15] + ) assert dist[dist < 3].all() == expect.all() diff --git a/biopandas/mmtf/tests/test_multiple_models.py b/biopandas/mmtf/tests/test_multiple_models.py index 5a77e9e..30be1cc 100644 --- a/biopandas/mmtf/tests/test_multiple_models.py +++ b/biopandas/mmtf/tests/test_multiple_models.py @@ -6,11 +6,12 @@ # Code Repository: https://github.com/rasbt/biopandas import os -from pandas.testing import assert_frame_equal - from biopandas.mmtf import PandasMmtf +from pandas.testing import assert_frame_equal -TESTDATA_FILENAME = os.path.join(os.path.dirname(__file__), "data", "2jyf.mmtf") +TESTDATA_FILENAME = os.path.join( + os.path.dirname(__file__), "data", "2jyf.mmtf" +) def test_label_models(): @@ -37,8 +38,14 @@ def test_get_models(): written = PandasMmtf().read_mmtf("test.mmtf") # Note: No way to preserve model ID as far as I can tell - assert_frame_equal(df.df["ATOM"].drop("model_id", axis=1).reset_index(drop=True), written.df["ATOM"].drop("model_id", axis=1).reset_index(drop=True)) - assert_frame_equal(df.df["HETATM"].drop("model_id", axis=1).reset_index(drop=True), written.df["HETATM"].drop("model_id", axis=1).reset_index(drop=True)) + assert_frame_equal( + df.df["ATOM"].drop("model_id", axis=1).reset_index(drop=True), + written.df["ATOM"].drop("model_id", axis=1).reset_index(drop=True), + ) + assert_frame_equal( + df.df["HETATM"].drop("model_id", axis=1).reset_index(drop=True), + written.df["HETATM"].drop("model_id", axis=1).reset_index(drop=True), + ) # Clean os.remove("test.mmtf") diff --git a/biopandas/mmtf/tests/test_read_mmtf.py b/biopandas/mmtf/tests/test_read_mmtf.py index cc3c1f9..045ff1d 100644 --- a/biopandas/mmtf/tests/test_read_mmtf.py +++ b/biopandas/mmtf/tests/test_read_mmtf.py @@ -4,39 +4,53 @@ # Project Website: http://rasbt.github.io/biopandas/ # Code Repository: https://github.com/rasbt/biopandas -import unittest import os +import unittest import pandas as pd - from biopandas.mmtf import PandasMmtf from biopandas.pdb import PandasPdb -MMTF_TESTDATA_FILENAME = os.path.join(os.path.dirname(__file__), "data", "3eiy.mmtf") -MMTF_TESTDATA_FILENAME_GZ = os.path.join(os.path.dirname(__file__), "data", "3eiy.mmtf.gz") - -PDB_TESTDATA_FILENAME = os.path.join(os.path.dirname(__file__), "..", "..", "pdb", "tests", "data", "3eiy.pdb") -PDB_TESTDATA_FILENAME_GZ = os.path.join(os.path.dirname(__file__), "..", "..", "pdb", "tests", "data", "3eiy.pdb.gz") +MMTF_TESTDATA_FILENAME = os.path.join( + os.path.dirname(__file__), "data", "3eiy.mmtf" +) +MMTF_TESTDATA_FILENAME_GZ = os.path.join( + os.path.dirname(__file__), "data", "3eiy.mmtf.gz" +) + +PDB_TESTDATA_FILENAME = os.path.join( + os.path.dirname(__file__), "..", "..", "pdb", "tests", "data", "3eiy.pdb" +) +PDB_TESTDATA_FILENAME_GZ = os.path.join( + os.path.dirname(__file__), + "..", + "..", + "pdb", + "tests", + "data", + "3eiy.pdb.gz", +) ATOM_DF_COLUMNS = [ "record_name", "atom_number", "atom_name", - #"alt_loc", + # "alt_loc", "residue_name", "chain_id", "residue_number", - #"insertion", + # "insertion", "x_coord", "y_coord", "z_coord", "occupancy", "b_factor", "element_symbol", - #"charge", + # "charge", ] + @unittest.skip(reason="PDB No longer serves MMTF files.") def test_fetch_pdb(): """Test fetch_pdb""" @@ -56,14 +70,14 @@ def test__read_mmtf(): pd.testing.assert_frame_equal( pmmtf.df["ATOM"][ATOM_DF_COLUMNS].reset_index(drop=True), ppdb.df["ATOM"][ATOM_DF_COLUMNS].reset_index(drop=True), - ) + ) ATOM_DF_COLUMNS.remove("atom_number") ATOM_DF_COLUMNS.remove("element_symbol") pd.testing.assert_frame_equal( pmmtf.df["HETATM"][ATOM_DF_COLUMNS].reset_index(drop=True), ppdb.df["HETATM"][ATOM_DF_COLUMNS].reset_index(drop=True), - ) + ) def test__read_mmtf_gz(): @@ -73,15 +87,14 @@ def test__read_mmtf_gz(): pmmtf.read_mmtf(MMTF_TESTDATA_FILENAME_GZ) ppdb = ppdb.read_pdb(PDB_TESTDATA_FILENAME_GZ) - - pmmtf.df["ATOM"].alt_loc.replace('\x00', "", inplace=True) - pmmtf.df["HETATM"].alt_loc.replace('\x00', "", inplace=True) + pmmtf.df["ATOM"].alt_loc.replace("\x00", "", inplace=True) + pmmtf.df["HETATM"].alt_loc.replace("\x00", "", inplace=True) pd.testing.assert_frame_equal( pmmtf.df["ATOM"][ATOM_DF_COLUMNS].reset_index(drop=True), ppdb.df["ATOM"][ATOM_DF_COLUMNS].reset_index(drop=True), - ) - #pd.testing.assert_frame_equal( + ) + # pd.testing.assert_frame_equal( # pmmtf.df["HETATM"][ATOM_DF_COLUMNS].reset_index(drop=True), # ppdb.df["HETATM"][ATOM_DF_COLUMNS].reset_index(drop=True), # ) @@ -92,7 +105,3 @@ def test_read_mmtf(): ppdb = PandasMmtf() ppdb.read_mmtf(MMTF_TESTDATA_FILENAME) assert ppdb.mmtf_path == MMTF_TESTDATA_FILENAME - - - - diff --git a/biopandas/mmtf/tests/test_rmsd.py b/biopandas/mmtf/tests/test_rmsd.py index 1d02e06..1131009 100644 --- a/biopandas/mmtf/tests/test_rmsd.py +++ b/biopandas/mmtf/tests/test_rmsd.py @@ -5,11 +5,10 @@ # Code Repository: https://github.com/rasbt/biopandas import os -import pytest +import pytest from biopandas.mmtf import PandasMmtf - TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48.mmtf") TESTDATA_1t49 = os.path.join(os.path.dirname(__file__), "data", "1t49.mmtf") # TESTDATA_lig1 = os.path.join(os.path.dirname(__file__), "data", "lig_conf_1.pdb") @@ -52,7 +51,9 @@ def test_invalid_query(): def test_protein(): - r = PandasMmtf.rmsd(p1t48.df["ATOM"], p1t49.df["ATOM"], s="c-alpha", invert=False) + r = PandasMmtf.rmsd( + p1t48.df["ATOM"], p1t49.df["ATOM"], s="c-alpha", invert=False + ) assert r == 0.4785, r diff --git a/biopandas/mmtf/tests/test_write_mmtf.py b/biopandas/mmtf/tests/test_write_mmtf.py index ce7bab2..69452e8 100644 --- a/biopandas/mmtf/tests/test_write_mmtf.py +++ b/biopandas/mmtf/tests/test_write_mmtf.py @@ -2,13 +2,22 @@ import unittest import pandas as pd +from biopandas.mmtf.pandas_mmtf import PandasMmtf, write_mmtf from pandas.testing import assert_frame_equal -from biopandas.mmtf.pandas_mmtf import PandasMmtf, write_mmtf @unittest.skip(reason="PDB No longer serves MMTF files.") def test_write_mmtf_bp(): - PDB_CODES = ["4hhb", "3eiy", "1t48", "1ehz", "4ggb", "1bxa", "1cbn", "1rcf"] + PDB_CODES = [ + "4hhb", + "3eiy", + "1t48", + "1ehz", + "4ggb", + "1bxa", + "1cbn", + "1rcf", + ] for pdb in PDB_CODES: print(pdb) pm1 = PandasMmtf().fetch_mmtf(pdb) @@ -16,15 +25,30 @@ def test_write_mmtf_bp(): assert os.path.exists("test.mmtf") pm2 = PandasMmtf().read_mmtf("test.mmtf") - assert_frame_equal(pm1.df["ATOM"].reset_index(drop=True), pm2.df["ATOM"].reset_index(drop=True)) - assert_frame_equal(pm1.df["HETATM"].reset_index(drop=True), pm2.df["HETATM"].reset_index(drop=True)) + assert_frame_equal( + pm1.df["ATOM"].reset_index(drop=True), + pm2.df["ATOM"].reset_index(drop=True), + ) + assert_frame_equal( + pm1.df["HETATM"].reset_index(drop=True), + pm2.df["HETATM"].reset_index(drop=True), + ) os.remove("test.mmtf") @unittest.skip(reason="PDB No longer serves MMTF files.") def test_write_mmtf(): - PDB_CODES = ["4hhb", "3eiy", "1t48", "1ehz", "4ggb", "1bxa", "1cbn", "1rcf"] + PDB_CODES = [ + "4hhb", + "3eiy", + "1t48", + "1ehz", + "4ggb", + "1bxa", + "1cbn", + "1rcf", + ] for pdb in PDB_CODES: print(pdb) pm1 = PandasMmtf().fetch_mmtf(pdb) @@ -32,8 +56,13 @@ def test_write_mmtf(): assert os.path.exists("test.mmtf") pm2 = PandasMmtf().read_mmtf("test.mmtf") - assert_frame_equal(pm1.df["ATOM"].reset_index(drop=True), pm2.df["ATOM"].reset_index(drop=True)) - assert_frame_equal(pm1.df["HETATM"].reset_index(drop=True), pm2.df["HETATM"].reset_index(drop=True)) + assert_frame_equal( + pm1.df["ATOM"].reset_index(drop=True), + pm2.df["ATOM"].reset_index(drop=True), + ) + assert_frame_equal( + pm1.df["HETATM"].reset_index(drop=True), + pm2.df["HETATM"].reset_index(drop=True), + ) os.remove("test.mmtf") - From e3234f364d065b6440556181ce39bff0e68f0d87 Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Mon, 8 Jul 2024 18:42:44 +0100 Subject: [PATCH 07/21] linting --- biopandas/mol2/__init__.py | 2 +- biopandas/mol2/pandas_mol2.py | 17 ++++++++++++----- biopandas/mol2/tests/test_mol2_io.py | 13 ++++++++++--- biopandas/mol2/tests/test_pandas_mol2.py | 6 +++++- 4 files changed, 28 insertions(+), 10 deletions(-) diff --git a/biopandas/mol2/__init__.py b/biopandas/mol2/__init__.py index 79358e2..38453f0 100644 --- a/biopandas/mol2/__init__.py +++ b/biopandas/mol2/__init__.py @@ -9,7 +9,7 @@ files in pandas DataFrames. """ -from .pandas_mol2 import PandasMol2 from .mol2_io import split_multimol2 +from .pandas_mol2 import PandasMol2 __all__ = ["PandasMol2", "split_multimol2"] diff --git a/biopandas/mol2/pandas_mol2.py b/biopandas/mol2/pandas_mol2.py index cbb893c..556a9fc 100644 --- a/biopandas/mol2/pandas_mol2.py +++ b/biopandas/mol2/pandas_mol2.py @@ -1,14 +1,15 @@ """ Class for working with Tripos MOL2 files""" + # BioPandas # Author: Sebastian Raschka # License: BSD 3 clause # Project Website: http://rasbt.github.io/biopandas/ # Code Repository: https://github.com/rasbt/biopandas -import pandas as pd import numpy as np -from .mol2_io import split_multimol2 +import pandas as pd +from .mol2_io import split_multimol2 COLUMN_NAMES = ( "atom_id", @@ -167,7 +168,9 @@ def read_mol2_from_list(self, mol2_lines, mol2_code, columns=None): def _construct_df(self, mol2_lines, col_names, col_types): """Construct DataFrames from list of PDB lines.""" return self._atomsection_to_pandas( - self._get_atomsection(mol2_lines), col_names=col_names, col_types=col_types + self._get_atomsection(mol2_lines), + col_names=col_names, + col_types=col_types, ) @staticmethod @@ -195,7 +198,9 @@ def _get_atomsection(mol2_lst): @staticmethod def _atomsection_to_pandas(mol2_atom_lst, col_names, col_types): - df = pd.DataFrame([lst.split() for lst in mol2_atom_lst], columns=col_names) + df = pd.DataFrame( + [lst.split() for lst in mol2_atom_lst], columns=col_names + ) for i in range(df.shape[1]): df[col_names[i]] = df[col_names[i]].astype(col_types[i]) @@ -281,4 +286,6 @@ def distance_df(df, xyz=(0.00, 0.00, 0.00)): """ - return np.sqrt(np.sum(df[["x", "y", "z"]].subtract(xyz, axis=1) ** 2, axis=1)) + return np.sqrt( + np.sum(df[["x", "y", "z"]].subtract(xyz, axis=1) ** 2, axis=1) + ) diff --git a/biopandas/mol2/tests/test_mol2_io.py b/biopandas/mol2/tests/test_mol2_io.py index dfc3e93..b6c4c98 100644 --- a/biopandas/mol2/tests/test_mol2_io.py +++ b/biopandas/mol2/tests/test_mol2_io.py @@ -5,6 +5,7 @@ # Code Repository: https://github.com/rasbt/biopandas import os + from biopandas.mol2.mol2_io import split_multimol2 from biopandas.testutils import assert_raises @@ -13,7 +14,9 @@ def test_split_multimol2(): all_mol2 = [] - for i in split_multimol2(os.path.join(this_dir, "data", "40_mol2_files.mol2")): + for i in split_multimol2( + os.path.join(this_dir, "data", "40_mol2_files.mol2") + ): all_mol2.append(i[0]) assert all_mol2[1] == "ZINC04084113" assert len(all_mol2) == 40 @@ -21,7 +24,9 @@ def test_split_multimol2(): def test_split_multimol2_wrong_format(): - expect = "Wrong file format;" "allowed file formats are .mol2 and .mol2.gz." + expect = ( + "Wrong file format;" "allowed file formats are .mol2 and .mol2.gz." + ) def run_code(): next(split_multimol2("40_mol2_files.pdb")) @@ -31,7 +36,9 @@ def run_code(): def test_split_multimol2_gz(): all_mol2 = [] - for i in split_multimol2(os.path.join(this_dir, "data", "40_mol2_files.mol2.gz")): + for i in split_multimol2( + os.path.join(this_dir, "data", "40_mol2_files.mol2.gz") + ): all_mol2.append(i[0]) assert all_mol2[1].decode() == "ZINC04084113" assert len(all_mol2) == 40 diff --git a/biopandas/mol2/tests/test_pandas_mol2.py b/biopandas/mol2/tests/test_pandas_mol2.py index 6cb4dba..ba88dd0 100644 --- a/biopandas/mol2/tests/test_pandas_mol2.py +++ b/biopandas/mol2/tests/test_pandas_mol2.py @@ -1,4 +1,5 @@ """ Utility function for reading Tripos MOL2 files from files""" + # BioPandas # Author: Sebastian Raschka # License: BSD 3 clause @@ -6,6 +7,7 @@ # Code Repository: https://github.com/rasbt/biopandas import os + from biopandas.mol2 import PandasMol2 from biopandas.mol2.mol2_io import split_multimol2 from biopandas.testutils import assert_raises @@ -48,7 +50,9 @@ def test_read_mol2_from_list(): data_path = os.path.join(this_dir, "data", "40_mol2_files.mol2") mol2 = next(split_multimol2(data_path)) - pdmol = PandasMol2().read_mol2_from_list(mol2_lines=mol2[1], mol2_code=mol2[0]) + pdmol = PandasMol2().read_mol2_from_list( + mol2_lines=mol2[1], mol2_code=mol2[0] + ) assert pdmol.df.shape == (65, 9) assert pdmol.code == "ZINC38611810" From 561344d0f3d63eb902a825d63af694ed5dec1f08 Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Mon, 8 Jul 2024 18:43:16 +0100 Subject: [PATCH 08/21] linting --- biopandas/mmcif/mmcif_parser.py | 21 +++- biopandas/mmcif/pandas_mmcif.py | 144 +++++++++++++++-------- biopandas/mmcif/tests/test_amino3to1.py | 8 +- biopandas/mmcif/tests/test_distance.py | 10 +- biopandas/mmcif/tests/test_read_mmcif.py | 21 ++-- biopandas/mmcif/tests/test_rmsd.py | 6 +- 6 files changed, 145 insertions(+), 65 deletions(-) diff --git a/biopandas/mmcif/mmcif_parser.py b/biopandas/mmcif/mmcif_parser.py index 96d0a31..91556bb 100644 --- a/biopandas/mmcif/mmcif_parser.py +++ b/biopandas/mmcif/mmcif_parser.py @@ -22,19 +22,28 @@ def __init__(self, parser_obj): self.names_defined = False def add_name(self, name): - cat_name = type(name) == str and partition_string(name, ".") or ["", "", ""] + cat_name = ( + type(name) == str and partition_string(name, ".") or ["", "", ""] + ) if cat_name[1]: if cat_name[0] not in self.parser_obj.current_target[-2]: self.parser_obj.current_target[-2][cat_name[0]] = {} - if cat_name[2] not in self.parser_obj.current_target[-2][cat_name[0]]: - self.parser_obj.current_target[-2][cat_name[0]][cat_name[2]] = [] + if ( + cat_name[2] + not in self.parser_obj.current_target[-2][cat_name[0]] + ): + self.parser_obj.current_target[-2][cat_name[0]][ + cat_name[2] + ] = [] self.ref_list.append( self.parser_obj.current_target[-2][cat_name[0]][cat_name[2]] ) else: if cat_name[0] not in self.parser_obj.current_target[-2]: self.parser_obj.current_target[-2][cat_name[0]] = [] - self.ref_list.append(self.parser_obj.current_target[-2][cat_name[0]]) + self.ref_list.append( + self.parser_obj.current_target[-2][cat_name[0]] + ) self.length = len(self.ref_list) def push_value(self, value): @@ -289,7 +298,9 @@ def __dump_str__(inp): return str(inp) if re.search(__CIF_STR_NL_CHECK__, inp) is not None: return "\n;%s\n;" % inp - return "'%s'" % inp if re.search(__CIF_STR_CHECK__, inp) is not None else inp + return ( + "'%s'" % inp if re.search(__CIF_STR_CHECK__, inp) is not None else inp + ) def __pad_string__(inp, flength): diff --git a/biopandas/mmcif/pandas_mmcif.py b/biopandas/mmcif/pandas_mmcif.py index 167b79e..e00c1f5 100644 --- a/biopandas/mmcif/pandas_mmcif.py +++ b/biopandas/mmcif/pandas_mmcif.py @@ -1,4 +1,5 @@ """Class for working with MMCIF files.""" + # BioPandas # Authors: Arian Jamasb , # Authors: Sebastian Raschka @@ -69,56 +70,76 @@ def read_mmcif(self, path): self.code = self.data["entry"]["id"][0].lower() return self - def fetch_mmcif(self, pdb_code: Optional[str] = None, uniprot_id: Optional[str] = None, source: str = "pdb"): + def fetch_mmcif( + self, + pdb_code: Optional[str] = None, + uniprot_id: Optional[str] = None, + source: str = "pdb", + ): """Fetches mmCIF file contents from the Protein Databank at rcsb.org or AlphaFold database at https://alphafold.ebi.ac.uk/. -. + . - Parameters - ---------- - pdb_code : str, optional - A 4-letter PDB code, e.g., `"3eiy"` to retrieve structures from the PDB. Defaults to `None`. + Parameters + ---------- + pdb_code : str, optional + A 4-letter PDB code, e.g., `"3eiy"` to retrieve structures from the PDB. Defaults to `None`. - uniprot_id : str, optional - A UniProt Identifier, e.g., `"Q5VSL9"` to retrieve structures from the AF2 database. Defaults to `None`. + uniprot_id : str, optional + A UniProt Identifier, e.g., `"Q5VSL9"` to retrieve structures from the AF2 database. Defaults to `None`. - source : str - The source to retrieve the structure from - (`"pdb"`, `"alphafold2-v3"` or `"alphafold2-v4"`). Defaults to `"pdb"`. + source : str + The source to retrieve the structure from + (`"pdb"`, `"alphafold2-v3"` or `"alphafold2-v4"`). Defaults to `"pdb"`. - Returns - --------- - self + Returns + --------- + self """ # Sanitize input invalid_input_identifier_1 = pdb_code is None and uniprot_id is None - invalid_input_identifier_2 = pdb_code is not None and uniprot_id is not None - invalid_input_combination_1 = uniprot_id is not None and source == "pdb" + invalid_input_identifier_2 = ( + pdb_code is not None and uniprot_id is not None + ) + invalid_input_combination_1 = ( + uniprot_id is not None and source == "pdb" + ) invalid_input_combination_2 = pdb_code is not None and source in { - "alphafold2-v3", "alphafold2-v4"} + "alphafold2-v3", + "alphafold2-v4", + } if invalid_input_identifier_1 or invalid_input_identifier_2: raise ValueError( - "Please provide either a PDB code or a UniProt ID.") + "Please provide either a PDB code or a UniProt ID." + ) if invalid_input_combination_1: raise ValueError( - "Please use a 'pdb_code' instead of 'uniprot_id' for source='pdb'.") + "Please use a 'pdb_code' instead of 'uniprot_id' for source='pdb'." + ) elif invalid_input_combination_2: raise ValueError( - f"Please use a 'uniprot_id' instead of 'pdb_code' for source={source}.") + f"Please use a 'uniprot_id' instead of 'pdb_code' for source={source}." + ) if source == "pdb": self.mmcif_path, self.mmcif_text = self._fetch_mmcif(pdb_code) elif source == "alphafold2-v3": af2_version = 3 - self.mmcif_path, self.mmcif_text = self._fetch_af2(uniprot_id, af2_version) + self.mmcif_path, self.mmcif_text = self._fetch_af2( + uniprot_id, af2_version + ) elif source == "alphafold2-v4": af2_version = 4 - self.mmcif_path, self.mmcif_text = self._fetch_af2(uniprot_id, af2_version) + self.mmcif_path, self.mmcif_text = self._fetch_af2( + uniprot_id, af2_version + ) else: - raise ValueError(f"Invalid source: {source}." - " Please use one of 'pdb', 'alphafold2-v3' or 'alphafold2-v4'.") + raise ValueError( + f"Invalid source: {source}." + " Please use one of 'pdb', 'alphafold2-v3' or 'alphafold2-v4'." + ) self._df = self._construct_df(text=self.mmcif_text) return self @@ -129,7 +150,8 @@ def _construct_df(self, text: str): self.data = data df: Dict[str, pd.DataFrame] = {} full_df = pd.DataFrame.from_dict( - data["atom_site"], orient="index").transpose() + data["atom_site"], orient="index" + ).transpose() full_df = full_df.astype(mmcif_col_types, errors="ignore") df["ATOM"] = pd.DataFrame(full_df[full_df.group_PDB == "ATOM"]) df["HETATM"] = pd.DataFrame(full_df[full_df.group_PDB == "HETATM"]) @@ -148,8 +170,9 @@ def _fetch_mmcif(pdb_code): response = urlopen(url) txt = response.read() txt = ( - txt.decode( - "utf-8") if sys.version_info[0] >= 3 else txt.encode("ascii") + txt.decode("utf-8") + if sys.version_info[0] >= 3 + else txt.encode("ascii") ) except HTTPError as e: print(f"HTTP Error {e.code}") @@ -166,11 +189,15 @@ def _fetch_af2(uniprot_id: str, af2_version: int = 3): try: response = urlopen(url) txt = response.read() - txt = txt.decode('utf-8') if sys.version_info[0] >= 3 else txt.encode('ascii') + txt = ( + txt.decode("utf-8") + if sys.version_info[0] >= 3 + else txt.encode("ascii") + ) except HTTPError as e: - print(f'HTTP Error {e.code}') + print(f"HTTP Error {e.code}") except URLError as e: - print(f'URL Error {e.args}') + print(f"URL Error {e.args}") return url, txt @staticmethod @@ -184,7 +211,8 @@ def _read_mmcif(path): openf = gzip.open else: allowed_formats = ", ".join( - (".cif", ".cif.gz", ".mmcif", ".mmcif.gz")) + (".cif", ".cif.gz", ".mmcif", ".mmcif.gz") + ) raise ValueError( f"Wrong file format; allowed file formats are {allowed_formats}" ) @@ -194,8 +222,9 @@ def _read_mmcif(path): if path.endswith(".gz"): txt = ( - txt.decode( - "utf-8") if sys.version_info[0] >= 3 else txt.encode("ascii") + txt.decode("utf-8") + if sys.version_info[0] >= 3 + else txt.encode("ascii") ) return path, txt @@ -271,14 +300,19 @@ def _get_mainchain( def _get_hydrogen(df, invert): """Return only hydrogen atom entries from a DataFrame""" return ( - df[(df["type_symbol"] != "H")] if invert else df[( - df["type_symbol"] == "H")] + df[(df["type_symbol"] != "H")] + if invert + else df[(df["type_symbol"] == "H")] ) @staticmethod def _get_heavy(df, invert): """Return only heavy atom entries from a DataFrame""" - return df[df["type_symbol"] == "H"] if invert else df[df["type_symbol"] != "H"] + return ( + df[df["type_symbol"] == "H"] + if invert + else df[df["type_symbol"] != "H"] + ) @staticmethod def _get_calpha(df, invert, atom_col: str = "auth_atom_id"): @@ -288,7 +322,11 @@ def _get_calpha(df, invert, atom_col: str = "auth_atom_id"): @staticmethod def _get_carbon(df, invert): """Return carbon atom entries from a DataFrame""" - return df[df["type_symbol"] != "C"] if invert else df[df["type_symbol"] == "C"] + return ( + df[df["type_symbol"] != "C"] + if invert + else df[df["type_symbol"] == "C"] + ) def amino3to1( self, @@ -339,8 +377,9 @@ def amino3to1( indices.append(ind) cmp = num - transl = tmp.iloc[indices][residue_col].map( - amino3to1dict).fillna(fillna) + transl = ( + tmp.iloc[indices][residue_col].map(amino3to1dict).fillna(fillna) + ) return pd.concat((tmp.iloc[indices][chain_col], transl), axis=1) @@ -425,7 +464,9 @@ def distance(self, xyz=(0.00, 0.00, 0.00), records=("ATOM", "HETATM")): return np.sqrt( np.sum( - df[["Cartn_x", "Cartn_y", "Cartn_z"]].subtract(xyz, axis=1) ** 2, axis=1 + df[["Cartn_x", "Cartn_y", "Cartn_z"]].subtract(xyz, axis=1) + ** 2, + axis=1, ) ) @@ -451,7 +492,9 @@ def distance_df(df, xyz=(0.00, 0.00, 0.00)): """ return np.sqrt( np.sum( - df[["Cartn_x", "Cartn_y", "Cartn_z"]].subtract(xyz, axis=1) ** 2, axis=1 + df[["Cartn_x", "Cartn_y", "Cartn_z"]].subtract(xyz, axis=1) + ** 2, + axis=1, ) ) @@ -485,7 +528,11 @@ def read_mmcif_from_list(self, mmcif_lines): self.code = self.data["entry"]["id"][0].lower() return self - def convert_to_pandas_pdb(self, offset_chains: bool = True, records: List[str] = ["ATOM", "HETATM"]) -> PandasPdb: + def convert_to_pandas_pdb( + self, + offset_chains: bool = True, + records: List[str] = ["ATOM", "HETATM"], + ) -> PandasPdb: """Returns a PandasPdb object with the same data as the PandasMmcif object. @@ -525,10 +572,15 @@ def convert_to_pandas_pdb(self, offset_chains: bool = True, records: List[str] = # Update atom numbers if offset_chains: - offsets = pandaspdb.df["ATOM"]["chain_id"].astype( - "category").cat.codes - pandaspdb.df["ATOM"]["atom_number"] = pandaspdb.df["ATOM"]["atom_number"] + offsets + offsets = ( + pandaspdb.df["ATOM"]["chain_id"].astype("category").cat.codes + ) + pandaspdb.df["ATOM"]["atom_number"] = ( + pandaspdb.df["ATOM"]["atom_number"] + offsets + ) hetatom_offset = offsets.max() + 1 - pandaspdb.df["HETATM"]["atom_number"] = pandaspdb.df["HETATM"]["atom_number"] + hetatom_offset + pandaspdb.df["HETATM"]["atom_number"] = ( + pandaspdb.df["HETATM"]["atom_number"] + hetatom_offset + ) return pandaspdb diff --git a/biopandas/mmcif/tests/test_amino3to1.py b/biopandas/mmcif/tests/test_amino3to1.py index a03c364..83a671c 100644 --- a/biopandas/mmcif/tests/test_amino3to1.py +++ b/biopandas/mmcif/tests/test_amino3to1.py @@ -805,8 +805,12 @@ def test_multichain(): expect_chain = ["A" for _ in range(88)] + ["B" for _ in range(94)] got_chain = list(transl["auth_asym_id"].values) - got_res_a = list(transl.loc[transl["auth_asym_id"] == "A", "auth_comp_id"].values) - got_res_b = list(transl.loc[transl["auth_asym_id"] == "B", "auth_comp_id"].values) + got_res_a = list( + transl.loc[transl["auth_asym_id"] == "A", "auth_comp_id"].values + ) + got_res_b = list( + transl.loc[transl["auth_asym_id"] == "B", "auth_comp_id"].values + ) assert expect_chain == got_chain assert expect_res_a == got_res_a diff --git a/biopandas/mmcif/tests/test_distance.py b/biopandas/mmcif/tests/test_distance.py index f827d01..e7cd116 100644 --- a/biopandas/mmcif/tests/test_distance.py +++ b/biopandas/mmcif/tests/test_distance.py @@ -18,7 +18,8 @@ def test_equal(): dist = p1t48.distance(xyz=(70.785, 15.477, 23.359), records=("ATOM",)) expect = pd.Series( - [2.533259, 1.520502, 0.000000, 1.257597, 1.252510], index=[12, 13, 14, 15, 16] + [2.533259, 1.520502, 0.000000, 1.257597, 1.252510], + index=[12, 13, 14, 15, 16], ) assert dist[dist < 3].all() == expect.all() @@ -31,7 +32,8 @@ def test_deprecated_str_arg(): dist = p1t48.distance(xyz=(70.785, 15.477, 23.359), records="ATOM") expect = pd.Series( - [2.533259, 1.520502, 0.000000, 1.257597, 1.252510], index=[12, 13, 14, 15, 16] + [2.533259, 1.520502, 0.000000, 1.257597, 1.252510], + index=[12, 13, 14, 15, 16], ) assert dist[dist < 3].all() == expect.all() @@ -44,5 +46,7 @@ def test_use_external_df(): new_df = p1t48.df["ATOM"].iloc[:-1, :].copy() dist = PandasMmcif.distance_df(df=new_df, xyz=(70.785, 15.477, 23.359)) - expect = pd.Series([2.533259, 1.520502, 0.000000, 1.257597], index=[12, 13, 14, 15]) + expect = pd.Series( + [2.533259, 1.520502, 0.000000, 1.257597], index=[12, 13, 14, 15] + ) assert dist[dist < 3].all() == expect.all() diff --git a/biopandas/mmcif/tests/test_read_mmcif.py b/biopandas/mmcif/tests/test_read_mmcif.py index 7189702..983e848 100644 --- a/biopandas/mmcif/tests/test_read_mmcif.py +++ b/biopandas/mmcif/tests/test_read_mmcif.py @@ -6,11 +6,11 @@ import os -import pytest -from urllib.error import HTTPError from pathlib import Path +from urllib.error import HTTPError import pandas as pd +import pytest from biopandas.mmcif import PandasMmcif from biopandas.pdb import PandasPdb from biopandas.testutils import assert_raises @@ -22,8 +22,12 @@ # TESTDATA_FILENAME2 = os.path.join( # os.path.dirname(__file__), "data", "4eiy_anisouchunk.cif" # ) -TESTDATA_FILENAME2 = os.path.join(os.path.dirname(__file__), "data", "4eiy.cif") -TESTDATA_FILENAME_GZ = os.path.join(os.path.dirname(__file__), "data", "3eiy.cif.gz") +TESTDATA_FILENAME2 = os.path.join( + os.path.dirname(__file__), "data", "4eiy.cif" +) +TESTDATA_FILENAME_GZ = os.path.join( + os.path.dirname(__file__), "data", "3eiy.cif.gz" +) TESTDATA_FILENAME_AF2_V4 = os.path.join( os.path.dirname(__file__), "data", "AF-Q5VSL9-F1-model_v4.cif" @@ -90,7 +94,6 @@ af2_test_struct_v3 = f.read() - def test__read_pdb(): """Test private _read_pdb""" ppdb = PandasMmcif() @@ -334,7 +337,9 @@ def test_mmcif_pdb_conversion(): ) assert_frame_equal( pdb.df["HETATM"].drop(columns=["line_idx"]), - mmcif_pdb.df["HETATM"].drop(columns=["line_idx"]).reset_index(drop=True), + mmcif_pdb.df["HETATM"] + .drop(columns=["line_idx"]) + .reset_index(drop=True), ) # single chain test @@ -348,5 +353,7 @@ def test_mmcif_pdb_conversion(): ) assert_frame_equal( pdb.df["HETATM"].drop(columns=["line_idx"]), - mmcif_pdb.df["HETATM"].drop(columns=["line_idx"]).reset_index(drop=True), + mmcif_pdb.df["HETATM"] + .drop(columns=["line_idx"]) + .reset_index(drop=True), ) diff --git a/biopandas/mmcif/tests/test_rmsd.py b/biopandas/mmcif/tests/test_rmsd.py index 5507059..054f3b2 100644 --- a/biopandas/mmcif/tests/test_rmsd.py +++ b/biopandas/mmcif/tests/test_rmsd.py @@ -5,8 +5,8 @@ # Code Repository: https://github.com/rasbt/biopandas import os -import pytest +import pytest from biopandas.mmcif import PandasMmcif TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48.cif") @@ -48,7 +48,9 @@ def test_invalid_query(): def test_protein(): - r = PandasMmcif.rmsd(p1t48.df["ATOM"], p1t49.df["ATOM"], s="c-alpha", invert=False) + r = PandasMmcif.rmsd( + p1t48.df["ATOM"], p1t49.df["ATOM"], s="c-alpha", invert=False + ) assert r == 0.4923, r From a3bf27b8d6817ebabd87aeafa86d6a0aa6619fc5 Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Mon, 8 Jul 2024 18:45:51 +0100 Subject: [PATCH 09/21] remove unused variables --- biopandas/pdb/tests/test_read_pdb.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/biopandas/pdb/tests/test_read_pdb.py b/biopandas/pdb/tests/test_read_pdb.py index c41792f..4477830 100644 --- a/biopandas/pdb/tests/test_read_pdb.py +++ b/biopandas/pdb/tests/test_read_pdb.py @@ -123,11 +123,11 @@ def test_fetch_pdb(): try: ppdb = PandasPdb() - url, txt = ppdb._fetch_pdb("3eiy") + _, txt = ppdb._fetch_pdb("3eiy") except HTTPError: - url, txt = None, None + _, txt = None, None except ConnectionResetError: - url, txt = None, None + _, txt = None, None if txt: # skip if PDB down txt[:100] == three_eiy[:100] @@ -141,11 +141,11 @@ def test_fetch_af2(): # Check latest release try: ppdb = PandasPdb() - url, txt = ppdb._fetch_af2("Q5VSL9", af2_version=4) + _, txt = ppdb._fetch_af2("Q5VSL9", af2_version=4) except HTTPError: - url, txt = None, None + _, txt = None, None except ConnectionResetError: - url, txt = None, None + _, txt = None, None if txt: # skip if AF2 DB down txt[:100] == af_test_struct_v4[:100] @@ -159,11 +159,11 @@ def test_fetch_af2(): # Check legacy release try: ppdb = PandasPdb() - url, txt = ppdb._fetch_af2("Q5VSL9", af2_version=3) + _, txt = ppdb._fetch_af2("Q5VSL9", af2_version=3) except HTTPError: - url, txt = None, None + _, txt = None, None except ConnectionResetError: - url, txt = None, None + _, txt = None, None if txt: # skip if AF2 DB down txt[:100] == af_test_struct_v3[:100] @@ -178,7 +178,7 @@ def test_fetch_af2(): def test__read_pdb_gz(): """Test public _read_pdb with gzip files""" ppdb = PandasPdb() - path, txt = ppdb._read_pdb(TESTDATA_FILENAME_GZ) + _, txt = ppdb._read_pdb(TESTDATA_FILENAME_GZ) assert txt == three_eiy From 108ead68e8314ababf1d9bf5aa87b46f4ecd75eb Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Mon, 8 Jul 2024 18:50:00 +0100 Subject: [PATCH 10/21] fix type comparison --- biopandas/mmtf/pandas_mmtf.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/biopandas/mmtf/pandas_mmtf.py b/biopandas/mmtf/pandas_mmtf.py index 9912946..1ebc0e9 100644 --- a/biopandas/mmtf/pandas_mmtf.py +++ b/biopandas/mmtf/pandas_mmtf.py @@ -2,9 +2,9 @@ from __future__ import annotations +import os import copy import gzip -import os import warnings from string import ascii_uppercase from typing import Any, Dict, List, Union @@ -12,11 +12,11 @@ import numpy as np import pandas as pd -from biopandas.constants import protein_letters_3to1_extended from looseversion import LooseVersion - from mmtf import MMTFDecoder, MMTFEncoder, fetch, parse, parse_gzip +from biopandas.constants import protein_letters_3to1_extended + from ..pdb.engines import amino3to1dict, pdb_df_columns, pdb_records pd_version = LooseVersion(pd.__version__) From 33b5f9f54220d223891299bd200e102633a9472c Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Mon, 8 Jul 2024 18:55:09 +0100 Subject: [PATCH 11/21] bump changelog --- docs/CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index f4176fc..8eb8d2a 100755 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -4,6 +4,10 @@ The CHANGELOG for the current development version is available at [https://github.com/rasbt/biopandas/blob/main/docs/sources/CHANGELOG.md](https://github.com/rasbt/biopandas/blob/main/docs/sources/CHANGELOG.md). +### 0.5.1dev1 (UNRELEASED) + +- Dev: switched testing framework entirely to pytest. Drops nose dependency due to version conflicts with Python 3.12 (`nose`) and 3.8 (`nose`) + ### 0.5.0dev1 (31/7/2023) - Implement add_remark for PandasPdb, (Via [Anton Bushuiev](https://github.com/anton-bushuiev) PR #[129](https://github.com/BioPandas/biopandas/pull/129)) From 45188a8a8e32d5cf84c946c227377ee27d01fcb3 Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Mon, 8 Jul 2024 18:56:10 +0100 Subject: [PATCH 12/21] add changelog enforcer test --- .github/workflows/changelog-enforcer.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 .github/workflows/changelog-enforcer.yaml diff --git a/.github/workflows/changelog-enforcer.yaml b/.github/workflows/changelog-enforcer.yaml new file mode 100644 index 0000000..f9ba8c4 --- /dev/null +++ b/.github/workflows/changelog-enforcer.yaml @@ -0,0 +1,16 @@ +name: Changelog Enforcer + +on: # yamllint disable-line rule:truthy + pull_request: + types: [opened, synchronize, reopened, ready_for_review, labeled, unlabeled] + +jobs: + + changelog: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - uses: dangoslen/changelog-enforcer@v3 + with: + skipLabels: 'skip-changelog' \ No newline at end of file From 25f590806bfc4250936d18d07b1850ea92fb34ab Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Mon, 8 Jul 2024 19:05:31 +0100 Subject: [PATCH 13/21] fix type comparison --- biopandas/mmcif/mmcif_parser.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/biopandas/mmcif/mmcif_parser.py b/biopandas/mmcif/mmcif_parser.py index 91556bb..c0dbfcb 100644 --- a/biopandas/mmcif/mmcif_parser.py +++ b/biopandas/mmcif/mmcif_parser.py @@ -23,7 +23,7 @@ def __init__(self, parser_obj): def add_name(self, name): cat_name = ( - type(name) == str and partition_string(name, ".") or ["", "", ""] + isinstance(name, str) and partition_string(name, ".") or ["", "", ""] ) if cat_name[1]: if cat_name[0] not in self.parser_obj.current_target[-2]: @@ -228,7 +228,7 @@ def __cif_float_range__(inp): try: pos = inp.index("-", 1) return (__CIFFloat__(inp[:pos]), __CIFFloat__(inp[pos + 1 :])) - except: + except Exception: return (__CIFFloat__(inp),) @@ -236,7 +236,7 @@ def __cif_int_range__(inp): try: pos = inp.index("-", 1) return (__CIFInt__(inp[:pos]), __CIFInt__(inp[pos + 1 :])) - except: + except Exception: return (__CIFInt__(inp),) @@ -248,12 +248,12 @@ def __load_cif_dic__(dic_file, force=False): if force: throw dic = json.loads(open(jsf).read()) - except: + except Exception: parser = CIFParser() parser.parse(open(dic_file)) json.dump(parser.data, open(jsf_dic, "w")) for k, v in parser.data["data_mmcif_pdbx.dic"].items(): - if type(v) != dict or "item_type" not in v: + if not isinstance(v, dict) or "item_type" not in v: continue name = partition_string(k[6:], ".") if name[0] not in dic: @@ -294,7 +294,7 @@ def __dump_cif__(jso): def __dump_str__(inp): if inp is None: return "?" - if type(inp) is not str: + if not isinstance(inp, str): return str(inp) if re.search(__CIF_STR_NL_CHECK__, inp) is not None: return "\n;%s\n;" % inp @@ -365,7 +365,7 @@ def __dump_part__(jso): def load_cif_data(data, do_clean=True, do_type=True): parser = CIFParser() - if type(data) == str: + if isinstance(data, str): parser.parse_string(data) else: parser.parse(data) # fileobj From e2364de5cab3dba07a3d99121177d77d7a29e519 Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Mon, 8 Jul 2024 19:06:51 +0100 Subject: [PATCH 14/21] clean up unused variable --- biopandas/mmcif/tests/test_read_mmcif.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/biopandas/mmcif/tests/test_read_mmcif.py b/biopandas/mmcif/tests/test_read_mmcif.py index 983e848..bfba806 100644 --- a/biopandas/mmcif/tests/test_read_mmcif.py +++ b/biopandas/mmcif/tests/test_read_mmcif.py @@ -97,7 +97,7 @@ def test__read_pdb(): """Test private _read_pdb""" ppdb = PandasMmcif() - path, txt = ppdb._read_mmcif(TESTDATA_FILENAME) + _, txt = ppdb._read_mmcif(TESTDATA_FILENAME) print(txt) assert txt == three_eiy @@ -127,9 +127,9 @@ def test_fetch_pdb(): try: ppdb = PandasMmcif() - url, txt = ppdb._fetch_mmcif("3eiy") + _, txt = ppdb._fetch_mmcif("3eiy") except (HTTPError, ConnectionResetError): - url, txt = None, None + _, txt = None, None if txt: # skip if PDB down txt[:100] == three_eiy[:100] ppdb.fetch_mmcif("3eiy") @@ -142,9 +142,9 @@ def test_fetch_af2(): # Test latest release try: ppdb = PandasMmcif() - url, txt = ppdb._fetch_af2("Q5VSL9", af2_version=4) + _, txt = ppdb._fetch_af2("Q5VSL9", af2_version=4) except (HTTPError, ConnectionResetError): - url, txt = None, None + _, txt = None, None if txt: # skip if AF DB down txt[:100] == af2_test_struct_v4[:100] ppdb.fetch_mmcif(uniprot_id="Q5VSL9", source="alphafold2-v4") @@ -157,9 +157,9 @@ def test_fetch_af2(): # Test legacy release try: ppdb = PandasMmcif() - url, txt = ppdb._fetch_af2("Q5VSL9", af2_version=3) + _, txt = ppdb._fetch_af2("Q5VSL9", af2_version=3) except (HTTPError, ConnectionResetError): - url, txt = None, None + _, txt = None, None if txt: # skip if AF DB down txt[:100] == af2_test_struct_v3[:100] ppdb.fetch_mmcif(uniprot_id="Q5VSL9", source="alphafold2-v3") @@ -173,7 +173,7 @@ def test_fetch_af2(): def test__read_pdb_gz(): """Test public _read_pdb with gzip files""" ppdb = PandasMmcif() - path, txt = ppdb._read_mmcif(TESTDATA_FILENAME_GZ) + _, txt = ppdb._read_mmcif(TESTDATA_FILENAME_GZ) assert txt == three_eiy From 0f5cf0dcaec6bdcad94cf56c2a4f2a5e49a2787e Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Mon, 8 Jul 2024 19:08:31 +0100 Subject: [PATCH 15/21] fix whitespace --- biopandas/mmcif/mmcif_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/biopandas/mmcif/mmcif_parser.py b/biopandas/mmcif/mmcif_parser.py index c0dbfcb..bce2dae 100644 --- a/biopandas/mmcif/mmcif_parser.py +++ b/biopandas/mmcif/mmcif_parser.py @@ -227,7 +227,7 @@ def __repr__(self): def __cif_float_range__(inp): try: pos = inp.index("-", 1) - return (__CIFFloat__(inp[:pos]), __CIFFloat__(inp[pos + 1 :])) + return (__CIFFloat__(inp[:pos]), __CIFFloat__(inp[pos + 1:])) except Exception: return (__CIFFloat__(inp),) @@ -235,7 +235,7 @@ def __cif_float_range__(inp): def __cif_int_range__(inp): try: pos = inp.index("-", 1) - return (__CIFInt__(inp[:pos]), __CIFInt__(inp[pos + 1 :])) + return (__CIFInt__(inp[:pos]), __CIFInt__(inp[pos + 1:])) except Exception: return (__CIFInt__(inp),) From 8474381ee415189cad398bb756f0cd0ed2269c65 Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Mon, 8 Jul 2024 19:09:20 +0100 Subject: [PATCH 16/21] remove unused mmtf --- biopandas/mmtf/pandas_mmtf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/biopandas/mmtf/pandas_mmtf.py b/biopandas/mmtf/pandas_mmtf.py index 1ebc0e9..a2db5de 100644 --- a/biopandas/mmtf/pandas_mmtf.py +++ b/biopandas/mmtf/pandas_mmtf.py @@ -6,7 +6,6 @@ import copy import gzip import warnings -from string import ascii_uppercase from typing import Any, Dict, List, Union from warnings import warn From cd3582d52066e70b71152ad0f1fc7c127fce5473 Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Mon, 8 Jul 2024 19:11:57 +0100 Subject: [PATCH 17/21] remove whitespace --- biopandas/pdb/pandas_pdb.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/biopandas/pdb/pandas_pdb.py b/biopandas/pdb/pandas_pdb.py index fb93182..102d9d5 100644 --- a/biopandas/pdb/pandas_pdb.py +++ b/biopandas/pdb/pandas_pdb.py @@ -247,13 +247,13 @@ def impute_element(self, records=("ATOM", "HETATM"), inplace=False): lambda x: x[0][1] if len(x[1]) == 3 else x[0][0], axis=1 ) return t - + def add_remark(self, code, text='', indent=0): """Add custom REMARK entry. The remark will be inserted to preserve the ordering of REMARK codes, i.e. if the code is `n` it will be added after all remarks with codes less or equal to `n`. If the object does - not store any remarks the remark will be inserted right before the first of ATOM, HETATM or + not store any remarks the remark will be inserted right before the first of ATOM, HETATM or ANISOU records. Parameters @@ -263,9 +263,9 @@ def add_remark(self, code, text='', indent=0): text : str The text of the remark. If the text does not fit into a single line it will be wrapped - into multiple lines of REMARK entries. Likewise, if the text contains new line + into multiple lines of REMARK entries. Likewise, if the text contains new line characters it will be split accordingly. - + indent : int, default: 0 Number of white spaces inserted before the text of the remark. From 0f91baa4c6a906902582770f043ede6138144f75 Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Mon, 8 Jul 2024 19:12:38 +0100 Subject: [PATCH 18/21] remove whitespace --- biopandas/pdb/pandas_pdb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/biopandas/pdb/pandas_pdb.py b/biopandas/pdb/pandas_pdb.py index 102d9d5..2b76db4 100644 --- a/biopandas/pdb/pandas_pdb.py +++ b/biopandas/pdb/pandas_pdb.py @@ -131,7 +131,7 @@ def fetch_pdb(self, pdb_code: Optional[str] = None, uniprot_id: Optional[str] = Defaults to `None`. source : str - The source to retrieve the structure from + The source to retrieve the structure from (`"pdb"`, `"alphafold2-v3"`, `"alphafold2-v4"`(latest)). Defaults to `"pdb"`. From 15340bafb6b773edddb33986f9cba8f95cb71bb4 Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Mon, 8 Jul 2024 19:14:03 +0100 Subject: [PATCH 19/21] rename ambiguous variable --- biopandas/pdb/pandas_pdb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/biopandas/pdb/pandas_pdb.py b/biopandas/pdb/pandas_pdb.py index 2b76db4..f36b11e 100644 --- a/biopandas/pdb/pandas_pdb.py +++ b/biopandas/pdb/pandas_pdb.py @@ -298,7 +298,7 @@ def add_remark(self, code, text='', indent=0): # Wrap remark to fit into 80 characters per line and add indentation wrapper = textwrap.TextWrapper(width=80 - (11 + indent)) - lines = sum([wrapper.wrap(l.strip()) or [' '] for l in text.split('\n')], []) + lines = sum([wrapper.wrap(line.strip()) or [' '] for line in text.split('\n')], []) lines = list(map(lambda x: f'{code:4} ' + indent*' ' + x, lines)) # Shift data frame indices and row indices to create space for the remark From 61ff468ca31ab70a81f5d45a993a977698fabd5a Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Mon, 8 Jul 2024 19:14:24 +0100 Subject: [PATCH 20/21] reduce whitespace --- biopandas/pdb/pandas_pdb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/biopandas/pdb/pandas_pdb.py b/biopandas/pdb/pandas_pdb.py index f36b11e..2d0c03e 100644 --- a/biopandas/pdb/pandas_pdb.py +++ b/biopandas/pdb/pandas_pdb.py @@ -299,7 +299,7 @@ def add_remark(self, code, text='', indent=0): # Wrap remark to fit into 80 characters per line and add indentation wrapper = textwrap.TextWrapper(width=80 - (11 + indent)) lines = sum([wrapper.wrap(line.strip()) or [' '] for line in text.split('\n')], []) - lines = list(map(lambda x: f'{code:4} ' + indent*' ' + x, lines)) + lines = list(map(lambda x: f'{code:4} ' + indent*' ' + x, lines)) # Shift data frame indices and row indices to create space for the remark # Create space in OTHERS From 6e41d7fd00da40516ece337575a88b68382afd89 Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Mon, 8 Jul 2024 19:15:35 +0100 Subject: [PATCH 21/21] reduce whitespace --- biopandas/mmtf/pandas_mmtf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/biopandas/mmtf/pandas_mmtf.py b/biopandas/mmtf/pandas_mmtf.py index a2db5de..c98ec00 100644 --- a/biopandas/mmtf/pandas_mmtf.py +++ b/biopandas/mmtf/pandas_mmtf.py @@ -655,7 +655,7 @@ def _seq1(seq, charmap: Dict[str, str], undef_code="X"): onecode = {k.upper(): v for k, v in charmap.items()} # add the given termination codon code and custom maps onecode.update((k.upper(), v) for k, v in charmap.items()) - seqlist = [seq[3 * i : 3 * (i + 1)] for i in range(len(seq) // 3)] + seqlist = [seq[3 * i:3 * (i + 1)] for i in range(len(seq) // 3)] return "".join(onecode.get(aa.upper(), undef_code) for aa in seqlist)