From c6bd3d624be1a165e2949019879da76bd0a48c64 Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arian.jamasb@roche.com>
Date: Mon, 8 Jul 2024 18:11:46 +0100
Subject: [PATCH 01/21] switch pytest to unittest

---
 biopandas/mmtf/tests/test_read_mmtf.py  | 4 ++--
 biopandas/mmtf/tests/test_write_mmtf.py | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/biopandas/mmtf/tests/test_read_mmtf.py b/biopandas/mmtf/tests/test_read_mmtf.py
index 9694822..78889d5 100644
--- a/biopandas/mmtf/tests/test_read_mmtf.py
+++ b/biopandas/mmtf/tests/test_read_mmtf.py
@@ -4,7 +4,7 @@
 # Project Website: http://rasbt.github.io/biopandas/
 # Code Repository: https://github.com/rasbt/biopandas
 
-import pytest
+import unittest
 import os
 from urllib.error import HTTPError, URLError
 from urllib.request import urlopen
@@ -42,7 +42,7 @@
     #"charge",
 ]
 
-@pytest.mark.skip(reason="PDB No longer serves MMTF files.")
+@unittest.skip(reason="PDB No longer serves MMTF files.")
 def test_fetch_pdb():
     """Test fetch_pdb"""
     ppdb = PandasMmtf()
diff --git a/biopandas/mmtf/tests/test_write_mmtf.py b/biopandas/mmtf/tests/test_write_mmtf.py
index e934ce1..ce7bab2 100644
--- a/biopandas/mmtf/tests/test_write_mmtf.py
+++ b/biopandas/mmtf/tests/test_write_mmtf.py
@@ -1,12 +1,12 @@
 import os
-import pytest
+import unittest
 
 import pandas as pd
 from pandas.testing import assert_frame_equal
 
 from biopandas.mmtf.pandas_mmtf import PandasMmtf, write_mmtf
 
-@pytest.mark.skip(reason="PDB No longer serves MMTF files.")
+@unittest.skip(reason="PDB No longer serves MMTF files.")
 def test_write_mmtf_bp():
     PDB_CODES = ["4hhb", "3eiy", "1t48", "1ehz", "4ggb", "1bxa", "1cbn", "1rcf"]
     for pdb in PDB_CODES:
@@ -22,7 +22,7 @@ def test_write_mmtf_bp():
     os.remove("test.mmtf")
 
 
-@pytest.mark.skip(reason="PDB No longer serves MMTF files.")
+@unittest.skip(reason="PDB No longer serves MMTF files.")
 def test_write_mmtf():
     PDB_CODES = ["4hhb", "3eiy", "1t48", "1ehz", "4ggb", "1bxa", "1cbn", "1rcf"]
     for pdb in PDB_CODES:

From 5a4bee3af40ff0169616c0274753a9d72d1ca457 Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arian.jamasb@roche.com>
Date: Mon, 8 Jul 2024 18:27:52 +0100
Subject: [PATCH 02/21] switch testing over to pytest from nose/unittest

---
 .appveyor.yml                            |  4 ++--
 biopandas/mmcif/tests/test_read_mmcif.py |  6 ++----
 biopandas/mmcif/tests/test_rmsd.py       |  8 ++++----
 biopandas/mmtf/tests/test_read_mmtf.py   |  5 -----
 biopandas/mmtf/tests/test_rmsd.py        |  9 +++++----
 biopandas/pdb/tests/test_gyradius.py     | 10 ++++------
 biopandas/pdb/tests/test_impute.py       |  3 ---
 biopandas/pdb/tests/test_read_pdb.py     |  7 +++----
 biopandas/pdb/tests/test_rmsd.py         |  9 ++++-----
 ci/.travis_install.sh                    |  6 +++---
 ci/.travis_test.sh                       |  4 ++--
 docs/CONTRIBUTING.md                     | 21 ++++++++++-----------
 setup.py                                 |  2 +-
 13 files changed, 40 insertions(+), 54 deletions(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index 8efc418..e300d19 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -14,8 +14,8 @@ install:
   - conda config --set always_yes yes --set changeps1 no
   - conda update -q conda
   - conda info -a
-  - conda create -q -n test-environment --channel=conda-forge mmtf-python numpy scipy pandas nose looseversion python=%PYTHON_VERSION%
+  - conda create -q -n test-environment --channel=conda-forge mmtf-python numpy scipy pandas pytest looseversion python=%PYTHON_VERSION%
   - activate test-environment
 
 test_script:
-  - nosetests -s -v
+  - pytest -s -v
diff --git a/biopandas/mmcif/tests/test_read_mmcif.py b/biopandas/mmcif/tests/test_read_mmcif.py
index 1d8b38d..b325206 100644
--- a/biopandas/mmcif/tests/test_read_mmcif.py
+++ b/biopandas/mmcif/tests/test_read_mmcif.py
@@ -6,16 +6,14 @@
 
 
 import os
+import pytest
 from urllib.error import HTTPError
-from urllib.request import urlopen
 from pathlib import Path
 
-import numpy as np
 import pandas as pd
 from biopandas.mmcif import PandasMmcif
 from biopandas.pdb import PandasPdb
 from biopandas.testutils import assert_raises
-from nose.tools import raises
 from pandas.testing import assert_frame_equal
 
 TESTDATA_FILENAME = os.path.join(os.path.dirname(__file__), "data", "3eiy.cif")
@@ -282,7 +280,7 @@ def test_read_pdb_with_pathlib():
 #    assert ppdb.code == "4eiy", ppdb.code
 
 
-@raises(AttributeError)
+@pytest.mark.xfail(AttributeError)
 def test_get_exceptions():
     ppdb = PandasMmcif()
     ppdb.read_mmcif(TESTDATA_FILENAME)
diff --git a/biopandas/mmcif/tests/test_rmsd.py b/biopandas/mmcif/tests/test_rmsd.py
index cb8d0ac..122c1f9 100644
--- a/biopandas/mmcif/tests/test_rmsd.py
+++ b/biopandas/mmcif/tests/test_rmsd.py
@@ -5,9 +5,9 @@
 # Code Repository: https://github.com/rasbt/biopandas
 
 import os
+import pytest
 
 from biopandas.mmcif import PandasMmcif
-from nose.tools import raises
 
 TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48.cif")
 TESTDATA_1t49 = os.path.join(os.path.dirname(__file__), "data", "1t49.cif")
@@ -32,17 +32,17 @@ def test_equal():
     assert r == 0.000, r
 
 
-@raises(AttributeError)
+@pytest.mark.xfail(AttributeError)
 def test_wrong_arg():
     PandasMmcif.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s="bla")
 
 
-@raises(AttributeError)
+@pytest.mark.xfail(AttributeError)
 def test_incompatible():
     PandasMmcif.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s=None)
 
 
-@raises(AttributeError)
+@pytest.mark.xfail(AttributeError)
 def test_invalid_query():
     PandasMmcif.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s="bla")
 
diff --git a/biopandas/mmtf/tests/test_read_mmtf.py b/biopandas/mmtf/tests/test_read_mmtf.py
index 78889d5..cc3c1f9 100644
--- a/biopandas/mmtf/tests/test_read_mmtf.py
+++ b/biopandas/mmtf/tests/test_read_mmtf.py
@@ -6,16 +6,11 @@
 
 import unittest
 import os
-from urllib.error import HTTPError, URLError
-from urllib.request import urlopen
 
-import numpy as np
 import pandas as pd
-from nose.tools import raises
 
 from biopandas.mmtf import PandasMmtf
 from biopandas.pdb import PandasPdb
-from biopandas.testutils import assert_raises
 
 MMTF_TESTDATA_FILENAME = os.path.join(os.path.dirname(__file__), "data", "3eiy.mmtf")
 MMTF_TESTDATA_FILENAME_GZ = os.path.join(os.path.dirname(__file__), "data", "3eiy.mmtf.gz")
diff --git a/biopandas/mmtf/tests/test_rmsd.py b/biopandas/mmtf/tests/test_rmsd.py
index b9292b5..561ba85 100644
--- a/biopandas/mmtf/tests/test_rmsd.py
+++ b/biopandas/mmtf/tests/test_rmsd.py
@@ -5,9 +5,10 @@
 # Code Repository: https://github.com/rasbt/biopandas
 
 import os
+import pytest
 
 from biopandas.mmtf import PandasMmtf
-from nose.tools import raises
+
 
 TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48.mmtf")
 TESTDATA_1t49 = os.path.join(os.path.dirname(__file__), "data", "1t49.mmtf")
@@ -35,17 +36,17 @@ def test_equal():
     assert r == 0.000, r
 
 
-@raises(AttributeError)
+@pytest.mark.xfail(AttributeError)
 def test_wrong_arg():
     PandasMmtf.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s="bla")
 
 
-@raises(AttributeError)
+@pytest.mark.xfail(AttributeError)
 def test_incompatible():
     PandasMmtf.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s=None)
 
 
-@raises(AttributeError)
+@pytest.mark.xfail(AttributeError)
 def test_invalid_query():
     PandasMmtf.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s="bla")
 
diff --git a/biopandas/pdb/tests/test_gyradius.py b/biopandas/pdb/tests/test_gyradius.py
index 3098010..9781f75 100644
--- a/biopandas/pdb/tests/test_gyradius.py
+++ b/biopandas/pdb/tests/test_gyradius.py
@@ -6,9 +6,7 @@
 
 from biopandas.pdb import PandasPdb
 import os
-import pandas as pd
-from nose.tools import raises
-import warnings
+import pytest
 
 TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48_995.pdb")
 
@@ -40,12 +38,12 @@ def test_atom_and_hetatm():
     assert rg == expected_rg, f"Expected {expected_rg}, got {rg} instead"
 
 
-@raises(KeyError)
+@pytest.mark.xfail(KeyError)
 def test_wrong_record_name():
     p1t48.gyradius(("Wrong",))
 
 
-@raises(TypeError)
+@pytest.mark.xfail(TypeError)
 def test_wrong_arg_type():
     p1t48.gyradius(5)
 
@@ -62,7 +60,7 @@ def test_negative_decimals():
     assert rg == expected_rg, f"Expected {expected_rg}, got {rg} instead"
 
 
-@raises(TypeError)
+@pytest.mark.xfail(TypeError)
 def test_wrong_decimals_arg():
     p1t48.gyradius(decimals='five')
 
diff --git a/biopandas/pdb/tests/test_impute.py b/biopandas/pdb/tests/test_impute.py
index 729ce2b..225f5c5 100644
--- a/biopandas/pdb/tests/test_impute.py
+++ b/biopandas/pdb/tests/test_impute.py
@@ -7,9 +7,6 @@
 
 from biopandas.pdb import PandasPdb
 import os
-import numpy as np
-import pandas as pd
-from nose.tools import raises
 
 TESTDATA_FILENAME = os.path.join(
     os.path.dirname(__file__), "data", "3eiy_stripped_no_ele.pdb"
diff --git a/biopandas/pdb/tests/test_read_pdb.py b/biopandas/pdb/tests/test_read_pdb.py
index 68cc9eb..912140a 100644
--- a/biopandas/pdb/tests/test_read_pdb.py
+++ b/biopandas/pdb/tests/test_read_pdb.py
@@ -6,14 +6,13 @@
 
 
 import os
-from urllib.error import HTTPError, URLError
-from urllib.request import urlopen
+import pytest
+from urllib.error import HTTPError
 
 import numpy as np
 import pandas as pd
 from biopandas.pdb import PandasPdb
 from biopandas.testutils import assert_raises
-from nose.tools import raises
 
 TESTDATA_FILENAME = os.path.join(os.path.dirname(__file__), "data", "3eiy.pdb")
 TESTDATA_FILENAME2 = os.path.join(
@@ -269,7 +268,7 @@ def test_anisou_input_handling():
     assert ppdb.code == "4eiy", ppdb.code
 
 
-@raises(AttributeError)
+@pytest.mark.xfail(AttributeError)
 def test_get_exceptions():
     ppdb = PandasPdb()
     ppdb.read_pdb(TESTDATA_FILENAME)
diff --git a/biopandas/pdb/tests/test_rmsd.py b/biopandas/pdb/tests/test_rmsd.py
index a5f2ea6..f8ca140 100644
--- a/biopandas/pdb/tests/test_rmsd.py
+++ b/biopandas/pdb/tests/test_rmsd.py
@@ -6,8 +6,7 @@
 
 from biopandas.pdb import PandasPdb
 import os
-from nose.tools import raises
-
+import pytest
 
 TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48_995.pdb")
 TESTDATA_1t49 = os.path.join(os.path.dirname(__file__), "data", "1t49_995.pdb")
@@ -32,17 +31,17 @@ def test_equal():
     assert r == 0.000, r
 
 
-@raises(AttributeError)
+@pytest.mark.xfail(AttributeError)
 def test_wrong_arg():
     PandasPdb.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s="bla")
 
 
-@raises(AttributeError)
+@pytest.mark.xfail(AttributeError)
 def test_incompatible():
     PandasPdb.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s=None)
 
 
-@raises(AttributeError)
+@pytest.mark.xfail(AttributeError)
 def test_invalid_query():
     PandasPdb.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s="bla")
 
diff --git a/ci/.travis_install.sh b/ci/.travis_install.sh
index c7547b5..f26ae3b 100755
--- a/ci/.travis_install.sh
+++ b/ci/.travis_install.sh
@@ -32,12 +32,12 @@ conda info -a
 # Configure the conda environment and put it in the path using the
 # provided versions
 if [[ "$LATEST" == "true" ]]; then
-    conda create -n testenv --yes python=$PYTHON_VERSION pip nose \
+    conda create -n testenv --yes python=$PYTHON_VERSION pip pytest \
         numpy scipy pandas
 else
-    conda create -n testenv --yes python=$PYTHON_VERSION pip nose \
+    conda create -n testenv --yes python=$PYTHON_VERSION pip pytest \
         numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \
-	    pandas=$PANDAS_VERSION 
+	    pandas=$PANDAS_VERSION
 fi
 
 conda init bash
diff --git a/ci/.travis_test.sh b/ci/.travis_test.sh
index de7e4c2..c2003bd 100755
--- a/ci/.travis_test.sh
+++ b/ci/.travis_test.sh
@@ -14,8 +14,8 @@ python -c "import scipy; print('scipy %s' % scipy.__version__)"
 python -c "import pandas; print('pandas %s' % pandas.__version__)"
 
 if [[ "$COVERAGE" == "true" ]]; then
-    nosetests -s -v --with-coverage --cover-package=biopandas
+    pytest -s -v --with-coverage --cover-package=biopandas
 else
-    nosetests -s -v biopandas
+    pytest -s -v biopandas
 fi
 #make test-doc test-sphinxext
diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md
index c6d3c10..2f0ca4b 100755
--- a/docs/CONTRIBUTING.md
+++ b/docs/CONTRIBUTING.md
@@ -6,22 +6,21 @@ I would be very happy about any kind of contributions that help to improve and e
 
 If this is your first contribution, please review the [Code of Conduct](CODE_OF_CONDUCT.md).
 
-
 ### Quick Contributor Checklist
 
 This is a quick checklist about the different steps of a typical contribution to biopandas and
 other open source projects. Consider copying this list to a local text file (or the issue tracker)
 and checking off items as you go.
 
-1. [ ]  Open a new "issue" on GitHub to discuss the new feature / bug fix  
+1. [ ]  Open a new "issue" on GitHub to discuss the new feature / bug fix
 2. [ ]  Fork the biopandas repository from GitHub (if not already done earlier)
-3. [ ]  Create and checkout a new topic branch   
-4. [ ]  Implement new feature or apply the bug-fix  
-5. [ ]  Add appropriate unit test functions  
-6. [ ]  Run `nosetests -sv` and make sure that all unit tests pass  
-7. [ ]  Check/improve the test coverage by running `nosetests --with-coverage`
-8. [ ]  Add a note about the change to the `./docs/sources/CHANGELOG.md` file  
-9. [ ]  Modify documentation in the appropriate location under `biopandas/docs/sources/`  
+3. [ ]  Create and checkout a new topic branch
+4. [ ]  Implement new feature or apply the bug-fix
+5. [ ]  Add appropriate unit test functions
+6. [ ]  Run `pytest -sv` and make sure that all unit tests pass
+7. [ ]  Check/improve the test coverage by running `pytest --with-coverage`
+8. [ ]  Add a note about the change to the `./docs/sources/CHANGELOG.md` file
+9. [ ]  Modify documentation in the appropriate location under `biopandas/docs/sources/`
 10. [ ]  Push the topic branch to the server and create a pull request
 11. [ ]  Check the Travis-CI build passed at [https://travis-ci.org/rasbt/biopandas](https://travis-ci.org/rasbt/biopandas)
 12. [ ]  Check/improve the unit test coverage at [https://coveralls.io/github/rasbt/biopandas](https://coveralls.io/github/rasbt/biopandas)
@@ -154,11 +153,11 @@ $ git checkout <new_feature>
 Adding/modifying the unit tests and check if they pass:
 
 ```bash
-$ nosetests -sv
+$ pytest -sv
 ```
 
 ```bash
-$ nosetests --with-coverage
+$ pytest --with-coverage
 ```
 
 #### 4. Documenting the changes
diff --git a/setup.py b/setup.py
index bc528ba..6751ea3 100644
--- a/setup.py
+++ b/setup.py
@@ -34,7 +34,7 @@
                     },
       include_package_data=True,
       install_requires=install_reqs,
-      extras_require={'test': ['pytest', 'pytest-cov','flake8', 'nose'],},
+      extras_require={'test': ['pytest', 'pytest-cov','flake8'],},
       license='BSD 3-Clause',
       platforms='any',
       classifiers=[

From 77cc989ef65485d8a83e6c6aa0d9355b7e4e509c Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arian.jamasb@roche.com>
Date: Mon, 8 Jul 2024 18:30:53 +0100
Subject: [PATCH 03/21] fix pytest expected failures

---
 biopandas/mmcif/tests/test_read_mmcif.py | 2 +-
 biopandas/mmcif/tests/test_rmsd.py       | 6 +++---
 biopandas/mmtf/tests/test_rmsd.py        | 6 +++---
 biopandas/pdb/tests/test_gyradius.py     | 6 +++---
 biopandas/pdb/tests/test_rmsd.py         | 6 +++---
 5 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/biopandas/mmcif/tests/test_read_mmcif.py b/biopandas/mmcif/tests/test_read_mmcif.py
index b325206..7189702 100644
--- a/biopandas/mmcif/tests/test_read_mmcif.py
+++ b/biopandas/mmcif/tests/test_read_mmcif.py
@@ -280,7 +280,7 @@ def test_read_pdb_with_pathlib():
 #    assert ppdb.code == "4eiy", ppdb.code
 
 
-@pytest.mark.xfail(AttributeError)
+@pytest.mark.xfail(raises=AttributeError)
 def test_get_exceptions():
     ppdb = PandasMmcif()
     ppdb.read_mmcif(TESTDATA_FILENAME)
diff --git a/biopandas/mmcif/tests/test_rmsd.py b/biopandas/mmcif/tests/test_rmsd.py
index 122c1f9..5507059 100644
--- a/biopandas/mmcif/tests/test_rmsd.py
+++ b/biopandas/mmcif/tests/test_rmsd.py
@@ -32,17 +32,17 @@ def test_equal():
     assert r == 0.000, r
 
 
-@pytest.mark.xfail(AttributeError)
+@pytest.mark.xfail(raises=AttributeError)
 def test_wrong_arg():
     PandasMmcif.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s="bla")
 
 
-@pytest.mark.xfail(AttributeError)
+@pytest.mark.xfail(raises=AttributeError)
 def test_incompatible():
     PandasMmcif.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s=None)
 
 
-@pytest.mark.xfail(AttributeError)
+@pytest.mark.xfail(raises=AttributeError)
 def test_invalid_query():
     PandasMmcif.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s="bla")
 
diff --git a/biopandas/mmtf/tests/test_rmsd.py b/biopandas/mmtf/tests/test_rmsd.py
index 561ba85..1d02e06 100644
--- a/biopandas/mmtf/tests/test_rmsd.py
+++ b/biopandas/mmtf/tests/test_rmsd.py
@@ -36,17 +36,17 @@ def test_equal():
     assert r == 0.000, r
 
 
-@pytest.mark.xfail(AttributeError)
+@pytest.mark.xfail(raises=AttributeError)
 def test_wrong_arg():
     PandasMmtf.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s="bla")
 
 
-@pytest.mark.xfail(AttributeError)
+@pytest.mark.xfail(raises=AttributeError)
 def test_incompatible():
     PandasMmtf.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s=None)
 
 
-@pytest.mark.xfail(AttributeError)
+@pytest.mark.xfail(raises=AttributeError)
 def test_invalid_query():
     PandasMmtf.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s="bla")
 
diff --git a/biopandas/pdb/tests/test_gyradius.py b/biopandas/pdb/tests/test_gyradius.py
index 9781f75..40f7227 100644
--- a/biopandas/pdb/tests/test_gyradius.py
+++ b/biopandas/pdb/tests/test_gyradius.py
@@ -38,12 +38,12 @@ def test_atom_and_hetatm():
     assert rg == expected_rg, f"Expected {expected_rg}, got {rg} instead"
 
 
-@pytest.mark.xfail(KeyError)
+@pytest.mark.xfail(raises=KeyError)
 def test_wrong_record_name():
     p1t48.gyradius(("Wrong",))
 
 
-@pytest.mark.xfail(TypeError)
+@pytest.mark.xfail(raises=TypeError)
 def test_wrong_arg_type():
     p1t48.gyradius(5)
 
@@ -60,7 +60,7 @@ def test_negative_decimals():
     assert rg == expected_rg, f"Expected {expected_rg}, got {rg} instead"
 
 
-@pytest.mark.xfail(TypeError)
+@pytest.mark.xfail(raises=TypeError)
 def test_wrong_decimals_arg():
     p1t48.gyradius(decimals='five')
 
diff --git a/biopandas/pdb/tests/test_rmsd.py b/biopandas/pdb/tests/test_rmsd.py
index f8ca140..dc7a707 100644
--- a/biopandas/pdb/tests/test_rmsd.py
+++ b/biopandas/pdb/tests/test_rmsd.py
@@ -31,17 +31,17 @@ def test_equal():
     assert r == 0.000, r
 
 
-@pytest.mark.xfail(AttributeError)
+@pytest.mark.xfail(raises=AttributeError)
 def test_wrong_arg():
     PandasPdb.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s="bla")
 
 
-@pytest.mark.xfail(AttributeError)
+@pytest.mark.xfail(raises=AttributeError)
 def test_incompatible():
     PandasPdb.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s=None)
 
 
-@pytest.mark.xfail(AttributeError)
+@pytest.mark.xfail(raises=AttributeError)
 def test_invalid_query():
     PandasPdb.rmsd(p1t48.df["ATOM"].loc[1:, :], p1t48.df["ATOM"], s="bla")
 

From f4b15d46f5585ebd269028a61d24f314cae67ba0 Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arian.jamasb@roche.com>
Date: Mon, 8 Jul 2024 18:33:09 +0100
Subject: [PATCH 04/21] fix lingering expected fail

---
 biopandas/pdb/tests/test_read_pdb.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/biopandas/pdb/tests/test_read_pdb.py b/biopandas/pdb/tests/test_read_pdb.py
index 912140a..e6bd8fa 100644
--- a/biopandas/pdb/tests/test_read_pdb.py
+++ b/biopandas/pdb/tests/test_read_pdb.py
@@ -268,7 +268,7 @@ def test_anisou_input_handling():
     assert ppdb.code == "4eiy", ppdb.code
 
 
-@pytest.mark.xfail(AttributeError)
+@pytest.mark.xfail(raises=AttributeError)
 def test_get_exceptions():
     ppdb = PandasPdb()
     ppdb.read_pdb(TESTDATA_FILENAME)

From 1114ff599d22c20bc99c262a41612930a1d11099 Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arian.jamasb@roche.com>
Date: Mon, 8 Jul 2024 18:40:03 +0100
Subject: [PATCH 05/21] linting

---
 biopandas/constants.py                | 1213 +++++++++++++++++++++----
 biopandas/pdb/engines.py              |  196 +++-
 biopandas/pdb/tests/test_amino3to1.py |   19 +-
 biopandas/pdb/tests/test_assign_df.py |    4 +-
 biopandas/pdb/tests/test_distance.py  |   25 +-
 biopandas/pdb/tests/test_gyradius.py  |    5 +-
 biopandas/pdb/tests/test_impute.py    |    3 +-
 biopandas/pdb/tests/test_read_pdb.py  |    9 +-
 biopandas/pdb/tests/test_rmsd.py      |   23 +-
 biopandas/pdb/tests/test_write_pdb.py |   39 +-
 10 files changed, 1290 insertions(+), 246 deletions(-)

diff --git a/biopandas/constants.py b/biopandas/constants.py
index 36e8259..153938f 100644
--- a/biopandas/constants.py
+++ b/biopandas/constants.py
@@ -1,178 +1,1043 @@
 from typing import Dict
 
-ATOMIC_MASSES: Dict[str, float] = {"C": 12.0107, "O": 15.9994, "N": 14.0067, "S": 32.065}
+ATOMIC_MASSES: Dict[str, float] = {
+    "C": 12.0107,
+    "O": 15.9994,
+    "N": 14.0067,
+    "S": 32.065,
+}
 
 protein_letters_3to1_extended: Dict[str, str] = {
-    "A5N": "N", "A8E": "V", "A9D": "S", "AA3": "A", "AA4": "A", "AAR": "R",
-    "ABA": "A", "ACL": "R", "AEA": "C", "AEI": "D", "AFA": "N", "AGM": "R",
-    "AGQ": "Y", "AGT": "C", "AHB": "N", "AHL": "R", "AHO": "A", "AHP": "A",
-    "AIB": "A", "AKL": "D", "AKZ": "D", "ALA": "A", "ALC": "A", "ALM": "A",
-    "ALN": "A", "ALO": "T", "ALS": "A", "ALT": "A", "ALV": "A", "ALY": "K",
-    "AME": "M", "AN6": "L", "AN8": "A", "API": "K", "APK": "K", "AR2": "R",
-    "AR4": "E", "AR7": "R", "ARG": "R", "ARM": "R", "ARO": "R", "AS7": "N",
-    "ASA": "D", "ASB": "D", "ASI": "D", "ASK": "D", "ASL": "D", "ASN": "N",
-    "ASP": "D", "ASQ": "D", "AYA": "A", "AZH": "A", "AZK": "K", "AZS": "S",
-    "AZY": "Y", "AVJ": "H", "A30": "Y", "A3U": "F", "ECC": "Q", "ECX": "C",
-    "EFC": "C", "EHP": "F", "ELY": "K", "EME": "E", "EPM": "M", "EPQ": "Q",
-    "ESB": "Y", "ESC": "M", "EXY": "L", "EXA": "K", "E0Y": "P", "E9V": "H",
-    "E9M": "W", "EJA": "C", "EUP": "T", "EZY": "G", "E9C": "Y", "EW6": "S",
-    "EXL": "W", "I2M": "I", "I4G": "G", "I58": "K", "IAM": "A", "IAR": "R",
-    "ICY": "C", "IEL": "K", "IGL": "G", "IIL": "I", "ILE": "I", "ILG": "E",
-    "ILM": "I", "ILX": "I", "ILY": "K", "IML": "I", "IOR": "R", "IPG": "G",
-    "IT1": "K", "IYR": "Y", "IZO": "M", "IC0": "G", "M0H": "C", "M2L": "K",
-    "M2S": "M", "M30": "G", "M3L": "K", "M3R": "K", "MA ": "A", "MAA": "A",
-    "MAI": "R", "MBQ": "Y", "MC1": "S", "MCL": "K", "MCS": "C", "MD3": "C",
-    "MD5": "C", "MD6": "G", "MDF": "Y", "ME0": "M", "MEA": "F", "MEG": "E",
-    "MEN": "N", "MEQ": "Q", "MET": "M", "MEU": "G", "MFN": "E", "MGG": "R",
-    "MGN": "Q", "MGY": "G", "MH1": "H", "MH6": "S", "MHL": "L", "MHO": "M",
-    "MHS": "H", "MHU": "F", "MIR": "S", "MIS": "S", "MK8": "L", "ML3": "K",
-    "MLE": "L", "MLL": "L", "MLY": "K", "MLZ": "K", "MME": "M", "MMO": "R",
-    "MNL": "L", "MNV": "V", "MP8": "P", "MPQ": "G", "MSA": "G", "MSE": "M",
-    "MSL": "M", "MSO": "M", "MT2": "M", "MTY": "Y", "MVA": "V", "MYK": "K",
-    "MYN": "R", "QCS": "C", "QIL": "I", "QMM": "Q", "QPA": "C", "QPH": "F",
-    "Q3P": "K", "QVA": "C", "QX7": "A", "Q2E": "W", "Q75": "M", "Q78": "F",
-    "QM8": "L", "QMB": "A", "QNQ": "C", "QNT": "C", "QNW": "C", "QO2": "C",
-    "QO5": "C", "QO8": "C", "QQ8": "Q", "U2X": "Y", "U3X": "F", "UF0": "S",
-    "UGY": "G", "UM1": "A", "UM2": "A", "UMA": "A", "UQK": "A", "UX8": "W",
-    "UXQ": "F", "YCM": "C", "YOF": "Y", "YPR": "P", "YPZ": "Y", "YTH": "T",
-    "Y1V": "L", "Y57": "K", "YHA": "K", "200": "F", "23F": "F", "23P": "A",
-    "26B": "T", "28X": "T", "2AG": "A", "2CO": "C", "2FM": "M", "2GX": "F",
-    "2HF": "H", "2JG": "S", "2KK": "K", "2KP": "K", "2LT": "Y", "2LU": "L",
-    "2ML": "L", "2MR": "R", "2MT": "P", "2OR": "R", "2P0": "P", "2QZ": "T",
-    "2R3": "Y", "2RA": "A", "2RX": "S", "2SO": "H", "2TY": "Y", "2VA": "V",
-    "2XA": "C", "2ZC": "S", "6CL": "K", "6CW": "W", "6GL": "A", "6HN": "K",
-    "60F": "C", "66D": "I", "6CV": "A", "6M6": "C", "6V1": "C", "6WK": "C",
-    "6Y9": "P", "6DN": "K", "DA2": "R", "DAB": "A", "DAH": "F", "DBS": "S",
-    "DBU": "T", "DBY": "Y", "DBZ": "A", "DC2": "C", "DDE": "H", "DDZ": "A",
-    "DI7": "Y", "DHA": "S", "DHN": "V", "DIR": "R", "DLS": "K", "DM0": "K",
-    "DMH": "N", "DMK": "D", "DNL": "K", "DNP": "A", "DNS": "K", "DNW": "A",
-    "DOH": "D", "DON": "L", "DP1": "R", "DPL": "P", "DPP": "A", "DPQ": "Y",
-    "DYS": "C", "D2T": "D", "DYA": "D", "DJD": "F", "DYJ": "P", "DV9": "E",
-    "H14": "F", "H1D": "M", "H5M": "P", "HAC": "A", "HAR": "R", "HBN": "H",
-    "HCM": "C", "HGY": "G", "HHI": "H", "HIA": "H", "HIC": "H", "HIP": "H",
-    "HIQ": "H", "HIS": "H", "HL2": "L", "HLU": "L", "HMR": "R", "HNC": "C",
-    "HOX": "F", "HPC": "F", "HPE": "F", "HPH": "F", "HPQ": "F", "HQA": "A",
-    "HR7": "R", "HRG": "R", "HRP": "W", "HS8": "H", "HS9": "H", "HSE": "S",
-    "HSK": "H", "HSL": "S", "HSO": "H", "HT7": "W", "HTI": "C", "HTR": "W",
-    "HV5": "A", "HVA": "V", "HY3": "P", "HYI": "M", "HYP": "P", "HZP": "P",
-    "HIX": "A", "HSV": "H", "HLY": "K", "HOO": "H", "H7V": "A", "L5P": "K",
-    "LRK": "K", "L3O": "L", "LA2": "K", "LAA": "D", "LAL": "A", "LBY": "K",
-    "LCK": "K", "LCX": "K", "LDH": "K", "LE1": "V", "LED": "L", "LEF": "L",
-    "LEH": "L", "LEM": "L", "LEN": "L", "LET": "K", "LEU": "L", "LEX": "L",
-    "LGY": "K", "LLO": "K", "LLP": "K", "LLY": "K", "LLZ": "K", "LME": "E",
-    "LMF": "K", "LMQ": "Q", "LNE": "L", "LNM": "L", "LP6": "K", "LPD": "P",
-    "LPG": "G", "LPS": "S", "LSO": "K", "LTR": "W", "LVG": "G", "LVN": "V",
-    "LWY": "P", "LYF": "K", "LYK": "K", "LYM": "K", "LYN": "K", "LYO": "K",
-    "LYP": "K", "LYR": "K", "LYS": "K", "LYU": "K", "LYX": "K", "LYZ": "K",
-    "LAY": "L", "LWI": "F", "LBZ": "K", "P1L": "C", "P2Q": "Y", "P2Y": "P",
-    "P3Q": "Y", "PAQ": "Y", "PAS": "D", "PAT": "W", "PBB": "C", "PBF": "F",
-    "PCA": "Q", "PCC": "P", "PCS": "F", "PE1": "K", "PEC": "C", "PF5": "F",
-    "PFF": "F", "PG1": "S", "PGY": "G", "PHA": "F", "PHD": "D", "PHE": "F",
-    "PHI": "F", "PHL": "F", "PHM": "F", "PKR": "P", "PLJ": "P", "PM3": "F",
-    "POM": "P", "PPN": "F", "PR3": "C", "PR4": "P", "PR7": "P", "PR9": "P",
-    "PRJ": "P", "PRK": "K", "PRO": "P", "PRS": "P", "PRV": "G", "PSA": "F",
-    "PSH": "H", "PTH": "Y", "PTM": "Y", "PTR": "Y", "PVH": "H", "PXU": "P",
-    "PYA": "A", "PYH": "K", "PYX": "C", "PH6": "P", "P9S": "C", "P5U": "S",
-    "POK": "R", "T0I": "Y", "T11": "F", "TAV": "D", "TBG": "V", "TBM": "T",
-    "TCQ": "Y", "TCR": "W", "TEF": "F", "TFQ": "F", "TH5": "T", "TH6": "T",
-    "THC": "T", "THR": "T", "THZ": "R", "TIH": "A", "TIS": "S", "TLY": "K",
-    "TMB": "T", "TMD": "T", "TNB": "C", "TNR": "S", "TNY": "T", "TOQ": "W",
-    "TOX": "W", "TPJ": "P", "TPK": "P", "TPL": "W", "TPO": "T", "TPQ": "Y",
-    "TQI": "W", "TQQ": "W", "TQZ": "C", "TRF": "W", "TRG": "K", "TRN": "W",
-    "TRO": "W", "TRP": "W", "TRQ": "W", "TRW": "W", "TRX": "W", "TRY": "W",
-    "TS9": "I", "TSY": "C", "TTQ": "W", "TTS": "Y", "TXY": "Y", "TY1": "Y",
-    "TY2": "Y", "TY3": "Y", "TY5": "Y", "TY8": "Y", "TY9": "Y", "TYB": "Y",
-    "TYC": "Y", "TYE": "Y", "TYI": "Y", "TYJ": "Y", "TYN": "Y", "TYO": "Y",
-    "TYQ": "Y", "TYR": "Y", "TYS": "Y", "TYT": "Y", "TYW": "Y", "TYY": "Y",
-    "T8L": "T", "T9E": "T", "TNQ": "W", "TSQ": "F", "TGH": "W", "X2W": "E",
-    "XCN": "C", "XPR": "P", "XSN": "N", "XW1": "A", "XX1": "K", "XYC": "A",
-    "XA6": "F", "11Q": "P", "11W": "E", "12L": "P", "12X": "P", "12Y": "P",
-    "143": "C", "1AC": "A", "1L1": "A", "1OP": "Y", "1PA": "F", "1PI": "A",
-    "1TQ": "W", "1TY": "Y", "1X6": "S", "56A": "H", "5AB": "A", "5CS": "C",
-    "5CW": "W", "5HP": "E", "5OH": "A", "5PG": "G", "51T": "Y", "54C": "W",
-    "5CR": "F", "5CT": "K", "5FQ": "A", "5GM": "I", "5JP": "S", "5T3": "K",
-    "5MW": "K", "5OW": "K", "5R5": "S", "5VV": "N", "5XU": "A", "55I": "F",
-    "999": "D", "9DN": "N", "9NE": "E", "9NF": "F", "9NR": "R", "9NV": "V",
-    "9E7": "K", "9KP": "K", "9WV": "A", "9TR": "K", "9TU": "K", "9TX": "K",
-    "9U0": "K", "9IJ": "F", "B1F": "F", "B27": "T", "B2A": "A", "B2F": "F",
-    "B2I": "I", "B2V": "V", "B3A": "A", "B3D": "D", "B3E": "E", "B3K": "K",
-    "B3U": "H", "B3X": "N", "B3Y": "Y", "BB6": "C", "BB7": "C", "BB8": "F",
-    "BB9": "C", "BBC": "C", "BCS": "C", "BCX": "C", "BFD": "D", "BG1": "S",
-    "BH2": "D", "BHD": "D", "BIF": "F", "BIU": "I", "BL2": "L", "BLE": "L",
-    "BLY": "K", "BMT": "T", "BNN": "F", "BOR": "R", "BP5": "A", "BPE": "C",
-    "BSE": "S", "BTA": "L", "BTC": "C", "BTK": "K", "BTR": "W", "BUC": "C",
-    "BUG": "V", "BYR": "Y", "BWV": "R", "BWB": "S", "BXT": "S", "F2F": "F",
-    "F2Y": "Y", "FAK": "K", "FB5": "A", "FB6": "A", "FC0": "F", "FCL": "F",
-    "FDL": "K", "FFM": "C", "FGL": "G", "FGP": "S", "FH7": "K", "FHL": "K",
-    "FHO": "K", "FIO": "R", "FLA": "A", "FLE": "L", "FLT": "Y", "FME": "M",
-    "FOE": "C", "FP9": "P", "FPK": "P", "FT6": "W", "FTR": "W", "FTY": "Y",
-    "FVA": "V", "FZN": "K", "FY3": "Y", "F7W": "W", "FY2": "Y", "FQA": "K",
-    "F7Q": "Y", "FF9": "K", "FL6": "D", "JJJ": "C", "JJK": "C", "JJL": "C",
-    "JLP": "K", "J3D": "C", "J9Y": "R", "J8W": "S", "JKH": "P", "N10": "S",
-    "N7P": "P", "NA8": "A", "NAL": "A", "NAM": "A", "NBQ": "Y", "NC1": "S",
-    "NCB": "A", "NEM": "H", "NEP": "H", "NFA": "F", "NIY": "Y", "NLB": "L",
-    "NLE": "L", "NLN": "L", "NLO": "L", "NLP": "L", "NLQ": "Q", "NLY": "G",
-    "NMC": "G", "NMM": "R", "NNH": "R", "NOT": "L", "NPH": "C", "NPI": "A",
-    "NTR": "Y", "NTY": "Y", "NVA": "V", "NWD": "A", "NYB": "C", "NYS": "C",
-    "NZH": "H", "N80": "P", "NZC": "T", "NLW": "L", "N0A": "F", "N9P": "A",
-    "N65": "K", "R1A": "C", "R4K": "W", "RE0": "W", "RE3": "W", "RGL": "R",
-    "RGP": "E", "RT0": "P", "RVX": "S", "RZ4": "S", "RPI": "R", "RVJ": "A",
-    "VAD": "V", "VAF": "V", "VAH": "V", "VAI": "V", "VAL": "V", "VB1": "K",
-    "VH0": "P", "VR0": "R", "V44": "C", "V61": "F", "VPV": "K", "V5N": "H",
-    "V7T": "K", "Z01": "A", "Z3E": "T", "Z70": "H", "ZBZ": "C", "ZCL": "F",
-    "ZU0": "T", "ZYJ": "P", "ZYK": "P", "ZZD": "C", "ZZJ": "A", "ZIQ": "W",
-    "ZPO": "P", "ZDJ": "Y", "ZT1": "K", "30V": "C", "31Q": "C", "33S": "F",
-    "33W": "A", "34E": "V", "3AH": "H", "3BY": "P", "3CF": "F", "3CT": "Y",
-    "3GA": "A", "3GL": "E", "3MD": "D", "3MY": "Y", "3NF": "Y", "3O3": "E",
-    "3PX": "P", "3QN": "K", "3TT": "P", "3XH": "G", "3YM": "Y", "3WS": "A",
-    "3WX": "P", "3X9": "C", "3ZH": "H", "7JA": "I", "73C": "S", "73N": "R",
-    "73O": "Y", "73P": "K", "74P": "K", "7N8": "F", "7O5": "A", "7XC": "F",
-    "7ID": "D", "7OZ": "A", "C1S": "C", "C1T": "C", "C1X": "K", "C22": "A",
-    "C3Y": "C", "C4R": "C", "C5C": "C", "C6C": "C", "CAF": "C", "CAS": "C",
-    "CAY": "C", "CCS": "C", "CEA": "C", "CGA": "E", "CGU": "E", "CGV": "C",
-    "CHP": "G", "CIR": "R", "CLE": "L", "CLG": "K", "CLH": "K", "CME": "C",
-    "CMH": "C", "CML": "C", "CMT": "C", "CR5": "G", "CS0": "C", "CS1": "C",
-    "CS3": "C", "CS4": "C", "CSA": "C", "CSB": "C", "CSD": "C", "CSE": "C",
-    "CSJ": "C", "CSO": "C", "CSP": "C", "CSR": "C", "CSS": "C", "CSU": "C",
-    "CSW": "C", "CSX": "C", "CSZ": "C", "CTE": "W", "CTH": "T", "CWD": "A",
-    "CWR": "S", "CXM": "M", "CY0": "C", "CY1": "C", "CY3": "C", "CY4": "C",
-    "CYA": "C", "CYD": "C", "CYF": "C", "CYG": "C", "CYJ": "K", "CYM": "C",
-    "CYQ": "C", "CYR": "C", "CYS": "C", "CYW": "C", "CZ2": "C", "CZZ": "C",
-    "CG6": "C", "C1J": "R", "C4G": "R", "C67": "R", "C6D": "R", "CE7": "N",
-    "CZS": "A", "G01": "E", "G8M": "E", "GAU": "E", "GEE": "G", "GFT": "S",
-    "GHC": "E", "GHG": "Q", "GHW": "E", "GL3": "G", "GLH": "Q", "GLJ": "E",
-    "GLK": "E", "GLN": "Q", "GLQ": "E", "GLU": "E", "GLY": "G", "GLZ": "G",
-    "GMA": "E", "GME": "E", "GNC": "Q", "GPL": "K", "GSC": "G", "GSU": "E",
-    "GT9": "C", "GVL": "S", "G3M": "R", "G5G": "L", "G1X": "Y", "G8X": "P",
-    "K1R": "C", "KBE": "K", "KCX": "K", "KFP": "K", "KGC": "K", "KNB": "A",
-    "KOR": "M", "KPI": "K", "KPY": "K", "KST": "K", "KYN": "W", "KYQ": "K",
-    "KCR": "K", "KPF": "K", "K5L": "S", "KEO": "K", "KHB": "K", "KKD": "D",
-    "K5H": "C", "K7K": "S", "OAR": "R", "OAS": "S", "OBS": "K", "OCS": "C",
-    "OCY": "C", "OHI": "H", "OHS": "D", "OLD": "H", "OLT": "T", "OLZ": "S",
-    "OMH": "S", "OMT": "M", "OMX": "Y", "OMY": "Y", "ONH": "A", "ORN": "A",
-    "ORQ": "R", "OSE": "S", "OTH": "T", "OXX": "D", "OYL": "H", "O7A": "T",
-    "O7D": "W", "O7G": "V", "O2E": "S", "O6H": "W", "OZW": "F", "S12": "S",
-    "S1H": "S", "S2C": "C", "S2P": "A", "SAC": "S", "SAH": "C", "SAR": "G",
-    "SBG": "S", "SBL": "S", "SCH": "C", "SCS": "C", "SCY": "C", "SD4": "N",
-    "SDB": "S", "SDP": "S", "SEB": "S", "SEE": "S", "SEG": "A", "SEL": "S",
-    "SEM": "S", "SEN": "S", "SEP": "S", "SER": "S", "SET": "S", "SGB": "S",
-    "SHC": "C", "SHP": "G", "SHR": "K", "SIB": "C", "SLL": "K", "SLZ": "K",
-    "SMC": "C", "SME": "M", "SMF": "F", "SNC": "C", "SNN": "N", "SOY": "S",
-    "SRZ": "S", "STY": "Y", "SUN": "S", "SVA": "S", "SVV": "S", "SVW": "S",
-    "SVX": "S", "SVY": "S", "SVZ": "S", "SXE": "S", "SKH": "K", "SNM": "S",
-    "SNK": "H", "SWW": "S", "WFP": "F", "WLU": "L", "WPA": "F", "WRP": "W",
-    "WVL": "V", "02K": "A", "02L": "N", "02O": "A", "02Y": "A", "033": "V",
-    "037": "P", "03Y": "C", "04U": "P", "04V": "P", "05N": "P", "07O": "C",
-    "0A0": "D", "0A1": "Y", "0A2": "K", "0A8": "C", "0A9": "F", "0AA": "V",
-    "0AB": "V", "0AC": "G", "0AF": "W", "0AG": "L", "0AH": "S", "0AK": "D",
-    "0AR": "R", "0BN": "F", "0CS": "A", "0E5": "T", "0EA": "Y", "0FL": "A",
-    "0LF": "P", "0NC": "A", "0PR": "Y", "0QL": "C", "0TD": "D", "0UO": "W",
-    "0WZ": "Y", "0X9": "R", "0Y8": "P", "4AF": "F", "4AR": "R", "4AW": "W",
-    "4BF": "F", "4CF": "F", "4CY": "M", "4DP": "W", "4FB": "P", "4FW": "W",
-    "4HL": "Y", "4HT": "W", "4IN": "W", "4MM": "M", "4PH": "F", "4U7": "A",
-    "41H": "F", "41Q": "N", "42Y": "S", "432": "S", "45F": "P", "4AK": "K",
-    "4D4": "R", "4GJ": "C", "4KY": "P", "4L0": "P", "4LZ": "Y", "4N7": "P",
-    "4N8": "P", "4N9": "P", "4OG": "W", "4OU": "F", "4OV": "S", "4OZ": "S",
-    "4PQ": "W", "4SJ": "F", "4WQ": "A", "4HH": "S", "4HJ": "S", "4J4": "C",
-    "4J5": "R", "4II": "F", "4VI": "R", "823": "N", "8SP": "S", "8AY": "A",
-}
\ No newline at end of file
+    "A5N": "N",
+    "A8E": "V",
+    "A9D": "S",
+    "AA3": "A",
+    "AA4": "A",
+    "AAR": "R",
+    "ABA": "A",
+    "ACL": "R",
+    "AEA": "C",
+    "AEI": "D",
+    "AFA": "N",
+    "AGM": "R",
+    "AGQ": "Y",
+    "AGT": "C",
+    "AHB": "N",
+    "AHL": "R",
+    "AHO": "A",
+    "AHP": "A",
+    "AIB": "A",
+    "AKL": "D",
+    "AKZ": "D",
+    "ALA": "A",
+    "ALC": "A",
+    "ALM": "A",
+    "ALN": "A",
+    "ALO": "T",
+    "ALS": "A",
+    "ALT": "A",
+    "ALV": "A",
+    "ALY": "K",
+    "AME": "M",
+    "AN6": "L",
+    "AN8": "A",
+    "API": "K",
+    "APK": "K",
+    "AR2": "R",
+    "AR4": "E",
+    "AR7": "R",
+    "ARG": "R",
+    "ARM": "R",
+    "ARO": "R",
+    "AS7": "N",
+    "ASA": "D",
+    "ASB": "D",
+    "ASI": "D",
+    "ASK": "D",
+    "ASL": "D",
+    "ASN": "N",
+    "ASP": "D",
+    "ASQ": "D",
+    "AYA": "A",
+    "AZH": "A",
+    "AZK": "K",
+    "AZS": "S",
+    "AZY": "Y",
+    "AVJ": "H",
+    "A30": "Y",
+    "A3U": "F",
+    "ECC": "Q",
+    "ECX": "C",
+    "EFC": "C",
+    "EHP": "F",
+    "ELY": "K",
+    "EME": "E",
+    "EPM": "M",
+    "EPQ": "Q",
+    "ESB": "Y",
+    "ESC": "M",
+    "EXY": "L",
+    "EXA": "K",
+    "E0Y": "P",
+    "E9V": "H",
+    "E9M": "W",
+    "EJA": "C",
+    "EUP": "T",
+    "EZY": "G",
+    "E9C": "Y",
+    "EW6": "S",
+    "EXL": "W",
+    "I2M": "I",
+    "I4G": "G",
+    "I58": "K",
+    "IAM": "A",
+    "IAR": "R",
+    "ICY": "C",
+    "IEL": "K",
+    "IGL": "G",
+    "IIL": "I",
+    "ILE": "I",
+    "ILG": "E",
+    "ILM": "I",
+    "ILX": "I",
+    "ILY": "K",
+    "IML": "I",
+    "IOR": "R",
+    "IPG": "G",
+    "IT1": "K",
+    "IYR": "Y",
+    "IZO": "M",
+    "IC0": "G",
+    "M0H": "C",
+    "M2L": "K",
+    "M2S": "M",
+    "M30": "G",
+    "M3L": "K",
+    "M3R": "K",
+    "MA ": "A",
+    "MAA": "A",
+    "MAI": "R",
+    "MBQ": "Y",
+    "MC1": "S",
+    "MCL": "K",
+    "MCS": "C",
+    "MD3": "C",
+    "MD5": "C",
+    "MD6": "G",
+    "MDF": "Y",
+    "ME0": "M",
+    "MEA": "F",
+    "MEG": "E",
+    "MEN": "N",
+    "MEQ": "Q",
+    "MET": "M",
+    "MEU": "G",
+    "MFN": "E",
+    "MGG": "R",
+    "MGN": "Q",
+    "MGY": "G",
+    "MH1": "H",
+    "MH6": "S",
+    "MHL": "L",
+    "MHO": "M",
+    "MHS": "H",
+    "MHU": "F",
+    "MIR": "S",
+    "MIS": "S",
+    "MK8": "L",
+    "ML3": "K",
+    "MLE": "L",
+    "MLL": "L",
+    "MLY": "K",
+    "MLZ": "K",
+    "MME": "M",
+    "MMO": "R",
+    "MNL": "L",
+    "MNV": "V",
+    "MP8": "P",
+    "MPQ": "G",
+    "MSA": "G",
+    "MSE": "M",
+    "MSL": "M",
+    "MSO": "M",
+    "MT2": "M",
+    "MTY": "Y",
+    "MVA": "V",
+    "MYK": "K",
+    "MYN": "R",
+    "QCS": "C",
+    "QIL": "I",
+    "QMM": "Q",
+    "QPA": "C",
+    "QPH": "F",
+    "Q3P": "K",
+    "QVA": "C",
+    "QX7": "A",
+    "Q2E": "W",
+    "Q75": "M",
+    "Q78": "F",
+    "QM8": "L",
+    "QMB": "A",
+    "QNQ": "C",
+    "QNT": "C",
+    "QNW": "C",
+    "QO2": "C",
+    "QO5": "C",
+    "QO8": "C",
+    "QQ8": "Q",
+    "U2X": "Y",
+    "U3X": "F",
+    "UF0": "S",
+    "UGY": "G",
+    "UM1": "A",
+    "UM2": "A",
+    "UMA": "A",
+    "UQK": "A",
+    "UX8": "W",
+    "UXQ": "F",
+    "YCM": "C",
+    "YOF": "Y",
+    "YPR": "P",
+    "YPZ": "Y",
+    "YTH": "T",
+    "Y1V": "L",
+    "Y57": "K",
+    "YHA": "K",
+    "200": "F",
+    "23F": "F",
+    "23P": "A",
+    "26B": "T",
+    "28X": "T",
+    "2AG": "A",
+    "2CO": "C",
+    "2FM": "M",
+    "2GX": "F",
+    "2HF": "H",
+    "2JG": "S",
+    "2KK": "K",
+    "2KP": "K",
+    "2LT": "Y",
+    "2LU": "L",
+    "2ML": "L",
+    "2MR": "R",
+    "2MT": "P",
+    "2OR": "R",
+    "2P0": "P",
+    "2QZ": "T",
+    "2R3": "Y",
+    "2RA": "A",
+    "2RX": "S",
+    "2SO": "H",
+    "2TY": "Y",
+    "2VA": "V",
+    "2XA": "C",
+    "2ZC": "S",
+    "6CL": "K",
+    "6CW": "W",
+    "6GL": "A",
+    "6HN": "K",
+    "60F": "C",
+    "66D": "I",
+    "6CV": "A",
+    "6M6": "C",
+    "6V1": "C",
+    "6WK": "C",
+    "6Y9": "P",
+    "6DN": "K",
+    "DA2": "R",
+    "DAB": "A",
+    "DAH": "F",
+    "DBS": "S",
+    "DBU": "T",
+    "DBY": "Y",
+    "DBZ": "A",
+    "DC2": "C",
+    "DDE": "H",
+    "DDZ": "A",
+    "DI7": "Y",
+    "DHA": "S",
+    "DHN": "V",
+    "DIR": "R",
+    "DLS": "K",
+    "DM0": "K",
+    "DMH": "N",
+    "DMK": "D",
+    "DNL": "K",
+    "DNP": "A",
+    "DNS": "K",
+    "DNW": "A",
+    "DOH": "D",
+    "DON": "L",
+    "DP1": "R",
+    "DPL": "P",
+    "DPP": "A",
+    "DPQ": "Y",
+    "DYS": "C",
+    "D2T": "D",
+    "DYA": "D",
+    "DJD": "F",
+    "DYJ": "P",
+    "DV9": "E",
+    "H14": "F",
+    "H1D": "M",
+    "H5M": "P",
+    "HAC": "A",
+    "HAR": "R",
+    "HBN": "H",
+    "HCM": "C",
+    "HGY": "G",
+    "HHI": "H",
+    "HIA": "H",
+    "HIC": "H",
+    "HIP": "H",
+    "HIQ": "H",
+    "HIS": "H",
+    "HL2": "L",
+    "HLU": "L",
+    "HMR": "R",
+    "HNC": "C",
+    "HOX": "F",
+    "HPC": "F",
+    "HPE": "F",
+    "HPH": "F",
+    "HPQ": "F",
+    "HQA": "A",
+    "HR7": "R",
+    "HRG": "R",
+    "HRP": "W",
+    "HS8": "H",
+    "HS9": "H",
+    "HSE": "S",
+    "HSK": "H",
+    "HSL": "S",
+    "HSO": "H",
+    "HT7": "W",
+    "HTI": "C",
+    "HTR": "W",
+    "HV5": "A",
+    "HVA": "V",
+    "HY3": "P",
+    "HYI": "M",
+    "HYP": "P",
+    "HZP": "P",
+    "HIX": "A",
+    "HSV": "H",
+    "HLY": "K",
+    "HOO": "H",
+    "H7V": "A",
+    "L5P": "K",
+    "LRK": "K",
+    "L3O": "L",
+    "LA2": "K",
+    "LAA": "D",
+    "LAL": "A",
+    "LBY": "K",
+    "LCK": "K",
+    "LCX": "K",
+    "LDH": "K",
+    "LE1": "V",
+    "LED": "L",
+    "LEF": "L",
+    "LEH": "L",
+    "LEM": "L",
+    "LEN": "L",
+    "LET": "K",
+    "LEU": "L",
+    "LEX": "L",
+    "LGY": "K",
+    "LLO": "K",
+    "LLP": "K",
+    "LLY": "K",
+    "LLZ": "K",
+    "LME": "E",
+    "LMF": "K",
+    "LMQ": "Q",
+    "LNE": "L",
+    "LNM": "L",
+    "LP6": "K",
+    "LPD": "P",
+    "LPG": "G",
+    "LPS": "S",
+    "LSO": "K",
+    "LTR": "W",
+    "LVG": "G",
+    "LVN": "V",
+    "LWY": "P",
+    "LYF": "K",
+    "LYK": "K",
+    "LYM": "K",
+    "LYN": "K",
+    "LYO": "K",
+    "LYP": "K",
+    "LYR": "K",
+    "LYS": "K",
+    "LYU": "K",
+    "LYX": "K",
+    "LYZ": "K",
+    "LAY": "L",
+    "LWI": "F",
+    "LBZ": "K",
+    "P1L": "C",
+    "P2Q": "Y",
+    "P2Y": "P",
+    "P3Q": "Y",
+    "PAQ": "Y",
+    "PAS": "D",
+    "PAT": "W",
+    "PBB": "C",
+    "PBF": "F",
+    "PCA": "Q",
+    "PCC": "P",
+    "PCS": "F",
+    "PE1": "K",
+    "PEC": "C",
+    "PF5": "F",
+    "PFF": "F",
+    "PG1": "S",
+    "PGY": "G",
+    "PHA": "F",
+    "PHD": "D",
+    "PHE": "F",
+    "PHI": "F",
+    "PHL": "F",
+    "PHM": "F",
+    "PKR": "P",
+    "PLJ": "P",
+    "PM3": "F",
+    "POM": "P",
+    "PPN": "F",
+    "PR3": "C",
+    "PR4": "P",
+    "PR7": "P",
+    "PR9": "P",
+    "PRJ": "P",
+    "PRK": "K",
+    "PRO": "P",
+    "PRS": "P",
+    "PRV": "G",
+    "PSA": "F",
+    "PSH": "H",
+    "PTH": "Y",
+    "PTM": "Y",
+    "PTR": "Y",
+    "PVH": "H",
+    "PXU": "P",
+    "PYA": "A",
+    "PYH": "K",
+    "PYX": "C",
+    "PH6": "P",
+    "P9S": "C",
+    "P5U": "S",
+    "POK": "R",
+    "T0I": "Y",
+    "T11": "F",
+    "TAV": "D",
+    "TBG": "V",
+    "TBM": "T",
+    "TCQ": "Y",
+    "TCR": "W",
+    "TEF": "F",
+    "TFQ": "F",
+    "TH5": "T",
+    "TH6": "T",
+    "THC": "T",
+    "THR": "T",
+    "THZ": "R",
+    "TIH": "A",
+    "TIS": "S",
+    "TLY": "K",
+    "TMB": "T",
+    "TMD": "T",
+    "TNB": "C",
+    "TNR": "S",
+    "TNY": "T",
+    "TOQ": "W",
+    "TOX": "W",
+    "TPJ": "P",
+    "TPK": "P",
+    "TPL": "W",
+    "TPO": "T",
+    "TPQ": "Y",
+    "TQI": "W",
+    "TQQ": "W",
+    "TQZ": "C",
+    "TRF": "W",
+    "TRG": "K",
+    "TRN": "W",
+    "TRO": "W",
+    "TRP": "W",
+    "TRQ": "W",
+    "TRW": "W",
+    "TRX": "W",
+    "TRY": "W",
+    "TS9": "I",
+    "TSY": "C",
+    "TTQ": "W",
+    "TTS": "Y",
+    "TXY": "Y",
+    "TY1": "Y",
+    "TY2": "Y",
+    "TY3": "Y",
+    "TY5": "Y",
+    "TY8": "Y",
+    "TY9": "Y",
+    "TYB": "Y",
+    "TYC": "Y",
+    "TYE": "Y",
+    "TYI": "Y",
+    "TYJ": "Y",
+    "TYN": "Y",
+    "TYO": "Y",
+    "TYQ": "Y",
+    "TYR": "Y",
+    "TYS": "Y",
+    "TYT": "Y",
+    "TYW": "Y",
+    "TYY": "Y",
+    "T8L": "T",
+    "T9E": "T",
+    "TNQ": "W",
+    "TSQ": "F",
+    "TGH": "W",
+    "X2W": "E",
+    "XCN": "C",
+    "XPR": "P",
+    "XSN": "N",
+    "XW1": "A",
+    "XX1": "K",
+    "XYC": "A",
+    "XA6": "F",
+    "11Q": "P",
+    "11W": "E",
+    "12L": "P",
+    "12X": "P",
+    "12Y": "P",
+    "143": "C",
+    "1AC": "A",
+    "1L1": "A",
+    "1OP": "Y",
+    "1PA": "F",
+    "1PI": "A",
+    "1TQ": "W",
+    "1TY": "Y",
+    "1X6": "S",
+    "56A": "H",
+    "5AB": "A",
+    "5CS": "C",
+    "5CW": "W",
+    "5HP": "E",
+    "5OH": "A",
+    "5PG": "G",
+    "51T": "Y",
+    "54C": "W",
+    "5CR": "F",
+    "5CT": "K",
+    "5FQ": "A",
+    "5GM": "I",
+    "5JP": "S",
+    "5T3": "K",
+    "5MW": "K",
+    "5OW": "K",
+    "5R5": "S",
+    "5VV": "N",
+    "5XU": "A",
+    "55I": "F",
+    "999": "D",
+    "9DN": "N",
+    "9NE": "E",
+    "9NF": "F",
+    "9NR": "R",
+    "9NV": "V",
+    "9E7": "K",
+    "9KP": "K",
+    "9WV": "A",
+    "9TR": "K",
+    "9TU": "K",
+    "9TX": "K",
+    "9U0": "K",
+    "9IJ": "F",
+    "B1F": "F",
+    "B27": "T",
+    "B2A": "A",
+    "B2F": "F",
+    "B2I": "I",
+    "B2V": "V",
+    "B3A": "A",
+    "B3D": "D",
+    "B3E": "E",
+    "B3K": "K",
+    "B3U": "H",
+    "B3X": "N",
+    "B3Y": "Y",
+    "BB6": "C",
+    "BB7": "C",
+    "BB8": "F",
+    "BB9": "C",
+    "BBC": "C",
+    "BCS": "C",
+    "BCX": "C",
+    "BFD": "D",
+    "BG1": "S",
+    "BH2": "D",
+    "BHD": "D",
+    "BIF": "F",
+    "BIU": "I",
+    "BL2": "L",
+    "BLE": "L",
+    "BLY": "K",
+    "BMT": "T",
+    "BNN": "F",
+    "BOR": "R",
+    "BP5": "A",
+    "BPE": "C",
+    "BSE": "S",
+    "BTA": "L",
+    "BTC": "C",
+    "BTK": "K",
+    "BTR": "W",
+    "BUC": "C",
+    "BUG": "V",
+    "BYR": "Y",
+    "BWV": "R",
+    "BWB": "S",
+    "BXT": "S",
+    "F2F": "F",
+    "F2Y": "Y",
+    "FAK": "K",
+    "FB5": "A",
+    "FB6": "A",
+    "FC0": "F",
+    "FCL": "F",
+    "FDL": "K",
+    "FFM": "C",
+    "FGL": "G",
+    "FGP": "S",
+    "FH7": "K",
+    "FHL": "K",
+    "FHO": "K",
+    "FIO": "R",
+    "FLA": "A",
+    "FLE": "L",
+    "FLT": "Y",
+    "FME": "M",
+    "FOE": "C",
+    "FP9": "P",
+    "FPK": "P",
+    "FT6": "W",
+    "FTR": "W",
+    "FTY": "Y",
+    "FVA": "V",
+    "FZN": "K",
+    "FY3": "Y",
+    "F7W": "W",
+    "FY2": "Y",
+    "FQA": "K",
+    "F7Q": "Y",
+    "FF9": "K",
+    "FL6": "D",
+    "JJJ": "C",
+    "JJK": "C",
+    "JJL": "C",
+    "JLP": "K",
+    "J3D": "C",
+    "J9Y": "R",
+    "J8W": "S",
+    "JKH": "P",
+    "N10": "S",
+    "N7P": "P",
+    "NA8": "A",
+    "NAL": "A",
+    "NAM": "A",
+    "NBQ": "Y",
+    "NC1": "S",
+    "NCB": "A",
+    "NEM": "H",
+    "NEP": "H",
+    "NFA": "F",
+    "NIY": "Y",
+    "NLB": "L",
+    "NLE": "L",
+    "NLN": "L",
+    "NLO": "L",
+    "NLP": "L",
+    "NLQ": "Q",
+    "NLY": "G",
+    "NMC": "G",
+    "NMM": "R",
+    "NNH": "R",
+    "NOT": "L",
+    "NPH": "C",
+    "NPI": "A",
+    "NTR": "Y",
+    "NTY": "Y",
+    "NVA": "V",
+    "NWD": "A",
+    "NYB": "C",
+    "NYS": "C",
+    "NZH": "H",
+    "N80": "P",
+    "NZC": "T",
+    "NLW": "L",
+    "N0A": "F",
+    "N9P": "A",
+    "N65": "K",
+    "R1A": "C",
+    "R4K": "W",
+    "RE0": "W",
+    "RE3": "W",
+    "RGL": "R",
+    "RGP": "E",
+    "RT0": "P",
+    "RVX": "S",
+    "RZ4": "S",
+    "RPI": "R",
+    "RVJ": "A",
+    "VAD": "V",
+    "VAF": "V",
+    "VAH": "V",
+    "VAI": "V",
+    "VAL": "V",
+    "VB1": "K",
+    "VH0": "P",
+    "VR0": "R",
+    "V44": "C",
+    "V61": "F",
+    "VPV": "K",
+    "V5N": "H",
+    "V7T": "K",
+    "Z01": "A",
+    "Z3E": "T",
+    "Z70": "H",
+    "ZBZ": "C",
+    "ZCL": "F",
+    "ZU0": "T",
+    "ZYJ": "P",
+    "ZYK": "P",
+    "ZZD": "C",
+    "ZZJ": "A",
+    "ZIQ": "W",
+    "ZPO": "P",
+    "ZDJ": "Y",
+    "ZT1": "K",
+    "30V": "C",
+    "31Q": "C",
+    "33S": "F",
+    "33W": "A",
+    "34E": "V",
+    "3AH": "H",
+    "3BY": "P",
+    "3CF": "F",
+    "3CT": "Y",
+    "3GA": "A",
+    "3GL": "E",
+    "3MD": "D",
+    "3MY": "Y",
+    "3NF": "Y",
+    "3O3": "E",
+    "3PX": "P",
+    "3QN": "K",
+    "3TT": "P",
+    "3XH": "G",
+    "3YM": "Y",
+    "3WS": "A",
+    "3WX": "P",
+    "3X9": "C",
+    "3ZH": "H",
+    "7JA": "I",
+    "73C": "S",
+    "73N": "R",
+    "73O": "Y",
+    "73P": "K",
+    "74P": "K",
+    "7N8": "F",
+    "7O5": "A",
+    "7XC": "F",
+    "7ID": "D",
+    "7OZ": "A",
+    "C1S": "C",
+    "C1T": "C",
+    "C1X": "K",
+    "C22": "A",
+    "C3Y": "C",
+    "C4R": "C",
+    "C5C": "C",
+    "C6C": "C",
+    "CAF": "C",
+    "CAS": "C",
+    "CAY": "C",
+    "CCS": "C",
+    "CEA": "C",
+    "CGA": "E",
+    "CGU": "E",
+    "CGV": "C",
+    "CHP": "G",
+    "CIR": "R",
+    "CLE": "L",
+    "CLG": "K",
+    "CLH": "K",
+    "CME": "C",
+    "CMH": "C",
+    "CML": "C",
+    "CMT": "C",
+    "CR5": "G",
+    "CS0": "C",
+    "CS1": "C",
+    "CS3": "C",
+    "CS4": "C",
+    "CSA": "C",
+    "CSB": "C",
+    "CSD": "C",
+    "CSE": "C",
+    "CSJ": "C",
+    "CSO": "C",
+    "CSP": "C",
+    "CSR": "C",
+    "CSS": "C",
+    "CSU": "C",
+    "CSW": "C",
+    "CSX": "C",
+    "CSZ": "C",
+    "CTE": "W",
+    "CTH": "T",
+    "CWD": "A",
+    "CWR": "S",
+    "CXM": "M",
+    "CY0": "C",
+    "CY1": "C",
+    "CY3": "C",
+    "CY4": "C",
+    "CYA": "C",
+    "CYD": "C",
+    "CYF": "C",
+    "CYG": "C",
+    "CYJ": "K",
+    "CYM": "C",
+    "CYQ": "C",
+    "CYR": "C",
+    "CYS": "C",
+    "CYW": "C",
+    "CZ2": "C",
+    "CZZ": "C",
+    "CG6": "C",
+    "C1J": "R",
+    "C4G": "R",
+    "C67": "R",
+    "C6D": "R",
+    "CE7": "N",
+    "CZS": "A",
+    "G01": "E",
+    "G8M": "E",
+    "GAU": "E",
+    "GEE": "G",
+    "GFT": "S",
+    "GHC": "E",
+    "GHG": "Q",
+    "GHW": "E",
+    "GL3": "G",
+    "GLH": "Q",
+    "GLJ": "E",
+    "GLK": "E",
+    "GLN": "Q",
+    "GLQ": "E",
+    "GLU": "E",
+    "GLY": "G",
+    "GLZ": "G",
+    "GMA": "E",
+    "GME": "E",
+    "GNC": "Q",
+    "GPL": "K",
+    "GSC": "G",
+    "GSU": "E",
+    "GT9": "C",
+    "GVL": "S",
+    "G3M": "R",
+    "G5G": "L",
+    "G1X": "Y",
+    "G8X": "P",
+    "K1R": "C",
+    "KBE": "K",
+    "KCX": "K",
+    "KFP": "K",
+    "KGC": "K",
+    "KNB": "A",
+    "KOR": "M",
+    "KPI": "K",
+    "KPY": "K",
+    "KST": "K",
+    "KYN": "W",
+    "KYQ": "K",
+    "KCR": "K",
+    "KPF": "K",
+    "K5L": "S",
+    "KEO": "K",
+    "KHB": "K",
+    "KKD": "D",
+    "K5H": "C",
+    "K7K": "S",
+    "OAR": "R",
+    "OAS": "S",
+    "OBS": "K",
+    "OCS": "C",
+    "OCY": "C",
+    "OHI": "H",
+    "OHS": "D",
+    "OLD": "H",
+    "OLT": "T",
+    "OLZ": "S",
+    "OMH": "S",
+    "OMT": "M",
+    "OMX": "Y",
+    "OMY": "Y",
+    "ONH": "A",
+    "ORN": "A",
+    "ORQ": "R",
+    "OSE": "S",
+    "OTH": "T",
+    "OXX": "D",
+    "OYL": "H",
+    "O7A": "T",
+    "O7D": "W",
+    "O7G": "V",
+    "O2E": "S",
+    "O6H": "W",
+    "OZW": "F",
+    "S12": "S",
+    "S1H": "S",
+    "S2C": "C",
+    "S2P": "A",
+    "SAC": "S",
+    "SAH": "C",
+    "SAR": "G",
+    "SBG": "S",
+    "SBL": "S",
+    "SCH": "C",
+    "SCS": "C",
+    "SCY": "C",
+    "SD4": "N",
+    "SDB": "S",
+    "SDP": "S",
+    "SEB": "S",
+    "SEE": "S",
+    "SEG": "A",
+    "SEL": "S",
+    "SEM": "S",
+    "SEN": "S",
+    "SEP": "S",
+    "SER": "S",
+    "SET": "S",
+    "SGB": "S",
+    "SHC": "C",
+    "SHP": "G",
+    "SHR": "K",
+    "SIB": "C",
+    "SLL": "K",
+    "SLZ": "K",
+    "SMC": "C",
+    "SME": "M",
+    "SMF": "F",
+    "SNC": "C",
+    "SNN": "N",
+    "SOY": "S",
+    "SRZ": "S",
+    "STY": "Y",
+    "SUN": "S",
+    "SVA": "S",
+    "SVV": "S",
+    "SVW": "S",
+    "SVX": "S",
+    "SVY": "S",
+    "SVZ": "S",
+    "SXE": "S",
+    "SKH": "K",
+    "SNM": "S",
+    "SNK": "H",
+    "SWW": "S",
+    "WFP": "F",
+    "WLU": "L",
+    "WPA": "F",
+    "WRP": "W",
+    "WVL": "V",
+    "02K": "A",
+    "02L": "N",
+    "02O": "A",
+    "02Y": "A",
+    "033": "V",
+    "037": "P",
+    "03Y": "C",
+    "04U": "P",
+    "04V": "P",
+    "05N": "P",
+    "07O": "C",
+    "0A0": "D",
+    "0A1": "Y",
+    "0A2": "K",
+    "0A8": "C",
+    "0A9": "F",
+    "0AA": "V",
+    "0AB": "V",
+    "0AC": "G",
+    "0AF": "W",
+    "0AG": "L",
+    "0AH": "S",
+    "0AK": "D",
+    "0AR": "R",
+    "0BN": "F",
+    "0CS": "A",
+    "0E5": "T",
+    "0EA": "Y",
+    "0FL": "A",
+    "0LF": "P",
+    "0NC": "A",
+    "0PR": "Y",
+    "0QL": "C",
+    "0TD": "D",
+    "0UO": "W",
+    "0WZ": "Y",
+    "0X9": "R",
+    "0Y8": "P",
+    "4AF": "F",
+    "4AR": "R",
+    "4AW": "W",
+    "4BF": "F",
+    "4CF": "F",
+    "4CY": "M",
+    "4DP": "W",
+    "4FB": "P",
+    "4FW": "W",
+    "4HL": "Y",
+    "4HT": "W",
+    "4IN": "W",
+    "4MM": "M",
+    "4PH": "F",
+    "4U7": "A",
+    "41H": "F",
+    "41Q": "N",
+    "42Y": "S",
+    "432": "S",
+    "45F": "P",
+    "4AK": "K",
+    "4D4": "R",
+    "4GJ": "C",
+    "4KY": "P",
+    "4L0": "P",
+    "4LZ": "Y",
+    "4N7": "P",
+    "4N8": "P",
+    "4N9": "P",
+    "4OG": "W",
+    "4OU": "F",
+    "4OV": "S",
+    "4OZ": "S",
+    "4PQ": "W",
+    "4SJ": "F",
+    "4WQ": "A",
+    "4HH": "S",
+    "4HJ": "S",
+    "4J4": "C",
+    "4J5": "R",
+    "4II": "F",
+    "4VI": "R",
+    "823": "N",
+    "8SP": "S",
+    "8AY": "A",
+}
diff --git a/biopandas/pdb/engines.py b/biopandas/pdb/engines.py
index 6f14b8e..886b459 100644
--- a/biopandas/pdb/engines.py
+++ b/biopandas/pdb/engines.py
@@ -62,32 +62,72 @@
 }
 
 pdb_atomdict = [
-    {"id": "record_name", "line": [0, 6], "type": str, "strf": lambda x: "%-6s" % x},
+    {
+        "id": "record_name",
+        "line": [0, 6],
+        "type": str,
+        "strf": lambda x: "%-6s" % x,
+    },
     {
         "id": "atom_number",
         "line": [6, 11],
         "type": int,
         "strf": lambda x: "%+5s" % str(x),
     },
-    {"id": "blank_1", "line": [11, 12], "type": str, "strf": lambda x: "%-1s" % x},
+    {
+        "id": "blank_1",
+        "line": [11, 12],
+        "type": str,
+        "strf": lambda x: "%-1s" % x,
+    },
     {
         "id": "atom_name",
         "line": [12, 16],
         "type": str,
         "strf": lambda x: " %-3s" % x if len(x) < 4 else "%-4s" % x,
     },
-    {"id": "alt_loc", "line": [16, 17], "type": str, "strf": lambda x: "%-1s" % x},
-    {"id": "residue_name", "line": [17, 20], "type": str, "strf": lambda x: "%+3s" % x},
-    {"id": "blank_2", "line": [20, 21], "type": str, "strf": lambda x: "%-1s" % x},
-    {"id": "chain_id", "line": [21, 22], "type": str, "strf": lambda x: "%-1s" % x},
+    {
+        "id": "alt_loc",
+        "line": [16, 17],
+        "type": str,
+        "strf": lambda x: "%-1s" % x,
+    },
+    {
+        "id": "residue_name",
+        "line": [17, 20],
+        "type": str,
+        "strf": lambda x: "%+3s" % x,
+    },
+    {
+        "id": "blank_2",
+        "line": [20, 21],
+        "type": str,
+        "strf": lambda x: "%-1s" % x,
+    },
+    {
+        "id": "chain_id",
+        "line": [21, 22],
+        "type": str,
+        "strf": lambda x: "%-1s" % x,
+    },
     {
         "id": "residue_number",
         "line": [22, 26],
         "type": int,
         "strf": lambda x: "%+4s" % str(x),
     },
-    {"id": "insertion", "line": [26, 27], "type": str, "strf": lambda x: "%-1s" % x},
-    {"id": "blank_3", "line": [27, 30], "type": str, "strf": lambda x: "%-3s" % x},
+    {
+        "id": "insertion",
+        "line": [26, 27],
+        "type": str,
+        "strf": lambda x: "%-1s" % x,
+    },
+    {
+        "id": "blank_3",
+        "line": [27, 30],
+        "type": str,
+        "strf": lambda x: "%-3s" % x,
+    },
     {
         "id": "x_coord",
         "line": [30, 38],
@@ -116,10 +156,24 @@
         "id": "b_factor",
         "line": [60, 66],
         "type": float,
-        "strf": lambda x: ("%+6.2f" % x).replace("+", " ") if len(str(int(x))) < 3 else ("%+6.2f" % x).replace("+", ""),
+        "strf": lambda x: (
+            ("%+6.2f" % x).replace("+", " ")
+            if len(str(int(x))) < 3
+            else ("%+6.2f" % x).replace("+", "")
+        ),
+    },
+    {
+        "id": "blank_4",
+        "line": [66, 72],
+        "type": str,
+        "strf": lambda x: "%-7s" % x,
+    },
+    {
+        "id": "segment_id",
+        "line": [72, 76],
+        "type": str,
+        "strf": lambda x: "%-3s" % x,
     },
-    {"id": "blank_4", "line": [66, 72], "type": str, "strf": lambda x: "%-7s" % x},
-    {"id": "segment_id", "line": [72, 76], "type": str, "strf": lambda x: "%-3s" % x},
     {
         "id": "element_symbol",
         "line": [76, 78],
@@ -130,45 +184,122 @@
         "id": "charge",
         "line": [78, 80],
         "type": float,
-        "strf": lambda x: (("%+2.1f" % x).replace("+", " ") if pd.notnull(x) else ""),
+        "strf": lambda x: (
+            ("%+2.1f" % x).replace("+", " ") if pd.notnull(x) else ""
+        ),
     },
 ]
 
 
 pdb_anisoudict = [
-    {"id": "record_name", "line": [0, 6], "type": str, "strf": lambda x: "%-6s" % x},
+    {
+        "id": "record_name",
+        "line": [0, 6],
+        "type": str,
+        "strf": lambda x: "%-6s" % x,
+    },
     {
         "id": "atom_number",
         "line": [6, 11],
         "type": int,
         "strf": lambda x: "%+5s" % str(x),
     },
-    {"id": "blank_1", "line": [11, 12], "type": str, "strf": lambda x: "%-1s" % x},
+    {
+        "id": "blank_1",
+        "line": [11, 12],
+        "type": str,
+        "strf": lambda x: "%-1s" % x,
+    },
     {
         "id": "atom_name",
         "line": [12, 16],
         "type": str,
         "strf": lambda x: (" %-3s" % x if len(x) < 4 else "%-4s" % x),
     },
-    {"id": "alt_loc", "line": [16, 17], "type": str, "strf": lambda x: "%-1s" % x},
-    {"id": "residue_name", "line": [17, 20], "type": str, "strf": lambda x: "%+3s" % x},
-    {"id": "blank_2", "line": [20, 21], "type": str, "strf": lambda x: "%-1s" % x},
-    {"id": "chain_id", "line": [21, 22], "type": str, "strf": lambda x: "%-1s" % x},
+    {
+        "id": "alt_loc",
+        "line": [16, 17],
+        "type": str,
+        "strf": lambda x: "%-1s" % x,
+    },
+    {
+        "id": "residue_name",
+        "line": [17, 20],
+        "type": str,
+        "strf": lambda x: "%+3s" % x,
+    },
+    {
+        "id": "blank_2",
+        "line": [20, 21],
+        "type": str,
+        "strf": lambda x: "%-1s" % x,
+    },
+    {
+        "id": "chain_id",
+        "line": [21, 22],
+        "type": str,
+        "strf": lambda x: "%-1s" % x,
+    },
     {
         "id": "residue_number",
         "line": [22, 26],
         "type": int,
         "strf": lambda x: "%+4s" % str(x),
     },
-    {"id": "insertion", "line": [26, 27], "type": str, "strf": lambda x: "%-1s" % x},
-    {"id": "blank_3", "line": [27, 28], "type": str, "strf": lambda x: "%-1s" % x},
-    {"id": "U(1,1)", "line": [28, 35], "type": int, "strf": lambda x: "%+7s" % str(x)},
-    {"id": "U(2,2)", "line": [35, 42], "type": int, "strf": lambda x: "%+7s" % str(x)},
-    {"id": "U(3,3)", "line": [42, 49], "type": int, "strf": lambda x: "%+7s" % str(x)},
-    {"id": "U(1,2)", "line": [49, 56], "type": int, "strf": lambda x: "%+7s" % str(x)},
-    {"id": "U(1,3)", "line": [56, 63], "type": int, "strf": lambda x: "%+7s" % str(x)},
-    {"id": "U(2,3)", "line": [63, 70], "type": int, "strf": lambda x: "%+7s" % str(x)},
-    {"id": "blank_4", "line": [70, 76], "type": str, "strf": lambda x: "%+6s" % x},
+    {
+        "id": "insertion",
+        "line": [26, 27],
+        "type": str,
+        "strf": lambda x: "%-1s" % x,
+    },
+    {
+        "id": "blank_3",
+        "line": [27, 28],
+        "type": str,
+        "strf": lambda x: "%-1s" % x,
+    },
+    {
+        "id": "U(1,1)",
+        "line": [28, 35],
+        "type": int,
+        "strf": lambda x: "%+7s" % str(x),
+    },
+    {
+        "id": "U(2,2)",
+        "line": [35, 42],
+        "type": int,
+        "strf": lambda x: "%+7s" % str(x),
+    },
+    {
+        "id": "U(3,3)",
+        "line": [42, 49],
+        "type": int,
+        "strf": lambda x: "%+7s" % str(x),
+    },
+    {
+        "id": "U(1,2)",
+        "line": [49, 56],
+        "type": int,
+        "strf": lambda x: "%+7s" % str(x),
+    },
+    {
+        "id": "U(1,3)",
+        "line": [56, 63],
+        "type": int,
+        "strf": lambda x: "%+7s" % str(x),
+    },
+    {
+        "id": "U(2,3)",
+        "line": [63, 70],
+        "type": int,
+        "strf": lambda x: "%+7s" % str(x),
+    },
+    {
+        "id": "blank_4",
+        "line": [70, 76],
+        "type": str,
+        "strf": lambda x: "%+6s" % x,
+    },
     {
         "id": "element_symbol",
         "line": [76, 78],
@@ -179,7 +310,9 @@
         "id": "charge",
         "line": [78, 80],
         "type": float,
-        "strf": lambda x: (("%+2.1f" % x).replace("+", " ") if pd.notnull(x) else ""),
+        "strf": lambda x: (
+            ("%+2.1f" % x).replace("+", " ") if pd.notnull(x) else ""
+        ),
     },
 ]
 
@@ -190,7 +323,12 @@
         "type": str,
         "strf": lambda x: "%s%s" % (x, " " * (6 - len(x))),
     },
-    {"id": "entry", "line": [6, -2], "type": str, "strf": lambda x: x.rstrip()},
+    {
+        "id": "entry",
+        "line": [6, -2],
+        "type": str,
+        "strf": lambda x: x.rstrip(),
+    },
 ]
 
 pdb_records = {
diff --git a/biopandas/pdb/tests/test_amino3to1.py b/biopandas/pdb/tests/test_amino3to1.py
index d8ac816..cabc9e4 100644
--- a/biopandas/pdb/tests/test_amino3to1.py
+++ b/biopandas/pdb/tests/test_amino3to1.py
@@ -4,13 +4,16 @@
 # Project Website: http://rasbt.github.io/biopandas/
 # Code Repository: https://github.com/rasbt/biopandas
 
+import os
+
 import numpy as np
 from biopandas.pdb import PandasPdb
-import os
 
 
 def test_defaults():
-    TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48_995.pdb")
+    TESTDATA_1t48 = os.path.join(
+        os.path.dirname(__file__), "data", "1t48_995.pdb"
+    )
     p1t48 = PandasPdb()
     p1t48.read_pdb(TESTDATA_1t48)
     expect_res = [
@@ -146,7 +149,9 @@ def test_defaults():
 
 
 def test_sameindex():
-    TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48_995.pdb")
+    TESTDATA_1t48 = os.path.join(
+        os.path.dirname(__file__), "data", "1t48_995.pdb"
+    )
     p1t48 = PandasPdb()
     p1t48.read_pdb(TESTDATA_1t48)
     print(p1t48)
@@ -482,8 +487,12 @@ def test_multichain():
     expect_chain = ["A" for _ in range(88)] + ["B" for _ in range(94)]
     got_chain = list(transl["chain_id"].values)
 
-    got_res_a = list(transl.loc[transl["chain_id"] == "A", "residue_name"].values)
-    got_res_b = list(transl.loc[transl["chain_id"] == "B", "residue_name"].values)
+    got_res_a = list(
+        transl.loc[transl["chain_id"] == "A", "residue_name"].values
+    )
+    got_res_b = list(
+        transl.loc[transl["chain_id"] == "B", "residue_name"].values
+    )
 
     assert expect_chain == got_chain
     assert expect_res_a == got_res_a
diff --git a/biopandas/pdb/tests/test_assign_df.py b/biopandas/pdb/tests/test_assign_df.py
index 700f638..f339609 100644
--- a/biopandas/pdb/tests/test_assign_df.py
+++ b/biopandas/pdb/tests/test_assign_df.py
@@ -4,10 +4,10 @@
 # Project Website: http://rasbt.github.io/biopandas/
 # Code Repository: https://github.com/rasbt/biopandas
 
-from biopandas.pdb import PandasPdb
-from biopandas.testutils import assert_raises
 import os
 
+from biopandas.pdb import PandasPdb
+from biopandas.testutils import assert_raises
 
 TESTDATA_FILENAME = os.path.join(os.path.dirname(__file__), "data", "3eiy.pdb")
 
diff --git a/biopandas/pdb/tests/test_distance.py b/biopandas/pdb/tests/test_distance.py
index 2c6e458..aec7f2e 100644
--- a/biopandas/pdb/tests/test_distance.py
+++ b/biopandas/pdb/tests/test_distance.py
@@ -4,44 +4,55 @@
 # Project Website: http://rasbt.github.io/biopandas/
 # Code Repository: https://github.com/rasbt/biopandas
 
+import os
+
 import pandas as pd
 from biopandas.pdb import PandasPdb
-import os
 
 
 def test_equal():
-    TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48_995.pdb")
+    TESTDATA_1t48 = os.path.join(
+        os.path.dirname(__file__), "data", "1t48_995.pdb"
+    )
 
     p1t48 = PandasPdb()
     p1t48.read_pdb(TESTDATA_1t48)
     dist = p1t48.distance(xyz=(70.785, 15.477, 23.359), records=("ATOM",))
 
     expect = pd.Series(
-        [2.533259, 1.520502, 0.000000, 1.257597, 1.252510], index=[12, 13, 14, 15, 16]
+        [2.533259, 1.520502, 0.000000, 1.257597, 1.252510],
+        index=[12, 13, 14, 15, 16],
     )
     assert dist[dist < 3].all() == expect.all()
 
 
 def test_deprecated_str_arg():
-    TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48_995.pdb")
+    TESTDATA_1t48 = os.path.join(
+        os.path.dirname(__file__), "data", "1t48_995.pdb"
+    )
 
     p1t48 = PandasPdb()
     p1t48.read_pdb(TESTDATA_1t48)
     dist = p1t48.distance(xyz=(70.785, 15.477, 23.359), records="ATOM")
 
     expect = pd.Series(
-        [2.533259, 1.520502, 0.000000, 1.257597, 1.252510], index=[12, 13, 14, 15, 16]
+        [2.533259, 1.520502, 0.000000, 1.257597, 1.252510],
+        index=[12, 13, 14, 15, 16],
     )
     assert dist[dist < 3].all() == expect.all()
 
 
 def test_use_external_df():
-    TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48_995.pdb")
+    TESTDATA_1t48 = os.path.join(
+        os.path.dirname(__file__), "data", "1t48_995.pdb"
+    )
 
     p1t48 = PandasPdb()
     p1t48.read_pdb(TESTDATA_1t48)
     new_df = p1t48.df["ATOM"].iloc[:-1, :].copy()
     dist = PandasPdb.distance_df(df=new_df, xyz=(70.785, 15.477, 23.359))
 
-    expect = pd.Series([2.533259, 1.520502, 0.000000, 1.257597], index=[12, 13, 14, 15])
+    expect = pd.Series(
+        [2.533259, 1.520502, 0.000000, 1.257597], index=[12, 13, 14, 15]
+    )
     assert dist[dist < 3].all() == expect.all()
diff --git a/biopandas/pdb/tests/test_gyradius.py b/biopandas/pdb/tests/test_gyradius.py
index 40f7227..ef4f4c8 100644
--- a/biopandas/pdb/tests/test_gyradius.py
+++ b/biopandas/pdb/tests/test_gyradius.py
@@ -4,9 +4,10 @@
 # Project Website: http://rasbt.github.io/biopandas/
 # Code Repository: https://github.com/rasbt/biopandas
 
-from biopandas.pdb import PandasPdb
 import os
+
 import pytest
+from biopandas.pdb import PandasPdb
 
 TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48_995.pdb")
 
@@ -62,7 +63,7 @@ def test_negative_decimals():
 
 @pytest.mark.xfail(raises=TypeError)
 def test_wrong_decimals_arg():
-    p1t48.gyradius(decimals='five')
+    p1t48.gyradius(decimals="five")
 
 
 def test_both_args():
diff --git a/biopandas/pdb/tests/test_impute.py b/biopandas/pdb/tests/test_impute.py
index 225f5c5..6035450 100644
--- a/biopandas/pdb/tests/test_impute.py
+++ b/biopandas/pdb/tests/test_impute.py
@@ -5,9 +5,10 @@
 # Code Repository: https://github.com/rasbt/biopandas
 
 
-from biopandas.pdb import PandasPdb
 import os
 
+from biopandas.pdb import PandasPdb
+
 TESTDATA_FILENAME = os.path.join(
     os.path.dirname(__file__), "data", "3eiy_stripped_no_ele.pdb"
 )
diff --git a/biopandas/pdb/tests/test_read_pdb.py b/biopandas/pdb/tests/test_read_pdb.py
index e6bd8fa..c41792f 100644
--- a/biopandas/pdb/tests/test_read_pdb.py
+++ b/biopandas/pdb/tests/test_read_pdb.py
@@ -6,11 +6,11 @@
 
 
 import os
-import pytest
 from urllib.error import HTTPError
 
 import numpy as np
 import pandas as pd
+import pytest
 from biopandas.pdb import PandasPdb
 from biopandas.testutils import assert_raises
 
@@ -18,7 +18,9 @@
 TESTDATA_FILENAME2 = os.path.join(
     os.path.dirname(__file__), "data", "4eiy_anisouchunk.pdb"
 )
-TESTDATA_FILENAME_GZ = os.path.join(os.path.dirname(__file__), "data", "3eiy.pdb.gz")
+TESTDATA_FILENAME_GZ = os.path.join(
+    os.path.dirname(__file__), "data", "3eiy.pdb.gz"
+)
 TESTDATA_FILENAME_AF2_V4 = os.path.join(
     os.path.dirname(__file__), "data", "AF-Q5VSL9-F1-model_v4.pdb"
 )
@@ -101,7 +103,8 @@ def test__read_pdb_raises():
     Test if ValueError is raised for wrong file formats."""
 
     expect = (
-        "Wrong file format; allowed file formats are " ".pdb, .pdb.gz, .ent, .ent.gz"
+        "Wrong file format; allowed file formats are "
+        ".pdb, .pdb.gz, .ent, .ent.gz"
     )
 
     def run_code_1():
diff --git a/biopandas/pdb/tests/test_rmsd.py b/biopandas/pdb/tests/test_rmsd.py
index dc7a707..88c142b 100644
--- a/biopandas/pdb/tests/test_rmsd.py
+++ b/biopandas/pdb/tests/test_rmsd.py
@@ -4,16 +4,23 @@
 # Project Website: http://rasbt.github.io/biopandas/
 # Code Repository: https://github.com/rasbt/biopandas
 
-from biopandas.pdb import PandasPdb
 import os
+
 import pytest
+from biopandas.pdb import PandasPdb
 
 TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48_995.pdb")
 TESTDATA_1t49 = os.path.join(os.path.dirname(__file__), "data", "1t49_995.pdb")
-TESTDATA_lig1 = os.path.join(os.path.dirname(__file__), "data", "lig_conf_1.pdb")
-TESTDATA_lig2 = os.path.join(os.path.dirname(__file__), "data", "lig_conf_2.pdb")
+TESTDATA_lig1 = os.path.join(
+    os.path.dirname(__file__), "data", "lig_conf_1.pdb"
+)
+TESTDATA_lig2 = os.path.join(
+    os.path.dirname(__file__), "data", "lig_conf_2.pdb"
+)
 
-TESTDATA_rna = os.path.join(os.path.dirname(__file__), "data", "1ehz-rna_short.pdb")
+TESTDATA_rna = os.path.join(
+    os.path.dirname(__file__), "data", "1ehz-rna_short.pdb"
+)
 
 p1t48 = PandasPdb()
 p1t48.read_pdb(TESTDATA_1t48)
@@ -47,7 +54,9 @@ def test_invalid_query():
 
 
 def test_protein():
-    r = PandasPdb.rmsd(p1t48.df["ATOM"], p1t49.df["ATOM"], s="c-alpha", invert=False)
+    r = PandasPdb.rmsd(
+        p1t48.df["ATOM"], p1t49.df["ATOM"], s="c-alpha", invert=False
+    )
     assert r == 0.4785, r
 
 
@@ -61,7 +70,9 @@ def test_rna_and_nonmatching_indices():
 
 
 def test_ligand():
-    r = PandasPdb.rmsd(pl1.df["HETATM"], pl2.df["HETATM"], s="hydrogen", invert=True)
+    r = PandasPdb.rmsd(
+        pl1.df["HETATM"], pl2.df["HETATM"], s="hydrogen", invert=True
+    )
     assert r == 1.9959, r
 
 
diff --git a/biopandas/pdb/tests/test_write_pdb.py b/biopandas/pdb/tests/test_write_pdb.py
index 22734ec..352e51b 100644
--- a/biopandas/pdb/tests/test_write_pdb.py
+++ b/biopandas/pdb/tests/test_write_pdb.py
@@ -4,11 +4,11 @@
 # Project Website: http://rasbt.github.io/biopandas/
 # Code Repository: https://github.com/rasbt/biopandas
 
-from biopandas.pdb import PandasPdb
-import warnings
-import pandas as pd
 import os
+import warnings
 
+import pandas as pd
+from biopandas.pdb import PandasPdb
 
 TESTDATA_FILENAME = os.path.join(os.path.dirname(__file__), "data", "3eiy.pdb")
 TESTDATA_FILENAME2 = os.path.join(
@@ -45,7 +45,9 @@ def test_defaults():
 def test_nonexpected_column():
     ppdb = PandasPdb()
     ppdb.read_pdb(TESTDATA_FILENAME)
-    ppdb.df["HETATM"]["test"] = pd.Series("test", index=ppdb.df["HETATM"].index)
+    ppdb.df["HETATM"]["test"] = pd.Series(
+        "test", index=ppdb.df["HETATM"].index
+    )
     with warnings.catch_warnings(record=True) as w:
         ppdb.to_pdb(path=OUTFILE, records=["HETATM"])
     with open(OUTFILE, "r") as f:
@@ -80,12 +82,12 @@ def test_add_remark():
     """Test adding a REMARK entry."""
     # Add remark
     code = 3
-    remark1 = 'THIS IS A HIGHLY IMPORTANT FREE-TEXT REMARK WHICH IS EXACTLY 80 CHARACTERS LONG.'
-    remark2 = ''
-    remark3 = 'THIS IS A NEXT MULTI-LINE INDENTED REMARK\n FOLLOWING THE BLANK REMARK.'
+    remark1 = "THIS IS A HIGHLY IMPORTANT FREE-TEXT REMARK WHICH IS EXACTLY 80 CHARACTERS LONG."
+    remark2 = ""
+    remark3 = "THIS IS A NEXT MULTI-LINE INDENTED REMARK\n FOLLOWING THE BLANK REMARK."
     ppdb = PandasPdb()
     ppdb.read_pdb(TESTDATA_FILENAME)
-    n_atoms = len(ppdb.df['ATOM'])
+    n_atoms = len(ppdb.df["ATOM"])
     ppdb.add_remark(code, remark1)
     ppdb.add_remark(code, remark2)
     ppdb.add_remark(code, remark3, 5)
@@ -110,18 +112,18 @@ def test_add_remark():
     ppdb = PandasPdb()
     ppdb.read_pdb(OUTFILE)
     os.remove(OUTFILE)
-    assert len(ppdb.df['ATOM']) == n_atoms
+    assert len(ppdb.df["ATOM"]) == n_atoms
 
 
 def test_introduce_remark():
     """Test introducing a REMARK entry to the file with no remarks."""
     # Add remark
     code = 3
-    remark = 'THIS IS A HIGHLY IMPORTANT FREE-TEXT REMARK WHICH IS EXACTLY 80 CHARACTERS LONG.'
+    remark = "THIS IS A HIGHLY IMPORTANT FREE-TEXT REMARK WHICH IS EXACTLY 80 CHARACTERS LONG."
     indent = 1
     ppdb = PandasPdb()
     ppdb.read_pdb(TESTDATA_FILENAME3)
-    n_atoms = len(ppdb.df['ATOM'])
+    n_atoms = len(ppdb.df["ATOM"])
     ppdb.add_remark(code, remark, indent)
     ppdb.to_pdb(path=OUTFILE)
 
@@ -138,16 +140,19 @@ def test_introduce_remark():
     ppdb = PandasPdb()
     ppdb.read_pdb(OUTFILE)
     os.remove(OUTFILE)
-    assert len(ppdb.df['ATOM']) == n_atoms
+    assert len(ppdb.df["ATOM"]) == n_atoms
+
 
-    
 def test_b_factor_shift():
     """Test b_factor shifting one white space when saving the fetched pdb."""
     ppdb = PandasPdb()
     ppdb.fetch_pdb("2e28")
     ppdb.to_pdb(path=OUTFILE, records=None)
-    tmp_df = ppdb.read_pdb(path=OUTFILE).df['ATOM']
+    tmp_df = ppdb.read_pdb(path=OUTFILE).df["ATOM"]
     os.remove(OUTFILE)
-    assert tmp_df[tmp_df["element_symbol"].isnull() | (tmp_df["element_symbol"] == '')].empty
-    assert not tmp_df[tmp_df["blank_4"].isnull() | (tmp_df["blank_4"] == '')].empty
-
+    assert tmp_df[
+        tmp_df["element_symbol"].isnull() | (tmp_df["element_symbol"] == "")
+    ].empty
+    assert not tmp_df[
+        tmp_df["blank_4"].isnull() | (tmp_df["blank_4"] == "")
+    ].empty

From 21a608050e1914a0353c521450de57a718a89341 Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arian.jamasb@roche.com>
Date: Mon, 8 Jul 2024 18:42:29 +0100
Subject: [PATCH 06/21] linting

---
 biopandas/mmtf/pandas_mmtf.py                | 161 +++++++++++++------
 biopandas/mmtf/tests/test_amino3to1.py       |  24 ++-
 biopandas/mmtf/tests/test_assign_df.py       |   4 +-
 biopandas/mmtf/tests/test_distance.py        |  22 ++-
 biopandas/mmtf/tests/test_multiple_models.py |  17 +-
 biopandas/mmtf/tests/test_read_mmtf.py       |  51 +++---
 biopandas/mmtf/tests/test_rmsd.py            |   7 +-
 biopandas/mmtf/tests/test_write_mmtf.py      |  45 +++++-
 8 files changed, 231 insertions(+), 100 deletions(-)

diff --git a/biopandas/mmtf/pandas_mmtf.py b/biopandas/mmtf/pandas_mmtf.py
index 5897895..9912946 100644
--- a/biopandas/mmtf/pandas_mmtf.py
+++ b/biopandas/mmtf/pandas_mmtf.py
@@ -1,9 +1,10 @@
 """Class for working with MMTF files."""
+
 from __future__ import annotations
 
-import os
 import copy
 import gzip
+import os
 import warnings
 from string import ascii_uppercase
 from typing import Any, Dict, List, Union
@@ -11,10 +12,10 @@
 
 import numpy as np
 import pandas as pd
+from biopandas.constants import protein_letters_3to1_extended
 from looseversion import LooseVersion
-from mmtf import MMTFDecoder, MMTFEncoder, fetch, parse, parse_gzip
 
-from biopandas.constants import protein_letters_3to1_extended
+from mmtf import MMTFDecoder, MMTFEncoder, fetch, parse, parse_gzip
 
 from ..pdb.engines import amino3to1dict, pdb_df_columns, pdb_records
 
@@ -90,9 +91,9 @@ def impute_element(self, records=("ATOM", "HETATM"), inplace=False):
                 t[d] = self.df[d].copy()
 
         for sec in records:
-            t[sec]["element_symbol"] = t[sec][["atom_name", "element_symbol"]].apply(
-                lambda x: x[0][1] if len(x[1]) == 3 else x[0][0], axis=1
-            )
+            t[sec]["element_symbol"] = t[sec][
+                ["atom_name", "element_symbol"]
+            ].apply(lambda x: x[0][1] if len(x[1]) == 3 else x[0][0], axis=1)
         return t
 
     @staticmethod
@@ -235,14 +236,18 @@ def amino3to1(self, record="ATOM", residue_col="residue_name", fillna="?"):
         cmp = "placeholder"
         indices = []
 
-        residue_number_insertion = tmp["residue_number"].astype(str) + tmp["insertion"]
+        residue_number_insertion = (
+            tmp["residue_number"].astype(str) + tmp["insertion"]
+        )
 
         for num, ind in zip(residue_number_insertion, np.arange(tmp.shape[0])):
             if num != cmp:
                 indices.append(ind)
             cmp = num
 
-        transl = tmp.iloc[indices][residue_col].map(amino3to1dict).fillna(fillna)
+        transl = (
+            tmp.iloc[indices][residue_col].map(amino3to1dict).fillna(fillna)
+        )
 
         return pd.concat((tmp.iloc[indices]["chain_id"], transl), axis=1)
 
@@ -282,7 +287,9 @@ def distance(self, xyz=(0.00, 0.00, 0.00), records=("ATOM", "HETATM")):
 
         return np.sqrt(
             np.sum(
-                df[["x_coord", "y_coord", "z_coord"]].subtract(xyz, axis=1) ** 2, axis=1
+                df[["x_coord", "y_coord", "z_coord"]].subtract(xyz, axis=1)
+                ** 2,
+                axis=1,
             )
         )
 
@@ -308,7 +315,9 @@ def distance_df(df, xyz=(0.00, 0.00, 0.00)):
         """
         return np.sqrt(
             np.sum(
-                df[["x_coord", "y_coord", "z_coord"]].subtract(xyz, axis=1) ** 2, axis=1
+                df[["x_coord", "y_coord", "z_coord"]].subtract(xyz, axis=1)
+                ** 2,
+                axis=1,
             )
         )
 
@@ -354,7 +363,9 @@ def to_pdb(self, path, records=None, gz=False, append_newline=True):
                 if c in {"x_coord", "y_coord", "z_coord"}:
                     for idx in range(dfs[r][c].values.shape[0]):
                         if len(dfs[r][c].values[idx]) > 8:
-                            dfs[r][c].values[idx] = str(dfs[r][c].values[idx]).strip()
+                            dfs[r][c].values[idx] = str(
+                                dfs[r][c].values[idx]
+                            ).strip()
                 if c in {"line_idx", "OUT"}:
                     pass
                 elif r in {"ATOM", "HETATM"} and c not in pdb_df_columns:
@@ -398,7 +409,7 @@ def parse_sse(self):
         """Parse secondary structure elements"""
         raise NotImplementedError
 
-    def to_mmtf(self, path, records = ("ATOM", "HETATM")):
+    def to_mmtf(self, path, records=("ATOM", "HETATM")):
         """Write record DataFrames to an MMTF file.
 
         Parameters
@@ -413,7 +424,6 @@ def to_mmtf(self, path, records = ("ATOM", "HETATM")):
         df = pd.concat(objs=[self.df[i] for i in records])
         return write_mmtf(df, path)
 
-
     def get_model(self, model_index: int) -> PandasMmtf:
         """Returns a new PandasPDB object with the dataframes subset to the
         given model index.
@@ -432,7 +442,9 @@ def get_model(self, model_index: int) -> PandasMmtf:
         df = copy.deepcopy(self)
 
         if "ATOM" in df.df.keys():
-            df.df["ATOM"] = df.df["ATOM"].loc[df.df["ATOM"]["model_id"] == model_index]
+            df.df["ATOM"] = df.df["ATOM"].loc[
+                df.df["ATOM"]["model_id"] == model_index
+            ]
         if "HETATM" in df.df.keys():
             df.df["HETATM"] = df.df["HETATM"].loc[
                 df.df["HETATM"]["model_id"] == model_index
@@ -462,20 +474,28 @@ def get_models(self, model_indices: List[int]) -> PandasMmtf:
 
         if "ATOM" in df.df.keys():
             df.df["ATOM"] = df.df["ATOM"].loc[
-                [x in model_indices for x in df.df["ATOM"]["model_id"].tolist()]
+                [
+                    x in model_indices
+                    for x in df.df["ATOM"]["model_id"].tolist()
+                ]
             ]
         if "HETATM" in df.df.keys():
             df.df["HETATM"] = df.df["HETATM"].loc[
-                [x in model_indices for x in df.df["HETATM"]["model_id"].tolist()]
+                [
+                    x in model_indices
+                    for x in df.df["HETATM"]["model_id"].tolist()
+                ]
             ]
         if "ANISOU" in df.df.keys():
             df.df["ANISOU"] = df.df["ANISOU"].loc[
-                [x in model_indices for x in df.df["ANISOU"]["model_id"].tolist()]
+                [
+                    x in model_indices
+                    for x in df.df["ANISOU"]["model_id"].tolist()
+                ]
             ]
         return df
 
 
-
 def fetch_mmtf(pdb_code: str) -> pd.DataFrame:
     """Returns a dataframe from a PDB code.
 
@@ -496,7 +516,11 @@ def parse_mmtf(file_path: str) -> pd.DataFrame:
     :return: Dataframe of protein structure.
     :rtype: pd.DataFrame
     """
-    df = parse_gzip(file_path) if file_path.endswith(".gz") else parse(file_path)
+    df = (
+        parse_gzip(file_path)
+        if file_path.endswith(".gz")
+        else parse(file_path)
+    )
     return mmtf_to_df(df)
 
 
@@ -528,7 +552,9 @@ def mmtf_to_df(mmtf_obj: MMTFDecoder) -> pd.DataFrame:
         else:
             chain_indices[i] = chain_indices[i] + chain_indices[i - 1]
     model_indices = mmtf_obj.chains_per_model
-    model_indices = [sum(model_indices[:i+1]) for i in range(len(model_indices))]
+    model_indices = [
+        sum(model_indices[: i + 1]) for i in range(len(model_indices))
+    ]
     ch_idx = 0
 
     entity_types = {}
@@ -540,23 +566,25 @@ def mmtf_to_df(mmtf_obj: MMTFDecoder) -> pd.DataFrame:
     ch_idx = next(ch_idx_iter)
     for idx, i in enumerate(mmtf_obj.group_type_list):
         res = mmtf_obj.group_list[i]
-        #record = "HETATM" if res["chemCompType"] == "NON-POLYMER" else "ATOM"
-        #record = (
+        # record = "HETATM" if res["chemCompType"] == "NON-POLYMER" else "ATOM"
+        # record = (
         #    "ATOM"
         #    if res["chemCompType"] in ["L-PEPTIDE LINKING", "PEPTIDE LINKING"]
         #    else "HETATM"
-        #)
+        # )
         if idx == chain_indices[ch_idx]:
-            #ch_idx += 1
+            # ch_idx += 1
             ch_idx = next(ch_idx_iter)
         record = "ATOM" if entity_types[ch_idx] == "polymer" else "HETATM"
 
         for _ in res["atomNameList"]:
             data["residue_name"].append(res["groupName"])
             data["residue_number"].append(mmtf_obj.group_id_list[idx])
-            #data["chain_id"].append([mmtf_obj.chain_name_list[ch_idx]])
+            # data["chain_id"].append([mmtf_obj.chain_name_list[ch_idx]])
             data["chain_id"].append([mmtf_obj.chain_name_list[ch_idx]])
-            data["model_id"].append(int(np.argwhere(np.array(model_indices)>ch_idx)[0]) + 1)
+            data["model_id"].append(
+                int(np.argwhere(np.array(model_indices) > ch_idx)[0]) + 1
+            )
             data["record_name"].append(record)
             data["insertion"].append(mmtf_obj.ins_code_list[idx])
         data["atom_name"].append(res["atomNameList"])
@@ -576,16 +604,19 @@ def mmtf_to_df(mmtf_obj: MMTFDecoder) -> pd.DataFrame:
             "record_name",
             "insertion",
             "atom_number",
-            "model_id"
+            "model_id",
         ]:
             continue
         data[k] = [i for sublist in v for i in sublist]
 
-    df = pd.DataFrame.from_dict(data).sort_values(by=["model_id", "atom_number"])
+    df = pd.DataFrame.from_dict(data).sort_values(
+        by=["model_id", "atom_number"]
+    )
     df.alt_loc = df.alt_loc.str.replace("\x00", "")
     df.insertion = df.insertion.str.replace("\x00", "")
     return df
 
+
 def _seq1(seq, charmap: Dict[str, str], undef_code="X"):
     # sourcery skip: dict-assign-update-to-union
     """Convert protein sequence from three-letter to one-letter code.
@@ -629,8 +660,6 @@ def _seq1(seq, charmap: Dict[str, str], undef_code="X"):
     return "".join(onecode.get(aa.upper(), undef_code) for aa in seqlist)
 
 
-
-
 def write_mmtf(df: pd.DataFrame, file_path: str):
     """Writes a biopandas dataframe to an MMTF file.
 
@@ -668,7 +697,17 @@ def write_mmtf(df: pd.DataFrame, file_path: str):
         experimental_methods=None,
     )
 
-    node_ids = df.model_id.astype(str) + ":" + df.chain_id + ":" + df.residue_name + ":" + df.residue_number.astype(str) + ":" + df.insertion.astype(str)
+    node_ids = (
+        df.model_id.astype(str)
+        + ":"
+        + df.chain_id
+        + ":"
+        + df.residue_name
+        + ":"
+        + df.residue_number.astype(str)
+        + ":"
+        + df.insertion.astype(str)
+    )
     df["residue_id"] = node_ids
     # Tracks values to replace them at the end
     chains_per_model = []
@@ -684,10 +723,10 @@ def write_mmtf(df: pd.DataFrame, file_path: str):
         count_models += 1
         # Set the model info
         encoder.set_model_info(
-            #model_id=model_idx, # According to mmtf-python this is meaningless
-            model_id=model_idx, # According to mmtf-python this is meaningless
-            chain_count=0 # Set to 0 here and changed later
-            )
+            # model_id=model_idx, # According to mmtf-python this is meaningless
+            model_id=model_idx,  # According to mmtf-python this is meaningless
+            chain_count=0,  # Set to 0 here and changed later
+        )
         # Iterate over chains in model
         for chain_id in chains:
             seqs = []
@@ -725,51 +764,73 @@ def write_mmtf(df: pd.DataFrame, file_path: str):
                 #  structure object so we treat each molecule as a separate
                 #  entity
                 if residue_type != prev_res_type or (
-                        residue_type == "HETATM" and resname != prev_resname
-                    ):
+                    residue_type == "HETATM" and resname != prev_resname
+                ):
                     encoder.set_entity_info(
                         chain_indices=[count_chains],
-                        sequence="", # Set to empty here and changed later
+                        sequence="",  # Set to empty here and changed later
                         description="",
                         entity_type=entity_type,
                     )
                     encoder.set_chain_info(
                         chain_id=chain_id,
-                        chain_name="\x00" if len(chain_id.strip()) == 0 else chain_id,
-                        num_groups=0, # Set to 0 here and changed later
+                        chain_name=(
+                            "\x00" if len(chain_id.strip()) == 0 else chain_id
+                        ),
+                        num_groups=0,  # Set to 0 here and changed later
                     )
                     if count_chains > 0:
-                        groups_per_chain.append(count_groups - sum(groups_per_chain) -1)
+                        groups_per_chain.append(
+                            count_groups - sum(groups_per_chain) - 1
+                        )
                     if not first_chain:
                         seqs.append(seq)
                     first_chain = False
                     count_chains += 1
-                    seq=""
+                    seq = ""
 
                 if entity_type == "polymer":
-                    seq += _seq1(residue_df.residue_name.unique()[0], charmap=protein_letters_3to1_extended)
+                    seq += _seq1(
+                        residue_df.residue_name.unique()[0],
+                        charmap=protein_letters_3to1_extended,
+                    )
 
                 prev_res_type = residue_type
                 prev_resname = resname
 
-                group_type = "NON-POLYMER" if residue_type == "HETATM" else "L-PEPTIDE LINKING"
+                group_type = (
+                    "NON-POLYMER"
+                    if residue_type == "HETATM"
+                    else "L-PEPTIDE LINKING"
+                )
                 encoder.set_group_info(
                     group_name=residue_df.residue_name.unique()[0],
                     group_number=int(residue_df.residue_number.unique()[0]),
-                    insertion_code="\x00" if residue_df.insertion.unique()[0] == "" else residue_df.insertion.unique()[0],
-                    group_type=group_type, # Hack to ensure we can re-parse.
+                    insertion_code=(
+                        "\x00"
+                        if residue_df.insertion.unique()[0] == ""
+                        else residue_df.insertion.unique()[0]
+                    ),
+                    group_type=group_type,  # Hack to ensure we can re-parse.
                     atom_count=len(residue_df),
                     bond_count=0,
-                    single_letter_code=_seq1(df.residue_name.unique()[0], charmap=protein_letters_3to1_extended),
-                    sequence_index=len(seq) - 1 if entity_type == "polymer" else -1,
-                    secondary_structure_type=-1
+                    single_letter_code=_seq1(
+                        df.residue_name.unique()[0],
+                        charmap=protein_letters_3to1_extended,
+                    ),
+                    sequence_index=(
+                        len(seq) - 1 if entity_type == "polymer" else -1
+                    ),
+                    secondary_structure_type=-1,
                 )
                 for row in residue_df.itertuples():
                     count_atoms += 1
                     encoder.set_atom_info(
                         atom_name=row.atom_name,
                         serial_number=row.atom_number,
-                        alternative_location_id="\x00" if row.alt_loc == "" else row.alt_loc,
+                        alternative_location_id=(
+                            "\x00" if row.alt_loc == "" else row.alt_loc
+                        ),
                         x=row.x_coord,
                         y=row.y_coord,
                         z=row.z_coord,
diff --git a/biopandas/mmtf/tests/test_amino3to1.py b/biopandas/mmtf/tests/test_amino3to1.py
index e4ccdb9..081265f 100644
--- a/biopandas/mmtf/tests/test_amino3to1.py
+++ b/biopandas/mmtf/tests/test_amino3to1.py
@@ -11,7 +11,9 @@
 
 
 def test_defaults():
-    TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48.mmtf")
+    TESTDATA_1t48 = os.path.join(
+        os.path.dirname(__file__), "data", "1t48.mmtf"
+    )
     p1t48 = PandasMmtf()
     p1t48.read_mmtf(TESTDATA_1t48)
     expect_res = [
@@ -319,7 +321,9 @@ def test_defaults():
 
 
 def test_sameindex():
-    TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48.mmtf")
+    TESTDATA_1t48 = os.path.join(
+        os.path.dirname(__file__), "data", "1t48.mmtf"
+    )
     p1t48 = PandasMmtf()
     p1t48.read_mmtf(TESTDATA_1t48)
     p1t48.df["ATOM"].index = np.zeros(p1t48.df["ATOM"].shape[0], dtype=int)
@@ -628,7 +632,9 @@ def test_sameindex():
 
 
 def test_multichain():
-    TESTDATA_5mtn = os.path.join(os.path.dirname(__file__), "data", "5mtn.mmtf")
+    TESTDATA_5mtn = os.path.join(
+        os.path.dirname(__file__), "data", "5mtn.mmtf"
+    )
     mtn = PandasMmtf()
     mtn.read_mmtf(TESTDATA_5mtn)
     expect_res_a = [
@@ -823,8 +829,12 @@ def test_multichain():
     expect_chain = ["A" for _ in range(88)] + ["B" for _ in range(94)]
     got_chain = list(transl["chain_id"].values)
 
-    got_res_a = list(transl.loc[transl["chain_id"] == "A", "residue_name"].values)
-    got_res_b = list(transl.loc[transl["chain_id"] == "B", "residue_name"].values)
+    got_res_a = list(
+        transl.loc[transl["chain_id"] == "A", "residue_name"].values
+    )
+    got_res_b = list(
+        transl.loc[transl["chain_id"] == "B", "residue_name"].values
+    )
 
     assert expect_chain == got_chain
     assert expect_res_a == got_res_a
@@ -832,7 +842,9 @@ def test_multichain():
 
 
 def test_pdb_with_insertion_codes():
-    PDB_2D7T_PATH = os.path.join(os.path.dirname(__file__), "data", "2d7t.mmtf")
+    PDB_2D7T_PATH = os.path.join(
+        os.path.dirname(__file__), "data", "2d7t.mmtf"
+    )
 
     ppdb = PandasMmtf().read_mmtf(PDB_2D7T_PATH)
     sequence = ppdb.amino3to1()
diff --git a/biopandas/mmtf/tests/test_assign_df.py b/biopandas/mmtf/tests/test_assign_df.py
index 66bd936..f9dd1bd 100644
--- a/biopandas/mmtf/tests/test_assign_df.py
+++ b/biopandas/mmtf/tests/test_assign_df.py
@@ -9,7 +9,9 @@
 from biopandas.mmtf import PandasMmtf
 from biopandas.testutils import assert_raises
 
-TESTDATA_FILENAME = os.path.join(os.path.dirname(__file__), "data", "3eiy.mmtf")
+TESTDATA_FILENAME = os.path.join(
+    os.path.dirname(__file__), "data", "3eiy.mmtf"
+)
 
 
 def test_overwrite_df():
diff --git a/biopandas/mmtf/tests/test_distance.py b/biopandas/mmtf/tests/test_distance.py
index d876fcc..96291e3 100644
--- a/biopandas/mmtf/tests/test_distance.py
+++ b/biopandas/mmtf/tests/test_distance.py
@@ -11,38 +11,48 @@
 
 
 def test_equal():
-    TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48.mmtf")
+    TESTDATA_1t48 = os.path.join(
+        os.path.dirname(__file__), "data", "1t48.mmtf"
+    )
 
     p1t48 = PandasMmtf()
     p1t48.read_mmtf(TESTDATA_1t48)
     dist = p1t48.distance(xyz=(70.785, 15.477, 23.359), records=("ATOM",))
 
     expect = pd.Series(
-        [2.533259, 1.520502, 0.000000, 1.257597, 1.252510], index=[12, 13, 14, 15, 16]
+        [2.533259, 1.520502, 0.000000, 1.257597, 1.252510],
+        index=[12, 13, 14, 15, 16],
     )
     assert dist[dist < 3].all() == expect.all()
 
 
 def test_deprecated_str_arg():
-    TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48.mmtf")
+    TESTDATA_1t48 = os.path.join(
+        os.path.dirname(__file__), "data", "1t48.mmtf"
+    )
 
     p1t48 = PandasMmtf()
     p1t48.read_mmtf(TESTDATA_1t48)
     dist = p1t48.distance(xyz=(70.785, 15.477, 23.359), records="ATOM")
 
     expect = pd.Series(
-        [2.533259, 1.520502, 0.000000, 1.257597, 1.252510], index=[12, 13, 14, 15, 16]
+        [2.533259, 1.520502, 0.000000, 1.257597, 1.252510],
+        index=[12, 13, 14, 15, 16],
     )
     assert dist[dist < 3].all() == expect.all()
 
 
 def test_use_external_df():
-    TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48.mmtf")
+    TESTDATA_1t48 = os.path.join(
+        os.path.dirname(__file__), "data", "1t48.mmtf"
+    )
 
     p1t48 = PandasMmtf()
     p1t48.read_mmtf(TESTDATA_1t48)
     new_df = p1t48.df["ATOM"].iloc[:-1, :].copy()
     dist = PandasMmtf.distance_df(df=new_df, xyz=(70.785, 15.477, 23.359))
 
-    expect = pd.Series([2.533259, 1.520502, 0.000000, 1.257597], index=[12, 13, 14, 15])
+    expect = pd.Series(
+        [2.533259, 1.520502, 0.000000, 1.257597], index=[12, 13, 14, 15]
+    )
     assert dist[dist < 3].all() == expect.all()
diff --git a/biopandas/mmtf/tests/test_multiple_models.py b/biopandas/mmtf/tests/test_multiple_models.py
index 5a77e9e..30be1cc 100644
--- a/biopandas/mmtf/tests/test_multiple_models.py
+++ b/biopandas/mmtf/tests/test_multiple_models.py
@@ -6,11 +6,12 @@
 # Code Repository: https://github.com/rasbt/biopandas
 import os
 
-from pandas.testing import assert_frame_equal
-
 from biopandas.mmtf import PandasMmtf
+from pandas.testing import assert_frame_equal
 
-TESTDATA_FILENAME = os.path.join(os.path.dirname(__file__), "data", "2jyf.mmtf")
+TESTDATA_FILENAME = os.path.join(
+    os.path.dirname(__file__), "data", "2jyf.mmtf"
+)
 
 
 def test_label_models():
@@ -37,8 +38,14 @@ def test_get_models():
     written = PandasMmtf().read_mmtf("test.mmtf")
 
     # Note: No way to preserve model ID as far as I can tell
-    assert_frame_equal(df.df["ATOM"].drop("model_id", axis=1).reset_index(drop=True), written.df["ATOM"].drop("model_id", axis=1).reset_index(drop=True))
-    assert_frame_equal(df.df["HETATM"].drop("model_id", axis=1).reset_index(drop=True), written.df["HETATM"].drop("model_id", axis=1).reset_index(drop=True))
+    assert_frame_equal(
+        df.df["ATOM"].drop("model_id", axis=1).reset_index(drop=True),
+        written.df["ATOM"].drop("model_id", axis=1).reset_index(drop=True),
+    )
+    assert_frame_equal(
+        df.df["HETATM"].drop("model_id", axis=1).reset_index(drop=True),
+        written.df["HETATM"].drop("model_id", axis=1).reset_index(drop=True),
+    )
 
     # Clean
     os.remove("test.mmtf")
diff --git a/biopandas/mmtf/tests/test_read_mmtf.py b/biopandas/mmtf/tests/test_read_mmtf.py
index cc3c1f9..045ff1d 100644
--- a/biopandas/mmtf/tests/test_read_mmtf.py
+++ b/biopandas/mmtf/tests/test_read_mmtf.py
@@ -4,39 +4,53 @@
 # Project Website: http://rasbt.github.io/biopandas/
 # Code Repository: https://github.com/rasbt/biopandas
 
-import unittest
 import os
+import unittest
 
 import pandas as pd
-
 from biopandas.mmtf import PandasMmtf
 from biopandas.pdb import PandasPdb
 
-MMTF_TESTDATA_FILENAME = os.path.join(os.path.dirname(__file__), "data", "3eiy.mmtf")
-MMTF_TESTDATA_FILENAME_GZ = os.path.join(os.path.dirname(__file__), "data", "3eiy.mmtf.gz")
-
-PDB_TESTDATA_FILENAME = os.path.join(os.path.dirname(__file__), "..", "..", "pdb", "tests", "data", "3eiy.pdb")
-PDB_TESTDATA_FILENAME_GZ = os.path.join(os.path.dirname(__file__), "..", "..", "pdb", "tests", "data", "3eiy.pdb.gz")
+MMTF_TESTDATA_FILENAME = os.path.join(
+    os.path.dirname(__file__), "data", "3eiy.mmtf"
+)
+MMTF_TESTDATA_FILENAME_GZ = os.path.join(
+    os.path.dirname(__file__), "data", "3eiy.mmtf.gz"
+)
+
+PDB_TESTDATA_FILENAME = os.path.join(
+    os.path.dirname(__file__), "..", "..", "pdb", "tests", "data", "3eiy.pdb"
+)
+PDB_TESTDATA_FILENAME_GZ = os.path.join(
+    os.path.dirname(__file__),
+    "..",
+    "..",
+    "pdb",
+    "tests",
+    "data",
+    "3eiy.pdb.gz",
+)
 
 
 ATOM_DF_COLUMNS = [
     "record_name",
     "atom_number",
     "atom_name",
-    #"alt_loc",
+    # "alt_loc",
     "residue_name",
     "chain_id",
     "residue_number",
-    #"insertion",
+    # "insertion",
     "x_coord",
     "y_coord",
     "z_coord",
     "occupancy",
     "b_factor",
     "element_symbol",
-    #"charge",
+    # "charge",
 ]
 
+
 @unittest.skip(reason="PDB No longer serves MMTF files.")
 def test_fetch_pdb():
     """Test fetch_pdb"""
@@ -56,14 +70,14 @@ def test__read_mmtf():
     pd.testing.assert_frame_equal(
         pmmtf.df["ATOM"][ATOM_DF_COLUMNS].reset_index(drop=True),
         ppdb.df["ATOM"][ATOM_DF_COLUMNS].reset_index(drop=True),
-        )
+    )
 
     ATOM_DF_COLUMNS.remove("atom_number")
     ATOM_DF_COLUMNS.remove("element_symbol")
     pd.testing.assert_frame_equal(
         pmmtf.df["HETATM"][ATOM_DF_COLUMNS].reset_index(drop=True),
         ppdb.df["HETATM"][ATOM_DF_COLUMNS].reset_index(drop=True),
-        )
+    )
 
 
 def test__read_mmtf_gz():
@@ -73,15 +87,14 @@ def test__read_mmtf_gz():
     pmmtf.read_mmtf(MMTF_TESTDATA_FILENAME_GZ)
     ppdb = ppdb.read_pdb(PDB_TESTDATA_FILENAME_GZ)
 
-
-    pmmtf.df["ATOM"].alt_loc.replace('\x00', "", inplace=True)
-    pmmtf.df["HETATM"].alt_loc.replace('\x00', "", inplace=True)
+    pmmtf.df["ATOM"].alt_loc.replace("\x00", "", inplace=True)
+    pmmtf.df["HETATM"].alt_loc.replace("\x00", "", inplace=True)
 
     pd.testing.assert_frame_equal(
         pmmtf.df["ATOM"][ATOM_DF_COLUMNS].reset_index(drop=True),
         ppdb.df["ATOM"][ATOM_DF_COLUMNS].reset_index(drop=True),
-        )
-    #pd.testing.assert_frame_equal(
+    )
+    # pd.testing.assert_frame_equal(
     #    pmmtf.df["HETATM"][ATOM_DF_COLUMNS].reset_index(drop=True),
     #    ppdb.df["HETATM"][ATOM_DF_COLUMNS].reset_index(drop=True),
     #    )
@@ -92,7 +105,3 @@ def test_read_mmtf():
     ppdb = PandasMmtf()
     ppdb.read_mmtf(MMTF_TESTDATA_FILENAME)
     assert ppdb.mmtf_path == MMTF_TESTDATA_FILENAME
-
-
-
-
diff --git a/biopandas/mmtf/tests/test_rmsd.py b/biopandas/mmtf/tests/test_rmsd.py
index 1d02e06..1131009 100644
--- a/biopandas/mmtf/tests/test_rmsd.py
+++ b/biopandas/mmtf/tests/test_rmsd.py
@@ -5,11 +5,10 @@
 # Code Repository: https://github.com/rasbt/biopandas
 
 import os
-import pytest
 
+import pytest
 from biopandas.mmtf import PandasMmtf
 
-
 TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48.mmtf")
 TESTDATA_1t49 = os.path.join(os.path.dirname(__file__), "data", "1t49.mmtf")
 # TESTDATA_lig1 = os.path.join(os.path.dirname(__file__), "data", "lig_conf_1.pdb")
@@ -52,7 +51,9 @@ def test_invalid_query():
 
 
 def test_protein():
-    r = PandasMmtf.rmsd(p1t48.df["ATOM"], p1t49.df["ATOM"], s="c-alpha", invert=False)
+    r = PandasMmtf.rmsd(
+        p1t48.df["ATOM"], p1t49.df["ATOM"], s="c-alpha", invert=False
+    )
     assert r == 0.4785, r
 
 
diff --git a/biopandas/mmtf/tests/test_write_mmtf.py b/biopandas/mmtf/tests/test_write_mmtf.py
index ce7bab2..69452e8 100644
--- a/biopandas/mmtf/tests/test_write_mmtf.py
+++ b/biopandas/mmtf/tests/test_write_mmtf.py
@@ -2,13 +2,22 @@
 import unittest
 
 import pandas as pd
+from biopandas.mmtf.pandas_mmtf import PandasMmtf, write_mmtf
 from pandas.testing import assert_frame_equal
 
-from biopandas.mmtf.pandas_mmtf import PandasMmtf, write_mmtf
 
 @unittest.skip(reason="PDB No longer serves MMTF files.")
 def test_write_mmtf_bp():
-    PDB_CODES = ["4hhb", "3eiy", "1t48", "1ehz", "4ggb", "1bxa", "1cbn", "1rcf"]
+    PDB_CODES = [
+        "4hhb",
+        "3eiy",
+        "1t48",
+        "1ehz",
+        "4ggb",
+        "1bxa",
+        "1cbn",
+        "1rcf",
+    ]
     for pdb in PDB_CODES:
         print(pdb)
         pm1 = PandasMmtf().fetch_mmtf(pdb)
@@ -16,15 +25,30 @@ def test_write_mmtf_bp():
         assert os.path.exists("test.mmtf")
 
         pm2 = PandasMmtf().read_mmtf("test.mmtf")
-        assert_frame_equal(pm1.df["ATOM"].reset_index(drop=True), pm2.df["ATOM"].reset_index(drop=True))
-        assert_frame_equal(pm1.df["HETATM"].reset_index(drop=True), pm2.df["HETATM"].reset_index(drop=True))
+        assert_frame_equal(
+            pm1.df["ATOM"].reset_index(drop=True),
+            pm2.df["ATOM"].reset_index(drop=True),
+        )
+        assert_frame_equal(
+            pm1.df["HETATM"].reset_index(drop=True),
+            pm2.df["HETATM"].reset_index(drop=True),
+        )
 
     os.remove("test.mmtf")
 
 
 @unittest.skip(reason="PDB No longer serves MMTF files.")
 def test_write_mmtf():
-    PDB_CODES = ["4hhb", "3eiy", "1t48", "1ehz", "4ggb", "1bxa", "1cbn", "1rcf"]
+    PDB_CODES = [
+        "4hhb",
+        "3eiy",
+        "1t48",
+        "1ehz",
+        "4ggb",
+        "1bxa",
+        "1cbn",
+        "1rcf",
+    ]
     for pdb in PDB_CODES:
         print(pdb)
         pm1 = PandasMmtf().fetch_mmtf(pdb)
@@ -32,8 +56,13 @@ def test_write_mmtf():
         assert os.path.exists("test.mmtf")
 
         pm2 = PandasMmtf().read_mmtf("test.mmtf")
-        assert_frame_equal(pm1.df["ATOM"].reset_index(drop=True), pm2.df["ATOM"].reset_index(drop=True))
-        assert_frame_equal(pm1.df["HETATM"].reset_index(drop=True), pm2.df["HETATM"].reset_index(drop=True))
+        assert_frame_equal(
+            pm1.df["ATOM"].reset_index(drop=True),
+            pm2.df["ATOM"].reset_index(drop=True),
+        )
+        assert_frame_equal(
+            pm1.df["HETATM"].reset_index(drop=True),
+            pm2.df["HETATM"].reset_index(drop=True),
+        )
 
     os.remove("test.mmtf")
-

From e3234f364d065b6440556181ce39bff0e68f0d87 Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arian.jamasb@roche.com>
Date: Mon, 8 Jul 2024 18:42:44 +0100
Subject: [PATCH 07/21] linting

---
 biopandas/mol2/__init__.py               |  2 +-
 biopandas/mol2/pandas_mol2.py            | 17 ++++++++++++-----
 biopandas/mol2/tests/test_mol2_io.py     | 13 ++++++++++---
 biopandas/mol2/tests/test_pandas_mol2.py |  6 +++++-
 4 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/biopandas/mol2/__init__.py b/biopandas/mol2/__init__.py
index 79358e2..38453f0 100644
--- a/biopandas/mol2/__init__.py
+++ b/biopandas/mol2/__init__.py
@@ -9,7 +9,7 @@
 files in pandas DataFrames.
 """
 
-from .pandas_mol2 import PandasMol2
 from .mol2_io import split_multimol2
+from .pandas_mol2 import PandasMol2
 
 __all__ = ["PandasMol2", "split_multimol2"]
diff --git a/biopandas/mol2/pandas_mol2.py b/biopandas/mol2/pandas_mol2.py
index cbb893c..556a9fc 100644
--- a/biopandas/mol2/pandas_mol2.py
+++ b/biopandas/mol2/pandas_mol2.py
@@ -1,14 +1,15 @@
 """ Class for working with Tripos MOL2 files"""
+
 # BioPandas
 # Author: Sebastian Raschka <mail@sebastianraschka.com>
 # License: BSD 3 clause
 # Project Website: http://rasbt.github.io/biopandas/
 # Code Repository: https://github.com/rasbt/biopandas
 
-import pandas as pd
 import numpy as np
-from .mol2_io import split_multimol2
+import pandas as pd
 
+from .mol2_io import split_multimol2
 
 COLUMN_NAMES = (
     "atom_id",
@@ -167,7 +168,9 @@ def read_mol2_from_list(self, mol2_lines, mol2_code, columns=None):
     def _construct_df(self, mol2_lines, col_names, col_types):
         """Construct DataFrames from list of PDB lines."""
         return self._atomsection_to_pandas(
-            self._get_atomsection(mol2_lines), col_names=col_names, col_types=col_types
+            self._get_atomsection(mol2_lines),
+            col_names=col_names,
+            col_types=col_types,
         )
 
     @staticmethod
@@ -195,7 +198,9 @@ def _get_atomsection(mol2_lst):
     @staticmethod
     def _atomsection_to_pandas(mol2_atom_lst, col_names, col_types):
 
-        df = pd.DataFrame([lst.split() for lst in mol2_atom_lst], columns=col_names)
+        df = pd.DataFrame(
+            [lst.split() for lst in mol2_atom_lst], columns=col_names
+        )
 
         for i in range(df.shape[1]):
             df[col_names[i]] = df[col_names[i]].astype(col_types[i])
@@ -281,4 +286,6 @@ def distance_df(df, xyz=(0.00, 0.00, 0.00)):
 
         """
 
-        return np.sqrt(np.sum(df[["x", "y", "z"]].subtract(xyz, axis=1) ** 2, axis=1))
+        return np.sqrt(
+            np.sum(df[["x", "y", "z"]].subtract(xyz, axis=1) ** 2, axis=1)
+        )
diff --git a/biopandas/mol2/tests/test_mol2_io.py b/biopandas/mol2/tests/test_mol2_io.py
index dfc3e93..b6c4c98 100644
--- a/biopandas/mol2/tests/test_mol2_io.py
+++ b/biopandas/mol2/tests/test_mol2_io.py
@@ -5,6 +5,7 @@
 # Code Repository: https://github.com/rasbt/biopandas
 
 import os
+
 from biopandas.mol2.mol2_io import split_multimol2
 from biopandas.testutils import assert_raises
 
@@ -13,7 +14,9 @@
 
 def test_split_multimol2():
     all_mol2 = []
-    for i in split_multimol2(os.path.join(this_dir, "data", "40_mol2_files.mol2")):
+    for i in split_multimol2(
+        os.path.join(this_dir, "data", "40_mol2_files.mol2")
+    ):
         all_mol2.append(i[0])
     assert all_mol2[1] == "ZINC04084113"
     assert len(all_mol2) == 40
@@ -21,7 +24,9 @@ def test_split_multimol2():
 
 def test_split_multimol2_wrong_format():
 
-    expect = "Wrong file format;" "allowed file formats are .mol2 and .mol2.gz."
+    expect = (
+        "Wrong file format;" "allowed file formats are .mol2 and .mol2.gz."
+    )
 
     def run_code():
         next(split_multimol2("40_mol2_files.pdb"))
@@ -31,7 +36,9 @@ def run_code():
 
 def test_split_multimol2_gz():
     all_mol2 = []
-    for i in split_multimol2(os.path.join(this_dir, "data", "40_mol2_files.mol2.gz")):
+    for i in split_multimol2(
+        os.path.join(this_dir, "data", "40_mol2_files.mol2.gz")
+    ):
         all_mol2.append(i[0])
     assert all_mol2[1].decode() == "ZINC04084113"
     assert len(all_mol2) == 40
diff --git a/biopandas/mol2/tests/test_pandas_mol2.py b/biopandas/mol2/tests/test_pandas_mol2.py
index 6cb4dba..ba88dd0 100644
--- a/biopandas/mol2/tests/test_pandas_mol2.py
+++ b/biopandas/mol2/tests/test_pandas_mol2.py
@@ -1,4 +1,5 @@
 """ Utility function for reading Tripos MOL2 files from files"""
+
 # BioPandas
 # Author: Sebastian Raschka <mail@sebastianraschka.com>
 # License: BSD 3 clause
@@ -6,6 +7,7 @@
 # Code Repository: https://github.com/rasbt/biopandas
 
 import os
+
 from biopandas.mol2 import PandasMol2
 from biopandas.mol2.mol2_io import split_multimol2
 from biopandas.testutils import assert_raises
@@ -48,7 +50,9 @@ def test_read_mol2_from_list():
     data_path = os.path.join(this_dir, "data", "40_mol2_files.mol2")
     mol2 = next(split_multimol2(data_path))
 
-    pdmol = PandasMol2().read_mol2_from_list(mol2_lines=mol2[1], mol2_code=mol2[0])
+    pdmol = PandasMol2().read_mol2_from_list(
+        mol2_lines=mol2[1], mol2_code=mol2[0]
+    )
     assert pdmol.df.shape == (65, 9)
     assert pdmol.code == "ZINC38611810"
 

From 561344d0f3d63eb902a825d63af694ed5dec1f08 Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arian.jamasb@roche.com>
Date: Mon, 8 Jul 2024 18:43:16 +0100
Subject: [PATCH 08/21] linting

---
 biopandas/mmcif/mmcif_parser.py          |  21 +++-
 biopandas/mmcif/pandas_mmcif.py          | 144 +++++++++++++++--------
 biopandas/mmcif/tests/test_amino3to1.py  |   8 +-
 biopandas/mmcif/tests/test_distance.py   |  10 +-
 biopandas/mmcif/tests/test_read_mmcif.py |  21 ++--
 biopandas/mmcif/tests/test_rmsd.py       |   6 +-
 6 files changed, 145 insertions(+), 65 deletions(-)

diff --git a/biopandas/mmcif/mmcif_parser.py b/biopandas/mmcif/mmcif_parser.py
index 96d0a31..91556bb 100644
--- a/biopandas/mmcif/mmcif_parser.py
+++ b/biopandas/mmcif/mmcif_parser.py
@@ -22,19 +22,28 @@ def __init__(self, parser_obj):
         self.names_defined = False
 
     def add_name(self, name):
-        cat_name = type(name) == str and partition_string(name, ".") or ["", "", ""]
+        cat_name = (
+            type(name) == str and partition_string(name, ".") or ["", "", ""]
+        )
         if cat_name[1]:
             if cat_name[0] not in self.parser_obj.current_target[-2]:
                 self.parser_obj.current_target[-2][cat_name[0]] = {}
-            if cat_name[2] not in self.parser_obj.current_target[-2][cat_name[0]]:
-                self.parser_obj.current_target[-2][cat_name[0]][cat_name[2]] = []
+            if (
+                cat_name[2]
+                not in self.parser_obj.current_target[-2][cat_name[0]]
+            ):
+                self.parser_obj.current_target[-2][cat_name[0]][
+                    cat_name[2]
+                ] = []
             self.ref_list.append(
                 self.parser_obj.current_target[-2][cat_name[0]][cat_name[2]]
             )
         else:
             if cat_name[0] not in self.parser_obj.current_target[-2]:
                 self.parser_obj.current_target[-2][cat_name[0]] = []
-            self.ref_list.append(self.parser_obj.current_target[-2][cat_name[0]])
+            self.ref_list.append(
+                self.parser_obj.current_target[-2][cat_name[0]]
+            )
         self.length = len(self.ref_list)
 
     def push_value(self, value):
@@ -289,7 +298,9 @@ def __dump_str__(inp):
         return str(inp)
     if re.search(__CIF_STR_NL_CHECK__, inp) is not None:
         return "\n;%s\n;" % inp
-    return "'%s'" % inp if re.search(__CIF_STR_CHECK__, inp) is not None else inp
+    return (
+        "'%s'" % inp if re.search(__CIF_STR_CHECK__, inp) is not None else inp
+    )
 
 
 def __pad_string__(inp, flength):
diff --git a/biopandas/mmcif/pandas_mmcif.py b/biopandas/mmcif/pandas_mmcif.py
index 167b79e..e00c1f5 100644
--- a/biopandas/mmcif/pandas_mmcif.py
+++ b/biopandas/mmcif/pandas_mmcif.py
@@ -1,4 +1,5 @@
 """Class for working with MMCIF files."""
+
 # BioPandas
 # Authors: Arian Jamasb <arian@jamasb.io>,
 # Authors: Sebastian Raschka <mail@sebastianraschka.com>
@@ -69,56 +70,76 @@ def read_mmcif(self, path):
         self.code = self.data["entry"]["id"][0].lower()
         return self
 
-    def fetch_mmcif(self, pdb_code: Optional[str] = None, uniprot_id: Optional[str] = None, source: str = "pdb"):
+    def fetch_mmcif(
+        self,
+        pdb_code: Optional[str] = None,
+        uniprot_id: Optional[str] = None,
+        source: str = "pdb",
+    ):
         """Fetches mmCIF file contents from the Protein Databank at rcsb.org or AlphaFold database at https://alphafold.ebi.ac.uk/.
-.
+        .
 
-        Parameters
-        ----------
-        pdb_code : str, optional
-            A 4-letter PDB code, e.g., `"3eiy"` to retrieve structures from the PDB. Defaults to `None`.
+                Parameters
+                ----------
+                pdb_code : str, optional
+                    A 4-letter PDB code, e.g., `"3eiy"` to retrieve structures from the PDB. Defaults to `None`.
 
-        uniprot_id : str, optional
-            A UniProt Identifier, e.g., `"Q5VSL9"` to retrieve structures from the AF2 database. Defaults to `None`.
+                uniprot_id : str, optional
+                    A UniProt Identifier, e.g., `"Q5VSL9"` to retrieve structures from the AF2 database. Defaults to `None`.
 
-        source : str
-            The source to retrieve the structure from 
-            (`"pdb"`, `"alphafold2-v3"` or `"alphafold2-v4"`). Defaults to `"pdb"`.
+                source : str
+                    The source to retrieve the structure from
+                    (`"pdb"`, `"alphafold2-v3"` or `"alphafold2-v4"`). Defaults to `"pdb"`.
 
-        Returns
-        ---------
-        self
+                Returns
+                ---------
+                self
 
         """
         # Sanitize input
         invalid_input_identifier_1 = pdb_code is None and uniprot_id is None
-        invalid_input_identifier_2 = pdb_code is not None and uniprot_id is not None
-        invalid_input_combination_1 = uniprot_id is not None and source == "pdb"
+        invalid_input_identifier_2 = (
+            pdb_code is not None and uniprot_id is not None
+        )
+        invalid_input_combination_1 = (
+            uniprot_id is not None and source == "pdb"
+        )
         invalid_input_combination_2 = pdb_code is not None and source in {
-            "alphafold2-v3", "alphafold2-v4"}
+            "alphafold2-v3",
+            "alphafold2-v4",
+        }
 
         if invalid_input_identifier_1 or invalid_input_identifier_2:
             raise ValueError(
-                "Please provide either a PDB code or a UniProt ID.")
+                "Please provide either a PDB code or a UniProt ID."
+            )
 
         if invalid_input_combination_1:
             raise ValueError(
-                "Please use a 'pdb_code' instead of 'uniprot_id' for source='pdb'.")
+                "Please use a 'pdb_code' instead of 'uniprot_id' for source='pdb'."
+            )
         elif invalid_input_combination_2:
             raise ValueError(
-                f"Please use a 'uniprot_id' instead of 'pdb_code' for source={source}.")
+                f"Please use a 'uniprot_id' instead of 'pdb_code' for source={source}."
+            )
 
         if source == "pdb":
             self.mmcif_path, self.mmcif_text = self._fetch_mmcif(pdb_code)
         elif source == "alphafold2-v3":
             af2_version = 3
-            self.mmcif_path, self.mmcif_text = self._fetch_af2(uniprot_id, af2_version)
+            self.mmcif_path, self.mmcif_text = self._fetch_af2(
+                uniprot_id, af2_version
+            )
         elif source == "alphafold2-v4":
             af2_version = 4
-            self.mmcif_path, self.mmcif_text = self._fetch_af2(uniprot_id, af2_version)
+            self.mmcif_path, self.mmcif_text = self._fetch_af2(
+                uniprot_id, af2_version
+            )
         else:
-            raise ValueError(f"Invalid source: {source}."
-                " Please use one of 'pdb', 'alphafold2-v3' or 'alphafold2-v4'.")
+            raise ValueError(
+                f"Invalid source: {source}."
+                " Please use one of 'pdb', 'alphafold2-v3' or 'alphafold2-v4'."
+            )
 
         self._df = self._construct_df(text=self.mmcif_text)
         return self
@@ -129,7 +150,8 @@ def _construct_df(self, text: str):
         self.data = data
         df: Dict[str, pd.DataFrame] = {}
         full_df = pd.DataFrame.from_dict(
-            data["atom_site"], orient="index").transpose()
+            data["atom_site"], orient="index"
+        ).transpose()
         full_df = full_df.astype(mmcif_col_types, errors="ignore")
         df["ATOM"] = pd.DataFrame(full_df[full_df.group_PDB == "ATOM"])
         df["HETATM"] = pd.DataFrame(full_df[full_df.group_PDB == "HETATM"])
@@ -148,8 +170,9 @@ def _fetch_mmcif(pdb_code):
             response = urlopen(url)
             txt = response.read()
             txt = (
-                txt.decode(
-                    "utf-8") if sys.version_info[0] >= 3 else txt.encode("ascii")
+                txt.decode("utf-8")
+                if sys.version_info[0] >= 3
+                else txt.encode("ascii")
             )
         except HTTPError as e:
             print(f"HTTP Error {e.code}")
@@ -166,11 +189,15 @@ def _fetch_af2(uniprot_id: str, af2_version: int = 3):
         try:
             response = urlopen(url)
             txt = response.read()
-            txt = txt.decode('utf-8') if sys.version_info[0] >= 3 else txt.encode('ascii')
+            txt = (
+                txt.decode("utf-8")
+                if sys.version_info[0] >= 3
+                else txt.encode("ascii")
+            )
         except HTTPError as e:
-            print(f'HTTP Error {e.code}')
+            print(f"HTTP Error {e.code}")
         except URLError as e:
-            print(f'URL Error {e.args}')
+            print(f"URL Error {e.args}")
         return url, txt
 
     @staticmethod
@@ -184,7 +211,8 @@ def _read_mmcif(path):
             openf = gzip.open
         else:
             allowed_formats = ", ".join(
-                (".cif", ".cif.gz", ".mmcif", ".mmcif.gz"))
+                (".cif", ".cif.gz", ".mmcif", ".mmcif.gz")
+            )
             raise ValueError(
                 f"Wrong file format; allowed file formats are {allowed_formats}"
             )
@@ -194,8 +222,9 @@ def _read_mmcif(path):
 
         if path.endswith(".gz"):
             txt = (
-                txt.decode(
-                    "utf-8") if sys.version_info[0] >= 3 else txt.encode("ascii")
+                txt.decode("utf-8")
+                if sys.version_info[0] >= 3
+                else txt.encode("ascii")
             )
         return path, txt
 
@@ -271,14 +300,19 @@ def _get_mainchain(
     def _get_hydrogen(df, invert):
         """Return only hydrogen atom entries from a DataFrame"""
         return (
-            df[(df["type_symbol"] != "H")] if invert else df[(
-                df["type_symbol"] == "H")]
+            df[(df["type_symbol"] != "H")]
+            if invert
+            else df[(df["type_symbol"] == "H")]
         )
 
     @staticmethod
     def _get_heavy(df, invert):
         """Return only heavy atom entries from a DataFrame"""
-        return df[df["type_symbol"] == "H"] if invert else df[df["type_symbol"] != "H"]
+        return (
+            df[df["type_symbol"] == "H"]
+            if invert
+            else df[df["type_symbol"] != "H"]
+        )
 
     @staticmethod
     def _get_calpha(df, invert, atom_col: str = "auth_atom_id"):
@@ -288,7 +322,11 @@ def _get_calpha(df, invert, atom_col: str = "auth_atom_id"):
     @staticmethod
     def _get_carbon(df, invert):
         """Return carbon atom entries from a DataFrame"""
-        return df[df["type_symbol"] != "C"] if invert else df[df["type_symbol"] == "C"]
+        return (
+            df[df["type_symbol"] != "C"]
+            if invert
+            else df[df["type_symbol"] == "C"]
+        )
 
     def amino3to1(
         self,
@@ -339,8 +377,9 @@ def amino3to1(
                 indices.append(ind)
             cmp = num
 
-        transl = tmp.iloc[indices][residue_col].map(
-            amino3to1dict).fillna(fillna)
+        transl = (
+            tmp.iloc[indices][residue_col].map(amino3to1dict).fillna(fillna)
+        )
 
         return pd.concat((tmp.iloc[indices][chain_col], transl), axis=1)
 
@@ -425,7 +464,9 @@ def distance(self, xyz=(0.00, 0.00, 0.00), records=("ATOM", "HETATM")):
 
         return np.sqrt(
             np.sum(
-                df[["Cartn_x", "Cartn_y", "Cartn_z"]].subtract(xyz, axis=1) ** 2, axis=1
+                df[["Cartn_x", "Cartn_y", "Cartn_z"]].subtract(xyz, axis=1)
+                ** 2,
+                axis=1,
             )
         )
 
@@ -451,7 +492,9 @@ def distance_df(df, xyz=(0.00, 0.00, 0.00)):
         """
         return np.sqrt(
             np.sum(
-                df[["Cartn_x", "Cartn_y", "Cartn_z"]].subtract(xyz, axis=1) ** 2, axis=1
+                df[["Cartn_x", "Cartn_y", "Cartn_z"]].subtract(xyz, axis=1)
+                ** 2,
+                axis=1,
             )
         )
 
@@ -485,7 +528,11 @@ def read_mmcif_from_list(self, mmcif_lines):
         self.code = self.data["entry"]["id"][0].lower()
         return self
 
-    def convert_to_pandas_pdb(self, offset_chains: bool = True, records: List[str] = ["ATOM", "HETATM"]) -> PandasPdb:
+    def convert_to_pandas_pdb(
+        self,
+        offset_chains: bool = True,
+        records: List[str] = ["ATOM", "HETATM"],
+    ) -> PandasPdb:
         """Returns a PandasPdb object with the same data as the PandasMmcif
         object.
 
@@ -525,10 +572,15 @@ def convert_to_pandas_pdb(self, offset_chains: bool = True, records: List[str] =
 
         # Update atom numbers
         if offset_chains:
-            offsets = pandaspdb.df["ATOM"]["chain_id"].astype(
-                "category").cat.codes
-            pandaspdb.df["ATOM"]["atom_number"] = pandaspdb.df["ATOM"]["atom_number"] + offsets
+            offsets = (
+                pandaspdb.df["ATOM"]["chain_id"].astype("category").cat.codes
+            )
+            pandaspdb.df["ATOM"]["atom_number"] = (
+                pandaspdb.df["ATOM"]["atom_number"] + offsets
+            )
             hetatom_offset = offsets.max() + 1
-            pandaspdb.df["HETATM"]["atom_number"] = pandaspdb.df["HETATM"]["atom_number"] + hetatom_offset
+            pandaspdb.df["HETATM"]["atom_number"] = (
+                pandaspdb.df["HETATM"]["atom_number"] + hetatom_offset
+            )
 
         return pandaspdb
diff --git a/biopandas/mmcif/tests/test_amino3to1.py b/biopandas/mmcif/tests/test_amino3to1.py
index a03c364..83a671c 100644
--- a/biopandas/mmcif/tests/test_amino3to1.py
+++ b/biopandas/mmcif/tests/test_amino3to1.py
@@ -805,8 +805,12 @@ def test_multichain():
     expect_chain = ["A" for _ in range(88)] + ["B" for _ in range(94)]
     got_chain = list(transl["auth_asym_id"].values)
 
-    got_res_a = list(transl.loc[transl["auth_asym_id"] == "A", "auth_comp_id"].values)
-    got_res_b = list(transl.loc[transl["auth_asym_id"] == "B", "auth_comp_id"].values)
+    got_res_a = list(
+        transl.loc[transl["auth_asym_id"] == "A", "auth_comp_id"].values
+    )
+    got_res_b = list(
+        transl.loc[transl["auth_asym_id"] == "B", "auth_comp_id"].values
+    )
 
     assert expect_chain == got_chain
     assert expect_res_a == got_res_a
diff --git a/biopandas/mmcif/tests/test_distance.py b/biopandas/mmcif/tests/test_distance.py
index f827d01..e7cd116 100644
--- a/biopandas/mmcif/tests/test_distance.py
+++ b/biopandas/mmcif/tests/test_distance.py
@@ -18,7 +18,8 @@ def test_equal():
     dist = p1t48.distance(xyz=(70.785, 15.477, 23.359), records=("ATOM",))
 
     expect = pd.Series(
-        [2.533259, 1.520502, 0.000000, 1.257597, 1.252510], index=[12, 13, 14, 15, 16]
+        [2.533259, 1.520502, 0.000000, 1.257597, 1.252510],
+        index=[12, 13, 14, 15, 16],
     )
     assert dist[dist < 3].all() == expect.all()
 
@@ -31,7 +32,8 @@ def test_deprecated_str_arg():
     dist = p1t48.distance(xyz=(70.785, 15.477, 23.359), records="ATOM")
 
     expect = pd.Series(
-        [2.533259, 1.520502, 0.000000, 1.257597, 1.252510], index=[12, 13, 14, 15, 16]
+        [2.533259, 1.520502, 0.000000, 1.257597, 1.252510],
+        index=[12, 13, 14, 15, 16],
     )
     assert dist[dist < 3].all() == expect.all()
 
@@ -44,5 +46,7 @@ def test_use_external_df():
     new_df = p1t48.df["ATOM"].iloc[:-1, :].copy()
     dist = PandasMmcif.distance_df(df=new_df, xyz=(70.785, 15.477, 23.359))
 
-    expect = pd.Series([2.533259, 1.520502, 0.000000, 1.257597], index=[12, 13, 14, 15])
+    expect = pd.Series(
+        [2.533259, 1.520502, 0.000000, 1.257597], index=[12, 13, 14, 15]
+    )
     assert dist[dist < 3].all() == expect.all()
diff --git a/biopandas/mmcif/tests/test_read_mmcif.py b/biopandas/mmcif/tests/test_read_mmcif.py
index 7189702..983e848 100644
--- a/biopandas/mmcif/tests/test_read_mmcif.py
+++ b/biopandas/mmcif/tests/test_read_mmcif.py
@@ -6,11 +6,11 @@
 
 
 import os
-import pytest
-from urllib.error import HTTPError
 from pathlib import Path
+from urllib.error import HTTPError
 
 import pandas as pd
+import pytest
 from biopandas.mmcif import PandasMmcif
 from biopandas.pdb import PandasPdb
 from biopandas.testutils import assert_raises
@@ -22,8 +22,12 @@
 # TESTDATA_FILENAME2 = os.path.join(
 #    os.path.dirname(__file__), "data", "4eiy_anisouchunk.cif"
 # )
-TESTDATA_FILENAME2 = os.path.join(os.path.dirname(__file__), "data", "4eiy.cif")
-TESTDATA_FILENAME_GZ = os.path.join(os.path.dirname(__file__), "data", "3eiy.cif.gz")
+TESTDATA_FILENAME2 = os.path.join(
+    os.path.dirname(__file__), "data", "4eiy.cif"
+)
+TESTDATA_FILENAME_GZ = os.path.join(
+    os.path.dirname(__file__), "data", "3eiy.cif.gz"
+)
 
 TESTDATA_FILENAME_AF2_V4 = os.path.join(
     os.path.dirname(__file__), "data", "AF-Q5VSL9-F1-model_v4.cif"
@@ -90,7 +94,6 @@
     af2_test_struct_v3 = f.read()
 
 
-
 def test__read_pdb():
     """Test private _read_pdb"""
     ppdb = PandasMmcif()
@@ -334,7 +337,9 @@ def test_mmcif_pdb_conversion():
     )
     assert_frame_equal(
         pdb.df["HETATM"].drop(columns=["line_idx"]),
-        mmcif_pdb.df["HETATM"].drop(columns=["line_idx"]).reset_index(drop=True),
+        mmcif_pdb.df["HETATM"]
+        .drop(columns=["line_idx"])
+        .reset_index(drop=True),
     )
 
     # single chain test
@@ -348,5 +353,7 @@ def test_mmcif_pdb_conversion():
     )
     assert_frame_equal(
         pdb.df["HETATM"].drop(columns=["line_idx"]),
-        mmcif_pdb.df["HETATM"].drop(columns=["line_idx"]).reset_index(drop=True),
+        mmcif_pdb.df["HETATM"]
+        .drop(columns=["line_idx"])
+        .reset_index(drop=True),
     )
diff --git a/biopandas/mmcif/tests/test_rmsd.py b/biopandas/mmcif/tests/test_rmsd.py
index 5507059..054f3b2 100644
--- a/biopandas/mmcif/tests/test_rmsd.py
+++ b/biopandas/mmcif/tests/test_rmsd.py
@@ -5,8 +5,8 @@
 # Code Repository: https://github.com/rasbt/biopandas
 
 import os
-import pytest
 
+import pytest
 from biopandas.mmcif import PandasMmcif
 
 TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), "data", "1t48.cif")
@@ -48,7 +48,9 @@ def test_invalid_query():
 
 
 def test_protein():
-    r = PandasMmcif.rmsd(p1t48.df["ATOM"], p1t49.df["ATOM"], s="c-alpha", invert=False)
+    r = PandasMmcif.rmsd(
+        p1t48.df["ATOM"], p1t49.df["ATOM"], s="c-alpha", invert=False
+    )
     assert r == 0.4923, r
 
 

From a3bf27b8d6817ebabd87aeafa86d6a0aa6619fc5 Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arian.jamasb@roche.com>
Date: Mon, 8 Jul 2024 18:45:51 +0100
Subject: [PATCH 09/21] remove unused variables

---
 biopandas/pdb/tests/test_read_pdb.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/biopandas/pdb/tests/test_read_pdb.py b/biopandas/pdb/tests/test_read_pdb.py
index c41792f..4477830 100644
--- a/biopandas/pdb/tests/test_read_pdb.py
+++ b/biopandas/pdb/tests/test_read_pdb.py
@@ -123,11 +123,11 @@ def test_fetch_pdb():
 
     try:
         ppdb = PandasPdb()
-        url, txt = ppdb._fetch_pdb("3eiy")
+        _, txt = ppdb._fetch_pdb("3eiy")
     except HTTPError:
-        url, txt = None, None
+        _, txt = None, None
     except ConnectionResetError:
-        url, txt = None, None
+        _, txt = None, None
 
     if txt:  # skip if PDB down
         txt[:100] == three_eiy[:100]
@@ -141,11 +141,11 @@ def test_fetch_af2():
     # Check latest release
     try:
         ppdb = PandasPdb()
-        url, txt = ppdb._fetch_af2("Q5VSL9", af2_version=4)
+        _, txt = ppdb._fetch_af2("Q5VSL9", af2_version=4)
     except HTTPError:
-        url, txt = None, None
+        _, txt = None, None
     except ConnectionResetError:
-        url, txt = None, None
+        _, txt = None, None
 
     if txt:  # skip if AF2 DB down
         txt[:100] == af_test_struct_v4[:100]
@@ -159,11 +159,11 @@ def test_fetch_af2():
     # Check legacy release
     try:
         ppdb = PandasPdb()
-        url, txt = ppdb._fetch_af2("Q5VSL9", af2_version=3)
+        _, txt = ppdb._fetch_af2("Q5VSL9", af2_version=3)
     except HTTPError:
-        url, txt = None, None
+        _, txt = None, None
     except ConnectionResetError:
-        url, txt = None, None
+        _, txt = None, None
 
     if txt:  # skip if AF2 DB down
         txt[:100] == af_test_struct_v3[:100]
@@ -178,7 +178,7 @@ def test_fetch_af2():
 def test__read_pdb_gz():
     """Test public _read_pdb with gzip files"""
     ppdb = PandasPdb()
-    path, txt = ppdb._read_pdb(TESTDATA_FILENAME_GZ)
+    _, txt = ppdb._read_pdb(TESTDATA_FILENAME_GZ)
     assert txt == three_eiy
 
 

From 108ead68e8314ababf1d9bf5aa87b46f4ecd75eb Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arian.jamasb@roche.com>
Date: Mon, 8 Jul 2024 18:50:00 +0100
Subject: [PATCH 10/21] fix type comparison

---
 biopandas/mmtf/pandas_mmtf.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/biopandas/mmtf/pandas_mmtf.py b/biopandas/mmtf/pandas_mmtf.py
index 9912946..1ebc0e9 100644
--- a/biopandas/mmtf/pandas_mmtf.py
+++ b/biopandas/mmtf/pandas_mmtf.py
@@ -2,9 +2,9 @@
 
 from __future__ import annotations
 
+import os
 import copy
 import gzip
-import os
 import warnings
 from string import ascii_uppercase
 from typing import Any, Dict, List, Union
@@ -12,11 +12,11 @@
 
 import numpy as np
 import pandas as pd
-from biopandas.constants import protein_letters_3to1_extended
 from looseversion import LooseVersion
-
 from mmtf import MMTFDecoder, MMTFEncoder, fetch, parse, parse_gzip
 
+from biopandas.constants import protein_letters_3to1_extended
+
 from ..pdb.engines import amino3to1dict, pdb_df_columns, pdb_records
 
 pd_version = LooseVersion(pd.__version__)

From 33b5f9f54220d223891299bd200e102633a9472c Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arian.jamasb@roche.com>
Date: Mon, 8 Jul 2024 18:55:09 +0100
Subject: [PATCH 11/21] bump changelog

---
 docs/CHANGELOG.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
index f4176fc..8eb8d2a 100755
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@@ -4,6 +4,10 @@
 The CHANGELOG for the current development version is available at
 [https://github.com/rasbt/biopandas/blob/main/docs/sources/CHANGELOG.md](https://github.com/rasbt/biopandas/blob/main/docs/sources/CHANGELOG.md).
 
+### 0.5.1dev1 (UNRELEASED)
+
+- Dev: switched testing framework entirely to pytest. Drops nose dependency due to version conflicts with Python 3.12 (`nose`) and 3.8 (`nose`)
+
 
 ### 0.5.0dev1 (31/7/2023)
 - Implement add_remark for PandasPdb, (Via [Anton Bushuiev](https://github.com/anton-bushuiev) PR #[129](https://github.com/BioPandas/biopandas/pull/129))

From 45188a8a8e32d5cf84c946c227377ee27d01fcb3 Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arian.jamasb@roche.com>
Date: Mon, 8 Jul 2024 18:56:10 +0100
Subject: [PATCH 12/21] add changelog enforcer test

---
 .github/workflows/changelog-enforcer.yaml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 .github/workflows/changelog-enforcer.yaml

diff --git a/.github/workflows/changelog-enforcer.yaml b/.github/workflows/changelog-enforcer.yaml
new file mode 100644
index 0000000..f9ba8c4
--- /dev/null
+++ b/.github/workflows/changelog-enforcer.yaml
@@ -0,0 +1,16 @@
+name: Changelog Enforcer
+
+on:  # yamllint disable-line rule:truthy
+  pull_request:
+    types: [opened, synchronize, reopened, ready_for_review, labeled, unlabeled]
+
+jobs:
+
+  changelog:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v3
+      - uses: dangoslen/changelog-enforcer@v3
+        with:
+          skipLabels: 'skip-changelog'
\ No newline at end of file

From 25f590806bfc4250936d18d07b1850ea92fb34ab Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arian.jamasb@roche.com>
Date: Mon, 8 Jul 2024 19:05:31 +0100
Subject: [PATCH 13/21] fix type comparison

---
 biopandas/mmcif/mmcif_parser.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/biopandas/mmcif/mmcif_parser.py b/biopandas/mmcif/mmcif_parser.py
index 91556bb..c0dbfcb 100644
--- a/biopandas/mmcif/mmcif_parser.py
+++ b/biopandas/mmcif/mmcif_parser.py
@@ -23,7 +23,7 @@ def __init__(self, parser_obj):
 
     def add_name(self, name):
         cat_name = (
-            type(name) == str and partition_string(name, ".") or ["", "", ""]
+            isinstance(name, str) and partition_string(name, ".") or ["", "", ""]
         )
         if cat_name[1]:
             if cat_name[0] not in self.parser_obj.current_target[-2]:
@@ -228,7 +228,7 @@ def __cif_float_range__(inp):
     try:
         pos = inp.index("-", 1)
         return (__CIFFloat__(inp[:pos]), __CIFFloat__(inp[pos + 1 :]))
-    except:
+    except Exception:
         return (__CIFFloat__(inp),)
 
 
@@ -236,7 +236,7 @@ def __cif_int_range__(inp):
     try:
         pos = inp.index("-", 1)
         return (__CIFInt__(inp[:pos]), __CIFInt__(inp[pos + 1 :]))
-    except:
+    except Exception:
         return (__CIFInt__(inp),)
 
 
@@ -248,12 +248,12 @@ def __load_cif_dic__(dic_file, force=False):
         if force:
             throw
         dic = json.loads(open(jsf).read())
-    except:
+    except Exception:
         parser = CIFParser()
         parser.parse(open(dic_file))
         json.dump(parser.data, open(jsf_dic, "w"))
         for k, v in parser.data["data_mmcif_pdbx.dic"].items():
-            if type(v) != dict or "item_type" not in v:
+            if not isinstance(v, dict) or "item_type" not in v:
                 continue
             name = partition_string(k[6:], ".")
             if name[0] not in dic:
@@ -294,7 +294,7 @@ def __dump_cif__(jso):
 def __dump_str__(inp):
     if inp is None:
         return "?"
-    if type(inp) is not str:
+    if not isinstance(inp, str):
         return str(inp)
     if re.search(__CIF_STR_NL_CHECK__, inp) is not None:
         return "\n;%s\n;" % inp
@@ -365,7 +365,7 @@ def __dump_part__(jso):
 
 def load_cif_data(data, do_clean=True, do_type=True):
     parser = CIFParser()
-    if type(data) == str:
+    if isinstance(data, str):
         parser.parse_string(data)
     else:
         parser.parse(data)  # fileobj

From e2364de5cab3dba07a3d99121177d77d7a29e519 Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arian.jamasb@roche.com>
Date: Mon, 8 Jul 2024 19:06:51 +0100
Subject: [PATCH 14/21] clean up unused variable

---
 biopandas/mmcif/tests/test_read_mmcif.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/biopandas/mmcif/tests/test_read_mmcif.py b/biopandas/mmcif/tests/test_read_mmcif.py
index 983e848..bfba806 100644
--- a/biopandas/mmcif/tests/test_read_mmcif.py
+++ b/biopandas/mmcif/tests/test_read_mmcif.py
@@ -97,7 +97,7 @@
 def test__read_pdb():
     """Test private _read_pdb"""
     ppdb = PandasMmcif()
-    path, txt = ppdb._read_mmcif(TESTDATA_FILENAME)
+    _, txt = ppdb._read_mmcif(TESTDATA_FILENAME)
     print(txt)
     assert txt == three_eiy
 
@@ -127,9 +127,9 @@ def test_fetch_pdb():
 
     try:
         ppdb = PandasMmcif()
-        url, txt = ppdb._fetch_mmcif("3eiy")
+        _, txt = ppdb._fetch_mmcif("3eiy")
     except (HTTPError, ConnectionResetError):
-        url, txt = None, None
+        _, txt = None, None
     if txt:  # skip if PDB down
         txt[:100] == three_eiy[:100]
         ppdb.fetch_mmcif("3eiy")
@@ -142,9 +142,9 @@ def test_fetch_af2():
     # Test latest release
     try:
         ppdb = PandasMmcif()
-        url, txt = ppdb._fetch_af2("Q5VSL9", af2_version=4)
+        _, txt = ppdb._fetch_af2("Q5VSL9", af2_version=4)
     except (HTTPError, ConnectionResetError):
-        url, txt = None, None
+        _, txt = None, None
     if txt:  # skip if AF DB down
         txt[:100] == af2_test_struct_v4[:100]
         ppdb.fetch_mmcif(uniprot_id="Q5VSL9", source="alphafold2-v4")
@@ -157,9 +157,9 @@ def test_fetch_af2():
     # Test legacy release
     try:
         ppdb = PandasMmcif()
-        url, txt = ppdb._fetch_af2("Q5VSL9", af2_version=3)
+        _, txt = ppdb._fetch_af2("Q5VSL9", af2_version=3)
     except (HTTPError, ConnectionResetError):
-        url, txt = None, None
+        _, txt = None, None
     if txt:  # skip if AF DB down
         txt[:100] == af2_test_struct_v3[:100]
         ppdb.fetch_mmcif(uniprot_id="Q5VSL9", source="alphafold2-v3")
@@ -173,7 +173,7 @@ def test_fetch_af2():
 def test__read_pdb_gz():
     """Test public _read_pdb with gzip files"""
     ppdb = PandasMmcif()
-    path, txt = ppdb._read_mmcif(TESTDATA_FILENAME_GZ)
+    _, txt = ppdb._read_mmcif(TESTDATA_FILENAME_GZ)
     assert txt == three_eiy
 
 

From 0f5cf0dcaec6bdcad94cf56c2a4f2a5e49a2787e Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arian.jamasb@roche.com>
Date: Mon, 8 Jul 2024 19:08:31 +0100
Subject: [PATCH 15/21] fix whitespace

---
 biopandas/mmcif/mmcif_parser.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/biopandas/mmcif/mmcif_parser.py b/biopandas/mmcif/mmcif_parser.py
index c0dbfcb..bce2dae 100644
--- a/biopandas/mmcif/mmcif_parser.py
+++ b/biopandas/mmcif/mmcif_parser.py
@@ -227,7 +227,7 @@ def __repr__(self):
 def __cif_float_range__(inp):
     try:
         pos = inp.index("-", 1)
-        return (__CIFFloat__(inp[:pos]), __CIFFloat__(inp[pos + 1 :]))
+        return (__CIFFloat__(inp[:pos]), __CIFFloat__(inp[pos + 1:]))
     except Exception:
         return (__CIFFloat__(inp),)
 
@@ -235,7 +235,7 @@ def __cif_float_range__(inp):
 def __cif_int_range__(inp):
     try:
         pos = inp.index("-", 1)
-        return (__CIFInt__(inp[:pos]), __CIFInt__(inp[pos + 1 :]))
+        return (__CIFInt__(inp[:pos]), __CIFInt__(inp[pos + 1:]))
     except Exception:
         return (__CIFInt__(inp),)
 

From 8474381ee415189cad398bb756f0cd0ed2269c65 Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arian.jamasb@roche.com>
Date: Mon, 8 Jul 2024 19:09:20 +0100
Subject: [PATCH 16/21] remove unused mmtf

---
 biopandas/mmtf/pandas_mmtf.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/biopandas/mmtf/pandas_mmtf.py b/biopandas/mmtf/pandas_mmtf.py
index 1ebc0e9..a2db5de 100644
--- a/biopandas/mmtf/pandas_mmtf.py
+++ b/biopandas/mmtf/pandas_mmtf.py
@@ -6,7 +6,6 @@
 import copy
 import gzip
 import warnings
-from string import ascii_uppercase
 from typing import Any, Dict, List, Union
 from warnings import warn
 

From cd3582d52066e70b71152ad0f1fc7c127fce5473 Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arian.jamasb@roche.com>
Date: Mon, 8 Jul 2024 19:11:57 +0100
Subject: [PATCH 17/21] remove whitespace

---
 biopandas/pdb/pandas_pdb.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/biopandas/pdb/pandas_pdb.py b/biopandas/pdb/pandas_pdb.py
index fb93182..102d9d5 100644
--- a/biopandas/pdb/pandas_pdb.py
+++ b/biopandas/pdb/pandas_pdb.py
@@ -247,13 +247,13 @@ def impute_element(self, records=("ATOM", "HETATM"), inplace=False):
                 lambda x: x[0][1] if len(x[1]) == 3 else x[0][0], axis=1
             )
         return t
-    
+
     def add_remark(self, code, text='', indent=0):
         """Add custom REMARK entry.
 
         The remark will be inserted to preserve the ordering of REMARK codes, i.e. if the code is
         `n` it will be added after all remarks with codes less or equal to `n`. If the object does
-        not store any remarks the remark will be inserted right before the first of ATOM, HETATM or 
+        not store any remarks the remark will be inserted right before the first of ATOM, HETATM or
         ANISOU records.
 
         Parameters
@@ -263,9 +263,9 @@ def add_remark(self, code, text='', indent=0):
 
         text : str
             The text of the remark. If the text does not fit into a single line it will be wrapped
-            into multiple lines of REMARK entries. Likewise, if the text contains new line 
+            into multiple lines of REMARK entries. Likewise, if the text contains new line
             characters it will be split accordingly.
-        
+
         indent : int, default: 0
             Number of white spaces inserted before the text of the remark.
 

From 0f91baa4c6a906902582770f043ede6138144f75 Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arian.jamasb@roche.com>
Date: Mon, 8 Jul 2024 19:12:38 +0100
Subject: [PATCH 18/21] remove whitespace

---
 biopandas/pdb/pandas_pdb.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/biopandas/pdb/pandas_pdb.py b/biopandas/pdb/pandas_pdb.py
index 102d9d5..2b76db4 100644
--- a/biopandas/pdb/pandas_pdb.py
+++ b/biopandas/pdb/pandas_pdb.py
@@ -131,7 +131,7 @@ def fetch_pdb(self, pdb_code: Optional[str] = None, uniprot_id: Optional[str] =
             Defaults to `None`.
 
         source : str
-            The source to retrieve the structure from 
+            The source to retrieve the structure from
             (`"pdb"`, `"alphafold2-v3"`, `"alphafold2-v4"`(latest)).
             Defaults to `"pdb"`.
 

From 15340bafb6b773edddb33986f9cba8f95cb71bb4 Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arian.jamasb@roche.com>
Date: Mon, 8 Jul 2024 19:14:03 +0100
Subject: [PATCH 19/21] rename ambiguous variable

---
 biopandas/pdb/pandas_pdb.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/biopandas/pdb/pandas_pdb.py b/biopandas/pdb/pandas_pdb.py
index 2b76db4..f36b11e 100644
--- a/biopandas/pdb/pandas_pdb.py
+++ b/biopandas/pdb/pandas_pdb.py
@@ -298,7 +298,7 @@ def add_remark(self, code, text='', indent=0):
 
         # Wrap remark to fit into 80 characters per line and add indentation
         wrapper = textwrap.TextWrapper(width=80 - (11 + indent))
-        lines = sum([wrapper.wrap(l.strip()) or [' '] for l in text.split('\n')], [])
+        lines = sum([wrapper.wrap(line.strip()) or [' '] for line in text.split('\n')], [])
         lines = list(map(lambda x: f'{code:4} ' +  indent*' ' + x, lines))
 
         # Shift data frame indices and row indices to create space for the remark

From 61ff468ca31ab70a81f5d45a993a977698fabd5a Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arian.jamasb@roche.com>
Date: Mon, 8 Jul 2024 19:14:24 +0100
Subject: [PATCH 20/21] reduce whitespace

---
 biopandas/pdb/pandas_pdb.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/biopandas/pdb/pandas_pdb.py b/biopandas/pdb/pandas_pdb.py
index f36b11e..2d0c03e 100644
--- a/biopandas/pdb/pandas_pdb.py
+++ b/biopandas/pdb/pandas_pdb.py
@@ -299,7 +299,7 @@ def add_remark(self, code, text='', indent=0):
         # Wrap remark to fit into 80 characters per line and add indentation
         wrapper = textwrap.TextWrapper(width=80 - (11 + indent))
         lines = sum([wrapper.wrap(line.strip()) or [' '] for line in text.split('\n')], [])
-        lines = list(map(lambda x: f'{code:4} ' +  indent*' ' + x, lines))
+        lines = list(map(lambda x: f'{code:4} ' + indent*' ' + x, lines))
 
         # Shift data frame indices and row indices to create space for the remark
         # Create space in OTHERS

From 6e41d7fd00da40516ece337575a88b68382afd89 Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arian.jamasb@roche.com>
Date: Mon, 8 Jul 2024 19:15:35 +0100
Subject: [PATCH 21/21] reduce whitespace

---
 biopandas/mmtf/pandas_mmtf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/biopandas/mmtf/pandas_mmtf.py b/biopandas/mmtf/pandas_mmtf.py
index a2db5de..c98ec00 100644
--- a/biopandas/mmtf/pandas_mmtf.py
+++ b/biopandas/mmtf/pandas_mmtf.py
@@ -655,7 +655,7 @@ def _seq1(seq, charmap: Dict[str, str], undef_code="X"):
     onecode = {k.upper(): v for k, v in charmap.items()}
     # add the given termination codon code and custom maps
     onecode.update((k.upper(), v) for k, v in charmap.items())
-    seqlist = [seq[3 * i : 3 * (i + 1)] for i in range(len(seq) // 3)]
+    seqlist = [seq[3 * i:3 * (i + 1)] for i in range(len(seq) // 3)]
     return "".join(onecode.get(aa.upper(), undef_code) for aa in seqlist)