Explicit UTF-8 encoding for VASP input files with zopen, and open…

… for other text files (#4218) * explicit utf-8 encoding for kpoints from file * explicit utf-8 elsewhere * fix root level and dev_scripts * simplify PMG PKG path * fix analysis, cli, command_line * fix electronic_structure, entries and ext * fix io, phonon and symmetry * fix alchemy and anlysis tests * fix apps, command_line, core, elec_struct, entries, ext and vis tests * finish io and phonon tests * remove unnecessary seek * revert encoding for json dump * type custom paths * revert another json dump * ignore userwarning by default * relocate test-only env var * remove unneeded default tag for non-userwarning * also explicit utf-8 for json dump though forced ASCII * utf8 is alias to utf-8 in codecs, but maybe prefer the standard name * fix missing encoding in comment * add test for Γ decoding * better error message --------- Signed-off-by: Matthew Horton <[email protected]> Co-authored-by: Shyue Ping Ong <[email protected]> Co-authored-by: Matthew Horton <[email protected]>
materialsproject · Jan 9, 2025 · 777a6b2 · 777a6b2
1 parent 361106f
commit 777a6b2
Show file tree

Hide file tree

Showing 109 changed files with 319 additions and 304 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -52,12 +52,10 @@ jobs:
         split: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
 
     runs-on: ${{ matrix.config.os }}
-
     env:
       MPLBACKEND: Agg  # non-interactive backend for matplotlib
       PMG_MAPI_KEY: ${{ secrets.PMG_MAPI_KEY }}
       PYTHONWARNDEFAULTENCODING: "true"  # PEP 597: Enable optional EncodingWarning
-
     steps:
       - name: Check out repo
         uses: actions/checkout@v4
@@ -107,6 +105,8 @@ jobs:
 
       - name: pytest split ${{ matrix.split }}
         env:
+          MPLBACKEND: Agg  # non-interactive backend for matplotlib
+          PMG_MAPI_KEY: ${{ secrets.PMG_MAPI_KEY }}
           PMG_TEST_FILES_DIR: "${{ github.workspace }}/tests/files"
         run: |
           micromamba activate pmg

diff --git a/dev_scripts/chemenv/explicit_permutations.py b/dev_scripts/chemenv/explicit_permutations.py
@@ -93,5 +93,5 @@ class Algo:
         cg._algorithms = [ExplicitPermutationsAlgorithm(permutations=explicit_permutations)]
         new_geom_dir = "new_geometry_files"
         os.makedirs(new_geom_dir, exist_ok=True)
-        with open(f"{new_geom_dir}/{cg_symbol}.json", mode="w") as file:
+        with open(f"{new_geom_dir}/{cg_symbol}.json", mode="w", encoding="utf-8") as file:
             json.dump(cg.as_dict(), file)
diff --git a/dev_scripts/chemenv/explicit_permutations_plane_algorithm.py b/dev_scripts/chemenv/explicit_permutations_plane_algorithm.py
@@ -159,5 +159,5 @@
     if test == "y":
         cg._algorithms = new_algos
         cg_dict = cg.as_dict()
-        with open(f"../coordination_geometries_files_new/{cg_symbol}.json", mode="w") as file:
+        with open(f"../coordination_geometries_files_new/{cg_symbol}.json", mode="w", encoding="utf-8") as file:
             json.dump(cg_dict, file)
diff --git a/dev_scripts/chemenv/get_plane_permutations_optimized.py b/dev_scripts/chemenv/get_plane_permutations_optimized.py
@@ -444,5 +444,5 @@ def random_permutations_iterator(initial_permutation, n_permutations):
         if test == "y":
             new_geom_dir = "new_geometry_files"
             os.makedirs(new_geom_dir, exist_ok=True)
-            with open(f"{new_geom_dir}/{cg_symbol}.json", mode="w") as file:
+            with open(f"{new_geom_dir}/{cg_symbol}.json", mode="w", encoding="utf-8") as file:
                 json.dump(cg.as_dict(), file)
diff --git a/dev_scripts/regen_libxcfunc.py b/dev_scripts/regen_libxcfunc.py
@@ -10,10 +10,11 @@
 from __future__ import annotations
 
 import json
-import os
 import sys
 from copy import deepcopy
 
+from pymatgen.core import PKG_DIR
+
 
 def parse_libxc_docs(path):
     """Parse libxc_docs.txt file, return dictionary {libxc_id: info_dict}."""
@@ -27,7 +28,7 @@ def parse_section(section):
         return int(dct["Number"]), dct
 
     dct = {}
-    with open(path) as file:
+    with open(path, encoding="utf-8") as file:
         section = []
         for line in file:
             if not line.startswith("-"):
@@ -62,7 +63,7 @@ def write_libxc_docs_json(xc_funcs, json_path):
             if desc is not None:
                 xc_funcs[num][opt] = desc
 
-    with open(json_path, "w") as fh:
+    with open(json_path, "w", encoding="utf-8") as fh:
         json.dump(xc_funcs, fh)
 
     return xc_funcs
@@ -85,8 +86,7 @@ def main():
     xc_funcs = parse_libxc_docs(path)
 
     # Generate new JSON file in pycore
-    pmg_core = os.path.abspath("../pymatgen/core/")
-    json_path = f"{pmg_core}/libxc_docs.json"
+    json_path = f"{PKG_DIR}/core/libxc_docs.json"
     write_libxc_docs_json(xc_funcs, json_path)
 
     # Build new enum list.
@@ -99,8 +99,8 @@ def main():
 
     # Re-generate enumerations.
     # [0] read py module.
-    xc_funcpy_path = f"{pmg_core}/libxcfunc.py"
-    with open(xc_funcpy_path) as file:
+    xc_funcpy_path = f"{PKG_DIR}/core/libxcfunc.py"
+    with open(xc_funcpy_path, encoding="utf-8") as file:
         lines = file.readlines()
 
     # [1] insert new enum values in list

diff --git a/dev_scripts/update_pt_data.py b/dev_scripts/update_pt_data.py
@@ -13,7 +13,7 @@
 from monty.serialization import dumpfn, loadfn
 from ruamel import yaml
 
-from pymatgen.core import Element, get_el_sp
+from pymatgen.core import PKG_DIR, Element, get_el_sp
 
 try:
     from bs4 import BeautifulSoup
@@ -25,7 +25,7 @@
 
 def parse_oxi_state():
     data = loadfn(PTABLE_YAML_PATH)
-    with open("oxidation_states.txt") as file:
+    with open("oxidation_states.txt", encoding="utf-8") as file:
         oxi_data = file.read()
     oxi_data = re.sub("[\n\r]", "", oxi_data)
     patt = re.compile("<tr>(.*?)</tr>", re.MULTILINE)
@@ -57,13 +57,13 @@ def parse_oxi_state():
             data[el]["Common oxidation states"] = common_oxi
         else:
             print(el)
-    with open("periodic_table2.yaml", mode="w") as file:
+    with open("periodic_table2.yaml", mode="w", encoding="utf-8") as file:
         yaml.dump(data, file)
 
 
 def parse_ionic_radii():
     data = loadfn(PTABLE_YAML_PATH)
-    with open("ionic_radii.csv") as file:
+    with open("ionic_radii.csv", encoding="utf-8") as file:
         radii_data = file.read()
     radii_data = radii_data.split("\r")
     header = radii_data[0].split(",")
@@ -87,13 +87,13 @@ def parse_ionic_radii():
                 data[el]["Ionic_radii"] = ionic_radii
         else:
             print(el)
-    with open("periodic_table2.yaml", mode="w") as file:
+    with open("periodic_table2.yaml", mode="w", encoding="utf-8") as file:
         yaml.dump(data, file)
 
 
 def parse_radii():
     data = loadfn(PTABLE_YAML_PATH)
-    with open("radii.csv") as file:
+    with open("radii.csv", encoding="utf-8") as file:
         radii_data = file.read()
     radii_data = radii_data.split("\r")
 
@@ -121,9 +121,9 @@ def parse_radii():
             data[el]["Van der waals radius"] = vdw_radii
         else:
             print(el)
-    with open("periodic_table2.yaml", mode="w") as file:
+    with open("periodic_table2.yaml", mode="w", encoding="utf-8") as file:
         yaml.dump(data, file)
-    with open("../pymatgen/core/periodic_table.json", mode="w") as file:
+    with open(f"{PKG_DIR}/core/periodic_table.json", mode="w", encoding="utf-8") as file:
         json.dump(data, file)
 
 
@@ -140,9 +140,9 @@ def update_ionic_radii():
         if "Ionic_radii_ls" in dct:
             dct["Ionic radii ls"] = {k: v / 100 for k, v in dct["Ionic_radii_ls"].items()}
             del dct["Ionic_radii_ls"]
-    with open("periodic_table2.yaml", mode="w") as file:
+    with open("periodic_table2.yaml", mode="w", encoding="utf-8") as file:
         yaml.dump(data, file)
-    with open("../pymatgen/core/periodic_table.json", mode="w") as file:
+    with open(f"{PKG_DIR}/core/periodic_table.json", mode="w", encoding="utf-8") as file:
         json.dump(data, file)
 
 
@@ -180,19 +180,19 @@ def parse_shannon_radii():
             data[el]["Shannon radii"] = dict(radii[el])
 
     dumpfn(data, PTABLE_YAML_PATH)
-    with open("../pymatgen/core/periodic_table.json", mode="w") as file:
+    with open(f"{PKG_DIR}/core/periodic_table.json", mode="w", encoding="utf-8") as file:
         json.dump(data, file)
 
 
 def gen_periodic_table():
     data = loadfn(PTABLE_YAML_PATH)
 
-    with open("../pymatgen/core/periodic_table.json", mode="w") as file:
+    with open(f"{PKG_DIR}/core/periodic_table.json", mode="w", encoding="utf-8") as file:
         json.dump(data, file)
 
 
 def gen_iupac_ordering():
-    periodic_table = loadfn("../pymatgen/core/periodic_table.json")
+    periodic_table = loadfn(f"{PKG_DIR}/core/periodic_table.json")
     order = [
         ([18], range(6, 0, -1)),  # noble gasses
         ([1], range(7, 1, -1)),  # alkali metals
@@ -274,16 +274,16 @@ def add_electron_affinities():
         missing_electron_affinities = set(range(1, 93)) - Z_set
         raise ValueError(f"{missing_electron_affinities=}")
     print(element_electron_affinities)
-    pt = loadfn("../pymatgen/core/periodic_table.json")
+    pt = loadfn(f"{PKG_DIR}/core/periodic_table.json")
     for key, val in pt.items():
         val["Electron affinity"] = element_electron_affinities.get(Element(key).long_name)
-    dumpfn(pt, "../pymatgen/core/periodic_table.json")
+    dumpfn(pt, f"{PKG_DIR}/core/periodic_table.json")
 
 
 def add_ionization_energies():
     """Update the periodic table data file with ground level and ionization energies from NIST."""
 
-    with open("NIST Atomic Ionization Energies Output.html") as file:
+    with open("NIST Atomic Ionization Energies Output.html", encoding="utf-8") as file:
         soup = BeautifulSoup(file.read(), "html.parser")
     table = None
     for table in soup.find_all("table"):
@@ -302,11 +302,11 @@ def add_ionization_energies():
     if not set(data).issuperset(range(1, 93)):
         raise RuntimeError("Failed to get data up to Uranium")
 
-    pt = loadfn("../pymatgen/core/periodic_table.json")
+    pt = loadfn(f"{PKG_DIR}/core/periodic_table.json")
     for key, val in pt.items():
         del val["Ionization energy"]
         val["Ionization energies"] = data.get(Element(key).long_name, [])
-    dumpfn(pt, "../pymatgen/core/periodic_table.json")
+    dumpfn(pt, f"{PKG_DIR}/core/periodic_table.json")
 
 
 if __name__ == "__main__":

diff --git a/docs/usage.md b/docs/usage.md
diff --git a/src/pymatgen/analysis/chemenv/coordination_environments/coordination_geometries.py b/src/pymatgen/analysis/chemenv/coordination_environments/coordination_geometries.py
@@ -861,18 +861,18 @@ def __init__(self, permutations_safe_override=False, only_symbols=None):
         dict.__init__(self)
         self.cg_list: list[CoordinationGeometry] = []
         if only_symbols is None:
-            with open(f"{MODULE_DIR}/coordination_geometries_files/allcg.txt") as file:
+            with open(f"{MODULE_DIR}/coordination_geometries_files/allcg.txt", encoding="utf-8") as file:
                 data = file.readlines()
             for line in data:
                 cg_file = f"{MODULE_DIR}/{line.strip()}"
-                with open(cg_file) as file:
+                with open(cg_file, encoding="utf-8") as file:
                     dd = json.load(file)
                 self.cg_list.append(CoordinationGeometry.from_dict(dd))
         else:
             for symbol in only_symbols:
                 fsymbol = symbol.replace(":", "#")
                 cg_file = f"{MODULE_DIR}/coordination_geometries_files/{fsymbol}.json"
-                with open(cg_file) as file:
+                with open(cg_file, encoding="utf-8") as file:
                     dd = json.load(file)
                 self.cg_list.append(CoordinationGeometry.from_dict(dd))
 

diff --git a/src/pymatgen/analysis/chemenv/utils/chemenv_config.py b/src/pymatgen/analysis/chemenv/utils/chemenv_config.py
@@ -153,7 +153,7 @@ def save(self, root_dir=None):
             if test != "Y":
                 print("Configuration not saved")
                 return config_file
-        with open(config_file, mode="w") as file:
+        with open(config_file, mode="w", encoding="utf-8") as file:
             json.dump(config_dict, file)
         print("Configuration saved")
         return config_file
@@ -171,7 +171,7 @@ def auto_load(cls, root_dir=None):
             root_dir = f"{home}/.chemenv"
         config_file = f"{root_dir}/config.json"
         try:
-            with open(config_file) as file:
+            with open(config_file, encoding="utf-8") as file:
                 config_dict = json.load(file)
             return ChemEnvConfig(package_options=config_dict["package_options"])
 

diff --git a/src/pymatgen/analysis/chempot_diagram.py b/src/pymatgen/analysis/chempot_diagram.py
@@ -23,7 +23,6 @@
 from __future__ import annotations
 
 import json
-import os
 import warnings
 from functools import lru_cache
 from itertools import groupby
@@ -36,6 +35,7 @@
 from scipy.spatial import ConvexHull, HalfspaceIntersection
 
 from pymatgen.analysis.phase_diagram import PDEntry, PhaseDiagram
+from pymatgen.core import PKG_DIR
 from pymatgen.core.composition import Composition, Element
 from pymatgen.util.coord import Simplex
 from pymatgen.util.due import Doi, due
@@ -44,7 +44,7 @@
 if TYPE_CHECKING:
     from pymatgen.entries.computed_entries import ComputedEntry
 
-with open(f"{os.path.dirname(__file__)}/../util/plotly_chempot_layouts.json") as file:
+with open(f"{PKG_DIR}/util/plotly_chempot_layouts.json", encoding="utf-8") as file:
     plotly_layouts = json.load(file)
 
 

diff --git a/src/pymatgen/analysis/cost.py b/src/pymatgen/analysis/cost.py
@@ -85,7 +85,7 @@ def __init__(self, filename):
         # read in data from file
         self._chemsys_entries = defaultdict(list)
         filename = os.path.join(os.path.dirname(__file__), filename)
-        with open(filename) as file:
+        with open(filename, encoding="utf-8") as file:
             reader = csv.reader(file, quotechar="|")
             for row in reader:
                 comp = Composition(row[0])

diff --git a/src/pymatgen/analysis/graphs.py b/src/pymatgen/analysis/graphs.py
@@ -975,7 +975,7 @@ def draw_graph_to_file(
 
         write_dot(g, f"{basename}.dot")
 
-        with open(filename, mode="w") as file:
+        with open(filename, mode="w", encoding="utf-8") as file:
             args = [algo, "-T", extension, f"{basename}.dot"]
             with subprocess.Popen(args, stdout=file, stdin=subprocess.PIPE, close_fds=True) as rs:
                 rs.communicate()
@@ -2644,7 +2644,7 @@ def draw_graph_to_file(
 
         write_dot(g, f"{basename}.dot")
 
-        with open(filename, mode="w") as file:
+        with open(filename, mode="w", encoding="utf-8") as file:
             args = [algo, "-T", extension, f"{basename}.dot"]
             with subprocess.Popen(args, stdout=file, stdin=subprocess.PIPE, close_fds=True) as rs:
                 rs.communicate()

diff --git a/src/pymatgen/analysis/hhi.py b/src/pymatgen/analysis/hhi.py
@@ -37,7 +37,7 @@ def __init__(self):
         """Init for HHIModel."""
         self.symbol_hhip_hhir = {}  # symbol->(HHI_production, HHI reserve)
 
-        with open(HHI_CSV_PATH) as file:
+        with open(HHI_CSV_PATH, encoding="utf-8") as file:
             for line in file:
                 if line[0] != "#":
                     symbol, hhi_production, hhi_reserve = line.split(",")

diff --git a/src/pymatgen/analysis/interface_reactions.py b/src/pymatgen/analysis/interface_reactions.py
@@ -6,7 +6,6 @@
 from __future__ import annotations
 
 import json
-import os
 import warnings
 from typing import TYPE_CHECKING
 
@@ -18,6 +17,7 @@
 
 from pymatgen.analysis.phase_diagram import GrandPotentialPhaseDiagram, PhaseDiagram
 from pymatgen.analysis.reaction_calculator import Reaction
+from pymatgen.core import PKG_DIR
 from pymatgen.core.composition import Composition
 from pymatgen.util.due import Doi, due
 from pymatgen.util.plotting import pretty_plot
@@ -31,7 +31,7 @@
 __email__ = "[email protected]"
 __date__ = "Sep 1, 2021"
 
-with open(os.path.join(os.path.dirname(__file__), "..", "util", "plotly_interface_rxn_layouts.json")) as file:
+with open(f"{PKG_DIR}/util/plotly_interface_rxn_layouts.json", encoding="utf-8") as file:
     plotly_layouts = json.load(file)
 
 

diff --git a/src/pymatgen/analysis/structure_prediction/substitution_probability.py b/src/pymatgen/analysis/structure_prediction/substitution_probability.py
@@ -59,7 +59,7 @@ def __init__(self, lambda_table=None, alpha=-5):
         else:
             module_dir = os.path.dirname(__file__)
             json_file = f"{module_dir}/data/lambda.json"
-            with open(json_file) as file:
+            with open(json_file, encoding="utf-8") as file:
                 self._lambda_table = json.load(file)
 
         # build map of specie pairs to lambdas

diff --git a/src/pymatgen/cli/pmg_potcar.py b/src/pymatgen/cli/pmg_potcar.py
@@ -32,7 +32,7 @@ def gen_potcar(dirname, filename):
     """
     if filename == "POTCAR.spec":
         fullpath = os.path.join(dirname, filename)
-        with open(fullpath) as file:
+        with open(fullpath, encoding="utf-8") as file:
             elements = file.readlines()
         symbols = [el.strip() for el in elements if el.strip() != ""]
         potcar = Potcar(symbols)