Skip to content

Commit

Permalink
Explicit UTF-8 encoding for VASP input files with zopen, and open
Browse files Browse the repository at this point in the history
… for other text files (#4218)

* explicit utf-8 encoding for kpoints from file

* explicit utf-8 elsewhere

* fix root level and dev_scripts

* simplify PMG PKG path

* fix analysis, cli, command_line

* fix electronic_structure, entries and ext

* fix io, phonon and symmetry

* fix alchemy and anlysis tests

* fix apps, command_line, core, elec_struct, entries, ext and vis tests

* finish io and phonon tests

* remove unnecessary seek

* revert encoding for json dump

* type custom paths

* revert another json dump

* ignore userwarning by default

* relocate test-only env var

* remove unneeded default tag for non-userwarning

* also explicit utf-8 for json dump though forced ASCII

* utf8 is alias to utf-8 in codecs, but maybe prefer the standard name

* fix missing encoding in comment

* add test for Γ decoding

* better error message

---------

Signed-off-by: Matthew Horton <[email protected]>
Co-authored-by: Shyue Ping Ong <[email protected]>
Co-authored-by: Matthew Horton <[email protected]>
  • Loading branch information
3 people authored Jan 9, 2025
1 parent 361106f commit 777a6b2
Show file tree
Hide file tree
Showing 109 changed files with 319 additions and 304 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,10 @@ jobs:
split: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

runs-on: ${{ matrix.config.os }}

env:
MPLBACKEND: Agg # non-interactive backend for matplotlib
PMG_MAPI_KEY: ${{ secrets.PMG_MAPI_KEY }}
PYTHONWARNDEFAULTENCODING: "true" # PEP 597: Enable optional EncodingWarning

steps:
- name: Check out repo
uses: actions/checkout@v4
Expand Down Expand Up @@ -107,6 +105,8 @@ jobs:
- name: pytest split ${{ matrix.split }}
env:
MPLBACKEND: Agg # non-interactive backend for matplotlib
PMG_MAPI_KEY: ${{ secrets.PMG_MAPI_KEY }}
PMG_TEST_FILES_DIR: "${{ github.workspace }}/tests/files"
run: |
micromamba activate pmg
Expand Down
2 changes: 1 addition & 1 deletion dev_scripts/chemenv/explicit_permutations.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,5 +93,5 @@ class Algo:
cg._algorithms = [ExplicitPermutationsAlgorithm(permutations=explicit_permutations)]
new_geom_dir = "new_geometry_files"
os.makedirs(new_geom_dir, exist_ok=True)
with open(f"{new_geom_dir}/{cg_symbol}.json", mode="w") as file:
with open(f"{new_geom_dir}/{cg_symbol}.json", mode="w", encoding="utf-8") as file:
json.dump(cg.as_dict(), file)
Original file line number Diff line number Diff line change
Expand Up @@ -159,5 +159,5 @@
if test == "y":
cg._algorithms = new_algos
cg_dict = cg.as_dict()
with open(f"../coordination_geometries_files_new/{cg_symbol}.json", mode="w") as file:
with open(f"../coordination_geometries_files_new/{cg_symbol}.json", mode="w", encoding="utf-8") as file:
json.dump(cg_dict, file)
2 changes: 1 addition & 1 deletion dev_scripts/chemenv/get_plane_permutations_optimized.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,5 +444,5 @@ def random_permutations_iterator(initial_permutation, n_permutations):
if test == "y":
new_geom_dir = "new_geometry_files"
os.makedirs(new_geom_dir, exist_ok=True)
with open(f"{new_geom_dir}/{cg_symbol}.json", mode="w") as file:
with open(f"{new_geom_dir}/{cg_symbol}.json", mode="w", encoding="utf-8") as file:
json.dump(cg.as_dict(), file)
14 changes: 7 additions & 7 deletions dev_scripts/regen_libxcfunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@
from __future__ import annotations

import json
import os
import sys
from copy import deepcopy

from pymatgen.core import PKG_DIR


def parse_libxc_docs(path):
"""Parse libxc_docs.txt file, return dictionary {libxc_id: info_dict}."""
Expand All @@ -27,7 +28,7 @@ def parse_section(section):
return int(dct["Number"]), dct

dct = {}
with open(path) as file:
with open(path, encoding="utf-8") as file:
section = []
for line in file:
if not line.startswith("-"):
Expand Down Expand Up @@ -62,7 +63,7 @@ def write_libxc_docs_json(xc_funcs, json_path):
if desc is not None:
xc_funcs[num][opt] = desc

with open(json_path, "w") as fh:
with open(json_path, "w", encoding="utf-8") as fh:
json.dump(xc_funcs, fh)

return xc_funcs
Expand All @@ -85,8 +86,7 @@ def main():
xc_funcs = parse_libxc_docs(path)

# Generate new JSON file in pycore
pmg_core = os.path.abspath("../pymatgen/core/")
json_path = f"{pmg_core}/libxc_docs.json"
json_path = f"{PKG_DIR}/core/libxc_docs.json"
write_libxc_docs_json(xc_funcs, json_path)

# Build new enum list.
Expand All @@ -99,8 +99,8 @@ def main():

# Re-generate enumerations.
# [0] read py module.
xc_funcpy_path = f"{pmg_core}/libxcfunc.py"
with open(xc_funcpy_path) as file:
xc_funcpy_path = f"{PKG_DIR}/core/libxcfunc.py"
with open(xc_funcpy_path, encoding="utf-8") as file:
lines = file.readlines()

# [1] insert new enum values in list
Expand Down
36 changes: 18 additions & 18 deletions dev_scripts/update_pt_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from monty.serialization import dumpfn, loadfn
from ruamel import yaml

from pymatgen.core import Element, get_el_sp
from pymatgen.core import PKG_DIR, Element, get_el_sp

try:
from bs4 import BeautifulSoup
Expand All @@ -25,7 +25,7 @@

def parse_oxi_state():
data = loadfn(PTABLE_YAML_PATH)
with open("oxidation_states.txt") as file:
with open("oxidation_states.txt", encoding="utf-8") as file:
oxi_data = file.read()
oxi_data = re.sub("[\n\r]", "", oxi_data)
patt = re.compile("<tr>(.*?)</tr>", re.MULTILINE)
Expand Down Expand Up @@ -57,13 +57,13 @@ def parse_oxi_state():
data[el]["Common oxidation states"] = common_oxi
else:
print(el)
with open("periodic_table2.yaml", mode="w") as file:
with open("periodic_table2.yaml", mode="w", encoding="utf-8") as file:
yaml.dump(data, file)


def parse_ionic_radii():
data = loadfn(PTABLE_YAML_PATH)
with open("ionic_radii.csv") as file:
with open("ionic_radii.csv", encoding="utf-8") as file:
radii_data = file.read()
radii_data = radii_data.split("\r")
header = radii_data[0].split(",")
Expand All @@ -87,13 +87,13 @@ def parse_ionic_radii():
data[el]["Ionic_radii"] = ionic_radii
else:
print(el)
with open("periodic_table2.yaml", mode="w") as file:
with open("periodic_table2.yaml", mode="w", encoding="utf-8") as file:
yaml.dump(data, file)


def parse_radii():
data = loadfn(PTABLE_YAML_PATH)
with open("radii.csv") as file:
with open("radii.csv", encoding="utf-8") as file:
radii_data = file.read()
radii_data = radii_data.split("\r")

Expand Down Expand Up @@ -121,9 +121,9 @@ def parse_radii():
data[el]["Van der waals radius"] = vdw_radii
else:
print(el)
with open("periodic_table2.yaml", mode="w") as file:
with open("periodic_table2.yaml", mode="w", encoding="utf-8") as file:
yaml.dump(data, file)
with open("../pymatgen/core/periodic_table.json", mode="w") as file:
with open(f"{PKG_DIR}/core/periodic_table.json", mode="w", encoding="utf-8") as file:
json.dump(data, file)


Expand All @@ -140,9 +140,9 @@ def update_ionic_radii():
if "Ionic_radii_ls" in dct:
dct["Ionic radii ls"] = {k: v / 100 for k, v in dct["Ionic_radii_ls"].items()}
del dct["Ionic_radii_ls"]
with open("periodic_table2.yaml", mode="w") as file:
with open("periodic_table2.yaml", mode="w", encoding="utf-8") as file:
yaml.dump(data, file)
with open("../pymatgen/core/periodic_table.json", mode="w") as file:
with open(f"{PKG_DIR}/core/periodic_table.json", mode="w", encoding="utf-8") as file:
json.dump(data, file)


Expand Down Expand Up @@ -180,19 +180,19 @@ def parse_shannon_radii():
data[el]["Shannon radii"] = dict(radii[el])

dumpfn(data, PTABLE_YAML_PATH)
with open("../pymatgen/core/periodic_table.json", mode="w") as file:
with open(f"{PKG_DIR}/core/periodic_table.json", mode="w", encoding="utf-8") as file:
json.dump(data, file)


def gen_periodic_table():
data = loadfn(PTABLE_YAML_PATH)

with open("../pymatgen/core/periodic_table.json", mode="w") as file:
with open(f"{PKG_DIR}/core/periodic_table.json", mode="w", encoding="utf-8") as file:
json.dump(data, file)


def gen_iupac_ordering():
periodic_table = loadfn("../pymatgen/core/periodic_table.json")
periodic_table = loadfn(f"{PKG_DIR}/core/periodic_table.json")
order = [
([18], range(6, 0, -1)), # noble gasses
([1], range(7, 1, -1)), # alkali metals
Expand Down Expand Up @@ -274,16 +274,16 @@ def add_electron_affinities():
missing_electron_affinities = set(range(1, 93)) - Z_set
raise ValueError(f"{missing_electron_affinities=}")
print(element_electron_affinities)
pt = loadfn("../pymatgen/core/periodic_table.json")
pt = loadfn(f"{PKG_DIR}/core/periodic_table.json")
for key, val in pt.items():
val["Electron affinity"] = element_electron_affinities.get(Element(key).long_name)
dumpfn(pt, "../pymatgen/core/periodic_table.json")
dumpfn(pt, f"{PKG_DIR}/core/periodic_table.json")


def add_ionization_energies():
"""Update the periodic table data file with ground level and ionization energies from NIST."""

with open("NIST Atomic Ionization Energies Output.html") as file:
with open("NIST Atomic Ionization Energies Output.html", encoding="utf-8") as file:
soup = BeautifulSoup(file.read(), "html.parser")
table = None
for table in soup.find_all("table"):
Expand All @@ -302,11 +302,11 @@ def add_ionization_energies():
if not set(data).issuperset(range(1, 93)):
raise RuntimeError("Failed to get data up to Uranium")

pt = loadfn("../pymatgen/core/periodic_table.json")
pt = loadfn(f"{PKG_DIR}/core/periodic_table.json")
for key, val in pt.items():
del val["Ionization energy"]
val["Ionization energies"] = data.get(Element(key).long_name, [])
dumpfn(pt, "../pymatgen/core/periodic_table.json")
dumpfn(pt, f"{PKG_DIR}/core/periodic_table.json")


if __name__ == "__main__":
Expand Down
4 changes: 2 additions & 2 deletions docs/usage.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -861,18 +861,18 @@ def __init__(self, permutations_safe_override=False, only_symbols=None):
dict.__init__(self)
self.cg_list: list[CoordinationGeometry] = []
if only_symbols is None:
with open(f"{MODULE_DIR}/coordination_geometries_files/allcg.txt") as file:
with open(f"{MODULE_DIR}/coordination_geometries_files/allcg.txt", encoding="utf-8") as file:
data = file.readlines()
for line in data:
cg_file = f"{MODULE_DIR}/{line.strip()}"
with open(cg_file) as file:
with open(cg_file, encoding="utf-8") as file:
dd = json.load(file)
self.cg_list.append(CoordinationGeometry.from_dict(dd))
else:
for symbol in only_symbols:
fsymbol = symbol.replace(":", "#")
cg_file = f"{MODULE_DIR}/coordination_geometries_files/{fsymbol}.json"
with open(cg_file) as file:
with open(cg_file, encoding="utf-8") as file:
dd = json.load(file)
self.cg_list.append(CoordinationGeometry.from_dict(dd))

Expand Down
4 changes: 2 additions & 2 deletions src/pymatgen/analysis/chemenv/utils/chemenv_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def save(self, root_dir=None):
if test != "Y":
print("Configuration not saved")
return config_file
with open(config_file, mode="w") as file:
with open(config_file, mode="w", encoding="utf-8") as file:
json.dump(config_dict, file)
print("Configuration saved")
return config_file
Expand All @@ -171,7 +171,7 @@ def auto_load(cls, root_dir=None):
root_dir = f"{home}/.chemenv"
config_file = f"{root_dir}/config.json"
try:
with open(config_file) as file:
with open(config_file, encoding="utf-8") as file:
config_dict = json.load(file)
return ChemEnvConfig(package_options=config_dict["package_options"])

Expand Down
4 changes: 2 additions & 2 deletions src/pymatgen/analysis/chempot_diagram.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
from __future__ import annotations

import json
import os
import warnings
from functools import lru_cache
from itertools import groupby
Expand All @@ -36,6 +35,7 @@
from scipy.spatial import ConvexHull, HalfspaceIntersection

from pymatgen.analysis.phase_diagram import PDEntry, PhaseDiagram
from pymatgen.core import PKG_DIR
from pymatgen.core.composition import Composition, Element
from pymatgen.util.coord import Simplex
from pymatgen.util.due import Doi, due
Expand All @@ -44,7 +44,7 @@
if TYPE_CHECKING:
from pymatgen.entries.computed_entries import ComputedEntry

with open(f"{os.path.dirname(__file__)}/../util/plotly_chempot_layouts.json") as file:
with open(f"{PKG_DIR}/util/plotly_chempot_layouts.json", encoding="utf-8") as file:
plotly_layouts = json.load(file)


Expand Down
2 changes: 1 addition & 1 deletion src/pymatgen/analysis/cost.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def __init__(self, filename):
# read in data from file
self._chemsys_entries = defaultdict(list)
filename = os.path.join(os.path.dirname(__file__), filename)
with open(filename) as file:
with open(filename, encoding="utf-8") as file:
reader = csv.reader(file, quotechar="|")
for row in reader:
comp = Composition(row[0])
Expand Down
4 changes: 2 additions & 2 deletions src/pymatgen/analysis/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -975,7 +975,7 @@ def draw_graph_to_file(

write_dot(g, f"{basename}.dot")

with open(filename, mode="w") as file:
with open(filename, mode="w", encoding="utf-8") as file:
args = [algo, "-T", extension, f"{basename}.dot"]
with subprocess.Popen(args, stdout=file, stdin=subprocess.PIPE, close_fds=True) as rs:
rs.communicate()
Expand Down Expand Up @@ -2644,7 +2644,7 @@ def draw_graph_to_file(

write_dot(g, f"{basename}.dot")

with open(filename, mode="w") as file:
with open(filename, mode="w", encoding="utf-8") as file:
args = [algo, "-T", extension, f"{basename}.dot"]
with subprocess.Popen(args, stdout=file, stdin=subprocess.PIPE, close_fds=True) as rs:
rs.communicate()
Expand Down
2 changes: 1 addition & 1 deletion src/pymatgen/analysis/hhi.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def __init__(self):
"""Init for HHIModel."""
self.symbol_hhip_hhir = {} # symbol->(HHI_production, HHI reserve)

with open(HHI_CSV_PATH) as file:
with open(HHI_CSV_PATH, encoding="utf-8") as file:
for line in file:
if line[0] != "#":
symbol, hhi_production, hhi_reserve = line.split(",")
Expand Down
4 changes: 2 additions & 2 deletions src/pymatgen/analysis/interface_reactions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from __future__ import annotations

import json
import os
import warnings
from typing import TYPE_CHECKING

Expand All @@ -18,6 +17,7 @@

from pymatgen.analysis.phase_diagram import GrandPotentialPhaseDiagram, PhaseDiagram
from pymatgen.analysis.reaction_calculator import Reaction
from pymatgen.core import PKG_DIR
from pymatgen.core.composition import Composition
from pymatgen.util.due import Doi, due
from pymatgen.util.plotting import pretty_plot
Expand All @@ -31,7 +31,7 @@
__email__ = "[email protected]"
__date__ = "Sep 1, 2021"

with open(os.path.join(os.path.dirname(__file__), "..", "util", "plotly_interface_rxn_layouts.json")) as file:
with open(f"{PKG_DIR}/util/plotly_interface_rxn_layouts.json", encoding="utf-8") as file:
plotly_layouts = json.load(file)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def __init__(self, lambda_table=None, alpha=-5):
else:
module_dir = os.path.dirname(__file__)
json_file = f"{module_dir}/data/lambda.json"
with open(json_file) as file:
with open(json_file, encoding="utf-8") as file:
self._lambda_table = json.load(file)

# build map of specie pairs to lambdas
Expand Down
2 changes: 1 addition & 1 deletion src/pymatgen/cli/pmg_potcar.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def gen_potcar(dirname, filename):
"""
if filename == "POTCAR.spec":
fullpath = os.path.join(dirname, filename)
with open(fullpath) as file:
with open(fullpath, encoding="utf-8") as file:
elements = file.readlines()
symbols = [el.strip() for el in elements if el.strip() != ""]
potcar = Potcar(symbols)
Expand Down
Loading

0 comments on commit 777a6b2

Please sign in to comment.