From e8dfaf68c398fbcd964b8ea2c579890e274323f4 Mon Sep 17 00:00:00 2001 From: mrastgoo Date: Sat, 7 Dec 2024 21:27:04 +0100 Subject: [PATCH 01/12] adding write_html for table_report --- skrub/_reporting/_table_report.py | 21 ++++++++++++ skrub/_reporting/tests/test_table_report.py | 38 +++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/skrub/_reporting/_table_report.py b/skrub/_reporting/_table_report.py index 9504ca549..df43ef6c4 100644 --- a/skrub/_reporting/_table_report.py +++ b/skrub/_reporting/_table_report.py @@ -1,5 +1,6 @@ import functools import json +from pathlib import Path from ._html import to_html from ._serve import open_in_browser @@ -189,6 +190,26 @@ def _repr_mimebundle_(self, include=None, exclude=None): def _repr_html_(self): return self._repr_mimebundle_()["text/html"] + def write_html(self, filename): + """saving an html report + + Parameters + ---------- + filename : str, pathlib.Path or file object. + """ + + if isinstance(filename, (str, Path)): + if isinstance(filename, str): + filename = Path(filename) + if filename.suffix != ".html": + raise ValueError("Not ending with .html") + file_object = open(filename, "w", encoding="utf-8") + else: + # already a file object + file_object = filename + file_object.write(self.html()) + file_object.close() + def open(self): """Open the HTML report in a web browser.""" open_in_browser(self.html()) diff --git a/skrub/_reporting/tests/test_table_report.py b/skrub/_reporting/tests/test_table_report.py index 380291847..929fd2893 100644 --- a/skrub/_reporting/tests/test_table_report.py +++ b/skrub/_reporting/tests/test_table_report.py @@ -2,6 +2,10 @@ import json import re import warnings +from pathlib import Path +from tempfile import TemporaryDirectory + +import pytest from skrub import TableReport, ToDatetime from skrub import _dataframe as sbd @@ -121,3 +125,37 @@ def test_duration(df_module): {"a": [datetime.timedelta(days=2), datetime.timedelta(days=3)]} ) assert re.search(r"2(\.0)?\s+days", TableReport(df).html()) + + +@pytest.mark.parametrize("filename_path", ["str", "Path", "file_object"]) +def test_write_html(pd_module, filename_path): + df = pd_module.make_dataframe({"a": [1, 2], "b": [3, 4]}) + report = TableReport(df) + + with TemporaryDirectory() as td: + f_name = Path(td) / Path("report.html") + + if filename_path == "str": + report.write_html(f_name.absolute()) + + if filename_path == "Path": + report.write_html(f_name) + + if filename_path == "file_object": + file_object = open(f_name, "w", encoding="utf-8") + report.write_html(file_object) + + # Check if the file exists + assert f_name.exists() + + +def test_write_html_with_no_suffix(pd_module): + df = pd_module.make_dataframe({"a": [1, 2], "b": [3, 4]}) + report = TableReport(df) + with TemporaryDirectory() as td: + f_name = Path(td) / Path("report") + with pytest.raises(ValueError, match="Not ending with .html"): + report.write_html(f_name) + + # Check if the file exists + assert not f_name.exists() From 048cec53d53246443c66de50e65a8c184e8821f2 Mon Sep 17 00:00:00 2001 From: mrastgoo Date: Sat, 7 Dec 2024 21:50:31 +0100 Subject: [PATCH 02/12] Update skrub/_reporting/_table_report.py Co-authored-by: Guillaume Lemaitre --- skrub/_reporting/_table_report.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/skrub/_reporting/_table_report.py b/skrub/_reporting/_table_report.py index 82e0b8fbd..33ef6db6f 100644 --- a/skrub/_reporting/_table_report.py +++ b/skrub/_reporting/_table_report.py @@ -203,7 +203,8 @@ def write_html(self, filename): Parameters ---------- - filename : str, pathlib.Path or file object. + filename : str, pathlib.Path or file object + The file object or path of the file to store the HTML output. """ if isinstance(filename, (str, Path)): From 15c7b060db2e25548b92986bbddcbbd4c0301450 Mon Sep 17 00:00:00 2001 From: mrastgoo Date: Sat, 7 Dec 2024 22:03:12 +0100 Subject: [PATCH 03/12] Apply suggestions from code review Co-authored-by: Guillaume Lemaitre --- skrub/_reporting/_table_report.py | 7 +++++-- skrub/_reporting/tests/test_table_report.py | 4 +--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/skrub/_reporting/_table_report.py b/skrub/_reporting/_table_report.py index 33ef6db6f..8d06e06b1 100644 --- a/skrub/_reporting/_table_report.py +++ b/skrub/_reporting/_table_report.py @@ -199,7 +199,7 @@ def _repr_html_(self): return self._repr_mimebundle_()["text/html"] def write_html(self, filename): - """saving an html report + """Store the report into an HTML file. Parameters ---------- @@ -211,7 +211,10 @@ def write_html(self, filename): if isinstance(filename, str): filename = Path(filename) if filename.suffix != ".html": - raise ValueError("Not ending with .html") + raise ValueError( + "The filename does not end with the suffix `.html`. " + f"Instead, got {filename.suffix}." + ) file_object = open(filename, "w", encoding="utf-8") else: # already a file object diff --git a/skrub/_reporting/tests/test_table_report.py b/skrub/_reporting/tests/test_table_report.py index 284a59d02..b4c4fcab2 100644 --- a/skrub/_reporting/tests/test_table_report.py +++ b/skrub/_reporting/tests/test_table_report.py @@ -127,7 +127,7 @@ def test_duration(df_module): assert re.search(r"2(\.0)?\s+days", TableReport(df).html()) -@pytest.mark.parametrize("filename_path", ["str", "Path", "file_object"]) +@pytest.mark.parametrize("filename_type", ["str", "Path", "file_object"]) def test_write_html(pd_module, filename_path): df = pd_module.make_dataframe({"a": [1, 2], "b": [3, 4]}) report = TableReport(df) @@ -145,7 +145,6 @@ def test_write_html(pd_module, filename_path): file_object = open(f_name, "w", encoding="utf-8") report.write_html(file_object) - # Check if the file exists assert f_name.exists() @@ -157,7 +156,6 @@ def test_write_html_with_no_suffix(pd_module): with pytest.raises(ValueError, match="Not ending with .html"): report.write_html(f_name) - # Check if the file exists assert not f_name.exists() From 0debd0bc520ba021d6fff7ef505ea090c768df5d Mon Sep 17 00:00:00 2001 From: mrastgoo Date: Sat, 7 Dec 2024 22:18:45 +0100 Subject: [PATCH 04/12] addressing the reviews --- skrub/_reporting/tests/test_table_report.py | 38 ++++++++++++--------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/skrub/_reporting/tests/test_table_report.py b/skrub/_reporting/tests/test_table_report.py index b4c4fcab2..749152b96 100644 --- a/skrub/_reporting/tests/test_table_report.py +++ b/skrub/_reporting/tests/test_table_report.py @@ -128,35 +128,39 @@ def test_duration(df_module): @pytest.mark.parametrize("filename_type", ["str", "Path", "file_object"]) -def test_write_html(pd_module, filename_path): +def test_write_html(pd_module, filename_type): df = pd_module.make_dataframe({"a": [1, 2], "b": [3, 4]}) report = TableReport(df) with TemporaryDirectory() as td: - f_name = Path(td) / Path("report.html") + tmp_file_path = Path(td) / Path("report.html") - if filename_path == "str": - report.write_html(f_name.absolute()) + if filename_type == "str": + filename = str(tmp_file_path) + elif filename_type == "file_object": + filename = open(tmp_file_path, "w", encoding="utf-8") + else: + filename = tmp_file_path - if filename_path == "Path": - report.write_html(f_name) - - if filename_path == "file_object": - file_object = open(f_name, "w", encoding="utf-8") - report.write_html(file_object) - - assert f_name.exists() + report.write_html(filename) + assert tmp_file_path.exists() def test_write_html_with_no_suffix(pd_module): df = pd_module.make_dataframe({"a": [1, 2], "b": [3, 4]}) report = TableReport(df) with TemporaryDirectory() as td: - f_name = Path(td) / Path("report") - with pytest.raises(ValueError, match="Not ending with .html"): - report.write_html(f_name) - - assert not f_name.exists() + filename = Path(td) / Path("report.txt") + with pytest.raises( + ValueError, + match=( + "The filename does not end with the suffix `.html`. " + f"Instead, got {filename.suffix}" + ), + ): + report.write_html(filename) + + assert not filename.exists() def test_verbosity_parameter(df_module, capsys): From 684a9c1cf5c934afea862cf8cfd647e8599b5040 Mon Sep 17 00:00:00 2001 From: mrastgoo Date: Mon, 9 Dec 2024 22:26:37 +0100 Subject: [PATCH 05/12] checking the encoding and TypeError --- CHANGES.rst | 3 ++ skrub/_reporting/_table_report.py | 44 ++++++++++------ skrub/_reporting/tests/test_table_report.py | 56 ++++++++++++--------- 3 files changed, 64 insertions(+), 39 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 87c83d84e..94045576e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -18,6 +18,9 @@ New features Changes ------- +* :class: `TableReport` has `write_html` method + :pr:`1190` by :user: `Mojdeh Rastgoo`. + * A new parameter `verbose` has been added to the :class:`TableReport` to toggle on or off the printing of progress information when a report is being generated. :pr:`1182` by :user:`Priscilla Baah`. diff --git a/skrub/_reporting/_table_report.py b/skrub/_reporting/_table_report.py index 8d06e06b1..3aacd9b6f 100644 --- a/skrub/_reporting/_table_report.py +++ b/skrub/_reporting/_table_report.py @@ -1,5 +1,7 @@ +import codecs import functools import json +import locale from pathlib import Path from ._html import to_html @@ -198,29 +200,41 @@ def _repr_mimebundle_(self, include=None, exclude=None): def _repr_html_(self): return self._repr_mimebundle_()["text/html"] - def write_html(self, filename): + def write_html(self, file): """Store the report into an HTML file. Parameters ---------- - filename : str, pathlib.Path or file object + file : str, pathlib.Path or file object The file object or path of the file to store the HTML output. """ - - if isinstance(filename, (str, Path)): - if isinstance(filename, str): - filename = Path(filename) - if filename.suffix != ".html": + html = self.html() + if isinstance(file, (str, Path)): + with open(file, "w", encoding="utf8") as stream: + stream.write(html) + return + try: + file.write(html.encode("utf-8")) + return + except TypeError: + pass + + if (encoding := getattr(file, "encoding", None)) is not None: + try: + assert codecs.lookup(encoding).name == "utf-8" + except (AssertionError, LookupError): raise ValueError( - "The filename does not end with the suffix `.html`. " - f"Instead, got {filename.suffix}." + "If `file` is a text file it should use utf-8 encoding; got:" + f" {encoding!r}" ) - file_object = open(filename, "w", encoding="utf-8") - else: - # already a file object - file_object = filename - file_object.write(self.html()) - file_object.close() + elif locale.getencodeing().lower() != "utf-8": + # when encoding=None, it will default on the platform-specific encoding + # raise if not utf-8 + raise ValueError( + f"Platform encoding is not utf-8; got {locale.getencoding()}" + ) + + file.write(html) def open(self): """Open the HTML report in a web browser.""" diff --git a/skrub/_reporting/tests/test_table_report.py b/skrub/_reporting/tests/test_table_report.py index 749152b96..311753fa0 100644 --- a/skrub/_reporting/tests/test_table_report.py +++ b/skrub/_reporting/tests/test_table_report.py @@ -3,7 +3,6 @@ import re import warnings from pathlib import Path -from tempfile import TemporaryDirectory import pytest @@ -127,39 +126,48 @@ def test_duration(df_module): assert re.search(r"2(\.0)?\s+days", TableReport(df).html()) -@pytest.mark.parametrize("filename_type", ["str", "Path", "file_object"]) -def test_write_html(pd_module, filename_type): +@pytest.mark.parametrize( + "filename_type", + ["str", "Path", "file_object", "binary_mode", "file_object_encoding_None"], +) +def test_write_html(tmp_path, pd_module, filename_type): df = pd_module.make_dataframe({"a": [1, 2], "b": [3, 4]}) report = TableReport(df) - with TemporaryDirectory() as td: - tmp_file_path = Path(td) / Path("report.html") + tmp_file_path = tmp_path / Path("report.html") - if filename_type == "str": - filename = str(tmp_file_path) - elif filename_type == "file_object": - filename = open(tmp_file_path, "w", encoding="utf-8") - else: - filename = tmp_file_path + if filename_type == "str": + filename = str(tmp_file_path) + elif filename_type == "file_object": + filename = open(tmp_file_path, "w", encoding="utf-8") + elif filename_type == "file_object_encoding_None": + filename = open(tmp_file_path, "w") + elif filename_type == "binary_mode": + filename = open(tmp_file_path, "wb") + else: + filename = tmp_file_path - report.write_html(filename) - assert tmp_file_path.exists() + report.write_html(filename) + assert tmp_file_path.exists() + + with open(tmp_file_path, "r") as file: + saved_content = file.read() + assert "" in saved_content -def test_write_html_with_no_suffix(pd_module): +def test_write_html_with_not_utf8_encoding(tmp_path, pd_module): df = pd_module.make_dataframe({"a": [1, 2], "b": [3, 4]}) report = TableReport(df) - with TemporaryDirectory() as td: - filename = Path(td) / Path("report.txt") - with pytest.raises( - ValueError, - match=( - "The filename does not end with the suffix `.html`. " - f"Instead, got {filename.suffix}" - ), - ): - report.write_html(filename) + filename = open(tmp_path / Path("report.html"), "w", encoding="latin-1") + encoding = getattr(filename, "encoding", None) + with pytest.raises( + ValueError, + match=( + f"If `file` is a text file it should use utf-8 encoding; got: {encoding!r}" + ), + ): + report.write_html(filename) assert not filename.exists() From 597bc0ebee2acbc76425776c249bd1293d074302 Mon Sep 17 00:00:00 2001 From: mrastgoo Date: Mon, 9 Dec 2024 23:04:25 +0100 Subject: [PATCH 06/12] ignoring test on platform encoding --- skrub/_reporting/_table_report.py | 3 ++- skrub/_reporting/tests/test_table_report.py | 4 +--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/skrub/_reporting/_table_report.py b/skrub/_reporting/_table_report.py index 3aacd9b6f..e83b9b41d 100644 --- a/skrub/_reporting/_table_report.py +++ b/skrub/_reporting/_table_report.py @@ -219,6 +219,7 @@ def write_html(self, file): except TypeError: pass + print(getattr(file, "encoding", None)) if (encoding := getattr(file, "encoding", None)) is not None: try: assert codecs.lookup(encoding).name == "utf-8" @@ -227,7 +228,7 @@ def write_html(self, file): "If `file` is a text file it should use utf-8 encoding; got:" f" {encoding!r}" ) - elif locale.getencodeing().lower() != "utf-8": + elif locale.getencoding().lower() != "utf-8": # when encoding=None, it will default on the platform-specific encoding # raise if not utf-8 raise ValueError( diff --git a/skrub/_reporting/tests/test_table_report.py b/skrub/_reporting/tests/test_table_report.py index 311753fa0..d631ee024 100644 --- a/skrub/_reporting/tests/test_table_report.py +++ b/skrub/_reporting/tests/test_table_report.py @@ -128,7 +128,7 @@ def test_duration(df_module): @pytest.mark.parametrize( "filename_type", - ["str", "Path", "file_object", "binary_mode", "file_object_encoding_None"], + ["str", "Path", "file_object", "binary_mode"], ) def test_write_html(tmp_path, pd_module, filename_type): df = pd_module.make_dataframe({"a": [1, 2], "b": [3, 4]}) @@ -140,8 +140,6 @@ def test_write_html(tmp_path, pd_module, filename_type): filename = str(tmp_file_path) elif filename_type == "file_object": filename = open(tmp_file_path, "w", encoding="utf-8") - elif filename_type == "file_object_encoding_None": - filename = open(tmp_file_path, "w") elif filename_type == "binary_mode": filename = open(tmp_file_path, "wb") else: From bbb50d97fa6d159886d3467a83430c551a9b36e9 Mon Sep 17 00:00:00 2001 From: mrastgoo Date: Sun, 15 Dec 2024 11:56:02 +0100 Subject: [PATCH 07/12] fixing the test --- skrub/_reporting/tests/test_table_report.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skrub/_reporting/tests/test_table_report.py b/skrub/_reporting/tests/test_table_report.py index d631ee024..08fb1217a 100644 --- a/skrub/_reporting/tests/test_table_report.py +++ b/skrub/_reporting/tests/test_table_report.py @@ -148,7 +148,7 @@ def test_write_html(tmp_path, pd_module, filename_type): report.write_html(filename) assert tmp_file_path.exists() - with open(tmp_file_path, "r") as file: + with open(tmp_file_path, "r", encoding="utf-8") as file: saved_content = file.read() assert "" in saved_content From f3c1e8b81a6ff52201f7117c9af8cb5a491cada4 Mon Sep 17 00:00:00 2001 From: mrastgoo Date: Mon, 16 Dec 2024 18:50:29 +0100 Subject: [PATCH 08/12] Update skrub/_reporting/_table_report.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Jérôme Dockès --- skrub/_reporting/_table_report.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/skrub/_reporting/_table_report.py b/skrub/_reporting/_table_report.py index e83b9b41d..02570d67c 100644 --- a/skrub/_reporting/_table_report.py +++ b/skrub/_reporting/_table_report.py @@ -228,12 +228,6 @@ def write_html(self, file): "If `file` is a text file it should use utf-8 encoding; got:" f" {encoding!r}" ) - elif locale.getencoding().lower() != "utf-8": - # when encoding=None, it will default on the platform-specific encoding - # raise if not utf-8 - raise ValueError( - f"Platform encoding is not utf-8; got {locale.getencoding()}" - ) file.write(html) From 760b7b321bbfe307b56254ae7c30ecb5dc21d51a Mon Sep 17 00:00:00 2001 From: mrastgoo Date: Mon, 16 Dec 2024 18:51:37 +0100 Subject: [PATCH 09/12] Update CHANGES.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Jérôme Dockès --- CHANGES.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 5cd655994..f98acddcf 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -32,7 +32,6 @@ Changes * :class: `TableReport` has `write_html` method :pr:`1190` by :user: `Mojdeh Rastgoo`. -* A new parameter `verbose` has been added to the :class:`TableReport` to toggle on or off the * A new parameter ``verbose`` has been added to the :class:`TableReport` to toggle on or off the printing of progress information when a report is being generated. :pr:`1182` by :user:`Priscilla Baah`. From 0789ac557c67858665101dbc1387baf447179961 Mon Sep 17 00:00:00 2001 From: mrastgoo Date: Mon, 16 Dec 2024 20:48:08 +0100 Subject: [PATCH 10/12] test modification - context manager --- skrub/_reporting/_table_report.py | 1 - skrub/_reporting/tests/test_table_report.py | 49 ++++++++++++--------- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/skrub/_reporting/_table_report.py b/skrub/_reporting/_table_report.py index 02570d67c..e6788e017 100644 --- a/skrub/_reporting/_table_report.py +++ b/skrub/_reporting/_table_report.py @@ -1,7 +1,6 @@ import codecs import functools import json -import locale from pathlib import Path from ._html import to_html diff --git a/skrub/_reporting/tests/test_table_report.py b/skrub/_reporting/tests/test_table_report.py index 08fb1217a..e2f5d7a98 100644 --- a/skrub/_reporting/tests/test_table_report.py +++ b/skrub/_reporting/tests/test_table_report.py @@ -1,3 +1,4 @@ +import contextlib import datetime import json import re @@ -128,7 +129,7 @@ def test_duration(df_module): @pytest.mark.parametrize( "filename_type", - ["str", "Path", "file_object", "binary_mode"], + ["str", "Path", "text_file_object", "binary_file_object"], ) def test_write_html(tmp_path, pd_module, filename_type): df = pd_module.make_dataframe({"a": [1, 2], "b": [3, 4]}) @@ -136,17 +137,18 @@ def test_write_html(tmp_path, pd_module, filename_type): tmp_file_path = tmp_path / Path("report.html") - if filename_type == "str": - filename = str(tmp_file_path) - elif filename_type == "file_object": - filename = open(tmp_file_path, "w", encoding="utf-8") - elif filename_type == "binary_mode": - filename = open(tmp_file_path, "wb") - else: - filename = tmp_file_path + with contextlib.ExitStack() as stack: + if filename_type == "str": + filename = str(tmp_file_path) + elif filename_type == "text_file_object": + filename = stack.enter_context(open(tmp_file_path, "w", encoding="utf-8")) + elif filename_type == "binary_file_object": + filename = stack.enter_context(open(tmp_file_path, "wb")) + else: + filename = tmp_file_path - report.write_html(filename) - assert tmp_file_path.exists() + report.write_html(filename) + assert tmp_file_path.exists() with open(tmp_file_path, "r", encoding="utf-8") as file: saved_content = file.read() @@ -156,17 +158,22 @@ def test_write_html(tmp_path, pd_module, filename_type): def test_write_html_with_not_utf8_encoding(tmp_path, pd_module): df = pd_module.make_dataframe({"a": [1, 2], "b": [3, 4]}) report = TableReport(df) + tmp_file_path = tmp_path / Path("report.html") - filename = open(tmp_path / Path("report.html"), "w", encoding="latin-1") - encoding = getattr(filename, "encoding", None) - with pytest.raises( - ValueError, - match=( - f"If `file` is a text file it should use utf-8 encoding; got: {encoding!r}" - ), - ): - report.write_html(filename) - assert not filename.exists() + with open(tmp_file_path, "w", encoding="latin-1") as file: + encoding = getattr(file, "encoding", None) + with pytest.raises( + ValueError, + match=( + "If `file` is a text file it should use utf-8 encoding; got:" + f" {encoding!r}" + ), + ): + report.write_html(file) + + with open(tmp_file_path, "r", encoding="latin-1") as file: + saved_content = file.read() + assert "" not in saved_content def test_verbosity_parameter(df_module, capsys): From 911eb5a09ba9104430a710a55f861b5e1747bc59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Dock=C3=A8s?= Date: Tue, 17 Dec 2024 09:02:12 +0100 Subject: [PATCH 11/12] Update skrub/_reporting/_table_report.py --- skrub/_reporting/_table_report.py | 1 - 1 file changed, 1 deletion(-) diff --git a/skrub/_reporting/_table_report.py b/skrub/_reporting/_table_report.py index e6788e017..fc89047e7 100644 --- a/skrub/_reporting/_table_report.py +++ b/skrub/_reporting/_table_report.py @@ -218,7 +218,6 @@ def write_html(self, file): except TypeError: pass - print(getattr(file, "encoding", None)) if (encoding := getattr(file, "encoding", None)) is not None: try: assert codecs.lookup(encoding).name == "utf-8" From 036db59016c3c928d4d49a890529a7029c3e8d91 Mon Sep 17 00:00:00 2001 From: mrastgoo Date: Thu, 19 Dec 2024 00:14:38 +0100 Subject: [PATCH 12/12] adding comment --- skrub/_reporting/_table_report.py | 8 +++++++- skrub/_reporting/tests/test_table_report.py | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/skrub/_reporting/_table_report.py b/skrub/_reporting/_table_report.py index fc89047e7..263d43c86 100644 --- a/skrub/_reporting/_table_report.py +++ b/skrub/_reporting/_table_report.py @@ -212,10 +212,15 @@ def write_html(self, file): with open(file, "w", encoding="utf8") as stream: stream.write(html) return + try: + # We don't have information about the write mode of the provided + # file-object. We start by writing bytes into it. file.write(html.encode("utf-8")) return except TypeError: + # We end-up here if the file-object was open in text mode + # Let's give it another chance in this mode. pass if (encoding := getattr(file, "encoding", None)) is not None: @@ -226,7 +231,8 @@ def write_html(self, file): "If `file` is a text file it should use utf-8 encoding; got:" f" {encoding!r}" ) - + # We write into the file-object expecting it to be in text mode at this + # stage and with a UTF-8 encoding. file.write(html) def open(self): diff --git a/skrub/_reporting/tests/test_table_report.py b/skrub/_reporting/tests/test_table_report.py index e2f5d7a98..e9a6f1fcf 100644 --- a/skrub/_reporting/tests/test_table_report.py +++ b/skrub/_reporting/tests/test_table_report.py @@ -137,6 +137,8 @@ def test_write_html(tmp_path, pd_module, filename_type): tmp_file_path = tmp_path / Path("report.html") + # making sure we are closing the open files, and dealing with the first + # condition which doesn't require opening any file with contextlib.ExitStack() as stack: if filename_type == "str": filename = str(tmp_file_path)