diff --git a/CHANGES.rst b/CHANGES.rst index 73003c4d5..f98acddcf 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -29,6 +29,9 @@ Release 0.4.1 Changes ------- +* :class: `TableReport` has `write_html` method + :pr:`1190` by :user: `Mojdeh Rastgoo`. + * A new parameter ``verbose`` has been added to the :class:`TableReport` to toggle on or off the printing of progress information when a report is being generated. :pr:`1182` by :user:`Priscilla Baah`. diff --git a/skrub/_reporting/_table_report.py b/skrub/_reporting/_table_report.py index 8c60f7e6b..263d43c86 100644 --- a/skrub/_reporting/_table_report.py +++ b/skrub/_reporting/_table_report.py @@ -1,5 +1,7 @@ +import codecs import functools import json +from pathlib import Path from ._html import to_html from ._serve import open_in_browser @@ -197,6 +199,42 @@ def _repr_mimebundle_(self, include=None, exclude=None): def _repr_html_(self): return self._repr_mimebundle_()["text/html"] + def write_html(self, file): + """Store the report into an HTML file. + + Parameters + ---------- + file : str, pathlib.Path or file object + The file object or path of the file to store the HTML output. + """ + html = self.html() + if isinstance(file, (str, Path)): + with open(file, "w", encoding="utf8") as stream: + stream.write(html) + return + + try: + # We don't have information about the write mode of the provided + # file-object. We start by writing bytes into it. + file.write(html.encode("utf-8")) + return + except TypeError: + # We end-up here if the file-object was open in text mode + # Let's give it another chance in this mode. + pass + + if (encoding := getattr(file, "encoding", None)) is not None: + try: + assert codecs.lookup(encoding).name == "utf-8" + except (AssertionError, LookupError): + raise ValueError( + "If `file` is a text file it should use utf-8 encoding; got:" + f" {encoding!r}" + ) + # We write into the file-object expecting it to be in text mode at this + # stage and with a UTF-8 encoding. + file.write(html) + def open(self): """Open the HTML report in a web browser.""" open_in_browser(self.html()) diff --git a/skrub/_reporting/tests/test_table_report.py b/skrub/_reporting/tests/test_table_report.py index c878a0d92..e9a6f1fcf 100644 --- a/skrub/_reporting/tests/test_table_report.py +++ b/skrub/_reporting/tests/test_table_report.py @@ -1,7 +1,11 @@ +import contextlib import datetime import json import re import warnings +from pathlib import Path + +import pytest from skrub import TableReport, ToDatetime from skrub import _dataframe as sbd @@ -123,6 +127,57 @@ def test_duration(df_module): assert re.search(r"2(\.0)?\s+days", TableReport(df).html()) +@pytest.mark.parametrize( + "filename_type", + ["str", "Path", "text_file_object", "binary_file_object"], +) +def test_write_html(tmp_path, pd_module, filename_type): + df = pd_module.make_dataframe({"a": [1, 2], "b": [3, 4]}) + report = TableReport(df) + + tmp_file_path = tmp_path / Path("report.html") + + # making sure we are closing the open files, and dealing with the first + # condition which doesn't require opening any file + with contextlib.ExitStack() as stack: + if filename_type == "str": + filename = str(tmp_file_path) + elif filename_type == "text_file_object": + filename = stack.enter_context(open(tmp_file_path, "w", encoding="utf-8")) + elif filename_type == "binary_file_object": + filename = stack.enter_context(open(tmp_file_path, "wb")) + else: + filename = tmp_file_path + + report.write_html(filename) + assert tmp_file_path.exists() + + with open(tmp_file_path, "r", encoding="utf-8") as file: + saved_content = file.read() + assert "" in saved_content + + +def test_write_html_with_not_utf8_encoding(tmp_path, pd_module): + df = pd_module.make_dataframe({"a": [1, 2], "b": [3, 4]}) + report = TableReport(df) + tmp_file_path = tmp_path / Path("report.html") + + with open(tmp_file_path, "w", encoding="latin-1") as file: + encoding = getattr(file, "encoding", None) + with pytest.raises( + ValueError, + match=( + "If `file` is a text file it should use utf-8 encoding; got:" + f" {encoding!r}" + ), + ): + report.write_html(file) + + with open(tmp_file_path, "r", encoding="latin-1") as file: + saved_content = file.read() + assert "" not in saved_content + + def test_verbosity_parameter(df_module, capsys): df = df_module.make_dataframe( dict(