From 6357f96acb05b939e5e0b6c8bfb22b62671ebcb0 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Wed, 10 Jul 2024 15:22:16 +0200 Subject: [PATCH] Test to_json batch_size --- tests/test_arrow_dataset.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_arrow_dataset.py b/tests/test_arrow_dataset.py index bae4fa1580c..77d89555cd8 100644 --- a/tests/test_arrow_dataset.py +++ b/tests/test_arrow_dataset.py @@ -4041,10 +4041,11 @@ def test_dataset_from_sql_keep_in_memory(keep_in_memory, sqlite_path, tmp_path, _check_sql_dataset(dataset, expected_features) +@pytest.mark.parametrize("batch_size", [None, 5]) @pytest.mark.parametrize("lines", [True, False]) -def test_dataset_to_json(lines, dataset, tmp_path): +def test_dataset_to_json(lines, batch_size, dataset, tmp_path): file_path = tmp_path / "test_path.jsonl" - bytes_written = dataset.to_json(path_or_buf=file_path, lines=lines) + bytes_written = dataset.to_json(path_or_buf=file_path, lines=lines, batch_size=batch_size) assert file_path.is_file() assert bytes_written == file_path.stat().st_size df = pd.read_json(file_path, orient="records", lines=lines)