From 5a021ae0272712340439af1409612797d49a58e2 Mon Sep 17 00:00:00 2001 From: Aki Ariga Date: Fri, 13 Sep 2024 08:33:43 -0700 Subject: [PATCH] Set the chunk number cap to 1000 To avoid OSError when writing large files, the chunk size is capped at 1000. --- pytd/writer.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pytd/writer.py b/pytd/writer.py index d43d650..efa1628 100644 --- a/pytd/writer.py +++ b/pytd/writer.py @@ -450,11 +450,13 @@ def write_dataframe( fps.append(fp) elif fmt == "msgpack": _replace_pd_na(dataframe) - + num_rows = len(dataframe) + # chunk number of records should not exceed 1000 to avoid OSError + _chunk_record_size = max(chunk_record_size, num_rows//1000) try: - for start in range(0, len(dataframe), chunk_record_size): + for start in range(0, num_rows, _chunk_record_size): records = dataframe.iloc[ - start : start + chunk_record_size + start : start + _chunk_record_size ].to_dict(orient="records") fp = tempfile.NamedTemporaryFile( suffix=".msgpack.gz", delete=False