From 0d2757339da404f945b99ad7760b8e68c108b0ea Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 6 Feb 2024 14:56:20 -0500 Subject: [PATCH 1/6] Update action versions --- .github/workflows/test_wheel.yaml | 6 ++-- .github/workflows/wheel.yml | 40 +++++++++++++------------- fastparquet/evolve.py | 0 fastparquet/parquet_thrift/__init__.py | 2 +- 4 files changed, 24 insertions(+), 24 deletions(-) create mode 100644 fastparquet/evolve.py diff --git a/.github/workflows/test_wheel.yaml b/.github/workflows/test_wheel.yaml index ad74ba06..ff03aff0 100644 --- a/.github/workflows/test_wheel.yaml +++ b/.github/workflows/test_wheel.yaml @@ -38,7 +38,7 @@ jobs: fetch-depth: 0 - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: "3.11" @@ -50,7 +50,7 @@ jobs: - name: Add msbuild to PATH if: runner.os == 'Windows' - uses: microsoft/setup-msbuild@v1.3 + uses: microsoft/setup-msbuild@v2 - name: delvewheel install if: runner.os == 'Windows' @@ -58,7 +58,7 @@ jobs: python -m pip install delvewheel cython - name: Build wheels - uses: joerick/cibuildwheel@v2.16.2 + uses: joerick/cibuildwheel@v2.16.5 - name: Install wheels shell: bash -l {0} diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index d1b97832..389f17e7 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -32,7 +32,7 @@ jobs: fetch-depth: 0 - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: "3.11" @@ -44,7 +44,7 @@ jobs: - name: Add msbuild to PATH if: runner.os == 'Windows' - uses: microsoft/setup-msbuild@v1 + uses: microsoft/setup-msbuild@v2 - name: delvewheel install if: runner.os == 'Windows' @@ -52,9 +52,9 @@ jobs: python -m pip install delvewheel cython - name: Build wheels - uses: joerick/cibuildwheel@v2.16.2 + uses: joerick/cibuildwheel@v2.16.5 - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: path: ./wheelhouse/*.whl name: wheels @@ -87,7 +87,7 @@ jobs: fetch-depth: 0 - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: "3.11" @@ -99,7 +99,7 @@ jobs: - name: Add msbuild to PATH if: runner.os == 'Windows' - uses: microsoft/setup-msbuild@v1 + uses: microsoft/setup-msbuild@v2 - name: delvewheel install if: runner.os == 'Windows' @@ -107,9 +107,9 @@ jobs: python -m pip install delvewheel cython - name: Build wheels - uses: joerick/cibuildwheel@v2.16.2 + uses: joerick/cibuildwheel@v2.16.5 - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: path: ./wheelhouse/*.whl name: wheels @@ -142,7 +142,7 @@ jobs: fetch-depth: 0 - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: "3.11" @@ -154,7 +154,7 @@ jobs: - name: Add msbuild to PATH if: runner.os == 'Windows' - uses: microsoft/setup-msbuild@v1 + uses: microsoft/setup-msbuild@v2 - name: delvewheel install if: runner.os == 'Windows' @@ -162,9 +162,9 @@ jobs: python -m pip install delvewheel cython - name: Build wheels - uses: joerick/cibuildwheel@v2.16.2 + uses: joerick/cibuildwheel@v2.16.5 - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: path: ./wheelhouse/*.whl name: wheels @@ -197,9 +197,9 @@ jobs: fetch-depth: 0 - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: - python-version: "3.11" + python-version: "3.12" - name: Set up QEMU if: runner.os == 'Linux' @@ -209,7 +209,7 @@ jobs: - name: Add msbuild to PATH if: runner.os == 'Windows' - uses: microsoft/setup-msbuild@v1 + uses: microsoft/setup-msbuild@v2 - name: delvewheel install if: runner.os == 'Windows' @@ -217,9 +217,9 @@ jobs: python -m pip install delvewheel cython - name: Build wheels - uses: joerick/cibuildwheel@v2.16.2 + uses: joerick/cibuildwheel@v2.16.5 - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: path: ./wheelhouse/*.whl name: wheels @@ -246,14 +246,14 @@ jobs: fetch-depth: 0 - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: "3.11" - name: Build wheels - uses: joerick/cibuildwheel@v2.16.2 + uses: joerick/cibuildwheel@v2.16.5 - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: path: ./wheelhouse/*.whl name: wheels diff --git a/fastparquet/evolve.py b/fastparquet/evolve.py new file mode 100644 index 00000000..e69de29b diff --git a/fastparquet/parquet_thrift/__init__.py b/fastparquet/parquet_thrift/__init__.py index c71820ef..85860548 100644 --- a/fastparquet/parquet_thrift/__init__.py +++ b/fastparquet/parquet_thrift/__init__.py @@ -4,7 +4,7 @@ def __getattr__(name): # for compatability with coe that calls, e.g., parquet_thrift.RowGroup(...) - from ..cencoding import ThriftObject + from fastparquet.cencoding import ThriftObject if name[0].isupper(): return partial(ThriftObject.from_fields, thrift_name=name) raise AttributeError(name) From 75f509b6dadecefb94130651f2ef60f416c9f3c7 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 6 Feb 2024 14:57:56 -0500 Subject: [PATCH 2/6] py versions --- .github/workflows/wheel.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index 389f17e7..72444f89 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -34,7 +34,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v5 with: - python-version: "3.11" + python-version: "3.12" - name: Set up QEMU if: runner.os == 'Linux' @@ -89,7 +89,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v5 with: - python-version: "3.11" + python-version: "3.12" - name: Set up QEMU if: runner.os == 'Linux' @@ -144,7 +144,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v5 with: - python-version: "3.11" + python-version: "3.12" - name: Set up QEMU if: runner.os == 'Linux' @@ -248,7 +248,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v5 with: - python-version: "3.11" + python-version: "3.12" - name: Build wheels uses: joerick/cibuildwheel@v2.16.5 From 98dce7ddc4e2878983b695fcadea4ea670d6320f Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 6 Feb 2024 15:29:48 -0500 Subject: [PATCH 3/6] rollback upload version https://github.com/actions/upload-artifact/issues/478 --- .github/workflows/wheel.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index 72444f89..e3cbcb21 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -54,7 +54,7 @@ jobs: - name: Build wheels uses: joerick/cibuildwheel@v2.16.5 - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v3 with: path: ./wheelhouse/*.whl name: wheels @@ -109,7 +109,7 @@ jobs: - name: Build wheels uses: joerick/cibuildwheel@v2.16.5 - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v3 with: path: ./wheelhouse/*.whl name: wheels @@ -164,7 +164,7 @@ jobs: - name: Build wheels uses: joerick/cibuildwheel@v2.16.5 - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v3 with: path: ./wheelhouse/*.whl name: wheels @@ -219,7 +219,7 @@ jobs: - name: Build wheels uses: joerick/cibuildwheel@v2.16.5 - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v3 with: path: ./wheelhouse/*.whl name: wheels @@ -253,7 +253,7 @@ jobs: - name: Build wheels uses: joerick/cibuildwheel@v2.16.5 - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v3 with: path: ./wheelhouse/*.whl name: wheels From c991f608a45d431c5e1048f974f536fc7ae768cc Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 7 Feb 2024 09:23:36 -0500 Subject: [PATCH 4/6] empty From 9a24e8f91296a6e89788a26a42e1af623328c7d8 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 7 Feb 2024 12:54:57 -0500 Subject: [PATCH 5/6] All absolute imports --- fastparquet/__init__.py | 10 +++++----- fastparquet/api.py | 13 ++++++------- fastparquet/compression.py | 2 +- fastparquet/converted_types.py | 6 +++--- fastparquet/core.py | 19 +++++++++---------- fastparquet/dataframe.py | 2 +- fastparquet/encoding.py | 6 +++--- fastparquet/schema.py | 2 +- fastparquet/thrift_structures.py | 7 +++++-- fastparquet/util.py | 4 ++-- fastparquet/writer.py | 18 ++++++++---------- 11 files changed, 44 insertions(+), 45 deletions(-) diff --git a/fastparquet/__init__.py b/fastparquet/__init__.py index 03ac88ae..17e7f4b6 100755 --- a/fastparquet/__init__.py +++ b/fastparquet/__init__.py @@ -1,8 +1,8 @@ """parquet - read parquet files.""" -from ._version import __version__ -from .writer import write, update_file_custom_metadata -from . import core, schema, converted_types, api -from .api import ParquetFile -from .util import ParquetException +from fastparquet._version import __version__ +from fastparquet.writer import write, update_file_custom_metadata +from fastparquet import core, schema, converted_types, api +from fastparquet.api import ParquetFile +from fastparquet.util import ParquetException diff --git a/fastparquet/api.py b/fastparquet/api.py index 53caba12..58b4760f 100644 --- a/fastparquet/api.py +++ b/fastparquet/api.py @@ -6,16 +6,15 @@ import numpy as np import fsspec -from fastparquet.util import join_path import pandas as pd -from . import core, schema, converted_types, encoding, dataframe, writer -from . import parquet_thrift -from .cencoding import ThriftObject, from_buffer -from .json import json_decoder -from .util import (default_open, default_remove, ParquetException, val_to_num, +from fastparquet import core, schema, converted_types, encoding, dataframe, writer +from fastparquet import parquet_thrift +from fastparquet.cencoding import ThriftObject, from_buffer +from fastparquet.json import json_decoder +from fastparquet.util import (default_open, default_remove, ParquetException, val_to_num, ops, ensure_bytes, ensure_str, check_column_names, metadata_from_many, - ex_from_sep, _strip_path_tail, get_fs, PANDAS_VERSION) + ex_from_sep, _strip_path_tail, get_fs, PANDAS_VERSION, jion_path) # Find in names of partition files the integer matching "**part.*.parquet", diff --git a/fastparquet/compression.py b/fastparquet/compression.py index f62fc72f..01188d70 100644 --- a/fastparquet/compression.py +++ b/fastparquet/compression.py @@ -1,7 +1,7 @@ import cramjam import numpy as np -from . import parquet_thrift +from fastparquet import parquet_thrift # TODO: use stream/direct-to-buffer conversions instead of memcopy diff --git a/fastparquet/converted_types.py b/fastparquet/converted_types.py index cd8858e7..d21a5e37 100644 --- a/fastparquet/converted_types.py +++ b/fastparquet/converted_types.py @@ -10,9 +10,9 @@ import numpy as np import pandas as pd -from . import parquet_thrift -from .cencoding import time_shift -from .json import json_decoder +from fastparquet import parquet_thrift +from fastparquet.cencoding import time_shift +from fastparquet.json import json_decoder logger = logging.getLogger('parquet') # pylint: disable=invalid-name diff --git a/fastparquet/core.py b/fastparquet/core.py index 3facf893..79c17762 100644 --- a/fastparquet/core.py +++ b/fastparquet/core.py @@ -1,17 +1,16 @@ -import warnings import numpy as np import pandas as pd -from . import encoding -from . encoding import read_plain +from fastparquet import encoding +from fastparquet.encoding import read_plain import fastparquet.cencoding as encoding -from .compression import decompress_data, rev_map, decom_into -from .converted_types import convert, simple, converts_inplace -from .schema import _is_list_like, _is_map_like -from .speedups import unpack_byte_array -from . import parquet_thrift -from .cencoding import ThriftObject, read_thrift -from .util import val_to_num, ex_from_sep +from fastparquet.compression import decompress_data, rev_map, decom_into +from fastparquet.converted_types import convert, simple, converts_inplace +from fastparquet.schema import _is_list_like, _is_map_like +from fastparquet.speedups import unpack_byte_array +from fastparquet import parquet_thrift +from fastparquet.cencoding import ThriftObject +from fastparquet.util import val_to_num def _read_page(file_obj, page_header, column_metadata): diff --git a/fastparquet/dataframe.py b/fastparquet/dataframe.py index 51ba40c4..afa7d01b 100644 --- a/fastparquet/dataframe.py +++ b/fastparquet/dataframe.py @@ -11,7 +11,7 @@ from pandas.core.arrays.masked import BaseMaskedDtype import warnings -from .util import PANDAS_VERSION +from fastparquet.util import PANDAS_VERSION class Dummy(object): diff --git a/fastparquet/encoding.py b/fastparquet/encoding.py index 3525c18f..8e69c344 100755 --- a/fastparquet/encoding.py +++ b/fastparquet/encoding.py @@ -1,8 +1,8 @@ """encoding.py - methods for reading parquet encoded data blocks.""" import numpy as np -from .cencoding import read_bitpacked1, NumpyIO -from .speedups import unpack_byte_array -from . import parquet_thrift +from fastparquet.cencoding import read_bitpacked1, NumpyIO +from fastparquet.speedups import unpack_byte_array +from fastparquet import parquet_thrift def read_plain_boolean(raw_bytes, count, out=None): diff --git a/fastparquet/schema.py b/fastparquet/schema.py index 2a0e988e..83a43afd 100755 --- a/fastparquet/schema.py +++ b/fastparquet/schema.py @@ -1,7 +1,7 @@ """Utils for working with the parquet thrift models.""" from collections import OrderedDict -from . import parquet_thrift +from fastparquet import parquet_thrift def schema_tree(schema, i=0): diff --git a/fastparquet/thrift_structures.py b/fastparquet/thrift_structures.py index 75410a4a..b8ae51d2 100644 --- a/fastparquet/thrift_structures.py +++ b/fastparquet/thrift_structures.py @@ -1,2 +1,5 @@ -from . import parquet_thrift -from .cencoding import ThriftObject +from fastparquet import parquet_thrift +from fastparquet.cencoding import ThriftObject + + +__all__ = ["ThriftObject", "parquet_thrift"] diff --git a/fastparquet/util.py b/fastparquet/util.py index 4e1c3115..7e93f37b 100644 --- a/fastparquet/util.py +++ b/fastparquet/util.py @@ -14,8 +14,8 @@ import fsspec -from . import parquet_thrift -from .cencoding import ThriftObject +from fastparquet import parquet_thrift +from fastparquet.cencoding import ThriftObject from fastparquet import __version__ PANDAS_VERSION = Version(pd.__version__) diff --git a/fastparquet/writer.py b/fastparquet/writer.py index 873d6954..b023eb2e 100644 --- a/fastparquet/writer.py +++ b/fastparquet/writer.py @@ -11,19 +11,17 @@ from fastparquet.util import join_path -from . import parquet_thrift -from .api import ParquetFile, partitions, part_ids -from .compression import compress_data -from .converted_types import tobson -from .json import json_encoder -from .util import (default_open, default_mkdirs, check_column_names, +from fastparquet import parquet_thrift, __version__, cencoding +from fastparquet.api import ParquetFile, partitions, part_ids +from fastparquet.compression import compress_data +from fastparquet.converted_types import tobson +from fastparquet.json import json_encoder +from fastparquet.util import (default_open, default_mkdirs, check_column_names, created_by, get_column_metadata, norm_col_name, path_string, reset_row_idx, get_fs, update_custom_metadata) -from . import __version__ -from .speedups import array_encode_utf8, pack_byte_array -from . import cencoding -from .cencoding import NumpyIO, ThriftObject, from_buffer +from fastparquet.speedups import array_encode_utf8, pack_byte_array +from fastparquet.cencoding import NumpyIO, ThriftObject, from_buffer from decimal import Decimal MARKER = b'PAR1' From 4175374d2ab2ae994506d4e71cf982d6efb9cdb4 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 7 Feb 2024 13:11:18 -0500 Subject: [PATCH 6/6] typo --- fastparquet/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastparquet/api.py b/fastparquet/api.py index 58b4760f..f6e55426 100644 --- a/fastparquet/api.py +++ b/fastparquet/api.py @@ -14,7 +14,7 @@ from fastparquet.json import json_decoder from fastparquet.util import (default_open, default_remove, ParquetException, val_to_num, ops, ensure_bytes, ensure_str, check_column_names, metadata_from_many, - ex_from_sep, _strip_path_tail, get_fs, PANDAS_VERSION, jion_path) + ex_from_sep, _strip_path_tail, get_fs, PANDAS_VERSION, join_path) # Find in names of partition files the integer matching "**part.*.parquet",