From 8a0345f1bedf83380e283b6230ac546575dc33ab Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Sun, 19 Jan 2025 20:49:05 +0100 Subject: [PATCH] ci: Unpin NumPy in type checking workflow (#20792) --- .github/workflows/lint-python.yml | 4 +- py-polars/polars/series/series.py | 5 +- .../unit/interop/numpy/test_to_numpy_df.py | 2 +- .../interop/numpy/test_to_numpy_series.py | 28 +++++----- py-polars/tests/unit/series/test_series.py | 55 ++++++++++--------- 5 files changed, 49 insertions(+), 45 deletions(-) diff --git a/.github/workflows/lint-python.yml b/.github/workflows/lint-python.yml index dc1fdd8a34f4..895a6a85b576 100644 --- a/.github/workflows/lint-python.yml +++ b/.github/workflows/lint-python.yml @@ -58,9 +58,7 @@ jobs: - name: Install Python dependencies working-directory: py-polars - # TODO: Fix typing issues for newer NumPy versions - # https://github.com/pola-rs/polars/issues/20561 - run: uv pip install -r requirements-dev.txt -r requirements-lint.txt 'numpy<2.1' + run: uv pip install -r requirements-dev.txt -r requirements-lint.txt # Allow untyped calls for older Python versions - name: Run mypy diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index 03aaabc85c57..875aef8be515 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -1421,7 +1421,7 @@ def __array_ufunc__( # Get minimum dtype needed to be able to cast all input arguments to the # same dtype. - dtype_char_minimum = np.result_type(*args).char + dtype_char_minimum: str = np.result_type(*args).char # Get all possible output dtypes for ufunc. # Input dtypes and output dtypes seem to always match for ufunc.types, @@ -4756,7 +4756,8 @@ def scatter( └─────────┘ """ if not isinstance(indices, Iterable): - indices = [indices] # type: ignore[list-item] + index: Any = indices # Workaround for older NumPy versions + indices = [index] indices = Series(values=indices) if indices.is_empty(): return self diff --git a/py-polars/tests/unit/interop/numpy/test_to_numpy_df.py b/py-polars/tests/unit/interop/numpy/test_to_numpy_df.py index fa1815823c34..f45edc05cd0b 100644 --- a/py-polars/tests/unit/interop/numpy/test_to_numpy_df.py +++ b/py-polars/tests/unit/interop/numpy/test_to_numpy_df.py @@ -160,7 +160,7 @@ def test_df_to_numpy_zero_copy_path_temporal() -> None: s = pl.Series(values) df = pl.DataFrame({"a": s[:4], "b": s[4:8], "c": s[8:]}) - result = df.to_numpy(allow_copy=False) + result: npt.NDArray[np.generic] = df.to_numpy(allow_copy=False) assert result.flags.f_contiguous is True assert result.flags.writeable is False assert result.tolist() == [list(row) for row in df.iter_rows()] diff --git a/py-polars/tests/unit/interop/numpy/test_to_numpy_series.py b/py-polars/tests/unit/interop/numpy/test_to_numpy_series.py index f1816c5ef963..663640641059 100644 --- a/py-polars/tests/unit/interop/numpy/test_to_numpy_series.py +++ b/py-polars/tests/unit/interop/numpy/test_to_numpy_series.py @@ -52,7 +52,7 @@ def test_series_to_numpy_numeric_zero_copy( dtype: PolarsDataType, expected_dtype: npt.DTypeLike ) -> None: s = pl.Series([1, 2, 3]).cast(dtype) - result = s.to_numpy(allow_copy=False) + result: npt.NDArray[np.generic] = s.to_numpy(allow_copy=False) assert_zero_copy(s, result) assert result.tolist() == s.to_list() @@ -78,7 +78,7 @@ def test_series_to_numpy_numeric_with_nulls( dtype: PolarsDataType, expected_dtype: npt.DTypeLike ) -> None: s = pl.Series([1, 2, None], dtype=dtype, strict=False) - result = s.to_numpy() + result: npt.NDArray[np.generic] = s.to_numpy() assert result.tolist()[:-1] == s.to_list()[:-1] assert np.isnan(result[-1]) @@ -104,7 +104,7 @@ def test_series_to_numpy_temporal_zero_copy( ) -> None: values = [0, 2_000, 1_000_000] s = pl.Series(values, dtype=dtype, strict=False) - result = s.to_numpy(allow_copy=False) + result: npt.NDArray[np.generic] = s.to_numpy(allow_copy=False) assert_zero_copy(s, result) # NumPy tolist returns integers for ns precision @@ -118,7 +118,7 @@ def test_series_to_numpy_temporal_zero_copy( def test_series_to_numpy_datetime_with_tz_zero_copy() -> None: values = [datetime(1970, 1, 1), datetime(2024, 2, 28)] s = pl.Series(values).dt.convert_time_zone("Europe/Amsterdam").rechunk() - result = s.to_numpy(allow_copy=False) + result: npt.NDArray[np.generic] = s.to_numpy(allow_copy=False) assert_zero_copy(s, result) assert result.tolist() == values @@ -129,7 +129,7 @@ def test_series_to_numpy_date() -> None: values = [date(1970, 1, 1), date(2024, 2, 28)] s = pl.Series(values) - result = s.to_numpy() + result: npt.NDArray[np.generic] = s.to_numpy() assert s.to_list() == result.tolist() assert result.dtype == np.dtype("datetime64[D]") @@ -168,7 +168,7 @@ def test_series_to_numpy_temporal_with_nulls( ) -> None: values = [0, 2_000, 1_000_000, None] s = pl.Series(values, dtype=dtype, strict=False) - result = s.to_numpy() + result: npt.NDArray[np.generic] = s.to_numpy() # NumPy tolist returns integers for ns precision if getattr(s.dtype, "time_unit", None) == "ns": @@ -182,7 +182,7 @@ def test_series_to_numpy_temporal_with_nulls( def test_series_to_numpy_datetime_with_tz_with_nulls() -> None: values = [datetime(1970, 1, 1), datetime(2024, 2, 28), None] s = pl.Series(values).dt.convert_time_zone("Europe/Amsterdam") - result = s.to_numpy() + result: npt.NDArray[np.generic] = s.to_numpy() assert result.tolist() == values assert result.dtype == np.dtype("datetime64[us]") @@ -209,7 +209,7 @@ def test_to_numpy_object_dtypes( values.append(None) s = pl.Series(values, dtype=dtype) - result = s.to_numpy() + result: npt.NDArray[np.generic] = s.to_numpy() assert result.tolist() == values assert result.dtype == np.object_ @@ -218,7 +218,7 @@ def test_to_numpy_object_dtypes( def test_series_to_numpy_bool() -> None: s = pl.Series([True, False]) - result = s.to_numpy() + result: npt.NDArray[np.generic] = s.to_numpy() assert s.to_list() == result.tolist() assert result.dtype == np.bool_ @@ -228,7 +228,7 @@ def test_series_to_numpy_bool() -> None: def test_series_to_numpy_bool_with_nulls() -> None: s = pl.Series([True, False, None]) - result = s.to_numpy() + result: npt.NDArray[np.generic] = s.to_numpy() assert s.to_list() == result.tolist() assert result.dtype == np.object_ @@ -249,7 +249,7 @@ def test_series_to_numpy_array_of_int() -> None: def test_series_to_numpy_array_of_str() -> None: values = [["1", "2", "3"], ["4", "5", "10000"]] s = pl.Series(values, dtype=pl.Array(pl.String, 3)) - result = s.to_numpy() + result: npt.NDArray[np.generic] = s.to_numpy() assert result.tolist() == values assert result.dtype == np.object_ @@ -343,7 +343,7 @@ def test_to_numpy_chunked() -> None: s2 = pl.Series([3, 4]) s = pl.concat([s1, s2], rechunk=False) - result = s.to_numpy() + result: npt.NDArray[np.generic] = s.to_numpy() assert result.tolist() == s.to_list() assert result.dtype == np.int64 @@ -362,7 +362,7 @@ def test_to_numpy_chunked_temporal_nested() -> None: s2 = pl.Series([[datetime(2022, 1, 1)], [datetime(2023, 1, 1)]], dtype=dtype) s = pl.concat([s1, s2], rechunk=False) - result = s.to_numpy() + result: npt.NDArray[np.generic] = s.to_numpy() assert result.tolist() == s.to_list() assert result.dtype == np.dtype("datetime64[us]") @@ -375,7 +375,7 @@ def test_zero_copy_only_deprecated() -> None: values = [1, 2] s = pl.Series([1, 2]) with pytest.deprecated_call(): - result = s.to_numpy(zero_copy_only=True) + result: npt.NDArray[np.generic] = s.to_numpy(zero_copy_only=True) assert result.tolist() == values diff --git a/py-polars/tests/unit/series/test_series.py b/py-polars/tests/unit/series/test_series.py index c4d704e8bbbe..c5d5a3517da0 100644 --- a/py-polars/tests/unit/series/test_series.py +++ b/py-polars/tests/unit/series/test_series.py @@ -498,35 +498,40 @@ def test_cast() -> None: pl.Series(["1", "2", "3", "4", "foobar"]).cast(int) -def test_to_pandas() -> None: - for test_data in ( +@pytest.mark.parametrize( + "test_data", + [ [1, None, 2], ["abc", None, "xyz"], [None, datetime.now()], [[1, 2], [3, 4], None], - ): - a = pl.Series("s", test_data) - b = a.to_pandas() - - assert a.name == b.name - assert b.isnull().sum() == 1 - - if a.dtype == pl.List: - vals_b = [(None if x is None else x.tolist()) for x in b] - else: - vals_b = b.replace({np.nan: None}).values.tolist() - - assert vals_b == test_data - - try: - c = a.to_pandas(use_pyarrow_extension_array=True) - assert a.name == c.name - assert c.isnull().sum() == 1 - vals_c = [None if x is pd.NA else x for x in c.tolist()] - assert vals_c == test_data - except ModuleNotFoundError: - # Skip test if pandas>=1.5.0 or Pyarrow>=8.0.0 is not installed. - pass + ], +) +def test_to_pandas(test_data: list[Any]) -> None: + a = pl.Series("s", test_data) + b = a.to_pandas() + + assert a.name == b.name + assert b.isnull().sum() == 1 + + vals_b: list[Any] + if a.dtype == pl.List: + vals_b = [(None if x is None else x.tolist()) for x in b] + else: + v = b.replace({np.nan: None}).values.tolist() + vals_b = cast(list[Any], v) + + assert vals_b == test_data + + try: + c = a.to_pandas(use_pyarrow_extension_array=True) + assert a.name == c.name + assert c.isnull().sum() == 1 + vals_c = [None if x is pd.NA else x for x in c.tolist()] + assert vals_c == test_data + except ModuleNotFoundError: + # Skip test if pandas>=1.5.0 or Pyarrow>=8.0.0 is not installed. + pass def test_series_to_list() -> None: