Skip to content

Commit

Permalink
ci: Unpin NumPy in type checking workflow (pola-rs#20792)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego authored Jan 19, 2025
1 parent 87b03aa commit 8a0345f
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 45 deletions.
4 changes: 1 addition & 3 deletions .github/workflows/lint-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,7 @@ jobs:
- name: Install Python dependencies
working-directory: py-polars
# TODO: Fix typing issues for newer NumPy versions
# https://github.com/pola-rs/polars/issues/20561
run: uv pip install -r requirements-dev.txt -r requirements-lint.txt 'numpy<2.1'
run: uv pip install -r requirements-dev.txt -r requirements-lint.txt

# Allow untyped calls for older Python versions
- name: Run mypy
Expand Down
5 changes: 3 additions & 2 deletions py-polars/polars/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1421,7 +1421,7 @@ def __array_ufunc__(

# Get minimum dtype needed to be able to cast all input arguments to the
# same dtype.
dtype_char_minimum = np.result_type(*args).char
dtype_char_minimum: str = np.result_type(*args).char

# Get all possible output dtypes for ufunc.
# Input dtypes and output dtypes seem to always match for ufunc.types,
Expand Down Expand Up @@ -4756,7 +4756,8 @@ def scatter(
└─────────┘
"""
if not isinstance(indices, Iterable):
indices = [indices] # type: ignore[list-item]
index: Any = indices # Workaround for older NumPy versions
indices = [index]
indices = Series(values=indices)
if indices.is_empty():
return self
Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/interop/numpy/test_to_numpy_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def test_df_to_numpy_zero_copy_path_temporal() -> None:
s = pl.Series(values)
df = pl.DataFrame({"a": s[:4], "b": s[4:8], "c": s[8:]})

result = df.to_numpy(allow_copy=False)
result: npt.NDArray[np.generic] = df.to_numpy(allow_copy=False)
assert result.flags.f_contiguous is True
assert result.flags.writeable is False
assert result.tolist() == [list(row) for row in df.iter_rows()]
Expand Down
28 changes: 14 additions & 14 deletions py-polars/tests/unit/interop/numpy/test_to_numpy_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def test_series_to_numpy_numeric_zero_copy(
dtype: PolarsDataType, expected_dtype: npt.DTypeLike
) -> None:
s = pl.Series([1, 2, 3]).cast(dtype)
result = s.to_numpy(allow_copy=False)
result: npt.NDArray[np.generic] = s.to_numpy(allow_copy=False)

assert_zero_copy(s, result)
assert result.tolist() == s.to_list()
Expand All @@ -78,7 +78,7 @@ def test_series_to_numpy_numeric_with_nulls(
dtype: PolarsDataType, expected_dtype: npt.DTypeLike
) -> None:
s = pl.Series([1, 2, None], dtype=dtype, strict=False)
result = s.to_numpy()
result: npt.NDArray[np.generic] = s.to_numpy()

assert result.tolist()[:-1] == s.to_list()[:-1]
assert np.isnan(result[-1])
Expand All @@ -104,7 +104,7 @@ def test_series_to_numpy_temporal_zero_copy(
) -> None:
values = [0, 2_000, 1_000_000]
s = pl.Series(values, dtype=dtype, strict=False)
result = s.to_numpy(allow_copy=False)
result: npt.NDArray[np.generic] = s.to_numpy(allow_copy=False)

assert_zero_copy(s, result)
# NumPy tolist returns integers for ns precision
Expand All @@ -118,7 +118,7 @@ def test_series_to_numpy_temporal_zero_copy(
def test_series_to_numpy_datetime_with_tz_zero_copy() -> None:
values = [datetime(1970, 1, 1), datetime(2024, 2, 28)]
s = pl.Series(values).dt.convert_time_zone("Europe/Amsterdam").rechunk()
result = s.to_numpy(allow_copy=False)
result: npt.NDArray[np.generic] = s.to_numpy(allow_copy=False)

assert_zero_copy(s, result)
assert result.tolist() == values
Expand All @@ -129,7 +129,7 @@ def test_series_to_numpy_date() -> None:
values = [date(1970, 1, 1), date(2024, 2, 28)]
s = pl.Series(values)

result = s.to_numpy()
result: npt.NDArray[np.generic] = s.to_numpy()

assert s.to_list() == result.tolist()
assert result.dtype == np.dtype("datetime64[D]")
Expand Down Expand Up @@ -168,7 +168,7 @@ def test_series_to_numpy_temporal_with_nulls(
) -> None:
values = [0, 2_000, 1_000_000, None]
s = pl.Series(values, dtype=dtype, strict=False)
result = s.to_numpy()
result: npt.NDArray[np.generic] = s.to_numpy()

# NumPy tolist returns integers for ns precision
if getattr(s.dtype, "time_unit", None) == "ns":
Expand All @@ -182,7 +182,7 @@ def test_series_to_numpy_temporal_with_nulls(
def test_series_to_numpy_datetime_with_tz_with_nulls() -> None:
values = [datetime(1970, 1, 1), datetime(2024, 2, 28), None]
s = pl.Series(values).dt.convert_time_zone("Europe/Amsterdam")
result = s.to_numpy()
result: npt.NDArray[np.generic] = s.to_numpy()

assert result.tolist() == values
assert result.dtype == np.dtype("datetime64[us]")
Expand All @@ -209,7 +209,7 @@ def test_to_numpy_object_dtypes(
values.append(None)

s = pl.Series(values, dtype=dtype)
result = s.to_numpy()
result: npt.NDArray[np.generic] = s.to_numpy()

assert result.tolist() == values
assert result.dtype == np.object_
Expand All @@ -218,7 +218,7 @@ def test_to_numpy_object_dtypes(

def test_series_to_numpy_bool() -> None:
s = pl.Series([True, False])
result = s.to_numpy()
result: npt.NDArray[np.generic] = s.to_numpy()

assert s.to_list() == result.tolist()
assert result.dtype == np.bool_
Expand All @@ -228,7 +228,7 @@ def test_series_to_numpy_bool() -> None:

def test_series_to_numpy_bool_with_nulls() -> None:
s = pl.Series([True, False, None])
result = s.to_numpy()
result: npt.NDArray[np.generic] = s.to_numpy()

assert s.to_list() == result.tolist()
assert result.dtype == np.object_
Expand All @@ -249,7 +249,7 @@ def test_series_to_numpy_array_of_int() -> None:
def test_series_to_numpy_array_of_str() -> None:
values = [["1", "2", "3"], ["4", "5", "10000"]]
s = pl.Series(values, dtype=pl.Array(pl.String, 3))
result = s.to_numpy()
result: npt.NDArray[np.generic] = s.to_numpy()
assert result.tolist() == values
assert result.dtype == np.object_

Expand Down Expand Up @@ -343,7 +343,7 @@ def test_to_numpy_chunked() -> None:
s2 = pl.Series([3, 4])
s = pl.concat([s1, s2], rechunk=False)

result = s.to_numpy()
result: npt.NDArray[np.generic] = s.to_numpy()

assert result.tolist() == s.to_list()
assert result.dtype == np.int64
Expand All @@ -362,7 +362,7 @@ def test_to_numpy_chunked_temporal_nested() -> None:
s2 = pl.Series([[datetime(2022, 1, 1)], [datetime(2023, 1, 1)]], dtype=dtype)
s = pl.concat([s1, s2], rechunk=False)

result = s.to_numpy()
result: npt.NDArray[np.generic] = s.to_numpy()

assert result.tolist() == s.to_list()
assert result.dtype == np.dtype("datetime64[us]")
Expand All @@ -375,7 +375,7 @@ def test_zero_copy_only_deprecated() -> None:
values = [1, 2]
s = pl.Series([1, 2])
with pytest.deprecated_call():
result = s.to_numpy(zero_copy_only=True)
result: npt.NDArray[np.generic] = s.to_numpy(zero_copy_only=True)
assert result.tolist() == values


Expand Down
55 changes: 30 additions & 25 deletions py-polars/tests/unit/series/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,35 +498,40 @@ def test_cast() -> None:
pl.Series(["1", "2", "3", "4", "foobar"]).cast(int)


def test_to_pandas() -> None:
for test_data in (
@pytest.mark.parametrize(
"test_data",
[
[1, None, 2],
["abc", None, "xyz"],
[None, datetime.now()],
[[1, 2], [3, 4], None],
):
a = pl.Series("s", test_data)
b = a.to_pandas()

assert a.name == b.name
assert b.isnull().sum() == 1

if a.dtype == pl.List:
vals_b = [(None if x is None else x.tolist()) for x in b]
else:
vals_b = b.replace({np.nan: None}).values.tolist()

assert vals_b == test_data

try:
c = a.to_pandas(use_pyarrow_extension_array=True)
assert a.name == c.name
assert c.isnull().sum() == 1
vals_c = [None if x is pd.NA else x for x in c.tolist()]
assert vals_c == test_data
except ModuleNotFoundError:
# Skip test if pandas>=1.5.0 or Pyarrow>=8.0.0 is not installed.
pass
],
)
def test_to_pandas(test_data: list[Any]) -> None:
a = pl.Series("s", test_data)
b = a.to_pandas()

assert a.name == b.name
assert b.isnull().sum() == 1

vals_b: list[Any]
if a.dtype == pl.List:
vals_b = [(None if x is None else x.tolist()) for x in b]
else:
v = b.replace({np.nan: None}).values.tolist()
vals_b = cast(list[Any], v)

assert vals_b == test_data

try:
c = a.to_pandas(use_pyarrow_extension_array=True)
assert a.name == c.name
assert c.isnull().sum() == 1
vals_c = [None if x is pd.NA else x for x in c.tolist()]
assert vals_c == test_data
except ModuleNotFoundError:
# Skip test if pandas>=1.5.0 or Pyarrow>=8.0.0 is not installed.
pass


def test_series_to_list() -> None:
Expand Down

0 comments on commit 8a0345f

Please sign in to comment.