Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TYP: mostly Hashtable and ArrowExtensionArray #56689

Merged
merged 9 commits into from
Jan 2, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def __len__(self) -> int:
def __iter__(self) -> Iterator[_T_co]:
...

def index(self, value: Any, /, start: int = 0, stop: int = ...) -> int:
def index(self, value: Any, start: int = ..., stop: int = ..., /) -> int:
...

def count(self, value: Any, /) -> int:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class PandasDelegate:
def _delegate_property_get(self, name: str, *args, **kwargs):
raise TypeError(f"You cannot access the property {name}")

def _delegate_property_set(self, name: str, value, *args, **kwargs):
def _delegate_property_set(self, name: str, value, *args, **kwargs) -> None:
raise TypeError(f"The property {name} cannot be set")

def _delegate_method(self, name: str, *args, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/arrow/extension_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def patch_pyarrow() -> None:
return

class ForbiddenExtensionType(pyarrow.ExtensionType):
def __arrow_ext_serialize__(self):
def __arrow_ext_serialize__(self) -> bytes:
return b""

@classmethod
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,9 @@ class ExtensionArray:
# ------------------------------------------------------------------------

@classmethod
def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = False):
def _from_sequence(
cls, scalars, *, dtype: Dtype | None = None, copy: bool = False
) -> Self:
"""
Construct a new ExtensionArray from a sequence of scalars.

Expand Down Expand Up @@ -329,7 +331,7 @@ def _from_scalars(cls, scalars, *, dtype: DtypeObj) -> Self:
@classmethod
def _from_sequence_of_strings(
cls, strings, *, dtype: Dtype | None = None, copy: bool = False
):
) -> Self:
"""
Construct a new ExtensionArray from a sequence of strings.

Expand Down
23 changes: 17 additions & 6 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,7 +597,7 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:

return result

def to_list(self):
def to_list(self) -> list:
"""
Alias for tolist.
"""
Expand Down Expand Up @@ -1017,7 +1017,9 @@ def as_unordered(self) -> Self:
"""
return self.set_ordered(False)

def set_categories(self, new_categories, ordered=None, rename: bool = False):
def set_categories(
self, new_categories, ordered=None, rename: bool = False
) -> Self:
"""
Set the categories to the specified new categories.

Expand Down Expand Up @@ -1870,7 +1872,7 @@ def check_for_ordered(self, op) -> None:

def argsort(
self, *, ascending: bool = True, kind: SortKind = "quicksort", **kwargs
):
) -> npt.NDArray[np.intp]:
"""
Return the indices that would sort the Categorical.

Expand Down Expand Up @@ -2618,7 +2620,15 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
code_values = code_values[null_mask | (code_values >= 0)]
return algorithms.isin(self.codes, code_values)

def _replace(self, *, to_replace, value, inplace: bool = False):
@overload
def _replace(self, *, to_replace, value, inplace: Literal[False] = ...) -> Self:
...

@overload
def _replace(self, *, to_replace, value, inplace: Literal[True]) -> None:
...

def _replace(self, *, to_replace, value, inplace: bool = False) -> Self | None:
from pandas import Index

orig_dtype = self.dtype
Expand Down Expand Up @@ -2666,6 +2676,7 @@ def _replace(self, *, to_replace, value, inplace: bool = False):
)
if not inplace:
return cat
return None

# ------------------------------------------------------------------------
# String methods interface
Expand Down Expand Up @@ -2901,8 +2912,8 @@ def _delegate_property_get(self, name: str):

# error: Signature of "_delegate_property_set" incompatible with supertype
# "PandasDelegate"
def _delegate_property_set(self, name: str, new_values): # type: ignore[override]
return setattr(self._parent, name, new_values)
def _delegate_property_set(self, name: str, new_values) -> None: # type: ignore[override]
setattr(self._parent, name, new_values)

@property
def codes(self) -> Series:
Expand Down
42 changes: 17 additions & 25 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ def _format_native_types(
"""
raise AbstractMethodError(self)

def _formatter(self, boxed: bool = False):
def _formatter(self, boxed: bool = False) -> Callable[[object], str]:
# TODO: Remove Datetime & DatetimeTZ formatters.
return "'{}'".format

Expand Down Expand Up @@ -808,9 +808,8 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:

if self.dtype.kind in "mM":
self = cast("DatetimeArray | TimedeltaArray", self)
# error: Item "ExtensionArray" of "ExtensionArray | ndarray[Any, Any]"
# has no attribute "as_unit"
values = values.as_unit(self.unit) # type: ignore[union-attr]
# error: "DatetimeLikeArrayMixin" has no attribute "as_unit"
values = values.as_unit(self.unit) # type: ignore[attr-defined]

try:
# error: Argument 1 to "_check_compatible_with" of "DatetimeLikeArrayMixin"
Expand Down Expand Up @@ -1209,7 +1208,7 @@ def _add_timedeltalike_scalar(self, other):
self, other = self._ensure_matching_resos(other)
return self._add_timedeltalike(other)

def _add_timedelta_arraylike(self, other: TimedeltaArray):
def _add_timedelta_arraylike(self, other: TimedeltaArray) -> Self:
"""
Add a delta of a TimedeltaIndex

Expand All @@ -1222,53 +1221,46 @@ def _add_timedelta_arraylike(self, other: TimedeltaArray):
if len(self) != len(other):
raise ValueError("cannot add indices of unequal length")

self = cast("DatetimeArray | TimedeltaArray", self)

self, other = self._ensure_matching_resos(other)
self, other = cast(
"DatetimeArray | TimedeltaArray", self
)._ensure_matching_resos(other)
return self._add_timedeltalike(other)

@final
def _add_timedeltalike(self, other: Timedelta | TimedeltaArray):
self = cast("DatetimeArray | TimedeltaArray", self)

def _add_timedeltalike(self, other: Timedelta | TimedeltaArray) -> Self:
other_i8, o_mask = self._get_i8_values_and_mask(other)
new_values = add_overflowsafe(self.asi8, np.asarray(other_i8, dtype="i8"))
res_values = new_values.view(self._ndarray.dtype)

new_freq = self._get_arithmetic_result_freq(other)

# error: Argument "dtype" to "_simple_new" of "DatetimeArray" has
# incompatible type "Union[dtype[datetime64], DatetimeTZDtype,
# dtype[timedelta64]]"; expected "Union[dtype[datetime64], DatetimeTZDtype]"
# error: Unexpected keyword argument "freq" for "_simple_new" of "NDArrayBacked"
return type(self)._simple_new(
res_values, dtype=self.dtype, freq=new_freq # type: ignore[arg-type]
res_values, dtype=self.dtype, freq=new_freq # type: ignore[call-arg]
)

@final
def _add_nat(self):
def _add_nat(self) -> Self:
"""
Add pd.NaT to self
"""
if isinstance(self.dtype, PeriodDtype):
raise TypeError(
f"Cannot add {type(self).__name__} and {type(NaT).__name__}"
)
self = cast("TimedeltaArray | DatetimeArray", self)

# GH#19124 pd.NaT is treated like a timedelta for both timedelta
# and datetime dtypes
result = np.empty(self.shape, dtype=np.int64)
result.fill(iNaT)
result = result.view(self._ndarray.dtype) # preserve reso
# error: Argument "dtype" to "_simple_new" of "DatetimeArray" has
# incompatible type "Union[dtype[timedelta64], dtype[datetime64],
# DatetimeTZDtype]"; expected "Union[dtype[datetime64], DatetimeTZDtype]"
# error: Unexpected keyword argument "freq" for "_simple_new" of "NDArrayBacked"
return type(self)._simple_new(
result, dtype=self.dtype, freq=None # type: ignore[arg-type]
result, dtype=self.dtype, freq=None # type: ignore[call-arg]
)

@final
def _sub_nat(self):
def _sub_nat(self) -> np.ndarray:
"""
Subtract pd.NaT from self
"""
Expand Down Expand Up @@ -1313,7 +1305,7 @@ def _sub_periodlike(self, other: Period | PeriodArray) -> npt.NDArray[np.object_
return new_data

@final
def _addsub_object_array(self, other: npt.NDArray[np.object_], op):
def _addsub_object_array(self, other: npt.NDArray[np.object_], op) -> np.ndarray:
"""
Add or subtract array-like of DateOffset objects

Expand Down Expand Up @@ -1364,7 +1356,7 @@ def __add__(self, other):

# scalar others
if other is NaT:
result = self._add_nat()
result: np.ndarray | DatetimeLikeArrayMixin = self._add_nat()
elif isinstance(other, (Tick, timedelta, np.timedelta64)):
result = self._add_timedeltalike_scalar(other)
elif isinstance(other, BaseOffset):
Expand Down Expand Up @@ -1424,7 +1416,7 @@ def __sub__(self, other):

# scalar others
if other is NaT:
result = self._sub_nat()
result: np.ndarray | DatetimeLikeArrayMixin = self._sub_nat()
elif isinstance(other, (Tick, timedelta, np.timedelta64)):
result = self._add_timedeltalike_scalar(-other)
elif isinstance(other, BaseOffset):
Expand Down
24 changes: 16 additions & 8 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
)
from typing import (
TYPE_CHECKING,
Generator,
TypeVar,
cast,
overload,
)
Expand Down Expand Up @@ -86,9 +88,15 @@
npt,
)

from pandas import DataFrame
from pandas import (
DataFrame,
Timedelta,
)
from pandas.core.arrays import PeriodArray

_TimestampNoneT1 = TypeVar("_TimestampNoneT1", Timestamp, None)
_TimestampNoneT2 = TypeVar("_TimestampNoneT2", Timestamp, None)


_ITER_CHUNKSIZE = 10_000

Expand Down Expand Up @@ -326,7 +334,7 @@ def _simple_new( # type: ignore[override]
return result

@classmethod
def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False):
def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False) -> Self:
return cls._from_sequence_not_strict(scalars, dtype=dtype, copy=copy)

@classmethod
Expand Down Expand Up @@ -2125,7 +2133,7 @@ def std(
ddof: int = 1,
keepdims: bool = False,
skipna: bool = True,
):
) -> Timedelta:
"""
Return sample standard deviation over requested axis.

Expand Down Expand Up @@ -2191,7 +2199,7 @@ def _sequence_to_dt64(
yearfirst: bool = False,
ambiguous: TimeAmbiguous = "raise",
out_unit: str | None = None,
):
) -> tuple[np.ndarray, tzinfo | None]:
"""
Parameters
----------
Expand Down Expand Up @@ -2360,7 +2368,7 @@ def objects_to_datetime64(
errors: DateTimeErrorChoices = "raise",
allow_object: bool = False,
out_unit: str = "ns",
):
) -> tuple[np.ndarray, tzinfo | None]:
"""
Convert data to array of timestamps.

Expand Down Expand Up @@ -2665,8 +2673,8 @@ def _infer_tz_from_endpoints(


def _maybe_normalize_endpoints(
start: Timestamp | None, end: Timestamp | None, normalize: bool
):
start: _TimestampNoneT1, end: _TimestampNoneT2, normalize: bool
) -> tuple[_TimestampNoneT1, _TimestampNoneT2]:
if normalize:
if start is not None:
start = start.normalize()
Expand Down Expand Up @@ -2717,7 +2725,7 @@ def _generate_range(
offset: BaseOffset,
*,
unit: str,
):
) -> Generator[Timestamp, None, None]:
"""
Generates a sequence of dates corresponding to the specified time
offset. Similar to dateutil.rrule except uses pandas DateOffset
Expand Down
13 changes: 9 additions & 4 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import textwrap
from typing import (
TYPE_CHECKING,
Callable,
Literal,
Union,
overload,
Expand Down Expand Up @@ -232,7 +233,7 @@ def __new__(
dtype: Dtype | None = None,
copy: bool = False,
verify_integrity: bool = True,
):
) -> Self:
data = extract_array(data, extract_numpy=True)

if isinstance(data, cls):
Expand Down Expand Up @@ -1241,7 +1242,7 @@ def value_counts(self, dropna: bool = True) -> Series:
# ---------------------------------------------------------------------
# Rendering Methods

def _formatter(self, boxed: bool = False):
def _formatter(self, boxed: bool = False) -> Callable[[object], str]:
# returning 'str' here causes us to render as e.g. "(0, 1]" instead of
# "Interval(0, 1, closed='right')"
return str
Expand Down Expand Up @@ -1842,9 +1843,13 @@ def _from_combined(self, combined: np.ndarray) -> IntervalArray:
dtype = self._left.dtype
if needs_i8_conversion(dtype):
assert isinstance(self._left, (DatetimeArray, TimedeltaArray))
new_left = type(self._left)._from_sequence(nc[:, 0], dtype=dtype)
new_left: DatetimeArray | TimedeltaArray | np.ndarray = type(
self._left
)._from_sequence(nc[:, 0], dtype=dtype)
assert isinstance(self._right, (DatetimeArray, TimedeltaArray))
new_right = type(self._right)._from_sequence(nc[:, 1], dtype=dtype)
new_right: DatetimeArray | TimedeltaArray | np.ndarray = type(
self._right
)._from_sequence(nc[:, 1], dtype=dtype)
else:
assert isinstance(dtype, np.dtype)
new_left = nc[:, 0].view(dtype)
Expand Down
Loading
Loading