diff --git a/pandas-stubs/_libs/properties.pyi b/pandas-stubs/_libs/properties.pyi index 761a00cea..f3351308b 100644 --- a/pandas-stubs/_libs/properties.pyi +++ b/pandas-stubs/_libs/properties.pyi @@ -5,7 +5,10 @@ class CachedProperty: def __get__(self, obj, typ): ... def __set__(self, obj, value) -> None: ... -cache_readonly: CachedProperty = ... +# note: this is a lie to make type checkers happy (they special +# case property). cache_readonly uses attribute names similar to +# property (fget) but it does not provide fset and fdel. +cache_readonly = property class AxisProperty: def __init__(self, axis: int = ..., doc: str = ...) -> None: ... diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index d8f655b50..89a7cbe72 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -48,6 +48,11 @@ from pandas.core.dtypes.dtypes import ( from pandas.io.formats.format import EngFormatter +# `Incomplete` is equivalent to `Any`. Use it to annotate symbols that you don't +# know the type of yet and that should be changed in the future. Use `Any` only +# where it is the only acceptable type. +Incomplete: TypeAlias = Any + ArrayLike: TypeAlias = ExtensionArray | np.ndarray AnyArrayLike: TypeAlias = Index | Series | np.ndarray PythonScalar: TypeAlias = str | bool | complex @@ -80,6 +85,10 @@ class FulldatetimeDict(YearMonthDayDict, total=False): us: DatetimeDictArg ns: DatetimeDictArg +CorrelationMethod: TypeAlias = ( + Literal["pearson", "kendall", "spearman"] + | Callable[[np.ndarray, np.ndarray], float] +) # dtypes NpDtype: TypeAlias = str | np.dtype[np.generic] | type[str | complex | bool | object] Dtype: TypeAlias = ExtensionDtype | NpDtype @@ -444,6 +453,7 @@ class SequenceNotStr(Protocol[_T_co]): IndexLabel: TypeAlias = Hashable | Sequence[Hashable] Label: TypeAlias = Hashable | None Level: TypeAlias = Hashable | int +Shape: TypeAlias = tuple[int, ...] Suffixes: TypeAlias = tuple[str | None, str | None] Ordered: TypeAlias = bool | None JSONSerializable: TypeAlias = PythonScalar | list | dict @@ -469,8 +479,11 @@ AggFuncTypeSeriesToFrame: TypeAlias = list[AggFuncTypeBase] | AggFuncTypeDictSer AggFuncTypeFrame: TypeAlias = ( AggFuncTypeBase | list[AggFuncTypeBase] | AggFuncTypeDictFrame ) +AggFuncTypeDict: TypeAlias = AggFuncTypeDictSeries | AggFuncTypeDictFrame +AggFuncType: TypeAlias = AggFuncTypeBase | list[AggFuncTypeBase] | AggFuncTypeDict num: TypeAlias = complex +AxisInt: TypeAlias = int AxisIndex: TypeAlias = Literal["index", 0] AxisColumn: TypeAlias = Literal["columns", 1] Axis: TypeAlias = AxisIndex | AxisColumn @@ -563,6 +576,9 @@ IndexT = TypeVar("IndexT", bound=Index) IntervalT = TypeVar("IntervalT", bound=Interval) IntervalClosedType: TypeAlias = Literal["left", "right", "both", "neither"] +ScalarIndexer: TypeAlias = int | np.integer +SequenceIndexer: TypeAlias = slice | list[int] | np.ndarray +PositionalIndexer: TypeAlias = ScalarIndexer | SequenceIndexer TakeIndexer: TypeAlias = Sequence[int] | Sequence[np.integer] | npt.NDArray[np.integer] IgnoreRaiseCoerce: TypeAlias = Literal["ignore", "raise", "coerce"] @@ -758,5 +774,9 @@ RandomState: TypeAlias = ( | np.random.BitGenerator | np.random.RandomState ) +Frequency: TypeAlias = str | BaseOffset +TimeGrouperOrigin: TypeAlias = ( + Timestamp | Literal["epoch", "start", "start_day", "end", "end_day"] +) __all__ = ["npt", "type_t"] diff --git a/pandas-stubs/core/base.pyi b/pandas-stubs/core/base.pyi index fb31f13f6..5b30bd13d 100644 --- a/pandas-stubs/core/base.pyi +++ b/pandas-stubs/core/base.pyi @@ -1,7 +1,12 @@ -from collections.abc import Iterator +from collections.abc import ( + Hashable, + Iterator, +) from typing import ( + Any, Generic, Literal, + final, ) import numpy as np @@ -19,13 +24,19 @@ from pandas._typing import ( Scalar, npt, ) +from pandas.util._decorators import cache_readonly class NoNewAttributesMixin: - def __setattr__(self, key, value) -> None: ... + def __setattr__(self, key: str, value: Any) -> None: ... class SelectionMixin(Generic[NDFrameT]): + obj: NDFrameT + exclusions: frozenset[Hashable] + @final + @cache_readonly def ndim(self) -> int: ... def __getitem__(self, key): ... + def aggregate(self, func, *args, **kwargs): ... class IndexOpsMixin(OpsMixin, Generic[S1]): __array_priority__: int = ... diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index ff34e31ad..45f02ca25 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -41,7 +41,6 @@ from pandas.core.indexing import ( _LocIndexer, ) from pandas.core.interchange.dataframe_protocol import DataFrame as DataFrameXchg -from pandas.core.resample import Resampler from pandas.core.series import Series from pandas.core.window import ( Expanding, @@ -54,6 +53,7 @@ from pandas.core.window.rolling import ( from typing_extensions import Self import xarray as xr +from pandas._libs.lib import NoDefault from pandas._libs.missing import NAType from pandas._libs.tslibs import BaseOffset from pandas._libs.tslibs.nattype import NaTType @@ -1006,104 +1006,96 @@ class DataFrame(NDFrame, OpsMixin): def groupby( self, by: Scalar, - axis: AxisIndex = ..., + axis: AxisIndex | NoDefault = ..., level: IndexLabel | None = ..., as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., - squeeze: _bool = ..., - observed: _bool = ..., + observed: _bool | NoDefault = ..., dropna: _bool = ..., ) -> DataFrameGroupBy[Scalar]: ... @overload def groupby( self, by: DatetimeIndex, - axis: AxisIndex = ..., + axis: AxisIndex | NoDefault = ..., level: IndexLabel | None = ..., as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., - squeeze: _bool = ..., - observed: _bool = ..., + observed: _bool | NoDefault = ..., dropna: _bool = ..., ) -> DataFrameGroupBy[Timestamp]: ... @overload def groupby( self, by: TimedeltaIndex, - axis: AxisIndex = ..., + axis: AxisIndex | NoDefault = ..., level: IndexLabel | None = ..., as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., - squeeze: _bool = ..., - observed: _bool = ..., + observed: _bool | NoDefault = ..., dropna: _bool = ..., ) -> DataFrameGroupBy[Timedelta]: ... @overload def groupby( self, by: PeriodIndex, - axis: AxisIndex = ..., + axis: AxisIndex | NoDefault = ..., level: IndexLabel | None = ..., as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., - squeeze: _bool = ..., - observed: _bool = ..., + observed: _bool | NoDefault = ..., dropna: _bool = ..., ) -> DataFrameGroupBy[Period]: ... @overload def groupby( self, by: IntervalIndex[IntervalT], - axis: AxisIndex = ..., + axis: AxisIndex | NoDefault = ..., level: IndexLabel | None = ..., as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., - squeeze: _bool = ..., - observed: _bool = ..., + observed: _bool | NoDefault = ..., dropna: _bool = ..., ) -> DataFrameGroupBy[IntervalT]: ... @overload def groupby( self, by: MultiIndex | GroupByObjectNonScalar | None = ..., - axis: AxisIndex = ..., + axis: AxisIndex | NoDefault = ..., level: IndexLabel | None = ..., as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., - squeeze: _bool = ..., - observed: _bool = ..., + observed: _bool | NoDefault = ..., dropna: _bool = ..., ) -> DataFrameGroupBy[tuple]: ... @overload def groupby( self, by: Series[SeriesByT], - axis: AxisIndex = ..., + axis: AxisIndex | NoDefault = ..., level: IndexLabel | None = ..., as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., - squeeze: _bool = ..., - observed: _bool = ..., + observed: _bool | NoDefault = ..., dropna: _bool = ..., ) -> DataFrameGroupBy[SeriesByT]: ... @overload def groupby( self, by: CategoricalIndex | Index | Series, - axis: AxisIndex = ..., + axis: AxisIndex | NoDefault = ..., level: IndexLabel | None = ..., as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., - squeeze: _bool = ..., - observed: _bool = ..., + observed: _bool | NoDefault = ..., dropna: _bool = ..., ) -> DataFrameGroupBy[Any]: ... def pivot( @@ -1921,21 +1913,6 @@ class DataFrame(NDFrame, OpsMixin): *, inplace: Literal[False] = ..., ) -> DataFrame: ... - def resample( - self, - rule, - axis: Axis = ..., - closed: _str | None = ..., - label: _str | None = ..., - convention: TimestampConvention = ..., - kind: Literal["timestamp", "period"] | None = ..., - on: _str | None = ..., - level: Level | None = ..., - origin: Timestamp - | Literal["epoch", "start", "start_day", "end", "end_day"] = ..., - offset: dt.timedelta | Timedelta | _str | None = ..., - group_keys: _bool = ..., - ) -> Resampler[DataFrame]: ... def rfloordiv( self, other, diff --git a/pandas-stubs/core/generic.pyi b/pandas-stubs/core/generic.pyi index e491b918f..38a298a2d 100644 --- a/pandas-stubs/core/generic.pyi +++ b/pandas-stubs/core/generic.pyi @@ -5,6 +5,7 @@ from collections.abc import ( Mapping, Sequence, ) +import datetime as dt import sqlite3 from typing import ( Any, @@ -17,6 +18,7 @@ from typing import ( import numpy as np from pandas import Index import pandas.core.indexing as indexing +from pandas.core.resample import DatetimeIndexResampler from pandas.core.series import Series import sqlalchemy.engine from typing_extensions import ( @@ -24,6 +26,7 @@ from typing_extensions import ( Self, ) +from pandas._libs.lib import NoDefault from pandas._typing import ( S1, ArrayLike, @@ -37,6 +40,7 @@ from pandas._typing import ( FilePath, FileWriteMode, FillnaOptions, + Frequency, HashableT1, HashableT2, HDFCompLib, @@ -48,6 +52,10 @@ from pandas._typing import ( SortKind, StorageOptions, T, + TimedeltaConvertibleTypes, + TimeGrouperOrigin, + TimestampConvention, + TimestampConvertibleTypes, WriteBuffer, ) @@ -432,6 +440,21 @@ class NDFrame(indexing.IndexingMixin): end_time, axis=..., ) -> Self: ... + @final + def resample( + self, + rule: Frequency | dt.timedelta, + axis: Axis | NoDefault = ..., + closed: Literal["right", "left"] | None = ..., + label: Literal["right", "left"] | None = ..., + convention: TimestampConvention = ..., + kind: Literal["period", "timestamp"] | None = ..., + on: Level | None = ..., + level: Level | None = ..., + origin: TimeGrouperOrigin | TimestampConvertibleTypes = ..., + offset: TimedeltaConvertibleTypes | None = ..., + group_keys: _bool = ..., + ) -> DatetimeIndexResampler[Self]: ... def first(self, offset) -> Self: ... def last(self, offset) -> Self: ... def rank( diff --git a/pandas-stubs/core/groupby/__init__.pyi b/pandas-stubs/core/groupby/__init__.pyi index 178542b2f..27ed3b004 100644 --- a/pandas-stubs/core/groupby/__init__.pyi +++ b/pandas-stubs/core/groupby/__init__.pyi @@ -1,2 +1,15 @@ -from pandas.core.groupby.generic import NamedAgg as NamedAgg +from pandas.core.groupby.generic import ( + DataFrameGroupBy as DataFrameGroupBy, + NamedAgg as NamedAgg, + SeriesGroupBy as SeriesGroupBy, +) +from pandas.core.groupby.groupby import GroupBy as GroupBy from pandas.core.groupby.grouper import Grouper as Grouper + +__all__ = [ + "DataFrameGroupBy", + "NamedAgg", + "SeriesGroupBy", + "GroupBy", + "Grouper", +] diff --git a/pandas-stubs/core/groupby/base.pyi b/pandas-stubs/core/groupby/base.pyi index 199eb6b3d..f56b6a324 100644 --- a/pandas-stubs/core/groupby/base.pyi +++ b/pandas-stubs/core/groupby/base.pyi @@ -1,4 +1,3 @@ -# from pandas.core.dtypes.common import is_list_like as is_list_like, is_scalar as is_scalar from collections.abc import Hashable import dataclasses diff --git a/pandas-stubs/core/groupby/categorical.pyi b/pandas-stubs/core/groupby/categorical.pyi index 002de503b..e69de29bb 100644 --- a/pandas-stubs/core/groupby/categorical.pyi +++ b/pandas-stubs/core/groupby/categorical.pyi @@ -1,6 +0,0 @@ -from pandas.core.arrays.categorical import ( # , CategoricalDtype as CategoricalDtype - Categorical, -) - -def recode_for_groupby(c: Categorical, sort: bool, observed: bool): ... -def recode_from_groupby(c: Categorical, sort: bool, ci): ... diff --git a/pandas-stubs/core/groupby/generic.pyi b/pandas-stubs/core/groupby/generic.pyi index 878ea633d..e14c104eb 100644 --- a/pandas-stubs/core/groupby/generic.pyi +++ b/pandas-stubs/core/groupby/generic.pyi @@ -1,7 +1,9 @@ from collections.abc import ( Callable, + Hashable, Iterable, Iterator, + Mapping, Sequence, ) from typing import ( @@ -9,56 +11,88 @@ from typing import ( Generic, Literal, NamedTuple, + final, overload, ) -from matplotlib.axes import ( - Axes as PlotAxes, - SubplotBase as AxesSubplot, -) +from matplotlib.axes import Axes as PlotAxes +import numpy as np from pandas.core.frame import DataFrame -from pandas.core.generic import NDFrame -from pandas.core.groupby.groupby import ( # , get_groupby as get_groupby - GroupBy as GroupBy, +from pandas.core.groupby.groupby import ( + GroupBy, + GroupByPlot, ) -from pandas.core.groupby.grouper import Grouper from pandas.core.series import Series -from typing_extensions import TypeAlias +from typing_extensions import ( + Self, + TypeAlias, +) +from pandas._libs.lib import NoDefault from pandas._typing import ( S1, AggFuncTypeBase, AggFuncTypeFrame, + ArrayLike, Axis, ByT, + CorrelationMethod, + Dtype, + IndexLabel, Level, ListLike, - RandomState, Scalar, + TakeIndexer, + WindowingEngine, + WindowingEngineKwargs, ) AggScalar: TypeAlias = str | Callable[..., Any] -ScalarResult = ... class NamedAgg(NamedTuple): - column: str = ... - aggfunc: AggScalar = ... - -def generate_property(name: str, klass: type[NDFrame]): ... + column: str + aggfunc: AggScalar -class SeriesGroupBy(GroupBy, Generic[S1, ByT]): - def any(self, skipna: bool = ...) -> Series[bool]: ... - def all(self, skipna: bool = ...) -> Series[bool]: ... - def apply(self, func, *args, **kwargs) -> Series: ... +class SeriesGroupBy(GroupBy[Series[S1]], Generic[S1, ByT]): @overload - def aggregate(self, func: list[AggFuncTypeBase], *args, **kwargs) -> DataFrame: ... + def aggregate( + self, + func: list[AggFuncTypeBase], + *args, + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., + **kwargs, + ) -> DataFrame: ... @overload - def aggregate(self, func: AggFuncTypeBase, *args, **kwargs) -> Series: ... + def aggregate( + self, + func: AggFuncTypeBase | None = ..., + *args, + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., + **kwargs, + ) -> Series: ... agg = aggregate - def transform(self, func: Callable | str, *args, **kwargs) -> Series: ... - def filter(self, func, dropna: bool = ..., *args, **kwargs): ... - def nunique(self, dropna: bool = ...) -> Series: ... - def describe(self, **kwargs) -> DataFrame: ... + def transform( + self, + func: Callable | str, + *args, + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., + **kwargs, + ) -> Series: ... + def filter( + self, func: Callable | str, dropna: bool = ..., *args, **kwargs + ) -> Series: ... + def nunique(self, dropna: bool = ...) -> Series[int]: ... + # describe delegates to super() method but here it has keyword-only parameters + def describe( # type: ignore[override] # pyright: ignore[reportIncompatibleMethodOverride] + self, + *, + percentiles: Iterable[float] | None = ..., + include: Literal["all"] | list[Dtype] | None = ..., + exclude: list[Dtype] | None = ..., + ) -> DataFrame: ... @overload def value_counts( self, @@ -77,52 +111,56 @@ class SeriesGroupBy(GroupBy, Generic[S1, ByT]): bins=..., dropna: bool = ..., ) -> Series[float]: ... - def count(self) -> Series[int]: ... - def pct_change( - self, - periods: int = ..., - fill_method: str = ..., - limit=..., - freq=..., - axis: Axis = ..., - ) -> Series[float]: ... - # Overrides and others from original pylance stubs - @property - def is_monotonic_increasing(self) -> bool: ... - @property - def is_monotonic_decreasing(self) -> bool: ... - def bfill(self, limit: int | None = ...) -> Series[S1]: ... - def cummax(self, axis: Axis = ..., **kwargs) -> Series[S1]: ... - def cummin(self, axis: Axis = ..., **kwargs) -> Series[S1]: ... - def cumprod(self, axis: Axis = ..., **kwargs) -> Series[S1]: ... - def cumsum(self, axis: Axis = ..., **kwargs) -> Series[S1]: ... - def ffill(self, limit: int | None = ...) -> Series[S1]: ... - def first(self, **kwargs) -> Series[S1]: ... - def head(self, n: int = ...) -> Series[S1]: ... - def last(self, **kwargs) -> Series[S1]: ... - def max(self, **kwargs) -> Series[S1]: ... - def mean(self, **kwargs) -> Series[S1]: ... - def median(self, **kwargs) -> Series[S1]: ... - def min(self, **kwargs) -> Series[S1]: ... - def nlargest(self, n: int = ..., keep: str = ...) -> Series[S1]: ... - def nsmallest(self, n: int = ..., keep: str = ...) -> Series[S1]: ... - def nth(self, n: int | Sequence[int], dropna: str | None = ...) -> Series[S1]: ... - def sum( + def fillna( + self, + value: ( + Scalar | ArrayLike | Series | DataFrame | Mapping[Hashable, Scalar] | None + ) = ..., + method: Literal["bfill", "ffill"] | None = ..., + axis: Axis | None | NoDefault = ..., + inplace: bool = ..., + limit: int | None = ..., + downcast: dict | None | NoDefault = ..., + ) -> Series[S1] | None: ... + def take( self, + indices: TakeIndexer, + axis: Axis | NoDefault = ..., + **kwargs, + ) -> Series[S1]: ... + def skew( + self, + axis: Axis | NoDefault = ..., + skipna: bool = ..., numeric_only: bool = ..., - min_count: int = ..., - engine=..., - engine_kwargs=..., + **kwargs, + ) -> Series: ... + @property + def plot(self) -> GroupByPlot[Self]: ... + def nlargest( + self, n: int = ..., keep: Literal["first", "last", "all"] = ... ) -> Series[S1]: ... - def prod(self, numeric_only: bool = ..., min_count: int = ...) -> Series[S1]: ... - def sem(self, ddof: int = ..., numeric_only: bool = ...) -> Series[float]: ... - def std(self, ddof: int = ..., numeric_only: bool = ...) -> Series[float]: ... - def var(self, ddof: int = ..., numeric_only: bool = ...) -> Series[float]: ... - def tail(self, n: int = ...) -> Series[S1]: ... - def unique(self) -> Series: ... + def nsmallest( + self, n: int = ..., keep: Literal["first", "last", "all"] = ... + ) -> Series[S1]: ... + def idxmin(self, axis: Axis | NoDefault = ..., skipna: bool = ...) -> Series: ... + def idxmax(self, axis: Axis | NoDefault = ..., skipna: bool = ...) -> Series: ... + def corr( + self, + other: Series, + method: CorrelationMethod = ..., + min_periods: int | None = ..., + ) -> Series: ... + def cov( + self, other: Series, min_periods: int | None = ..., ddof: int | None = ... + ) -> Series: ... + @property + def is_monotonic_increasing(self) -> Series[bool]: ... + @property + def is_monotonic_decreasing(self) -> Series[bool]: ... def hist( self, - by=..., + by: IndexLabel | None = ..., ax: PlotAxes | None = ..., grid: bool = ..., xlabelsize: int | None = ..., @@ -130,21 +168,23 @@ class SeriesGroupBy(GroupBy, Generic[S1, ByT]): ylabelsize: int | None = ..., yrot: float | None = ..., figsize: tuple[float, float] | None = ..., - bins: int | Sequence = ..., + bins: int | Sequence[int] = ..., backend: str | None = ..., legend: bool = ..., **kwargs, - ) -> AxesSubplot: ... - def idxmax(self, axis: Axis = ..., skipna: bool = ...) -> Series: ... - def idxmin(self, axis: Axis = ..., skipna: bool = ...) -> Series: ... - def __iter__(self) -> Iterator[tuple[ByT, Series[S1]]]: ... - def diff(self, periods: int = ..., axis: Axis = ...) -> Series: ... + ) -> Series: ... # Series[Axes] but this is not allowed + @property + def dtype(self) -> Series: ... + def unique(self) -> Series: ... + # Overrides that provide more precise return types over the GroupBy class + @final # type: ignore[misc] + def __iter__( # pyright: ignore[reportIncompatibleMethodOverride] + self, + ) -> Iterator[tuple[ByT, Series[S1]]]: ... -class DataFrameGroupBy(GroupBy, Generic[ByT]): - def any(self, skipna: bool = ...) -> DataFrame: ... - def all(self, skipna: bool = ...) -> DataFrame: ... +class DataFrameGroupBy(GroupBy[DataFrame], Generic[ByT]): # error: Overload 3 for "apply" will never be used because its parameters overlap overload 1 - @overload + @overload # type: ignore[override] def apply( # type: ignore[overload-overlap] self, func: Callable[[DataFrame], Scalar | list | dict], @@ -159,7 +199,7 @@ class DataFrameGroupBy(GroupBy, Generic[ByT]): **kwargs, ) -> DataFrame: ... @overload - def apply( # pyright: ignore[reportOverlappingOverload] + def apply( # pyright: ignore[reportOverlappingOverload,reportIncompatibleMethodOverride] self, func: Callable[[Iterable], float], *args, @@ -167,26 +207,55 @@ class DataFrameGroupBy(GroupBy, Generic[ByT]): ) -> DataFrame: ... # error: overload 1 overlaps overload 2 because of different return types @overload - def aggregate(self, arg: Literal["size"]) -> Series: ... # type: ignore[overload-overlap] # pyright: ignore[reportOverlappingOverload] + def aggregate(self, func: Literal["size"]) -> Series: ... # type: ignore[overload-overlap] # pyright: ignore[reportOverlappingOverload] @overload - def aggregate(self, arg: AggFuncTypeFrame = ..., *args, **kwargs) -> DataFrame: ... + def aggregate( + self, + func: AggFuncTypeFrame | None = ..., + *args, + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., + **kwargs, + ) -> DataFrame: ... agg = aggregate - def transform(self, func: Callable | str, *args, **kwargs) -> DataFrame: ... + def transform( + self, + func: Callable | str, + *args, + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., + **kwargs, + ) -> DataFrame: ... def filter( self, func: Callable, dropna: bool = ..., *args, **kwargs ) -> DataFrame: ... - def nunique(self, dropna: bool = ...) -> DataFrame: ... @overload - def __getitem__(self, item: str) -> SeriesGroupBy[Any, ByT]: ... + def __getitem__( # type: ignore[overload-overlap] + self, key: Scalar | Hashable | tuple[Hashable, ...] + ) -> SeriesGroupBy[Any, ByT]: ... + @overload + def __getitem__( # pyright: ignore[reportIncompatibleMethodOverride] + self, key: Iterable[Hashable] | slice + ) -> DataFrameGroupBy[ByT]: ... + def nunique(self, dropna: bool = ...) -> DataFrame: ... + def idxmax( + self, + axis: Axis | None | NoDefault = ..., + skipna: bool = ..., + numeric_only: bool = ..., + ) -> DataFrame: ... + def idxmin( + self, + axis: Axis | None | NoDefault = ..., + skipna: bool = ..., + numeric_only: bool = ..., + ) -> DataFrame: ... @overload - def __getitem__(self, item: list[str]) -> DataFrameGroupBy[ByT]: ... - def count(self) -> DataFrame: ... def boxplot( self, - grouped: DataFrame, - subplots: bool = ..., - column: str | Sequence | None = ..., - fontsize: float | str = ..., + subplots: Literal[True] = ..., + column: IndexLabel | None = ..., + fontsize: float | str | None = ..., rot: float = ..., grid: bool = ..., ax: PlotAxes | None = ..., @@ -194,108 +263,77 @@ class DataFrameGroupBy(GroupBy, Generic[ByT]): layout: tuple[int, int] | None = ..., sharex: bool = ..., sharey: bool = ..., - bins: int | Sequence = ..., backend: str | None = ..., **kwargs, - ) -> AxesSubplot | Sequence[AxesSubplot]: ... - # Overrides and others from original pylance stubs - # These are "properties" but properties can't have all these arguments?! - def corr(self, method: str | Callable, min_periods: int = ...) -> DataFrame: ... - def cov(self, min_periods: int = ...) -> DataFrame: ... - def diff(self, periods: int = ..., axis: Axis = ...) -> DataFrame: ... - def bfill(self, limit: int | None = ...) -> DataFrame: ... - def corrwith( - self, - other: DataFrame, - axis: Axis = ..., - drop: bool = ..., - method: str = ..., - ) -> Series: ... - def cummax( - self, axis: Axis = ..., numeric_only: bool = ..., **kwargs - ) -> DataFrame: ... - def cummin( - self, axis: Axis = ..., numeric_only: bool = ..., **kwargs - ) -> DataFrame: ... - def cumprod(self, axis: Axis = ..., **kwargs) -> DataFrame: ... - def cumsum(self, axis: Axis = ..., **kwargs) -> DataFrame: ... - def describe(self, **kwargs) -> DataFrame: ... - def ffill(self, limit: int | None = ...) -> DataFrame: ... - def fillna( - self, - value, - method: str | None = ..., - axis: Axis = ..., - inplace: Literal[False] = ..., - limit: int | None = ..., - downcast: dict | None = ..., - ) -> DataFrame: ... - def first(self, **kwargs) -> DataFrame: ... - def head(self, n: int = ...) -> DataFrame: ... - def hist( + ) -> Series: ... # Series[PlotAxes] but this is not allowed + @overload + def boxplot( self, - data: DataFrame, - column: str | Sequence | None = ..., - by=..., + subplots: Literal[False], + column: IndexLabel | None = ..., + fontsize: float | str | None = ..., + rot: float = ..., grid: bool = ..., - xlabelsize: int | None = ..., - xrot: float | None = ..., - ylabelsize: int | None = ..., - yrot: float | None = ..., ax: PlotAxes | None = ..., + figsize: tuple[float, float] | None = ..., + layout: tuple[int, int] | None = ..., sharex: bool = ..., sharey: bool = ..., + backend: str | None = ..., + **kwargs, + ) -> PlotAxes: ... + @overload + def boxplot( + self, + subplots: bool, + column: IndexLabel | None = ..., + fontsize: float | str | None = ..., + rot: float = ..., + grid: bool = ..., + ax: PlotAxes | None = ..., figsize: tuple[float, float] | None = ..., layout: tuple[int, int] | None = ..., - bins: int | Sequence = ..., + sharex: bool = ..., + sharey: bool = ..., backend: str | None = ..., **kwargs, - ) -> AxesSubplot | Sequence[AxesSubplot]: ... - def idxmax( - self, axis: Axis = ..., skipna: bool = ..., numeric_only: bool = ... - ) -> DataFrame: ... - def idxmin( - self, axis: Axis = ..., skipna: bool = ..., numeric_only: bool = ... - ) -> DataFrame: ... - def last(self, **kwargs) -> DataFrame: ... - def max(self, **kwargs) -> DataFrame: ... - def mean(self, **kwargs) -> DataFrame: ... - def median(self, **kwargs) -> DataFrame: ... - def min(self, **kwargs) -> DataFrame: ... - def nth(self, n: int | Sequence[int], dropna: str | None = ...) -> DataFrame: ... - def pct_change( - self, - periods: int = ..., - fill_method: str = ..., - limit=..., - freq=..., - axis: Axis = ..., - ) -> DataFrame: ... - def prod(self, numeric_only: bool = ..., min_count: int = ...) -> DataFrame: ... - def quantile( - self, q: float = ..., interpolation: str = ..., numeric_only: bool = ... - ) -> DataFrame: ... - def resample(self, rule, *args, **kwargs) -> Grouper: ... - def sample( - self, - n: int | None = ..., - frac: float | None = ..., - replace: bool = ..., - weights: ListLike | None = ..., - random_state: RandomState | None = ..., - ) -> DataFrame: ... - def sem(self, ddof: int = ..., numeric_only: bool = ...) -> DataFrame: ... - def shift( + ) -> PlotAxes | Series: ... # Series[PlotAxes] + @overload + def value_counts( self, - periods: int = ..., - freq: str = ..., - axis: Axis = ..., - fill_value=..., + subset: ListLike | None = ..., + normalize: Literal[False] = ..., + sort: bool = ..., + ascending: bool = ..., + dropna: bool = ..., + ) -> Series[int]: ... + @overload + def value_counts( + self, + subset: ListLike | None, + normalize: Literal[True], + sort: bool = ..., + ascending: bool = ..., + dropna: bool = ..., + ) -> Series[float]: ... + def fillna( + self, + value: ( + Scalar | ArrayLike | Series | DataFrame | Mapping[Hashable, Scalar] | None + ) = ..., + method: Literal["bfill", "ffill"] | None = ..., + axis: Axis | None | NoDefault = ..., + inplace: Literal[False] = ..., + limit: int | None = ..., + downcast: dict | None | NoDefault = ..., + ) -> DataFrame: ... + def take( + self, indices: TakeIndexer, axis: Axis | None | NoDefault = ..., **kwargs ) -> DataFrame: ... @overload - def skew( + def skew( # type: ignore[overload-overlap] self, - axis: Axis = ..., + axis: Axis | None | NoDefault = ..., skipna: bool = ..., numeric_only: bool = ..., *, @@ -305,41 +343,59 @@ class DataFrameGroupBy(GroupBy, Generic[ByT]): @overload def skew( self, - axis: Axis = ..., + axis: Axis | None | NoDefault = ..., skipna: bool = ..., - level: None = ..., numeric_only: bool = ..., + *, + level: None = ..., **kwargs, ) -> Series: ... - def std(self, ddof: int = ..., numeric_only: bool = ...) -> DataFrame: ... - def sum( + @property + def plot(self) -> GroupByPlot[Self]: ... + def corr( self, + method: str | Callable[[np.ndarray, np.ndarray], float] = ..., + min_periods: int = ..., numeric_only: bool = ..., - min_count: int = ..., - engine=..., - engine_kwargs=..., ) -> DataFrame: ... - def tail(self, n: int = ...) -> DataFrame: ... - def take(self, indices: Sequence, axis: Axis = ..., **kwargs) -> DataFrame: ... - def tshift(self, periods: int, freq=..., axis: Axis = ...) -> DataFrame: ... - def var(self, ddof: int = ..., numeric_only: bool = ...) -> DataFrame: ... - @overload - def value_counts( + def cov( self, - subset: ListLike | None = ..., - normalize: Literal[False] = ..., - sort: bool = ..., - ascending: bool = ..., - dropna: bool = ..., - ) -> Series[int]: ... - @overload - def value_counts( + min_periods: int | None = ..., + ddof: int | None = ..., + numeric_only: bool = ..., + ) -> DataFrame: ... + def hist( self, - subset: ListLike | None, - normalize: Literal[True], - sort: bool = ..., - ascending: bool = ..., - dropna: bool = ..., - ) -> Series[float]: ... + column: IndexLabel | None = ..., + by: IndexLabel | None = ..., + grid: bool = ..., + xlabelsize: int | None = ..., + xrot: float | None = ..., + ylabelsize: int | None = ..., + yrot: float | None = ..., + ax: PlotAxes | None = ..., + sharex: bool = ..., + sharey: bool = ..., + figsize: tuple[float, float] | None = ..., + layout: tuple[int, int] | None = ..., + bins: int | Sequence[int] = ..., + backend: str | None = ..., + legend: bool = ..., + **kwargs, + ) -> Series: ... # Series[Axes] but this is not allowed + @property + def dtypes(self) -> Series: ... + def corrwith( + self, + other: DataFrame | Series, + axis: Axis | NoDefault = ..., + drop: bool = ..., + method: CorrelationMethod = ..., + numeric_only: bool = ..., + ) -> DataFrame: ... def __getattr__(self, name: str) -> SeriesGroupBy[Any, ByT]: ... - def __iter__(self) -> Iterator[tuple[ByT, DataFrame]]: ... + # Overrides that provide more precise return types over the GroupBy class + @final # type: ignore[misc] + def __iter__( # pyright: ignore[reportIncompatibleMethodOverride] + self, + ) -> Iterator[tuple[ByT, DataFrame]]: ... diff --git a/pandas-stubs/core/groupby/groupby.pyi b/pandas-stubs/core/groupby/groupby.pyi index 32942ab16..cac030850 100644 --- a/pandas-stubs/core/groupby/groupby.pyi +++ b/pandas-stubs/core/groupby/groupby.pyi @@ -1,115 +1,403 @@ from collections.abc import ( Callable, Hashable, + Iterable, + Iterator, + Mapping, + Sequence, +) +import datetime as dt +from typing import ( + Any, + Generic, + Literal, + TypeVar, + final, + overload, ) import numpy as np from pandas.core.base import SelectionMixin from pandas.core.frame import DataFrame -from pandas.core.generic import NDFrame -from pandas.core.groupby import ops +from pandas.core.groupby import ( + generic, + ops, +) +from pandas.core.groupby.indexing import ( + GroupByIndexingMixin, + GroupByNthSelector, +) +from pandas.core.indexers import BaseIndexer from pandas.core.indexes.api import Index +from pandas.core.resample import ( + DatetimeIndexResamplerGroupby, + PeriodIndexResamplerGroupby, + TimedeltaIndexResamplerGroupby, +) from pandas.core.series import Series +from pandas.core.window import ( + ExpandingGroupby, + ExponentialMovingWindowGroupby, + RollingGroupby, +) +from typing_extensions import ( + Concatenate, + Self, + TypeAlias, +) +from pandas._libs.lib import NoDefault +from pandas._libs.tslibs import BaseOffset from pandas._typing import ( + S1, + AnyArrayLike, Axis, - KeysArgType, + AxisInt, + CalculationMethod, + Dtype, + Frequency, + IndexLabel, + IntervalClosedType, + MaskType, NDFrameT, + P, + RandomState, + Scalar, + T, + TimedeltaConvertibleTypes, + TimeGrouperOrigin, + TimestampConvention, + TimestampConvertibleTypes, + WindowingEngine, + WindowingEngineKwargs, npt, ) -class GroupByPlot: - def __init__(self, groupby) -> None: ... - def __call__(self, *args, **kwargs): ... - def __getattr__(self, name: str): ... +from pandas.plotting import PlotAccessor -class BaseGroupBy(SelectionMixin[NDFrameT]): - level = ... - as_index = ... - keys = ... - sort = ... - group_keys = ... - squeeze = ... - observed = ... - mutated = ... - @property - def obj(self) -> NDFrameT: ... - axis = ... - grouper = ... - exclusions = ... +_GroupByT = TypeVar("_GroupByT", bound=GroupBy) + +_KeysArgType: TypeAlias = ( + Hashable + | list[Hashable] + | Callable[[Hashable], Hashable] + | list[Callable[[Hashable], Hashable]] + | Mapping[Hashable, Hashable] +) + +_ResamplerGroupBy: TypeAlias = ( + DatetimeIndexResamplerGroupby[NDFrameT] + | PeriodIndexResamplerGroupby[NDFrameT] + | TimedeltaIndexResamplerGroupby[NDFrameT] +) + +# GroupByPlot does not really inherit from PlotAccessor but it delegates +# to it using __call__ and __getattr__. We lie here to avoid repeating the +# whole stub of PlotAccessor +@final +class GroupByPlot(PlotAccessor, Generic[_GroupByT]): + def __init__(self, groupby: _GroupByT) -> None: ... + # The following methods are inherited from the fake parent class PlotAccessor + # def __call__(self, *args, **kwargs): ... + # def __getattr__(self, name: str): ... + +class BaseGroupBy(SelectionMixin[NDFrameT], GroupByIndexingMixin): + axis: AxisInt + grouper: ops.BaseGrouper + keys: _KeysArgType | None + level: IndexLabel | None + group_keys: bool + @final def __len__(self) -> int: ... + @final + def __repr__(self) -> str: ... # noqa: PYI029 __repr__ here is final + @final @property - def groups(self) -> dict[Hashable, list[Hashable]]: ... + def groups(self) -> dict[Hashable, Index]: ... + @final @property def ngroups(self) -> int: ... + @final @property def indices(self) -> dict[Hashable, Index | npt.NDArray[np.int_] | list[int]]: ... - def pipe(self, func: Callable, *args, **kwargs): ... - plot = ... + @overload + def pipe( + self, + func: Callable[Concatenate[Self, P], T], + *args: P.args, + **kwargs: P.kwargs, + ) -> T: ... + @overload + def pipe( + self, + func: tuple[Callable[..., T], str], + *args: Any, + **kwargs: Any, + ) -> T: ... + @final def get_group(self, name, obj: NDFrameT | None = ...) -> NDFrameT: ... + @final + def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: ... + @overload + def __getitem__(self: BaseGroupBy[DataFrame], key: Scalar | Hashable | tuple[Hashable, ...]) -> generic.SeriesGroupBy: ... # type: ignore[overload-overlap] + @overload + def __getitem__( + self: BaseGroupBy[DataFrame], key: Iterable[Hashable] | slice + ) -> generic.DataFrameGroupBy: ... + @overload + def __getitem__( + self: BaseGroupBy[Series[S1]], + idx: ( + list[str] + | Index + | Series[S1] + | slice + | MaskType + | tuple[Hashable | slice, ...] + ), + ) -> generic.SeriesGroupBy: ... + @overload + def __getitem__(self: BaseGroupBy[Series[S1]], idx: Scalar) -> S1: ... class GroupBy(BaseGroupBy[NDFrameT]): - def count(self) -> DataFrame | Series: ... - def mean(self, **kwargs) -> DataFrame | Series: ... - def median(self, **kwargs) -> DataFrame | Series: ... - def std(self, ddof: int = ..., numeric_only: bool = ...) -> DataFrame | Series: ... - def var(self, ddof: int = ..., numeric_only: bool = ...) -> DataFrame | Series: ... - def sem(self, ddof: int = ..., numeric_only: bool = ...) -> DataFrame | Series: ... + as_index: bool + sort: bool + observed: bool + def __getattr__(self, attr: str) -> Any: ... + def apply(self, func: Callable | str, *args, **kwargs) -> NDFrameT: ... + @final + @overload + def any(self: GroupBy[Series], skipna: bool = ...) -> Series[bool]: ... + @overload + def any(self: GroupBy[DataFrame], skipna: bool = ...) -> DataFrame: ... + @final + @overload + def all(self: GroupBy[Series], skipna: bool = ...) -> Series[bool]: ... + @overload + def all(self: GroupBy[DataFrame], skipna: bool = ...) -> DataFrame: ... + @final + def count(self) -> NDFrameT: ... + @final + def mean( + self, + numeric_only: bool = ..., + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., + ) -> NDFrameT: ... + @final + def median(self, numeric_only: bool = ...) -> NDFrameT: ... + @final + @overload + def std( + self: GroupBy[Series], + ddof: int = ..., + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., + numeric_only: bool = ..., + ) -> Series[float]: ... + @overload + def std( + self: GroupBy[DataFrame], + ddof: int = ..., + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., + numeric_only: bool = ..., + ) -> DataFrame: ... + @final + @overload + def var( + self: GroupBy[Series], + ddof: int = ..., + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., + numeric_only: bool = ..., + ) -> Series[float]: ... + @overload + def var( + self: GroupBy[DataFrame], + ddof: int = ..., + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., + numeric_only: bool = ..., + ) -> DataFrame: ... + @final + @overload + def sem( + self: GroupBy[Series], ddof: int = ..., numeric_only: bool = ... + ) -> Series[float]: ... + @overload + def sem( + self: GroupBy[DataFrame], ddof: int = ..., numeric_only: bool = ... + ) -> DataFrame: ... + @final + @overload + def size(self: GroupBy[Series]) -> Series[int]: ... + @overload # return type depends on `as_index` for dataframe groupby + def size(self: GroupBy[DataFrame]) -> DataFrame | Series[int]: ... + @final + def sum( + self, + numeric_only: bool = ..., + min_count: int = ..., + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., + ) -> NDFrameT: ... + @final + def prod(self, numeric_only: bool = ..., min_count: int = ...) -> NDFrameT: ... + @final + def min( + self, + numeric_only: bool = ..., + min_count: int = ..., + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., + ) -> NDFrameT: ... + @final + def max( + self, + numeric_only: bool = ..., + min_count: int = ..., + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., + ) -> NDFrameT: ... + @final + def first(self, numeric_only: bool = ..., min_count: int = ...) -> NDFrameT: ... + @final + def last(self, numeric_only: bool = ..., min_count: int = ...) -> NDFrameT: ... + @final def ohlc(self) -> DataFrame: ... - def describe(self, **kwargs) -> DataFrame | Series: ... - def resample(self, rule, *args, **kwargs): ... - def rolling(self, *args, **kwargs): ... - def expanding(self, *args, **kwargs): ... - def ffill(self, limit: int | None = ...) -> DataFrame | Series: ... - def bfill(self, limit: int | None = ...) -> DataFrame | Series: ... - def nth( - self, n: int | list[int], dropna: str | None = ... - ) -> DataFrame | Series: ... - def quantile(self, q=..., interpolation: str = ...): ... - def ngroup(self, ascending: bool = ...) -> Series: ... - def cumcount(self, ascending: bool = ...) -> Series: ... + def describe( + self, + percentiles: Iterable[float] | None = ..., + include: Literal["all"] | list[Dtype] | None = ..., + exclude: list[Dtype] | None = ..., + ) -> DataFrame: ... + @final + def resample( + self, + rule: Frequency | dt.timedelta, + how: str | None = ..., + fill_method: str | None = ..., + limit: int | None = ..., + kind: str | None = ..., + on: Hashable | None = ..., + *, + closed: Literal["left", "right"] | None = ..., + label: Literal["left", "right"] | None = ..., + axis: Axis = ..., + convention: TimestampConvention | None = ..., + origin: TimeGrouperOrigin | TimestampConvertibleTypes = ..., + offset: TimedeltaConvertibleTypes | None = ..., + group_keys: bool = ..., + **kwargs, + ) -> _ResamplerGroupBy[NDFrameT]: ... + @final + def rolling( + self, + window: int | dt.timedelta | str | BaseOffset | BaseIndexer | None = ..., + min_periods: int | None = ..., + center: bool | None = ..., + win_type: str | None = ..., + axis: Axis = ..., + on: str | Index | None = ..., + closed: IntervalClosedType | None = ..., + method: CalculationMethod = ..., + *, + selection: IndexLabel | None = ..., + ) -> RollingGroupby[NDFrameT]: ... + @final + def expanding( + self, + min_periods: int = ..., + axis: Axis = ..., + method: CalculationMethod = ..., + selection: IndexLabel | None = ..., + ) -> ExpandingGroupby[NDFrameT]: ... + @final + def ewm( + self, + com: float | None = ..., + span: float | None = ..., + halflife: TimedeltaConvertibleTypes | None = ..., + alpha: float | None = ..., + min_periods: int | None = ..., + adjust: bool = ..., + ignore_na: bool = ..., + axis: Axis = ..., + times: str | np.ndarray | Series | np.timedelta64 | None = ..., + method: CalculationMethod = ..., + *, + selection: IndexLabel | None = ..., + ) -> ExponentialMovingWindowGroupby[NDFrameT]: ... + @final + def ffill(self, limit: int | None = ...) -> NDFrameT: ... + @final + def bfill(self, limit: int | None = ...) -> NDFrameT: ... + @final + @property + def nth(self) -> GroupByNthSelector[Self]: ... + @final + def quantile( + self, + q: float | AnyArrayLike = ..., + interpolation: str = ..., + numeric_only: bool = ..., + ) -> NDFrameT: ... + @final + def ngroup(self, ascending: bool = ...) -> Series[int]: ... + @final + def cumcount(self, ascending: bool = ...) -> Series[int]: ... + @final def rank( self, method: str = ..., ascending: bool = ..., na_option: str = ..., pct: bool = ..., - axis: int = ..., - ) -> DataFrame: ... - def cummax(self, axis: Axis = ..., **kwargs) -> DataFrame | Series: ... - def cummin(self, axis: Axis = ..., **kwargs) -> DataFrame | Series: ... - def cumprod(self, axis: Axis = ..., **kwargs) -> DataFrame | Series: ... - def cumsum(self, axis: Axis = ..., **kwargs) -> DataFrame | Series: ... - def shift(self, periods: int = ..., freq=..., axis: Axis = ..., fill_value=...): ... + axis: AxisInt | NoDefault = ..., + ) -> NDFrameT: ... + @final + def cumprod(self, axis: Axis | NoDefault = ..., *args, **kwargs) -> NDFrameT: ... + @final + def cumsum(self, axis: Axis | NoDefault = ..., *args, **kwargs) -> NDFrameT: ... + @final + def cummin( + self, axis: AxisInt | NoDefault = ..., numeric_only: bool = ..., **kwargs + ) -> NDFrameT: ... + @final + def cummax( + self, axis: AxisInt | NoDefault = ..., numeric_only: bool = ..., **kwargs + ) -> NDFrameT: ... + @final + def shift( + self, + periods: int | Sequence[int] = ..., + freq: Frequency | None = ..., + axis: Axis | NoDefault = ..., + fill_value=..., + suffix: str | None = ..., + ) -> NDFrameT: ... + @final + def diff(self, periods: int = ..., axis: AxisInt | NoDefault = ...) -> NDFrameT: ... + @final def pct_change( self, periods: int = ..., - fill_method: str = ..., - limit=..., + fill_method: Literal["bfill", "ffill"] | None | NoDefault = ..., + limit: int | None | NoDefault = ..., freq=..., - axis: Axis = ..., - ) -> DataFrame | Series: ... - def head(self, n: int = ...) -> DataFrame | Series: ... - def tail(self, n: int = ...) -> DataFrame | Series: ... - # Surplus methods from original pylance stubs; should they go away? - def first(self, **kwargs) -> DataFrame | Series: ... - def last(self, **kwargs) -> DataFrame | Series: ... - def max(self, **kwargs) -> DataFrame | Series: ... - def min(self, **kwargs) -> DataFrame | Series: ... - def size(self) -> Series[int]: ... - -def get_groupby( - obj: NDFrame, - by: KeysArgType | None = ..., - axis: int = ..., - level=..., - grouper: ops.BaseGrouper | None = ..., - exclusions=..., - selection=..., - as_index: bool = ..., - sort: bool = ..., - group_keys: bool = ..., - squeeze: bool = ..., - observed: bool = ..., - mutated: bool = ..., -) -> GroupBy: ... + axis: Axis | NoDefault = ..., + ) -> NDFrameT: ... + @final + def head(self, n: int = ...) -> NDFrameT: ... + @final + def tail(self, n: int = ...) -> NDFrameT: ... + @final + def sample( + self, + n: int | None = ..., + frac: float | None = ..., + replace: bool = ..., + weights: Sequence | Series | None = ..., + random_state: RandomState | None = ..., + ) -> NDFrameT: ... diff --git a/pandas-stubs/core/groupby/grouper.pyi b/pandas-stubs/core/groupby/grouper.pyi index 65d76109b..666cbc9ab 100644 --- a/pandas-stubs/core/groupby/grouper.pyi +++ b/pandas-stubs/core/groupby/grouper.pyi @@ -1,4 +1,11 @@ -from collections.abc import Hashable +from collections.abc import ( + Hashable, + Iterator, +) +from typing import ( + final, + overload, +) import numpy as np from pandas import ( @@ -6,68 +13,64 @@ from pandas import ( Index, Series, ) -from pandas.core.groupby.ops import BaseGrouper +from pandas.core.resample import TimeGrouper +from typing_extensions import Self -from pandas._typing import NDFrameT +from pandas._libs.lib import NoDefault +from pandas._typing import ( + ArrayLike, + Axis, + Frequency, + Incomplete, + KeysArgType, + Level, + ListLikeHashable, + npt, +) +from pandas.util._decorators import cache_readonly class Grouper: - def __new__(cls, *args, **kwargs): ... - key = ... - level = ... - freq = ... - axis = ... - sort = ... - grouper = ... - obj = ... - indexer = ... - binner = ... - def __init__( - self, key=..., level=..., freq=..., axis: int = ..., sort: bool = ... - ) -> None: ... - @property - def ax(self): ... - @property - def groups(self): ... + key: KeysArgType | None + level: Level | ListLikeHashable[Level] | None + freq: Frequency | None + axis: Axis + sort: bool + dropna: bool + binner: Incomplete + @overload + def __new__( + cls, + key: KeysArgType | None = ..., + level: Level | ListLikeHashable[Level] | None = ..., + axis: Axis | NoDefault = ..., + sort: bool = ..., + dropna: bool = ..., + ) -> Self: ... + @overload + def __new__(cls, *args, freq: Frequency, **kwargs) -> TimeGrouper: ... + @final + def __repr__(self) -> str: ... # noqa: PYI029 __repr__ here is final +@final class Grouping: - name = ... - level = ... - grouper = ... - all_grouper = ... - index = ... - sort = ... - obj = ... - observed = ... - in_axis = ... - def __init__( - self, - index: Index, - grouper=..., - obj: DataFrame | Series | None = ..., - name=..., - level=..., - sort: bool = ..., - observed: bool = ..., - in_axis: bool = ..., - ) -> None: ... - def __iter__(self): ... - @property + level: Level | None + obj: DataFrame | Series | None + in_axis: bool + grouping_vector: Incomplete + def __iter__(self) -> Iterator[Hashable]: ... + @cache_readonly + def name(self) -> Hashable: ... + @cache_readonly def ngroups(self) -> int: ... - def indices(self): ... + @cache_readonly + def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]: ... @property - def codes(self) -> np.ndarray: ... + def codes(self) -> npt.NDArray[np.signedinteger]: ... + @cache_readonly + def group_arraylike(self) -> ArrayLike: ... + @cache_readonly def result_index(self) -> Index: ... - @property + @cache_readonly def group_index(self) -> Index: ... + @cache_readonly def groups(self) -> dict[Hashable, np.ndarray]: ... - -def get_grouper( - obj: NDFrameT, - key=..., - axis: int = ..., - level=..., - sort: bool = ..., - observed: bool = ..., - mutated: bool = ..., - validate: bool = ..., -) -> tuple[BaseGrouper, list[Hashable], NDFrameT]: ... diff --git a/pandas-stubs/core/groupby/indexing.pyi b/pandas-stubs/core/groupby/indexing.pyi new file mode 100644 index 000000000..c013d4c74 --- /dev/null +++ b/pandas-stubs/core/groupby/indexing.pyi @@ -0,0 +1,32 @@ +from typing import ( + Any, + Generic, + Literal, + TypeVar, +) + +from pandas import ( + DataFrame, + Series, +) +from pandas.core.groupby import groupby + +from pandas._typing import PositionalIndexer + +_GroupByT = TypeVar("_GroupByT", bound=groupby.GroupBy[Any]) + +class GroupByIndexingMixin: ... + +class GroupByPositionalSelector: + groupby_object: groupby.GroupBy + def __getitem__(self, arg: PositionalIndexer | tuple) -> DataFrame | Series: ... + +class GroupByNthSelector(Generic[_GroupByT]): + groupby_object: _GroupByT + + def __call__( + self, + n: PositionalIndexer | tuple, + dropna: Literal["any", "all", None] = ..., + ) -> DataFrame | Series: ... + def __getitem__(self, n: PositionalIndexer | tuple) -> DataFrame | Series: ... diff --git a/pandas-stubs/core/groupby/ops.pyi b/pandas-stubs/core/groupby/ops.pyi index 2f67fb38e..2f2665a01 100644 --- a/pandas-stubs/core/groupby/ops.pyi +++ b/pandas-stubs/core/groupby/ops.pyi @@ -1,98 +1,103 @@ -from collections.abc import Sequence +from collections.abc import ( + Callable, + Hashable, + Iterator, +) +from typing import ( + Generic, + final, +) import numpy as np from pandas import ( - DataFrame, Index, Series, ) from pandas.core.groupby import grouper +from pandas._typing import ( + ArrayLike, + AxisInt, + Incomplete, + NDFrameT, + Shape, + T, + npt, +) +from pandas.util._decorators import cache_readonly + class BaseGrouper: - axis = ... - sort = ... - group_keys = ... - mutated = ... - indexer = ... - def __init__( - self, - axis: Index, - groupings: Sequence[grouper.Grouping], - sort: bool = ..., - group_keys: bool = ..., - mutated: bool = ..., - indexer: np.ndarray | None = ..., - ) -> None: ... + axis: Index + dropna: bool @property def groupings(self) -> list[grouper.Grouping]: ... @property - def shape(self): ... - def __iter__(self): ... + def shape(self) -> Shape: ... + def __iter__(self) -> Iterator: ... @property def nkeys(self) -> int: ... - def get_iterator(self, data: DataFrame | Series, axis: int = ...): ... - def apply(self, f, data: DataFrame | Series, axis: int = ...): ... - def indices(self): ... + def get_iterator( + self, data: NDFrameT, axis: AxisInt = ... + ) -> Iterator[tuple[Hashable, NDFrameT]]: ... + @final + @cache_readonly + def group_keys_seq(self): ... + @cache_readonly + def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]: ... + @final + def result_ilocs(self) -> npt.NDArray[np.intp]: ... + @final @property - def codes(self) -> list[np.ndarray]: ... + def codes(self) -> list[npt.NDArray[np.signedinteger]]: ... @property def levels(self) -> list[Index]: ... @property - def names(self): ... + def names(self) -> list: ... + @final def size(self) -> Series: ... - def groups(self): ... + @cache_readonly + def groups(self) -> dict[Hashable, np.ndarray]: ... + @final + @cache_readonly def is_monotonic(self) -> bool: ... - def group_info(self): ... - def codes_info(self) -> np.ndarray: ... + @final + @cache_readonly + def has_dropped_na(self) -> bool: ... + @cache_readonly + def group_info(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp], int]: ... + @cache_readonly + def codes_info(self) -> npt.NDArray[np.intp]: ... + @final + @cache_readonly def ngroups(self) -> int: ... @property - def reconstructed_codes(self) -> list[np.ndarray]: ... + def reconstructed_codes(self) -> list[npt.NDArray[np.intp]]: ... + @cache_readonly def result_index(self) -> Index: ... - def get_group_levels(self): ... - def agg_series(self, obj: Series, func): ... - -class BinGrouper(BaseGrouper): - bins = ... - binlabels = ... - mutated = ... - indexer = ... - def __init__( + @final + def get_group_levels(self) -> list[ArrayLike]: ... + @final + def agg_series( self, - bins, - binlabels, - filter_empty: bool = ..., - mutated: bool = ..., - indexer=..., - ) -> None: ... - def groups(self): ... - @property - def nkeys(self) -> int: ... - def get_iterator(self, data: DataFrame | Series, axis: int = ...): ... - def indices(self): ... - def group_info(self): ... - @property - def reconstructed_codes(self) -> list[np.ndarray]: ... - def result_index(self): ... - @property - def levels(self): ... - @property - def names(self): ... - @property - def groupings(self) -> list[grouper.Grouping]: ... - def agg_series(self, obj: Series, func): ... + obj: Series, + func: Callable[[Series], object], + preserve_dtype: bool = ..., + ) -> ArrayLike: ... + @final + def apply_groupwise( + self, f: Callable[[NDFrameT], T], data: NDFrameT, axis: AxisInt = ... + ) -> tuple[list[T], bool]: ... -class DataSplitter: - data = ... - labels = ... - ngroups = ... - axis = ... - def __init__( - self, data: DataFrame | Series, labels, ngroups: int, axis: int = ... - ) -> None: ... - def slabels(self): ... - def __iter__(self): ... - -class SeriesSplitter(DataSplitter): ... -class FrameSplitter(DataSplitter): ... +class BinGrouper(BaseGrouper): + bins: npt.NDArray[np.int64] + binlabels: Index + indexer: npt.NDArray[np.intp] + @cache_readonly + def indices(self) -> dict[Incomplete, list[int]]: ... # type: ignore[override] # pyright: ignore[reportIncompatibleMethodOverride] -def get_splitter(data: DataFrame | Series, *args, **kwargs) -> DataSplitter: ... +class DataSplitter(Generic[NDFrameT]): + data: NDFrameT + labels: npt.NDArray[np.intp] + ngroups: int + axis: AxisInt + def __iter__(self) -> Iterator[NDFrameT]: ... diff --git a/pandas-stubs/core/indexes/base.pyi b/pandas-stubs/core/indexes/base.pyi index e045d476a..bbf518881 100644 --- a/pandas-stubs/core/indexes/base.pyi +++ b/pandas-stubs/core/indexes/base.pyi @@ -13,6 +13,7 @@ from typing import ( Any, ClassVar, Literal, + final, overload, ) @@ -400,6 +401,7 @@ class Index(IndexOpsMixin[S1]): def set_value(self, arr, key, value) -> None: ... def get_indexer_non_unique(self, target): ... def get_indexer_for(self, target, **kwargs): ... + @final def groupby(self, values) -> dict[Hashable, np.ndarray]: ... def map(self, mapper, na_action=...) -> Index: ... def isin(self, values, level=...) -> np_ndarray_bool: ... diff --git a/pandas-stubs/core/resample.pyi b/pandas-stubs/core/resample.pyi index a21b7dfcf..9b1d8ee96 100644 --- a/pandas-stubs/core/resample.pyi +++ b/pandas-stubs/core/resample.pyi @@ -1,11 +1,9 @@ from collections.abc import ( Callable, - Generator, Hashable, Mapping, ) from typing import ( - Generic, Literal, overload, ) @@ -13,18 +11,31 @@ from typing import ( import numpy as np from pandas import ( DataFrame, + DatetimeIndex, Index, + PeriodIndex, Series, + Timedelta, + TimedeltaIndex, ) from pandas.core.groupby.generic import SeriesGroupBy from pandas.core.groupby.groupby import BaseGroupBy -from typing_extensions import TypeAlias +from pandas.core.groupby.grouper import Grouper +from pandas.core.groupby.ops import BinGrouper +from typing_extensions import ( + Self, + TypeAlias, +) +from pandas._libs.lib import NoDefault from pandas._typing import ( + S1, Axis, InterpolateOptions, NDFrameT, Scalar, + TimeGrouperOrigin, + TimestampConvention, npt, ) @@ -51,58 +62,19 @@ _SeriesGroupByFuncArgs: TypeAlias = ( _SeriesGroupByFuncTypes | Mapping[Hashable, _SeriesGroupByFunc | str] ) -class Resampler(BaseGroupBy, Generic[NDFrameT]): +class Resampler(BaseGroupBy[NDFrameT]): + grouper: BinGrouper # pyright: ignore[reportIncompatibleVariableOverride] # variance incompatibility + binner: DatetimeIndex | TimedeltaIndex | PeriodIndex + exclusions: frozenset[Hashable] + ax: Index def __getattr__(self, attr: str) -> SeriesGroupBy: ... - def __iter__(self) -> Generator[tuple[Hashable, NDFrameT], None, None]: ... - @property - def obj(self) -> NDFrameT: ... - @property - def ax(self) -> Index: ... - @overload - def pipe( - self: Resampler[DataFrame], - func: Callable[..., DataFrame] - | tuple[Callable[..., DataFrame], str] - | Callable[..., Series] - | tuple[Callable[..., Series], str], - *args, - **kwargs, - ) -> DataFrame: ... - @overload - def pipe( - self: Resampler[DataFrame], - func: Callable[..., Scalar] | tuple[Callable[..., Scalar], str], - *args, - **kwargs, - ) -> Series: ... - @overload - def pipe( - self: Resampler[Series], - func: Callable[..., Series] | tuple[Callable[..., Series], str], - *args, - **kwargs, - ) -> Series: ... - @overload - def pipe( - self: Resampler[Series], - func: Callable[..., Scalar] | tuple[Callable[..., Scalar], str], - *args, - **kwargs, - ) -> Scalar: ... - @overload - def pipe( - self: Resampler[Series], - func: Callable[..., DataFrame] | tuple[Callable[..., DataFrame], str], - *args, - **kwargs, - ) -> DataFrame: ... @overload def aggregate( self: Resampler[DataFrame], func: _FrameGroupByFuncArgs | None = ..., *args, **kwargs, - ) -> Series | DataFrame: ... + ) -> DataFrame: ... @overload def aggregate( self: Resampler[Series], @@ -112,17 +84,17 @@ class Resampler(BaseGroupBy, Generic[NDFrameT]): ) -> Series | DataFrame: ... agg = aggregate apply = aggregate + @overload def transform( - self, arg: Callable[[Series], Series], *args, **kwargs - ) -> NDFrameT: ... + self: Resampler[Series], arg: Callable[[Series], Series[S1]], *args, **kwargs + ) -> Series[S1]: ... + @overload + def transform( + self: Resampler[DataFrame], arg: Callable[[Series], Series[S1]], *args, **kwargs + ) -> DataFrame: ... def ffill(self, limit: int | None = ...) -> NDFrameT: ... def nearest(self, limit: int | None = ...) -> NDFrameT: ... def bfill(self, limit: int | None = ...) -> NDFrameT: ... - def fillna( - self, - method: Literal["pad", "backfill", "ffill", "bfill", "nearest"], - limit: int | None = ..., - ) -> NDFrameT: ... @overload def interpolate( self, @@ -133,7 +105,7 @@ class Resampler(BaseGroupBy, Generic[NDFrameT]): inplace: Literal[True], limit_direction: Literal["forward", "backward", "both"] = ..., limit_area: Literal["inside", "outside"] | None = ..., - downcast: Literal["infer"] | None = ..., + downcast: Literal["infer"] | None | NoDefault = ..., **kwargs, ) -> None: ... @overload @@ -146,23 +118,10 @@ class Resampler(BaseGroupBy, Generic[NDFrameT]): inplace: Literal[False] = ..., limit_direction: Literal["forward", "backward", "both"] = ..., limit_area: Literal["inside", "outside"] | None = ..., - downcast: Literal["infer"] | None = ..., + downcast: Literal["infer"] | None | NoDefault = ..., **kwargs, ) -> NDFrameT: ... def asfreq(self, fill_value: Scalar | None = ...) -> NDFrameT: ... - def std( - self, ddof: int = ..., numeric_only: bool = ..., *args, **kwargs - ) -> NDFrameT: ... - def var( - self, ddof: int = ..., numeric_only: bool = ..., *args, **kwargs - ) -> NDFrameT: ... - def size(self) -> Series: ... - def count(self) -> NDFrameT: ... - def quantile( - self, - q: float | list[float] | npt.NDArray[np.float_] | Series[float] = ..., - **kwargs, - ) -> NDFrameT: ... def sum( self, numeric_only: bool = ..., min_count: int = ..., *args, **kwargs ) -> NDFrameT: ... @@ -181,8 +140,68 @@ class Resampler(BaseGroupBy, Generic[NDFrameT]): def last( self, numeric_only: bool = ..., min_count: int = ..., *args, **kwargs ) -> NDFrameT: ... - def mean(self, numeric_only: bool = ..., *args, **kwargs) -> NDFrameT: ... - def sem(self, numeric_only: bool = ..., *args, **kwargs) -> NDFrameT: ... def median(self, numeric_only: bool = ..., *args, **kwargs) -> NDFrameT: ... + def mean(self, numeric_only: bool = ..., *args, **kwargs) -> NDFrameT: ... + def std( + self, ddof: int = ..., numeric_only: bool = ..., *args, **kwargs + ) -> NDFrameT: ... + def var( + self, ddof: int = ..., numeric_only: bool = ..., *args, **kwargs + ) -> NDFrameT: ... + def sem( + self, ddof: int = ..., numeric_only: bool = ..., *args, **kwargs + ) -> NDFrameT: ... def ohlc(self, *args, **kwargs) -> DataFrame: ... - def nunique(self, *args, **kwargs) -> NDFrameT: ... + @overload + def nunique(self: Resampler[Series], *args, **kwargs) -> Series[int]: ... + @overload + def nunique(self: Resampler[DataFrame], *args, **kwargs) -> DataFrame: ... + def size(self) -> Series[int]: ... + @overload + def count(self: Resampler[Series]) -> Series[int]: ... + @overload + def count(self: Resampler[DataFrame]) -> DataFrame: ... + def quantile( + self, + q: float | list[float] | npt.NDArray[np.float_] | Series[float] = ..., + **kwargs, + ) -> NDFrameT: ... + +# We lie about inheriting from Resampler because at runtime inherits all Resampler +# attributes via setattr +class _GroupByMixin(Resampler[NDFrameT]): + key: str | list[str] | None + def __getitem__(self, key) -> Self: ... # type: ignore[override] # pyright: ignore[reportIncompatibleMethodOverride] + +class DatetimeIndexResampler(Resampler[NDFrameT]): ... + +class DatetimeIndexResamplerGroupby( + _GroupByMixin[NDFrameT], DatetimeIndexResampler[NDFrameT] +): + def __getattr__(self, attr: str) -> Self: ... # type: ignore[override] # pyright: ignore[reportIncompatibleMethodOverride] + +class PeriodIndexResampler(DatetimeIndexResampler[NDFrameT]): ... + +class PeriodIndexResamplerGroupby( + _GroupByMixin[NDFrameT], PeriodIndexResampler[NDFrameT] +): + def __getattr__(self, attr: str) -> Self: ... # type: ignore[override] # pyright: ignore[reportIncompatibleMethodOverride] + +class TimedeltaIndexResampler(DatetimeIndexResampler[NDFrameT]): ... + +class TimedeltaIndexResamplerGroupby( + _GroupByMixin[NDFrameT], TimedeltaIndexResampler[NDFrameT] +): + def __getattr__(self, attr: str) -> Self: ... # type: ignore[override] # pyright: ignore[reportIncompatibleMethodOverride] + +class TimeGrouper(Grouper): + closed: Literal["left", "right"] + label: Literal["left", "right"] + kind: str | None + convention: TimestampConvention + how: str + fill_method: str | None + limit: int | None + group_keys: bool + origin: TimeGrouperOrigin + offset: Timedelta | None diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index 3f16abefa..ab963e52a 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -45,6 +45,7 @@ from pandas.core.base import IndexOpsMixin from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame from pandas.core.groupby.generic import SeriesGroupBy +from pandas.core.groupby.groupby import BaseGroupBy from pandas.core.indexers import BaseIndexer from pandas.core.indexes.accessors import ( CombinedDatetimelikeProperties, @@ -66,7 +67,6 @@ from pandas.core.indexing import ( _IndexSliceTuple, _LocIndexer, ) -from pandas.core.resample import Resampler from pandas.core.strings import StringMethods from pandas.core.window import ( Expanding, @@ -87,6 +87,7 @@ from pandas._libs.interval import ( Interval, _OrderableT, ) +from pandas._libs.lib import NoDefault from pandas._libs.missing import NAType from pandas._libs.tslibs import BaseOffset from pandas._typing import ( @@ -307,7 +308,7 @@ class Series(IndexOpsMixin[S1], NDFrame): @overload def __new__( cls, - data: Scalar | _ListLike | dict[HashableT1, Any] | None = ..., + data: Scalar | _ListLike | dict[HashableT1, Any] | BaseGroupBy | None = ..., index: Axes | None = ..., *, dtype: Dtype = ..., @@ -555,8 +556,7 @@ class Series(IndexOpsMixin[S1], NDFrame): as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., - squeeze: _bool = ..., - observed: _bool = ..., + observed: _bool | NoDefault = ..., dropna: _bool = ..., ) -> SeriesGroupBy[S1, Scalar]: ... @overload @@ -568,8 +568,7 @@ class Series(IndexOpsMixin[S1], NDFrame): as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., - squeeze: _bool = ..., - observed: _bool = ..., + observed: _bool | NoDefault = ..., dropna: _bool = ..., ) -> SeriesGroupBy[S1, Timestamp]: ... @overload @@ -581,8 +580,7 @@ class Series(IndexOpsMixin[S1], NDFrame): as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., - squeeze: _bool = ..., - observed: _bool = ..., + observed: _bool | NoDefault = ..., dropna: _bool = ..., ) -> SeriesGroupBy[S1, Timedelta]: ... @overload @@ -594,8 +592,7 @@ class Series(IndexOpsMixin[S1], NDFrame): as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., - squeeze: _bool = ..., - observed: _bool = ..., + observed: _bool | NoDefault = ..., dropna: _bool = ..., ) -> SeriesGroupBy[S1, Period]: ... @overload @@ -607,24 +604,47 @@ class Series(IndexOpsMixin[S1], NDFrame): as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., - squeeze: _bool = ..., - observed: _bool = ..., + observed: _bool | NoDefault = ..., dropna: _bool = ..., ) -> SeriesGroupBy[S1, IntervalT]: ... @overload def groupby( self, - by: MultiIndex | GroupByObjectNonScalar = ..., + by: MultiIndex | GroupByObjectNonScalar, axis: AxisIndex = ..., level: IndexLabel | None = ..., as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., - squeeze: _bool = ..., - observed: _bool = ..., + observed: _bool | NoDefault = ..., dropna: _bool = ..., ) -> SeriesGroupBy[S1, tuple]: ... @overload + def groupby( + self, + by: None, + axis: AxisIndex, + level: IndexLabel, # level is required when by=None (passed as positional) + as_index: _bool = ..., + sort: _bool = ..., + group_keys: _bool = ..., + observed: _bool | NoDefault = ..., + dropna: _bool = ..., + ) -> SeriesGroupBy[S1, Scalar]: ... + @overload + def groupby( + self, + by: None = ..., + axis: AxisIndex = ..., + *, + level: IndexLabel, # level is required when by=None (passed as keyword) + as_index: _bool = ..., + sort: _bool = ..., + group_keys: _bool = ..., + observed: _bool | NoDefault = ..., + dropna: _bool = ..., + ) -> SeriesGroupBy[S1, Scalar]: ... + @overload def groupby( self, by: Series[SeriesByT], @@ -633,8 +653,7 @@ class Series(IndexOpsMixin[S1], NDFrame): as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., - squeeze: _bool = ..., - observed: _bool = ..., + observed: _bool | NoDefault = ..., dropna: _bool = ..., ) -> SeriesGroupBy[S1, SeriesByT]: ... @overload @@ -646,8 +665,7 @@ class Series(IndexOpsMixin[S1], NDFrame): as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., - squeeze: _bool = ..., - observed: _bool = ..., + observed: _bool | NoDefault = ..., dropna: _bool = ..., ) -> SeriesGroupBy[S1, Any]: ... # need the ignore because None is Hashable @@ -1343,23 +1361,6 @@ class Series(IndexOpsMixin[S1], NDFrame): end_time: _str | time, axis: AxisIndex | None = ..., ) -> Series[S1]: ... - def resample( - self, - rule, - axis: AxisIndex = ..., - closed: _str | None = ..., - label: _str | None = ..., - convention: TimestampConvention = ..., - kind: Literal["timestamp", "period"] | None = ..., - loffset=..., - base: int = ..., - on: _str | None = ..., - level: Level | None = ..., - origin: datetime - | Timestamp - | Literal["epoch", "start", "start_day", "end", "end_day"] = ..., - offset: timedelta | Timedelta | _str | None = ..., - ) -> Resampler[Series]: ... def first(self, offset) -> Series[S1]: ... def last(self, offset) -> Series[S1]: ... def rank( diff --git a/pandas-stubs/core/window/__init__.pyi b/pandas-stubs/core/window/__init__.pyi index b9504df8d..d5dbe6956 100644 --- a/pandas-stubs/core/window/__init__.pyi +++ b/pandas-stubs/core/window/__init__.pyi @@ -1,4 +1,7 @@ -from pandas.core.window.ewm import ExponentialMovingWindow as ExponentialMovingWindow +from pandas.core.window.ewm import ( + ExponentialMovingWindow as ExponentialMovingWindow, + ExponentialMovingWindowGroupby as ExponentialMovingWindowGroupby, +) from pandas.core.window.expanding import ( Expanding as Expanding, ExpandingGroupby as ExpandingGroupby, diff --git a/pandas-stubs/core/window/ewm.pyi b/pandas-stubs/core/window/ewm.pyi index 548464021..cc9d8cbb7 100644 --- a/pandas-stubs/core/window/ewm.pyi +++ b/pandas-stubs/core/window/ewm.pyi @@ -1,47 +1,33 @@ -from typing import Generic - -import numpy as np from pandas import ( DataFrame, Series, ) -from pandas.core.window.rolling import BaseWindow +from pandas.core.window.rolling import ( + BaseWindow, + BaseWindowGroupby, +) from pandas._typing import ( - Axis, - CalculationMethod, NDFrameT, - TimedeltaConvertibleTypes, WindowingEngine, WindowingEngineKwargs, ) -class ExponentialMovingWindow(BaseWindow[NDFrameT], Generic[NDFrameT]): - def __init__( +class ExponentialMovingWindow(BaseWindow[NDFrameT]): + def online( self, - obj: NDFrameT, - com: float | None = ..., - span: float | None = ..., - halflife: TimedeltaConvertibleTypes | None = ..., - alpha: float | None = ..., - min_periods: int | None = ..., - adjust: bool = ..., - ignore_na: bool = ..., - axis: Axis = ..., - times: str | np.ndarray | Series | None | np.timedelta64 = ..., - method: CalculationMethod = ..., - ) -> None: ... + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., + ) -> OnlineExponentialMovingWindow[NDFrameT]: ... def mean( self, numeric_only: bool = ..., - *, engine: WindowingEngine = ..., engine_kwargs: WindowingEngineKwargs = ..., ) -> NDFrameT: ... def sum( self, numeric_only: bool = ..., - *, engine: WindowingEngine = ..., engine_kwargs: WindowingEngineKwargs = ..., ) -> NDFrameT: ... @@ -60,3 +46,29 @@ class ExponentialMovingWindow(BaseWindow[NDFrameT], Generic[NDFrameT]): pairwise: bool | None = ..., numeric_only: bool = ..., ) -> NDFrameT: ... + +class ExponentialMovingWindowGroupby( + BaseWindowGroupby[NDFrameT], ExponentialMovingWindow[NDFrameT] +): ... + +class OnlineExponentialMovingWindow(ExponentialMovingWindow[NDFrameT]): + def reset(self) -> None: ... + def aggregate(self, func, *args, **kwargs): ... + def std(self, bias: bool = ..., *args, **kwargs): ... + def corr( + self, + other: DataFrame | Series | None = ..., + pairwise: bool | None = ..., + numeric_only: bool = ..., + ): ... + def cov( + self, + other: DataFrame | Series | None = ..., + pairwise: bool | None = ..., + bias: bool = ..., + numeric_only: bool = ..., + ): ... + def var(self, bias: bool = ..., numeric_only: bool = ...): ... + def mean( + self, *args, update: NDFrameT | None = ..., update_times: None = ..., **kwargs + ) -> NDFrameT: ... diff --git a/pandas-stubs/core/window/expanding.pyi b/pandas-stubs/core/window/expanding.pyi index 9ac04cf4e..e5c8e586c 100644 --- a/pandas-stubs/core/window/expanding.pyi +++ b/pandas-stubs/core/window/expanding.pyi @@ -1,119 +1,9 @@ -from collections.abc import Callable -from typing import Any - -from pandas import ( - DataFrame, - Series, -) from pandas.core.window.rolling import ( BaseWindowGroupby, RollingAndExpandingMixin, ) -from pandas._typing import ( - NDFrameT, - QuantileInterpolation, - WindowingEngine, - WindowingEngineKwargs, - WindowingRankType, -) - -class Expanding(RollingAndExpandingMixin[NDFrameT]): - def count(self) -> NDFrameT: ... - def apply( - self, - func: Callable[..., Any], - raw: bool = ..., - engine: WindowingEngine = ..., - engine_kwargs: WindowingEngineKwargs = ..., - args: tuple[Any, ...] | None = ..., - kwargs: dict[str, Any] | None = ..., - ) -> NDFrameT: ... - def sum( - self, - numeric_only: bool = ..., - *, - engine: WindowingEngine = ..., - engine_kwargs: WindowingEngineKwargs = ..., - ) -> NDFrameT: ... - def max( - self, - numeric_only: bool = ..., - *, - engine: WindowingEngine = ..., - engine_kwargs: WindowingEngineKwargs = ..., - ) -> NDFrameT: ... - def min( - self, - numeric_only: bool = ..., - *, - engine: WindowingEngine = ..., - engine_kwargs: WindowingEngineKwargs = ..., - ) -> NDFrameT: ... - def mean( - self, - numeric_only: bool = ..., - *, - engine: WindowingEngine = ..., - engine_kwargs: WindowingEngineKwargs = ..., - ) -> NDFrameT: ... - def median( - self, - numeric_only: bool = ..., - engine: WindowingEngine = ..., - engine_kwargs: WindowingEngineKwargs = ..., - ) -> NDFrameT: ... - def std( - self, - ddof: int = ..., - numeric_only: bool = ..., - *, - engine: WindowingEngine = ..., - engine_kwargs: WindowingEngineKwargs = ..., - ) -> NDFrameT: ... - def var( - self, - ddof: int = ..., - numeric_only: bool = ..., - *, - engine: WindowingEngine = ..., - engine_kwargs: WindowingEngineKwargs = ..., - ) -> NDFrameT: ... - def sem(self, ddof: int = ..., numeric_only: bool = ...) -> NDFrameT: ... - def skew( - self, - numeric_only: bool = ..., - ) -> NDFrameT: ... - def kurt( - self, - numeric_only: bool = ..., - ) -> NDFrameT: ... - def quantile( - self, - quantile: float, - interpolation: QuantileInterpolation = ..., - numeric_only: bool = ..., - ) -> NDFrameT: ... - def rank( - self, - method: WindowingRankType = ..., - ascending: bool = ..., - pct: bool = ..., - numeric_only: bool = ..., - ) -> NDFrameT: ... - def cov( - self, - other: DataFrame | Series | None = ..., - pairwise: bool | None = ..., - ddof: int = ..., - numeric_only: bool = ..., - ) -> NDFrameT: ... - def corr( - self, - other: DataFrame | Series | None = ..., - pairwise: bool | None = ..., - ddof: int = ..., - numeric_only: bool = ..., - ) -> NDFrameT: ... +from pandas._typing import NDFrameT -class ExpandingGroupby(BaseWindowGroupby, Expanding): ... +class Expanding(RollingAndExpandingMixin[NDFrameT]): ... +class ExpandingGroupby(BaseWindowGroupby[NDFrameT], Expanding[NDFrameT]): ... diff --git a/pandas-stubs/core/window/rolling.pyi b/pandas-stubs/core/window/rolling.pyi index d1d3b675b..06774ef1a 100644 --- a/pandas-stubs/core/window/rolling.pyi +++ b/pandas-stubs/core/window/rolling.pyi @@ -1,20 +1,30 @@ -from collections.abc import Callable +from collections.abc import ( + Callable, + Iterator, +) +import datetime as dt from typing import ( Any, - Generic, overload, ) from pandas import ( DataFrame, + Index, Series, ) from pandas.core.base import SelectionMixin +from pandas.core.indexers import BaseIndexer +from typing_extensions import Self +from pandas._libs.tslibs import BaseOffset from pandas._typing import ( AggFuncTypeBase, AggFuncTypeFrame, AggFuncTypeSeriesToFrame, + AxisInt, + CalculationMethod, + IntervalClosedType, NDFrameT, QuantileInterpolation, WindowingEngine, @@ -22,9 +32,19 @@ from pandas._typing import ( WindowingRankType, ) -class BaseWindow(SelectionMixin[NDFrameT], Generic[NDFrameT]): - def __getattr__(self, attr: str): ... - def __iter__(self): ... +class BaseWindow(SelectionMixin[NDFrameT]): + on: str | Index | None + closed: IntervalClosedType | None + step: int | None + window: int | dt.timedelta | str | BaseOffset | BaseIndexer | None + min_periods: int | None + center: bool | None + win_type: str | None + axis: AxisInt + method: CalculationMethod + def __getitem__(self, key) -> Self: ... + def __getattr__(self, attr: str) -> Self: ... + def __iter__(self) -> Iterator[NDFrameT]: ... @overload def aggregate( self: BaseWindow[Series], func: AggFuncTypeBase, *args: Any, **kwargs: Any @@ -51,14 +71,14 @@ class Window(BaseWindow[NDFrameT]): def sum(self, numeric_only: bool = ..., **kwargs: Any) -> NDFrameT: ... def mean(self, numeric_only: bool = ..., **kwargs: Any) -> NDFrameT: ... def var( - self, ddof: int = ..., numeric_only: bool = ..., *args: Any, **kwargs: Any + self, ddof: int = ..., numeric_only: bool = ..., **kwargs: Any ) -> NDFrameT: ... def std( - self, ddof: int = ..., numeric_only: bool = ..., *args: Any, **kwargs: Any + self, ddof: int = ..., numeric_only: bool = ..., **kwargs: Any ) -> NDFrameT: ... -class RollingAndExpandingMixin(BaseWindow[NDFrameT], Generic[NDFrameT]): - def count(self) -> NDFrameT: ... +class RollingAndExpandingMixin(BaseWindow[NDFrameT]): + def count(self, numeric_only: bool = ...) -> NDFrameT: ... def apply( self, func: Callable[..., Any], @@ -71,28 +91,24 @@ class RollingAndExpandingMixin(BaseWindow[NDFrameT], Generic[NDFrameT]): def sum( self, numeric_only: bool = ..., - *, engine: WindowingEngine = ..., engine_kwargs: WindowingEngineKwargs = ..., ) -> NDFrameT: ... def max( self, numeric_only: bool = ..., - *, engine: WindowingEngine = ..., engine_kwargs: WindowingEngineKwargs = ..., ) -> NDFrameT: ... def min( self, numeric_only: bool = ..., - *, engine: WindowingEngine = ..., engine_kwargs: WindowingEngineKwargs = ..., ) -> NDFrameT: ... def mean( self, numeric_only: bool = ..., - *, engine: WindowingEngine = ..., engine_kwargs: WindowingEngineKwargs = ..., ) -> NDFrameT: ... @@ -106,7 +122,6 @@ class RollingAndExpandingMixin(BaseWindow[NDFrameT], Generic[NDFrameT]): self, ddof: int = ..., numeric_only: bool = ..., - *, engine: WindowingEngine = ..., engine_kwargs: WindowingEngineKwargs = ..., ) -> NDFrameT: ... @@ -114,20 +129,15 @@ class RollingAndExpandingMixin(BaseWindow[NDFrameT], Generic[NDFrameT]): self, ddof: int = ..., numeric_only: bool = ..., - *, engine: WindowingEngine = ..., engine_kwargs: WindowingEngineKwargs = ..., ) -> NDFrameT: ... def skew(self, numeric_only: bool = ...) -> NDFrameT: ... - def sem( - self, - ddof: int = ..., - numeric_only: bool = ..., - ) -> NDFrameT: ... + def sem(self, ddof: int = ..., numeric_only: bool = ...) -> NDFrameT: ... def kurt(self, numeric_only: bool = ...) -> NDFrameT: ... def quantile( self, - quantile: float, + q: float, interpolation: QuantileInterpolation = ..., numeric_only: bool = ..., ) -> NDFrameT: ... @@ -153,15 +163,5 @@ class RollingAndExpandingMixin(BaseWindow[NDFrameT], Generic[NDFrameT]): numeric_only: bool = ..., ) -> NDFrameT: ... -class Rolling(RollingAndExpandingMixin[NDFrameT]): - def apply( - self, - func: Callable[..., Any], - raw: bool = ..., - engine: WindowingEngine = ..., - engine_kwargs: WindowingEngineKwargs | None = ..., - args: tuple[Any, ...] | None = ..., - kwargs: dict[str, Any] | None = ..., - ) -> NDFrameT: ... - -class RollingGroupby(BaseWindowGroupby[NDFrameT], Rolling): ... +class Rolling(RollingAndExpandingMixin[NDFrameT]): ... +class RollingGroupby(BaseWindowGroupby[NDFrameT], Rolling[NDFrameT]): ... diff --git a/pandas-stubs/util/_decorators.pyi b/pandas-stubs/util/_decorators.pyi index f6f50ee41..ecc2a69f7 100644 --- a/pandas-stubs/util/_decorators.pyi +++ b/pandas-stubs/util/_decorators.pyi @@ -1,25 +1 @@ -from collections.abc import ( - Callable, - Mapping, -) -from typing import Any - -def deprecate( - name: str, - alternative: Callable[..., Any], - version: str, - alt_name: str | None = ..., - klass: type[Warning] | None = ..., - stacklevel: int = ..., - msg: str | None = ..., -) -> Callable[..., Any]: ... -def deprecate_kwarg( - old_arg_name: str, - new_arg_name: str | None, - mapping: Mapping[Any, Any] | Callable[[Any], Any] | None = ..., - stacklevel: int = ..., -) -> Callable[..., Any]: ... -def rewrite_axis_style_signature( - name: str, extra_params: list[tuple[str, Any]] -) -> Callable[..., Any]: ... -def indent(text: str | None, indents: int = ...) -> str: ... +from pandas._libs.properties import cache_readonly as cache_readonly diff --git a/pyproject.toml b/pyproject.toml index e44933eec..d10abb29c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ mypy = "1.8.0" pandas = "2.1.4" pyarrow = ">=10.0.1" pytest = ">=7.1.2" -pyright = ">=1.1.344" +pyright = "==1.1.346" poethepoet = ">=0.16.5" loguru = ">=0.6.0" typing-extensions = ">=4.4.0" diff --git a/tests/__init__.py b/tests/__init__.py index 847f7f52a..166b5a3e5 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -13,6 +13,7 @@ ) import pandas as pd +from pandas.core.groupby.groupby import BaseGroupBy from pandas.util.version import Version import pytest @@ -33,6 +34,8 @@ def check(actual: T, klass: type, dtype: type | None = None, attr: str = "left") value = actual.iloc[0] elif isinstance(actual, pd.Index): value = actual[0] # type: ignore[assignment] + elif isinstance(actual, BaseGroupBy): + value = actual.obj elif hasattr(actual, "__iter__"): value = next(iter(actual)) # pyright: ignore[reportGeneralTypeIssues] else: diff --git a/tests/test_frame.py b/tests/test_frame.py index 8056e2835..cd4ace1ff 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -30,7 +30,10 @@ import numpy.typing as npt import pandas as pd from pandas._testing import ensure_clean -from pandas.core.resample import Resampler # noqa: F401 +from pandas.core.resample import ( + DatetimeIndexResampler, + Resampler, +) from pandas.core.series import Series import pytest from typing_extensions import ( @@ -953,7 +956,7 @@ def test_types_groupby() -> None: df6: pd.DataFrame = df.groupby(by=["col1", "col2"]).nunique() with pytest_warns_bounded( FutureWarning, - "The provided callable is currently using", + "(The provided callable is currently using|The behavior of DataFrame.sum with)", lower="2.0.99", ): df7: pd.DataFrame = df.groupby(by="col1").apply(sum) @@ -1249,7 +1252,7 @@ def test_types_agg() -> None: with pytest_warns_bounded( FutureWarning, - r"The provided callable is currently using", + r"The provided callable <(built-in function (min|max|mean)|function mean at 0x\w+)> is currently using", lower="2.0.99", ): check(assert_type(df.agg(min), pd.Series), pd.Series) @@ -1382,6 +1385,8 @@ def test_types_resample() -> None: df.resample("M", on="date") df.resample("20min", origin="epoch", offset=pd.Timedelta(2, "minutes"), on="date") df.resample("20min", origin="epoch", offset=datetime.timedelta(2), on="date") + df.resample(pd.Timedelta(20, "minutes"), origin="epoch", on="date") + df.resample(datetime.timedelta(minutes=20), origin="epoch", on="date") def test_types_to_dict() -> None: @@ -1420,8 +1425,9 @@ def test_types_from_dict() -> None: def test_pipe() -> None: - def foo(df: pd.DataFrame) -> pd.DataFrame: - return pd.DataFrame(df) + def resampler_foo(resampler: Resampler[pd.DataFrame]) -> pd.DataFrame: + assert isinstance(resampler, Resampler) + return pd.DataFrame(resampler) with pytest_warns_bounded(FutureWarning, "'M' is deprecated", lower="2.1.99"): val = ( @@ -1433,9 +1439,12 @@ def foo(df: pd.DataFrame) -> pd.DataFrame: ) .assign(week_starting=pd.date_range("01/01/2018", periods=8, freq="W")) .resample("M", on="week_starting") - .pipe(foo) + .pipe(resampler_foo) ) + def foo(df: pd.DataFrame) -> pd.DataFrame: + return pd.DataFrame(df) + df = pd.DataFrame({"a": [1], "b": [2]}) check(assert_type(val, pd.DataFrame), pd.DataFrame) @@ -1702,7 +1711,7 @@ def test_types_regressions() -> None: df = pd.DataFrame({"A": [1, 2, 3], "B": [5, 6, 7]}) with pytest_warns_bounded( FutureWarning, - "The 'closed' keyword in DatetimeIndex construction is deprecated", + "The '(closed|normalize)' keyword in DatetimeIndex construction is deprecated", lower="2.0.99", ): pd.DatetimeIndex( @@ -2685,12 +2694,15 @@ def test_resample_150_changes() -> None: frame = pd.DataFrame(np.random.standard_normal((700, 1)), index=idx, columns=["a"]) with pytest_warns_bounded(FutureWarning, "'M' is deprecated", lower="2.1.99"): resampler = frame.resample("M", group_keys=True) - assert_type(resampler, "Resampler[pd.DataFrame]") + check( + assert_type(resampler, "DatetimeIndexResampler[pd.DataFrame]"), + DatetimeIndexResampler, + ) def f(s: pd.DataFrame) -> pd.Series: return s.mean() - check(assert_type(resampler.apply(f), Union[pd.Series, pd.DataFrame]), pd.DataFrame) + check(assert_type(resampler.apply(f), pd.DataFrame), pd.DataFrame) def test_df_accepting_dicts_iterator() -> None: diff --git a/tests/test_groupby.py b/tests/test_groupby.py new file mode 100644 index 000000000..5b5560162 --- /dev/null +++ b/tests/test_groupby.py @@ -0,0 +1,952 @@ +from __future__ import annotations + +from collections.abc import Iterator +import datetime as dt +from typing import ( + TYPE_CHECKING, + Literal, + Union, + cast, +) + +import numpy as np +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + Series, + Timedelta, + date_range, +) +from pandas.core.groupby.generic import ( + DataFrameGroupBy, + SeriesGroupBy, +) +from pandas.core.resample import ( + DatetimeIndexResamplerGroupby, + Resampler, +) +from pandas.core.window import ( + ExpandingGroupby, + ExponentialMovingWindowGroupby, + RollingGroupby, +) +from typing_extensions import assert_type + +from tests import ( + PD_LTE_21, + TYPE_CHECKING_INVALID_USAGE, + check, + pytest_warns_bounded, +) + +if TYPE_CHECKING: + from pandas.core.groupby.groupby import _ResamplerGroupBy # noqa: F401 + +DR = date_range("1999-1-1", periods=365, freq="D") +DF_ = DataFrame(np.random.standard_normal((365, 1)), index=DR) +BY = Series(np.random.choice([1, 2], 365), index=DR) +S = DF_.iloc[:, 0] +DF = DataFrame({"col1": S, "col2": S, "col3": BY}) +GB_DF = DF.groupby("col3") +GB_S = cast("SeriesGroupBy[float, int]", GB_DF.col1) + +M = "M" if PD_LTE_21 else "ME" + + +def test_frame_groupby_resample() -> None: + # basic + check( + assert_type(GB_DF.resample(M), "_ResamplerGroupBy[DataFrame]"), + DatetimeIndexResamplerGroupby, + DataFrame, + ) + check( + assert_type(GB_DF.resample(Timedelta(days=30)), "_ResamplerGroupBy[DataFrame]"), + DatetimeIndexResamplerGroupby, + DataFrame, + ) + check( + assert_type( + GB_DF.resample(dt.timedelta(days=30)), "_ResamplerGroupBy[DataFrame]" + ), + DatetimeIndexResamplerGroupby, + DataFrame, + ) + + # props + check(assert_type(GB_DF.resample(M).obj, DataFrame), DataFrame) + check(assert_type(GB_DF.resample(M).ax, Index), DatetimeIndex) + + # agg funcs + check(assert_type(GB_DF.resample(M).sum(), DataFrame), DataFrame) + check(assert_type(GB_DF.resample(M).prod(), DataFrame), DataFrame) + check(assert_type(GB_DF.resample(M).min(), DataFrame), DataFrame) + check(assert_type(GB_DF.resample(M).max(), DataFrame), DataFrame) + check(assert_type(GB_DF.resample(M).first(), DataFrame), DataFrame) + check(assert_type(GB_DF.resample(M).last(), DataFrame), DataFrame) + check(assert_type(GB_DF.resample(M).mean(), DataFrame), DataFrame) + check(assert_type(GB_DF.resample(M).sum(), DataFrame), DataFrame) + check(assert_type(GB_DF.resample(M).median(), DataFrame), DataFrame) + check(assert_type(GB_DF.resample(M).ohlc(), DataFrame), DataFrame) + check(assert_type(GB_DF.resample(M).nunique(), DataFrame), DataFrame) + + # quantile + check(assert_type(GB_DF.resample(M).quantile(0.5), DataFrame), DataFrame) + check(assert_type(GB_DF.resample(M).quantile([0.5, 0.7]), DataFrame), DataFrame) + check( + assert_type(GB_DF.resample(M).quantile(np.array([0.5, 0.7])), DataFrame), + DataFrame, + ) + + # std / var + check(assert_type(GB_DF.resample(M).std(), DataFrame), DataFrame) + check(assert_type(GB_DF.resample(M).var(2), DataFrame), DataFrame) + + # size / count + check(assert_type(GB_DF.resample(M).size(), "Series[int]"), Series, np.integer) + check(assert_type(GB_DF.resample(M).count(), DataFrame), DataFrame) + + # filling + check(assert_type(GB_DF.resample(M).ffill(), DataFrame), DataFrame) + check(assert_type(GB_DF.resample(M).nearest(), DataFrame), DataFrame) + check(assert_type(GB_DF.resample(M).bfill(), DataFrame), DataFrame) + + # fillna (deprecated) + if TYPE_CHECKING_INVALID_USAGE: + GB_DF.resample(M).fillna("ffill") # type: ignore[operator] # pyright: ignore + + # aggregate / apply + with pytest_warns_bounded( + FutureWarning, + r"The provided callable is currently using ", + lower="2.0.99", + ): + check(assert_type(GB_DF.resample(M).aggregate(np.sum), DataFrame), DataFrame) + check(assert_type(GB_DF.resample(M).agg(np.sum), DataFrame), DataFrame) + check(assert_type(GB_DF.resample(M).apply(np.sum), DataFrame), DataFrame) + check( + assert_type(GB_DF.resample(M).aggregate([np.sum, np.mean]), DataFrame), + DataFrame, + ) + check( + assert_type(GB_DF.resample(M).aggregate(["sum", np.mean]), DataFrame), + DataFrame, + ) + check( + assert_type( + GB_DF.resample(M).aggregate({"col1": "sum", "col2": np.mean}), + DataFrame, + ), + DataFrame, + ) + check( + assert_type( + GB_DF.resample(M).aggregate( + {"col1": ["sum", np.mean], "col2": np.mean} + ), + DataFrame, + ), + DataFrame, + ) + + def f(val: DataFrame) -> Series: + return val.mean() + + check(assert_type(GB_DF.resample(M).aggregate(f), DataFrame), DataFrame) + + # aggregate combinations + def df2frame(val: DataFrame) -> DataFrame: + return DataFrame(val) + + def df2series(val: DataFrame) -> Series: + return val.mean() + + def df2scalar(val: DataFrame) -> float: + return float(val.mean().mean()) + + with pytest_warns_bounded( + FutureWarning, + r"The provided callable is currently using ", + lower="2.0.99", + ): + check(GB_DF.resample(M).aggregate(np.sum), DataFrame) + check(GB_DF.resample(M).aggregate([np.mean]), DataFrame) + check(GB_DF.resample(M).aggregate(["sum", np.mean]), DataFrame) + check(GB_DF.resample(M).aggregate({"col1": np.sum}), DataFrame) + check( + GB_DF.resample(M).aggregate({"col1": np.sum, "col2": np.mean}), + DataFrame, + ) + check( + GB_DF.resample(M).aggregate({"col1": [np.sum], "col2": ["sum", np.mean]}), + DataFrame, + ) + check( + GB_DF.resample(M).aggregate({"col1": np.sum, "col2": ["sum", np.mean]}), + DataFrame, + ) + check( + GB_DF.resample(M).aggregate({"col1": "sum", "col2": [np.mean]}), + DataFrame, + ) + check(GB_DF.resample(M).aggregate("sum"), DataFrame) + check(GB_DF.resample(M).aggregate(df2frame), DataFrame) + check(GB_DF.resample(M).aggregate(df2series), DataFrame) + check(GB_DF.resample(M).aggregate(df2scalar), DataFrame) + + # asfreq + check(assert_type(GB_DF.resample(M).asfreq(-1.0), DataFrame), DataFrame) + + # getattr + check( + assert_type(GB_DF.resample(M).col1, "_ResamplerGroupBy[DataFrame]"), + DatetimeIndexResamplerGroupby, + ) + + # getitem + check( + assert_type(GB_DF.resample(M)["col1"], "_ResamplerGroupBy[DataFrame]"), + DatetimeIndexResamplerGroupby, + ) + check( + assert_type( + GB_DF.resample(M)[["col1", "col2"]], "_ResamplerGroupBy[DataFrame]" + ), + DatetimeIndexResamplerGroupby, + ) + + # interpolate + check(assert_type(GB_DF.resample(M).interpolate(), DataFrame), DataFrame) + check( + assert_type(GB_DF.resample(M).interpolate(method="linear"), DataFrame), + DataFrame, + ) + check(assert_type(GB_DF.resample(M).interpolate(inplace=True), None), type(None)) + + # pipe + def g(val: Resampler[DataFrame]) -> DataFrame: + assert isinstance(val, Resampler) + return val.mean() + + check(assert_type(GB_DF.resample(M).pipe(g), DataFrame), DataFrame) + + def h(val: Resampler[DataFrame]) -> Series: + assert isinstance(val, Resampler) + return val.mean().mean() + + check(assert_type(GB_DF.resample(M).pipe(h), Series), Series) + + def i(val: Resampler[DataFrame]) -> float: + assert isinstance(val, Resampler) + return float(val.mean().mean().mean()) + + check(assert_type(GB_DF.resample(M).pipe(i), float), float) + + # transform + def j(val: Series) -> Series: + return -1 * val + + check(assert_type(GB_DF.resample(M).transform(j), DataFrame), DataFrame) + + +def test_series_groupby_resample() -> None: + # basic + check( + assert_type(GB_S.resample(M), "_ResamplerGroupBy[Series[float]]"), + DatetimeIndexResamplerGroupby, + Series, + ) + + # props + check(assert_type(GB_S.resample(M).obj, "Series[float]"), Series, float) + check(assert_type(GB_S.resample(M).ax, Index), DatetimeIndex) + + # agg funcs + check(assert_type(GB_S.resample(M).sum(), "Series[float]"), Series, float) + check(assert_type(GB_S.resample(M).prod(), "Series[float]"), Series, float) + check(assert_type(GB_S.resample(M).min(), "Series[float]"), Series, float) + check(assert_type(GB_S.resample(M).max(), "Series[float]"), Series, float) + check(assert_type(GB_S.resample(M).first(), "Series[float]"), Series, float) + check(assert_type(GB_S.resample(M).last(), "Series[float]"), Series, float) + check(assert_type(GB_S.resample(M).mean(), "Series[float]"), Series, float) + check(assert_type(GB_S.resample(M).sum(), "Series[float]"), Series, float) + check(assert_type(GB_S.resample(M).median(), "Series[float]"), Series, float) + check(assert_type(GB_S.resample(M).ohlc(), DataFrame), DataFrame) + check(assert_type(GB_S.resample(M).nunique(), "Series[int]"), Series, np.integer) + + # quantile + check(assert_type(GB_S.resample(M).quantile(0.5), "Series[float]"), Series, float) + check( + assert_type(GB_S.resample(M).quantile([0.5, 0.7]), "Series[float]"), + Series, + float, + ) + check( + assert_type(GB_S.resample(M).quantile(np.array([0.5, 0.7])), "Series[float]"), + Series, + ) + + # std / var + check(assert_type(GB_S.resample(M).std(), "Series[float]"), Series, float) + check(assert_type(GB_S.resample(M).var(2), "Series[float]"), Series, float) + + # size / count + check(assert_type(GB_S.resample(M).size(), "Series[int]"), Series, np.integer) + check(assert_type(GB_S.resample(M).count(), "Series[int]"), Series, np.integer) + + # filling + check(assert_type(GB_S.resample(M).ffill(), "Series[float]"), Series, float) + check(assert_type(GB_S.resample(M).nearest(), "Series[float]"), Series, float) + check(assert_type(GB_S.resample(M).bfill(), "Series[float]"), Series, float) + + # fillna (deprecated) + if TYPE_CHECKING_INVALID_USAGE: + GB_S.resample(M).fillna("ffill") # type: ignore[operator] # pyright: ignore + + # aggregate + with pytest_warns_bounded( + FutureWarning, + r"The provided callable is currently using ", + lower="2.0.99", + ): + check( + assert_type(GB_S.resample(M).aggregate(np.sum), Union[DataFrame, Series]), + Series, + ) + check( + assert_type(GB_S.resample(M).agg(np.sum), Union[DataFrame, Series]), Series + ) + check( + assert_type(GB_S.resample(M).apply(np.sum), Union[DataFrame, Series]), + Series, + ) + check( + assert_type( + GB_S.resample(M).aggregate([np.sum, np.mean]), Union[DataFrame, Series] + ), + DataFrame, + ) + check( + assert_type( + GB_S.resample(M).aggregate(["sum", np.mean]), Union[DataFrame, Series] + ), + DataFrame, + ) + check( + assert_type( + GB_S.resample(M).aggregate({"col1": "sum", "col2": np.mean}), + Union[DataFrame, Series], + ), + DataFrame, + ) + + def f(val: Series) -> float: + return val.mean() + + check(assert_type(GB_S.resample(M).aggregate(f), Union[DataFrame, Series]), Series) + + # asfreq + check(assert_type(GB_S.resample(M).asfreq(-1.0), "Series[float]"), Series, float) + + # interpolate + check(assert_type(GB_S.resample(M).interpolate(), "Series[float]"), Series, float) + check(assert_type(GB_S.resample(M).interpolate(inplace=True), None), type(None)) + + # pipe + def g(val: Resampler[Series]) -> float: + assert isinstance(val, Resampler) + return float(val.mean().mean()) + + check(assert_type(GB_S.resample(M).pipe(g), float), float) + + # transform + def h(val: Series) -> Series: + return -1 * val + + check(assert_type(GB_S.resample(M).transform(h), Series), Series) + + # aggregate combinations + def s2series(val: Series) -> Series: + return Series(val) + + def s2scalar(val: Series) -> float: + return float(val.mean()) + + with pytest_warns_bounded( + FutureWarning, + r"The provided callable is currently using ", + lower="2.0.99", + ): + check(GB_S.resample(M).aggregate(np.sum), Series) + check(GB_S.resample(M).aggregate([np.mean]), DataFrame) + check(GB_S.resample(M).aggregate(["sum", np.mean]), DataFrame) + check(GB_S.resample(M).aggregate({"sum": np.sum}), DataFrame) + check(GB_S.resample(M).aggregate({"sum": np.sum, "mean": np.mean}), DataFrame) + check(GB_S.resample(M).aggregate("sum"), Series) + check(GB_S.resample(M).aggregate(s2series), Series) + check(GB_S.resample(M).aggregate(s2scalar), Series) + + +def test_frame_groupby_rolling() -> None: + # basic + check( + assert_type(GB_DF.rolling(1), "RollingGroupby[DataFrame]"), + RollingGroupby, + DataFrame, + ) + + # props + check(assert_type(GB_DF.rolling(1).obj, DataFrame), DataFrame) + check(assert_type(GB_DF.rolling(1).on, Union[str, Index, None]), type(None)) + check(assert_type(GB_DF.rolling(1).method, Literal["single", "table"]), str) + check(assert_type(GB_DF.rolling(1).axis, int), int) + + # agg funcs + check(assert_type(GB_DF.rolling(1).sum(), DataFrame), DataFrame) + check(assert_type(GB_DF.rolling(1).min(), DataFrame), DataFrame) + check(assert_type(GB_DF.rolling(1).max(), DataFrame), DataFrame) + check(assert_type(GB_DF.rolling(1).mean(), DataFrame), DataFrame) + check(assert_type(GB_DF.rolling(1).sum(), DataFrame), DataFrame) + check(assert_type(GB_DF.rolling(1).median(), DataFrame), DataFrame) + + # quantile / std / var / count + check(assert_type(GB_DF.rolling(1).quantile(0.5), DataFrame), DataFrame) + check(assert_type(GB_DF.rolling(1).std(), DataFrame), DataFrame) + check(assert_type(GB_DF.rolling(1).var(2), DataFrame), DataFrame) + check(assert_type(GB_DF.rolling(1).count(), DataFrame), DataFrame) + + # aggregate / apply + with pytest_warns_bounded( + FutureWarning, + r"The provided callable is currently using ", + lower="2.0.99", + ): + check(assert_type(GB_DF.rolling(1).aggregate(np.sum), DataFrame), DataFrame) + check(assert_type(GB_DF.rolling(1).agg(np.sum), DataFrame), DataFrame) + check(assert_type(GB_DF.rolling(1).apply(np.sum), DataFrame), DataFrame) + check( + assert_type(GB_DF.rolling(1).aggregate([np.sum, np.mean]), DataFrame), + DataFrame, + ) + check( + assert_type(GB_DF.rolling(1).aggregate(["sum", np.mean]), DataFrame), + DataFrame, + ) + check( + assert_type( + GB_DF.rolling(1).aggregate({"col1": "sum", "col2": np.mean}), + DataFrame, + ), + DataFrame, + ) + check( + assert_type( + GB_DF.rolling(1).aggregate({"col1": ["sum", np.mean], "col2": np.mean}), + DataFrame, + ), + DataFrame, + ) + + def f(val: DataFrame) -> Series: + return val.mean() + + check(assert_type(GB_DF.rolling(1).aggregate(f), DataFrame), DataFrame) + + # aggregate combinations + def df2series(val: DataFrame) -> Series: + assert isinstance(val, Series) + return val.mean() + + def df2scalar(val: DataFrame) -> float: + return float(val.mean().mean()) + + with pytest_warns_bounded( + FutureWarning, + r"The provided callable is currently using ", + lower="2.0.99", + ): + check(GB_DF.rolling(1).aggregate(np.sum), DataFrame) + check(GB_DF.rolling(1).aggregate([np.mean]), DataFrame) + check(GB_DF.rolling(1).aggregate(["sum", np.mean]), DataFrame) + check(GB_DF.rolling(1).aggregate({"col1": np.sum}), DataFrame) + check( + GB_DF.rolling(1).aggregate({"col1": np.sum, "col2": np.mean}), + DataFrame, + ) + check( + GB_DF.rolling(1).aggregate({"col1": [np.sum], "col2": ["sum", np.mean]}), + DataFrame, + ) + check( + GB_DF.rolling(1).aggregate({"col1": np.sum, "col2": ["sum", np.mean]}), + DataFrame, + ) + check( + GB_DF.rolling(1).aggregate({"col1": "sum", "col2": [np.mean]}), + DataFrame, + ) + check(GB_DF.rolling(1).aggregate("sum"), DataFrame) + check(GB_DF.rolling(1).aggregate(df2series), DataFrame) + check(GB_DF.rolling(1).aggregate(df2scalar), DataFrame) + + # getattr + check( + assert_type(GB_DF.rolling(1).col1, "RollingGroupby[DataFrame]"), + RollingGroupby, + ) + + # getitem + check( + assert_type(GB_DF.rolling(1)["col1"], "RollingGroupby[DataFrame]"), + RollingGroupby, + ) + check( + assert_type(GB_DF.rolling(1)[["col1", "col2"]], "RollingGroupby[DataFrame]"), + RollingGroupby, + ) + + # iter + iterator = iter(GB_DF.rolling(1)) + check(assert_type(iterator, Iterator[DataFrame]), Iterator) + check(assert_type(next(iterator), DataFrame), DataFrame) + check(assert_type(list(GB_DF.rolling(1)), list[DataFrame]), list, DataFrame) + + +def test_series_groupby_rolling() -> None: + # basic + check( + assert_type(GB_S.rolling(1), "RollingGroupby[Series[float]]"), + RollingGroupby, + Series, + ) + + # props + check(assert_type(GB_S.rolling(1).obj, "Series[float]"), Series, float) + + # agg funcs + check(assert_type(GB_S.rolling(1).sum(), "Series[float]"), Series, float) + check(assert_type(GB_S.rolling(1).min(), "Series[float]"), Series, float) + check(assert_type(GB_S.rolling(1).max(), "Series[float]"), Series, float) + check(assert_type(GB_S.rolling(1).mean(), "Series[float]"), Series, float) + check(assert_type(GB_S.rolling(1).sum(), "Series[float]"), Series, float) + check(assert_type(GB_S.rolling(1).median(), "Series[float]"), Series, float) + + # quantile / std / var / count + check(assert_type(GB_S.rolling(1).quantile(0.5), "Series[float]"), Series, float) + check(assert_type(GB_S.rolling(1).std(), "Series[float]"), Series, float) + check(assert_type(GB_S.rolling(1).var(2), "Series[float]"), Series, float) + check(assert_type(GB_S.rolling(1).count(), "Series[float]"), Series, float) + + # aggregate + with pytest_warns_bounded( + FutureWarning, + r"The provided callable is currently using ", + lower="2.0.99", + ): + check(assert_type(GB_S.rolling(1).aggregate("sum"), Series), Series) + check(assert_type(GB_S.rolling(1).aggregate(np.sum), Series), Series) + check(assert_type(GB_S.rolling(1).agg(np.sum), Series), Series) + check( + assert_type(GB_S.rolling(1).apply(np.sum), "Series[float]"), Series, float + ) + check( + assert_type(GB_S.rolling(1).aggregate([np.sum, np.mean]), DataFrame), + DataFrame, + ) + check( + assert_type(GB_S.rolling(1).aggregate(["sum", np.mean]), DataFrame), + DataFrame, + ) + check( + assert_type( + GB_S.rolling(1).aggregate({"col1": "sum", "col2": np.mean}), DataFrame + ), + DataFrame, + ) + + def f(val: Series) -> float: + return val.mean() + + check(assert_type(GB_S.rolling(1).aggregate(f), Series), Series) + + def s2scalar(val: Series) -> float: + return float(val.mean()) + + check(assert_type(GB_S.rolling(1).aggregate(s2scalar), Series), Series) + + # iter + iterator = iter(GB_S.rolling(1)) + check(assert_type(iterator, "Iterator[Series[float]]"), Iterator) + check(assert_type(next(iterator), "Series[float]"), Series, float) + check(assert_type(list(GB_S.rolling(1)), "list[Series[float]]"), list, Series) + + +def test_frame_groupby_expanding() -> None: + # basic + check( + assert_type(GB_DF.expanding(1), "ExpandingGroupby[DataFrame]"), + ExpandingGroupby, + DataFrame, + ) + + # props + check(assert_type(GB_DF.expanding(1).obj, DataFrame), DataFrame) + check(assert_type(GB_DF.expanding(1).on, Union[str, Index, None]), type(None)) + check(assert_type(GB_DF.expanding(1).method, Literal["single", "table"]), str) + check(assert_type(GB_DF.expanding(1).axis, int), int) + + # agg funcs + check(assert_type(GB_DF.expanding(1).sum(), DataFrame), DataFrame) + check(assert_type(GB_DF.expanding(1).min(), DataFrame), DataFrame) + check(assert_type(GB_DF.expanding(1).max(), DataFrame), DataFrame) + check(assert_type(GB_DF.expanding(1).mean(), DataFrame), DataFrame) + check(assert_type(GB_DF.expanding(1).sum(), DataFrame), DataFrame) + check(assert_type(GB_DF.expanding(1).median(), DataFrame), DataFrame) + + # quantile / std / var / count + check(assert_type(GB_DF.expanding(1).quantile(0.5), DataFrame), DataFrame) + check(assert_type(GB_DF.expanding(1).std(), DataFrame), DataFrame) + check(assert_type(GB_DF.expanding(1).var(2), DataFrame), DataFrame) + check(assert_type(GB_DF.expanding(1).count(), DataFrame), DataFrame) + + # aggregate / apply + with pytest_warns_bounded( + FutureWarning, + r"The provided callable is currently using ", + lower="2.0.99", + ): + check(assert_type(GB_DF.expanding(1).aggregate(np.sum), DataFrame), DataFrame) + check(assert_type(GB_DF.expanding(1).agg(np.sum), DataFrame), DataFrame) + check(assert_type(GB_DF.expanding(1).apply(np.sum), DataFrame), DataFrame) + check( + assert_type(GB_DF.expanding(1).aggregate([np.sum, np.mean]), DataFrame), + DataFrame, + ) + check( + assert_type(GB_DF.expanding(1).aggregate(["sum", np.mean]), DataFrame), + DataFrame, + ) + check( + assert_type( + GB_DF.expanding(1).aggregate({"col1": "sum", "col2": np.mean}), + DataFrame, + ), + DataFrame, + ) + check( + assert_type( + GB_DF.expanding(1).aggregate( + {"col1": ["sum", np.mean], "col2": np.mean} + ), + DataFrame, + ), + DataFrame, + ) + + def f(val: DataFrame) -> Series: + return val.mean() + + check(assert_type(GB_DF.expanding(1).aggregate(f), DataFrame), DataFrame) + + # aggregate combinations + def df2series(val: DataFrame) -> Series: + assert isinstance(val, Series) + return val.mean() + + def df2scalar(val: DataFrame) -> float: + return float(val.mean().mean()) + + with pytest_warns_bounded( + FutureWarning, + r"The provided callable is currently using ", + lower="2.0.99", + ): + check(GB_DF.expanding(1).aggregate(np.sum), DataFrame) + check(GB_DF.expanding(1).aggregate([np.mean]), DataFrame) + check(GB_DF.expanding(1).aggregate(["sum", np.mean]), DataFrame) + check(GB_DF.expanding(1).aggregate({"col1": np.sum}), DataFrame) + check( + GB_DF.expanding(1).aggregate({"col1": np.sum, "col2": np.mean}), + DataFrame, + ) + check( + GB_DF.expanding(1).aggregate({"col1": [np.sum], "col2": ["sum", np.mean]}), + DataFrame, + ) + check( + GB_DF.expanding(1).aggregate({"col1": np.sum, "col2": ["sum", np.mean]}), + DataFrame, + ) + check( + GB_DF.expanding(1).aggregate({"col1": "sum", "col2": [np.mean]}), + DataFrame, + ) + check(GB_DF.expanding(1).aggregate("sum"), DataFrame) + check(GB_DF.expanding(1).aggregate(df2series), DataFrame) + check(GB_DF.expanding(1).aggregate(df2scalar), DataFrame) + + # getattr + check( + assert_type(GB_DF.expanding(1).col1, "ExpandingGroupby[DataFrame]"), + ExpandingGroupby, + ) + + # getitem + check( + assert_type(GB_DF.expanding(1)["col1"], "ExpandingGroupby[DataFrame]"), + ExpandingGroupby, + ) + check( + assert_type( + GB_DF.expanding(1)[["col1", "col2"]], "ExpandingGroupby[DataFrame]" + ), + ExpandingGroupby, + ) + + # iter + iterator = iter(GB_DF.expanding(1)) + check(assert_type(iterator, Iterator[DataFrame]), Iterator) + check(assert_type(next(iterator), DataFrame), DataFrame) + check(assert_type(list(GB_DF.expanding(1)), list[DataFrame]), list, DataFrame) + + +def test_series_groupby_expanding() -> None: + # basic + check( + assert_type(GB_S.expanding(1), "ExpandingGroupby[Series[float]]"), + ExpandingGroupby, + Series, + ) + + # props + check(assert_type(GB_S.expanding(1).obj, "Series[float]"), Series, float) + + # agg funcs + check(assert_type(GB_S.expanding(1).sum(), "Series[float]"), Series, float) + check(assert_type(GB_S.expanding(1).min(), "Series[float]"), Series, float) + check(assert_type(GB_S.expanding(1).max(), "Series[float]"), Series, float) + check(assert_type(GB_S.expanding(1).mean(), "Series[float]"), Series, float) + check(assert_type(GB_S.expanding(1).sum(), "Series[float]"), Series, float) + check(assert_type(GB_S.expanding(1).median(), "Series[float]"), Series, float) + + # quantile / std / var / count + check(assert_type(GB_S.expanding(1).quantile(0.5), "Series[float]"), Series, float) + check(assert_type(GB_S.expanding(1).std(), "Series[float]"), Series, float) + check(assert_type(GB_S.expanding(1).var(2), "Series[float]"), Series, float) + check(assert_type(GB_S.expanding(1).count(), "Series[float]"), Series, float) + + # aggregate + with pytest_warns_bounded( + FutureWarning, + r"The provided callable is currently using ", + lower="2.0.99", + ): + check(assert_type(GB_S.expanding(1).aggregate("sum"), Series), Series) + check(assert_type(GB_S.expanding(1).aggregate(np.sum), Series), Series) + check(assert_type(GB_S.expanding(1).agg(np.sum), Series), Series) + check( + assert_type(GB_S.expanding(1).apply(np.sum), "Series[float]"), Series, float + ) + check( + assert_type(GB_S.expanding(1).aggregate([np.sum, np.mean]), DataFrame), + DataFrame, + ) + check( + assert_type(GB_S.expanding(1).aggregate(["sum", np.mean]), DataFrame), + DataFrame, + ) + check( + assert_type( + GB_S.expanding(1).aggregate({"col1": "sum", "col2": np.mean}), DataFrame + ), + DataFrame, + ) + + def f(val: Series) -> float: + return val.mean() + + check(assert_type(GB_S.expanding(1).aggregate(f), Series), Series) + + def s2scalar(val: Series) -> float: + return float(val.mean()) + + check(assert_type(GB_S.expanding(1).aggregate(s2scalar), Series), Series) + + # iter + iterator = iter(GB_S.expanding(1)) + check(assert_type(iterator, "Iterator[Series[float]]"), Iterator) + check(assert_type(next(iterator), "Series[float]"), Series, float) + check(assert_type(list(GB_S.expanding(1)), "list[Series[float]]"), list, Series) + + +def test_frame_groupby_ewm() -> None: + # basic + check( + assert_type(GB_DF.ewm(1), "ExponentialMovingWindowGroupby[DataFrame]"), + ExponentialMovingWindowGroupby, + DataFrame, + ) + + # props + check(assert_type(GB_DF.ewm(1).obj, DataFrame), DataFrame) + check(assert_type(GB_DF.ewm(1).on, Union[str, Index, None]), type(None)) + check(assert_type(GB_DF.ewm(1).method, Literal["single", "table"]), str) + check(assert_type(GB_DF.ewm(1).axis, int), int) + + # agg funcs + check(assert_type(GB_DF.ewm(1).sum(), DataFrame), DataFrame) + check(assert_type(GB_DF.ewm(1).mean(), DataFrame), DataFrame) + check(assert_type(GB_DF.ewm(1).sum(), DataFrame), DataFrame) + + # std / var + check(assert_type(GB_DF.ewm(1).std(), DataFrame), DataFrame) + check(assert_type(GB_DF.ewm(1).var(), DataFrame), DataFrame) + + # aggregate + with pytest_warns_bounded( + FutureWarning, + r"The provided callable is currently using ", + lower="2.0.99", + ): + check(assert_type(GB_DF.ewm(1).aggregate(np.sum), DataFrame), DataFrame) + check(assert_type(GB_DF.ewm(1).agg(np.sum), DataFrame), DataFrame) + check( + assert_type(GB_DF.ewm(1).aggregate([np.sum, np.mean]), DataFrame), + DataFrame, + ) + check( + assert_type(GB_DF.ewm(1).aggregate(["sum", np.mean]), DataFrame), + DataFrame, + ) + check( + assert_type( + GB_DF.ewm(1).aggregate({"col1": "sum", "col2": np.mean}), + DataFrame, + ), + DataFrame, + ) + check( + assert_type( + GB_DF.ewm(1).aggregate({"col1": ["sum", np.mean], "col2": np.mean}), + DataFrame, + ), + DataFrame, + ) + + # aggregate combinations + with pytest_warns_bounded( + FutureWarning, + r"The provided callable is currently using ", + lower="2.0.99", + ): + check(GB_DF.ewm(1).aggregate(np.sum), DataFrame) + check(GB_DF.ewm(1).aggregate([np.mean]), DataFrame) + check(GB_DF.ewm(1).aggregate(["sum", np.mean]), DataFrame) + check(GB_DF.ewm(1).aggregate({"col1": np.sum}), DataFrame) + check( + GB_DF.ewm(1).aggregate({"col1": np.sum, "col2": np.mean}), + DataFrame, + ) + check( + GB_DF.ewm(1).aggregate({"col1": [np.sum], "col2": ["sum", np.mean]}), + DataFrame, + ) + check( + GB_DF.ewm(1).aggregate({"col1": np.sum, "col2": ["sum", np.mean]}), + DataFrame, + ) + check( + GB_DF.ewm(1).aggregate({"col1": "sum", "col2": [np.mean]}), + DataFrame, + ) + check(GB_DF.ewm(1).aggregate("sum"), DataFrame) + + # getattr + check( + assert_type(GB_DF.ewm(1).col1, "ExponentialMovingWindowGroupby[DataFrame]"), + ExponentialMovingWindowGroupby, + ) + + # getitem + check( + assert_type(GB_DF.ewm(1)["col1"], "ExponentialMovingWindowGroupby[DataFrame]"), + ExponentialMovingWindowGroupby, + ) + check( + assert_type( + GB_DF.ewm(1)[["col1", "col2"]], "ExponentialMovingWindowGroupby[DataFrame]" + ), + ExponentialMovingWindowGroupby, + ) + + # iter + iterator = iter(GB_DF.ewm(1)) + check(assert_type(iterator, Iterator[DataFrame]), Iterator) + check(assert_type(next(iterator), DataFrame), DataFrame) + check(assert_type(list(GB_DF.ewm(1)), list[DataFrame]), list, DataFrame) + + +def test_series_groupby_ewm() -> None: + # basic + check( + assert_type(GB_S.ewm(1), "ExponentialMovingWindowGroupby[Series[float]]"), + ExponentialMovingWindowGroupby, + Series, + ) + + # props + check(assert_type(GB_S.ewm(1).obj, "Series[float]"), Series, float) + + # agg funcs + check(assert_type(GB_S.ewm(1).sum(), "Series[float]"), Series, float) + check(assert_type(GB_S.ewm(1).mean(), "Series[float]"), Series, float) + check(assert_type(GB_S.ewm(1).sum(), "Series[float]"), Series, float) + + # std / var + check(assert_type(GB_S.ewm(1).std(), "Series[float]"), Series, float) + check(assert_type(GB_S.ewm(1).var(), "Series[float]"), Series, float) + + # aggregate + with pytest_warns_bounded( + FutureWarning, + r"The provided callable is currently using ", + lower="2.0.99", + ): + check(assert_type(GB_S.ewm(1).aggregate("sum"), Series), Series) + check(assert_type(GB_S.ewm(1).aggregate(np.sum), Series), Series) + check(assert_type(GB_S.ewm(1).agg(np.sum), Series), Series) + check( + assert_type(GB_S.ewm(1).aggregate([np.sum, np.mean]), DataFrame), + DataFrame, + ) + check( + assert_type(GB_S.ewm(1).aggregate(["sum", np.mean]), DataFrame), + DataFrame, + ) + check( + assert_type( + GB_S.ewm(1).aggregate({"col1": "sum", "col2": np.mean}), DataFrame + ), + DataFrame, + ) + + # iter + iterator = iter(GB_S.ewm(1)) + check(assert_type(iterator, "Iterator[Series[float]]"), Iterator) + check(assert_type(next(iterator), "Series[float]"), Series, float) + check(assert_type(list(GB_S.ewm(1)), "list[Series[float]]"), list, Series) + + +def test_engine() -> None: + if TYPE_CHECKING_INVALID_USAGE: + # See issue #810 + DataFrameGroupBy().aggregate( + "size", + "some", + "args", + engine=0, # type: ignore[call-overload] # pyright: ignore + engine_kwargs="not valid", # pyright: ignore + other_kwarg="", + ) + GB_DF.aggregate("size", engine="cython", engine_kwargs={}) diff --git a/tests/test_plotting.py b/tests/test_plotting.py index 53b213732..e6ebcec20 100644 --- a/tests/test_plotting.py +++ b/tests/test_plotting.py @@ -1,5 +1,9 @@ import io -from typing import Any +import itertools +from typing import ( + Any, + Union, +) from matplotlib.axes import Axes from matplotlib.figure import Figure @@ -8,6 +12,7 @@ import numpy as np import numpy.typing as npt import pandas as pd +from pandas import Series import pytest from typing_extensions import assert_type @@ -578,3 +583,78 @@ def test_plot_subplot_changes_150() -> None: ), np.ndarray, ) + + +def test_grouped_dataframe_boxplot(close_figures): + tuples = [t for t in itertools.product(range(10), range(2))] + index = pd.MultiIndex.from_tuples(tuples, names=["lvl0", "lvl1"]) + df = pd.DataFrame( + data=np.random.randn(len(index), 2), columns=["A", "B"], index=index + ) + grouped = df.groupby(level="lvl1") + + # subplots (default is subplots=True) + check(assert_type(grouped.boxplot(), Series), Series) + check(assert_type(grouped.boxplot(subplots=True), Series), Series) + + # a single plot + check( + assert_type( + grouped.boxplot( + subplots=False, rot=45, fontsize=12, figsize=(8, 10), vert=False + ), + Axes, + ), + Axes, + ) + + # not a literal bool + check(assert_type(grouped.boxplot(subplots=bool(0.5)), Union[Axes, Series]), Series) + + +def test_grouped_dataframe_hist(close_figures): + df = IRIS_DF.iloc[:50] + grouped = df.groupby("Name") + check(assert_type(grouped.hist(), Series), Series) + check( + assert_type( + grouped.hist( + column="PetalWidth", + by="PetalLength", + grid=False, + xlabelsize=2, + ylabelsize=1, + yrot=10.0, + sharex=True, + sharey=False, + figsize=(1.5, 1.5), + bins=4, + ), + Series, + ), + Series, + ) + + +def test_grouped_series_hist(close_figures): + multi_index = pd.MultiIndex.from_tuples([(0, 0), (0, 1), (1, 0)], names=["a", "b"]) + s = pd.Series([0, 1, 2], index=multi_index, dtype=int) + grouped = s.groupby(level=0) + check(assert_type(grouped.hist(), Series), Series) + check(assert_type(grouped.hist(by="a", grid=False), Series), Series) + check( + assert_type( + grouped.hist( + by=["a", "b"], + grid=False, + xlabelsize=2, + ylabelsize=1, + yrot=10.0, + figsize=(1.5, 1.5), + bins=4, + legend=True, + ), + Series, + ), + Series, + ) diff --git a/tests/test_resampler.py b/tests/test_resampler.py index 3129a247e..9c7fe0018 100644 --- a/tests/test_resampler.py +++ b/tests/test_resampler.py @@ -1,6 +1,6 @@ from collections.abc import ( - Generator, Hashable, + Iterator, ) from typing import Union @@ -13,14 +13,16 @@ Series, date_range, ) -from pandas.core.groupby.generic import SeriesGroupBy -from pandas.core.resample import Resampler +from pandas.core.groupby.generic import ( + DataFrameGroupBy, + SeriesGroupBy, +) +from pandas.core.resample import DatetimeIndexResampler from typing_extensions import assert_type -from pandas._typing import Scalar - from tests import ( PD_LTE_21, + TYPE_CHECKING_INVALID_USAGE, check, pytest_warns_bounded, ) @@ -30,6 +32,7 @@ S = DF_.iloc[:, 0] DF = DataFrame({"col1": S, "col2": S}) + _AggRetType = Union[DataFrame, Series] if PD_LTE_21: @@ -44,9 +47,7 @@ def test_props() -> None: def test_iter() -> None: - assert_type( - iter(DF.resample(MonthFreq)), Generator[tuple[Hashable, DataFrame], None, None] - ) + assert_type(iter(DF.resample(MonthFreq)), Iterator[tuple[Hashable, DataFrame]]) for v in DF.resample(MonthFreq): check(assert_type(v, tuple[Hashable, DataFrame]), tuple) @@ -82,7 +83,7 @@ def test_std_var() -> None: def test_size_count() -> None: - check(assert_type(DF.resample(MonthFreq).size(), Series), Series) + check(assert_type(DF.resample(MonthFreq).size(), "Series[int]"), Series, np.integer) check(assert_type(DF.resample(MonthFreq).count(), DataFrame), DataFrame) @@ -93,21 +94,9 @@ def test_filling() -> None: def test_fillna() -> None: - with pytest_warns_bounded( - FutureWarning, - "DatetimeIndexResampler.fillna is deprecated ", - lower="2.0.99", - ): - check(assert_type(DF.resample(MonthFreq).fillna("pad"), DataFrame), DataFrame) - check( - assert_type(DF.resample(MonthFreq).fillna("backfill"), DataFrame), DataFrame - ) - check(assert_type(DF.resample(MonthFreq).fillna("ffill"), DataFrame), DataFrame) - check(assert_type(DF.resample(MonthFreq).fillna("bfill"), DataFrame), DataFrame) - check( - assert_type(DF.resample(MonthFreq).fillna("nearest", limit=2), DataFrame), - DataFrame, - ) + # deprecated (and removed from stub) + if TYPE_CHECKING_INVALID_USAGE: + DF.resample(MonthFreq).fillna("pad") # type: ignore[operator] # pyright: ignore def test_aggregate() -> None: @@ -117,27 +106,22 @@ def test_aggregate() -> None: lower="2.0.99", ): check( - assert_type(DF.resample(MonthFreq).aggregate(np.sum), _AggRetType), - DataFrame, + assert_type(DF.resample(MonthFreq).aggregate(np.sum), DataFrame), DataFrame ) - check(assert_type(DF.resample(MonthFreq).agg(np.sum), _AggRetType), DataFrame) - check(assert_type(DF.resample(MonthFreq).apply(np.sum), _AggRetType), DataFrame) + check(assert_type(DF.resample(MonthFreq).agg(np.sum), DataFrame), DataFrame) + check(assert_type(DF.resample(MonthFreq).apply(np.sum), DataFrame), DataFrame) check( - assert_type( - DF.resample(MonthFreq).aggregate([np.sum, np.mean]), _AggRetType - ), + assert_type(DF.resample(MonthFreq).aggregate([np.sum, np.mean]), DataFrame), DataFrame, ) check( - assert_type( - DF.resample(MonthFreq).aggregate(["sum", np.mean]), _AggRetType - ), + assert_type(DF.resample(MonthFreq).aggregate(["sum", np.mean]), DataFrame), DataFrame, ) check( assert_type( DF.resample(MonthFreq).aggregate({"col1": "sum", "col2": np.mean}), - _AggRetType, + DataFrame, ), DataFrame, ) @@ -146,7 +130,7 @@ def test_aggregate() -> None: DF.resample(MonthFreq).aggregate( {"col1": ["sum", np.mean], "col2": np.mean} ), - _AggRetType, + DataFrame, ), DataFrame, ) @@ -154,7 +138,7 @@ def test_aggregate() -> None: def f(val: DataFrame) -> Series: return val.mean() - check(assert_type(DF.resample(MonthFreq).aggregate(f), _AggRetType), DataFrame) + check(assert_type(DF.resample(MonthFreq).aggregate(f), DataFrame), DataFrame) def test_asfreq() -> None: @@ -180,21 +164,112 @@ def test_interpolate_inplace() -> None: def test_pipe() -> None: - def f(val: DataFrame) -> DataFrame: + def f(val: "DatetimeIndexResampler[DataFrame]") -> DataFrame: + assert isinstance(val, DatetimeIndexResampler) return DataFrame(val) check(assert_type(DF.resample(MonthFreq).pipe(f), DataFrame), DataFrame) - def g(val: DataFrame) -> Series: + def g(val: "DatetimeIndexResampler[DataFrame]") -> DataFrame: + assert isinstance(val, DatetimeIndexResampler) return val.mean() check(assert_type(DF.resample(MonthFreq).pipe(g), DataFrame), DataFrame) - def h(val: DataFrame) -> float: + def h(val: "DatetimeIndexResampler[DataFrame]") -> Series: + assert isinstance(val, DatetimeIndexResampler) return val.mean().mean() check(assert_type(DF.resample(MonthFreq).pipe(h), Series), Series) + def i(val: "DatetimeIndexResampler[DataFrame]") -> float: + assert isinstance(val, DatetimeIndexResampler) + return float(val.mean().mean().mean()) + + check(assert_type(DF.resample(MonthFreq).pipe(i), float), float) + + def j( + res: "DatetimeIndexResampler[DataFrame]", + pos: int, + /, + arg1: list[float], + arg2: str, + *, + kw: tuple[int], + ) -> DataFrame: + assert isinstance(res, DatetimeIndexResampler) + return res.obj + + check( + assert_type( + DF.resample(MonthFreq).pipe(j, 1, [1.0], arg2="hi", kw=(1,)), DataFrame + ), + DataFrame, + ) + + if TYPE_CHECKING_INVALID_USAGE: + DF.resample(MonthFreq).pipe( + j, + "a", # type: ignore[arg-type] # pyright: ignore[reportGeneralTypeIssues] + [1.0, 2.0], + arg2="hi", + kw=(1,), + ) + DF.resample(MonthFreq).pipe( + j, + 1, + [1.0, "b"], # type: ignore[list-item] # pyright: ignore[reportGeneralTypeIssues] + arg2="hi", + kw=(1,), + ) + DF.resample(MonthFreq).pipe( + j, + 1, + [1.0], + arg2=11, # type: ignore[arg-type] # pyright: ignore[reportGeneralTypeIssues] + kw=(1,), + ) + DF.resample(MonthFreq).pipe( + j, + 1, + [1.0], + arg2="hi", + kw=(1, 2), # type: ignore[arg-type] # pyright: ignore[reportGeneralTypeIssues] + ) + DF.resample(MonthFreq).pipe( # type: ignore[call-arg] + j, + 1, + [1.0], + arg3="hi", # pyright: ignore[reportGeneralTypeIssues] + kw=(1,), + ) + DF.resample(MonthFreq).pipe( # type: ignore[misc] + j, + 1, + [1.0], + 11, # type: ignore[arg-type] + (1,), # pyright: ignore[reportGeneralTypeIssues] + ) + DF.resample(MonthFreq).pipe( # type: ignore[call-arg] + j, + pos=1, # pyright: ignore[reportGeneralTypeIssues] + arg1=[1.0], + arg2=11, # type: ignore[arg-type] + kw=(1,), + ) + + def k(x: int, t: "DatetimeIndexResampler[DataFrame]") -> DataFrame: + assert isinstance(x, int) + return t.obj + + check(assert_type(DF.resample(MonthFreq).pipe((k, "t"), 1), DataFrame), DataFrame) + + if TYPE_CHECKING_INVALID_USAGE: + DF.resample(MonthFreq).pipe( + (k, 1), # type: ignore[arg-type] # pyright: ignore[reportGeneralTypeIssues] + 1, + ) + def test_transform() -> None: def f(val: Series) -> Series: @@ -224,7 +299,9 @@ def test_agg_funcs_series() -> None: check(assert_type(S.resample(MonthFreq).sum(), Series), Series) check(assert_type(S.resample(MonthFreq).median(), Series), Series) check(assert_type(S.resample(MonthFreq).ohlc(), DataFrame), DataFrame) - check(assert_type(S.resample(MonthFreq).nunique(), Series), Series) + check( + assert_type(S.resample(MonthFreq).nunique(), "Series[int]"), Series, np.integer + ) def test_quantile_series() -> None: @@ -242,8 +319,8 @@ def test_std_var_series() -> None: def test_size_count_series() -> None: - check(assert_type(S.resample(MonthFreq).size(), Series), Series) - check(assert_type(S.resample(MonthFreq).count(), Series), Series) + check(assert_type(S.resample(MonthFreq).size(), "Series[int]"), Series, np.integer) + check(assert_type(S.resample(MonthFreq).count(), "Series[int]"), Series, np.integer) def test_filling_series() -> None: @@ -253,19 +330,9 @@ def test_filling_series() -> None: def test_fillna_series() -> None: - with pytest_warns_bounded( - FutureWarning, - "DatetimeIndexResampler.fillna is deprecated ", - lower="2.0.99", - ): - check(assert_type(S.resample(MonthFreq).fillna("pad"), Series), Series) - check(assert_type(S.resample(MonthFreq).fillna("backfill"), Series), Series) - check(assert_type(S.resample(MonthFreq).fillna("ffill"), Series), Series) - check(assert_type(S.resample(MonthFreq).fillna("bfill"), Series), Series) - check( - assert_type(S.resample(MonthFreq).fillna("nearest", limit=2), Series), - Series, - ) + # deprecated (and removed from stub) + if TYPE_CHECKING_INVALID_USAGE: + S.resample(MonthFreq).fillna("pad") # type: ignore[operator] # pyright: ignore def test_aggregate_series() -> None: @@ -317,17 +384,20 @@ def test_interpolate_inplace_series() -> None: def test_pipe_series() -> None: - def f(val: Series) -> Series: + def f(val: "DatetimeIndexResampler[Series]") -> Series: + assert isinstance(val, DatetimeIndexResampler) return Series(val) check(assert_type(S.resample(MonthFreq).pipe(f), Series), Series) - def g(val: Resampler) -> float: + def g(val: "DatetimeIndexResampler[Series]") -> float: + assert isinstance(val, DatetimeIndexResampler) return float(val.mean().mean()) - check(assert_type(S.resample(MonthFreq).pipe(g), Scalar), float) + check(assert_type(S.resample(MonthFreq).pipe(g), float), float) - def h(val: Series) -> DataFrame: + def h(val: "DatetimeIndexResampler[Series]") -> DataFrame: + assert isinstance(val, DatetimeIndexResampler) return DataFrame({0: val, 1: val}) check(assert_type(S.resample(MonthFreq).pipe(h), DataFrame), DataFrame) @@ -408,3 +478,11 @@ def df2scalar(val: DataFrame) -> float: check(DF.resample(MonthFreq).aggregate(df2frame), DataFrame) check(DF.resample(MonthFreq).aggregate(df2series), DataFrame) check(DF.resample(MonthFreq).aggregate(df2scalar), DataFrame) + + +def test_getitem() -> None: + check(assert_type(DF.resample(MonthFreq)["col1"], SeriesGroupBy), SeriesGroupBy) + check( + assert_type(DF.resample(MonthFreq)[["col1", "col2"]], DataFrameGroupBy), + DataFrameGroupBy, + ) diff --git a/tests/test_series.py b/tests/test_series.py index 673e4623d..1c7e8ff17 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -93,6 +93,11 @@ def test_types_init() -> None: pd.Series(data=[1, 2, 3, 4], index=None) pd.Series(data={"row1": [1, 2], "row2": [3, 4]}, index=None) + groupby = pd.Series(np.array([1, 2])).groupby(level=0) + resampler = pd.Series(np.array([1, 2]), index=dt).resample("1D") + pd.Series(data=groupby) + pd.Series(data=resampler) + def test_types_any() -> None: check(assert_type(pd.Series([False, False]).any(), bool), np.bool_) @@ -671,6 +676,15 @@ def test_groupby_result() -> None: check(assert_type(index3, tuple), tuple, int) check(assert_type(value3, "pd.Series[int]"), pd.Series, np.integer) + # Explicit by=None + iterator4 = s.groupby(None, level=0).__iter__() + assert_type(iterator4, Iterator[tuple[Scalar, "pd.Series[int]"]]) + index4, value4 = next(iterator4) + assert_type((index4, value4), tuple[Scalar, "pd.Series[int]"]) + + check(assert_type(index4, Scalar), int) + check(assert_type(value4, "pd.Series[int]"), pd.Series, np.integer) + # Want to make sure these cases are differentiated for (k1, k2), g in s.groupby(["a", "b"]): pass @@ -930,7 +944,7 @@ def test_types_agg() -> None: check(assert_type(s.agg("mean", axis=0), float), np.float64) with pytest_warns_bounded( FutureWarning, - r"The provided callable is currently using", + r"The provided callable <(built-in function (min|max|mean)|function mean at 0x\w+)> is currently using", lower="2.0.99", ): check(assert_type(s.agg(min), int), np.integer) @@ -1573,20 +1587,24 @@ def test_resample() -> None: N = 10 index = pd.date_range("1/1/2000", periods=N, freq="min") x = [x for x in range(N)] - df = pd.Series(x, index=index) - check(assert_type(df.resample("2min").std(), pd.Series), pd.Series) - check(assert_type(df.resample("2min").var(), pd.Series), pd.Series) - check(assert_type(df.resample("2min").quantile(), pd.Series), pd.Series) - check(assert_type(df.resample("2min").sum(), pd.Series), pd.Series) - check(assert_type(df.resample("2min").prod(), pd.Series), pd.Series) - check(assert_type(df.resample("2min").min(), pd.Series), pd.Series) - check(assert_type(df.resample("2min").max(), pd.Series), pd.Series) - check(assert_type(df.resample("2min").first(), pd.Series), pd.Series) - check(assert_type(df.resample("2min").last(), pd.Series), pd.Series) - check(assert_type(df.resample("2min").mean(), pd.Series), pd.Series) - check(assert_type(df.resample("2min").sem(), pd.Series), pd.Series) - check(assert_type(df.resample("2min").median(), pd.Series), pd.Series) - check(assert_type(df.resample("2min").ohlc(), pd.DataFrame), pd.DataFrame) + s = pd.Series(x, index=index, dtype=float) + check(assert_type(s.resample("2min").std(), "pd.Series[float]"), pd.Series, float) + check(assert_type(s.resample("2min").var(), "pd.Series[float]"), pd.Series, float) + check( + assert_type(s.resample("2min").quantile(), "pd.Series[float]"), pd.Series, float + ) + check(assert_type(s.resample("2min").sum(), "pd.Series[float]"), pd.Series, float) + check(assert_type(s.resample("2min").prod(), "pd.Series[float]"), pd.Series, float) + check(assert_type(s.resample("2min").min(), "pd.Series[float]"), pd.Series, float) + check(assert_type(s.resample("2min").max(), "pd.Series[float]"), pd.Series, float) + check(assert_type(s.resample("2min").first(), "pd.Series[float]"), pd.Series, float) + check(assert_type(s.resample("2min").last(), "pd.Series[float]"), pd.Series, float) + check(assert_type(s.resample("2min").mean(), "pd.Series[float]"), pd.Series, float) + check(assert_type(s.resample("2min").sem(), "pd.Series[float]"), pd.Series, float) + check( + assert_type(s.resample("2min").median(), "pd.Series[float]"), pd.Series, float + ) + check(assert_type(s.resample("2min").ohlc(), pd.DataFrame), pd.DataFrame) def test_to_xarray(): @@ -2755,8 +2773,12 @@ def test_to_json_mode() -> None: def test_groupby_diff() -> None: # GH 658 - s = pd.Series([1, 2, 3, np.nan]) - check(assert_type(s.groupby(level=0).diff(), pd.Series), pd.Series) + s = pd.Series([1.0, 2.0, 3.0, np.nan]) + check( + assert_type(s.groupby(level=0).diff(), "pd.Series[float]"), + pd.Series, + float, + ) def test_to_string() -> None: