From ac3cc90785b5776f3b709a5e6a8c50035e70e2a2 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Mon, 13 May 2024 09:52:40 +0200 Subject: [PATCH] FEAT-#7252: Add type hints for `base.py` (#7253) Signed-off-by: Anatoly Myachev --- environment-dev.yml | 1 + modin/pandas/base.py | 454 ++++++++++++++---------- modin/pandas/dataframe.py | 2 +- modin/pandas/series.py | 2 +- requirements-dev.txt | 1 + requirements/env_hdk.yml | 1 + requirements/env_unidist_linux.yml | 1 + requirements/env_unidist_win.yml | 1 + requirements/requirements-no-engine.yml | 1 + 9 files changed, 268 insertions(+), 196 deletions(-) diff --git a/environment-dev.yml b/environment-dev.yml index 76d4f7a4d69..ba20f0a8ead 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -50,6 +50,7 @@ dependencies: - pytest-benchmark>=4.0.0 - pytest-cov>=4.0.0 - pytest-xdist>=3.2.0 + - typing_extensions # code linters - black>=24.1.0 diff --git a/modin/pandas/base.py b/modin/pandas/base.py index 9af67188806..7861ad165a6 100644 --- a/modin/pandas/base.py +++ b/modin/pandas/base.py @@ -15,11 +15,12 @@ from __future__ import annotations +import abc import pickle as pkl import re import warnings from functools import cached_property -from typing import TYPE_CHECKING, Any, Hashable, Literal, Optional, Sequence, Union +from typing import TYPE_CHECKING, Any, Callable, Hashable, Literal, Optional, Sequence import numpy as np import pandas @@ -36,7 +37,9 @@ IndexLabel, Level, RandomState, + Scalar, StorageOptions, + T, TimedeltaConvertibleTypes, TimestampConvertibleTypes, npt, @@ -71,8 +74,16 @@ from .utils import _doc_binary_op, is_full_grab_slice if TYPE_CHECKING: + from typing_extensions import Self + from modin.core.storage_formats import BaseQueryCompiler + from .dataframe import DataFrame + from .indexing import _iLocIndexer, _LocIndexer + from .resample import Resampler + from .series import Series + from .window import Expanding, Rolling, Window + # Similar to pandas, sentinel value to use as kwarg in place of None when None has # special meaning and needs to be distinguished from a user explicitly passing None. sentinel = object() @@ -181,6 +192,7 @@ class BasePandasDataset(ClassLogger): # but lives in "pandas" namespace. _pandas_class = pandas.core.generic.NDFrame _query_compiler: BaseQueryCompiler + _siblings: list[BasePandasDataset] @cached_property def _is_dataframe(self) -> bool: @@ -198,13 +210,16 @@ def _is_dataframe(self) -> bool: """ return issubclass(self._pandas_class, pandas.DataFrame) - def _create_or_update_from_compiler(self, new_query_compiler, inplace=False): + @abc.abstractmethod + def _create_or_update_from_compiler( + self, new_query_compiler: BaseQueryCompiler, inplace: bool = False + ) -> Self | None: """ Return or update a ``DataFrame`` or ``Series`` with given `new_query_compiler`. Parameters ---------- - new_query_compiler : PandasQueryCompiler + new_query_compiler : BaseQueryCompiler QueryCompiler to use to manage the data. inplace : bool, default: False Whether or not to perform update or creation inplace. @@ -214,9 +229,9 @@ def _create_or_update_from_compiler(self, new_query_compiler, inplace=False): DataFrame, Series or None None if update was done, ``DataFrame`` or ``Series`` otherwise. """ - raise NotImplementedError() + pass - def _add_sibling(self, sibling): + def _add_sibling(self, sibling: BasePandasDataset) -> None: """ Add a DataFrame or Series object to the list of siblings. @@ -233,7 +248,9 @@ def _add_sibling(self, sibling): for sib in self._siblings: sib._siblings += [sibling] - def _build_repr_df(self, num_rows, num_cols): + def _build_repr_df( + self, num_rows: int, num_cols: int + ) -> pandas.DataFrame | pandas.Series: """ Build pandas DataFrame for string representation. @@ -268,13 +285,13 @@ def _build_repr_df(self, num_rows, num_cols): indexer = row_indexer return self.iloc[indexer]._query_compiler.to_pandas() - def _update_inplace(self, new_query_compiler): + def _update_inplace(self, new_query_compiler: BaseQueryCompiler) -> None: """ Update the current DataFrame inplace. Parameters ---------- - new_query_compiler : query_compiler + new_query_compiler : BaseQueryCompiler The new QueryCompiler to use to manage the data. """ old_query_compiler = self._query_compiler @@ -308,7 +325,7 @@ def _validate_other( Returns ------- - modin.pandas.BasePandasDataset + BaseQueryCompiler or Any Other frame if it is determined to be valid. Raises @@ -387,7 +404,7 @@ def _validate_other( raise TypeError("Cannot do operation with improper dtypes") return result - def _validate_function(self, func, on_invalid=None): + def _validate_function(self, func, on_invalid=None) -> None: """ Check the validity of the function which is intended to be applied to the frame. @@ -431,7 +448,7 @@ def error_raiser(msg, exception=Exception): TypeError, ) - def _binary_op(self, op, other, **kwargs): + def _binary_op(self, op, other, **kwargs) -> Self: """ Do binary operation between two datasets. @@ -564,7 +581,7 @@ def _default_to_pandas(self, op, *args, reason: str = None, **kwargs): return result @classmethod - def _get_axis_number(cls, axis): + def _get_axis_number(cls, axis) -> int: """ Convert axis name or number to axis index. @@ -584,7 +601,7 @@ def _get_axis_number(cls, axis): return cls._pandas_class._get_axis_number(axis) if axis is not None else 0 @cached_property - def __constructor__(self) -> type[BasePandasDataset]: + def __constructor__(self) -> type[Self]: """ Construct DataFrame or Series object depending on self type. @@ -595,14 +612,14 @@ def __constructor__(self) -> type[BasePandasDataset]: """ return type(self) - def abs(self): # noqa: RT01, D200 + def abs(self) -> Self: # noqa: RT01, D200 """ Return a `BasePandasDataset` with absolute numeric value of each element. """ self._validate_dtypes(numeric_only=True) return self.__constructor__(query_compiler=self._query_compiler.abs()) - def _set_index(self, new_index): + def _set_index(self, new_index) -> None: """ Set the index for this DataFrame. @@ -613,7 +630,7 @@ def _set_index(self, new_index): """ self._query_compiler.index = new_index - def _get_index(self): + def _get_index(self) -> pandas.Index: """ Get the index for this DataFrame. @@ -624,9 +641,9 @@ def _get_index(self): """ return self._query_compiler.index - index = property(_get_index, _set_index) + index: pandas.Index = property(_get_index, _set_index) - def _get_axis(self, axis): + def _get_axis(self, axis) -> pandas.Index: """ Return index labels of the specified axis. @@ -644,7 +661,7 @@ def _get_axis(self, axis): def add( self, other, axis="columns", level=None, fill_value=None - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Return addition of `BasePandasDataset` and `other`, element-wise (binary operator `add`). """ @@ -652,7 +669,9 @@ def add( "add", other, axis=axis, level=level, fill_value=fill_value ) - def aggregate(self, func=None, axis=0, *args, **kwargs): # noqa: PR01, RT01, D200 + def aggregate( + self, func=None, axis=0, *args, **kwargs + ) -> DataFrame | Series | Scalar: # noqa: PR01, RT01, D200 """ Aggregate using one or more operations over the specified axis. """ @@ -667,7 +686,7 @@ def aggregate(self, func=None, axis=0, *args, **kwargs): # noqa: PR01, RT01, D2 return self.apply(func, axis=axis, args=args, **kwargs) return result - agg = aggregate + agg: DataFrame | Series | Scalar = aggregate def _aggregate(self, func, *args, **kwargs): """ @@ -736,7 +755,7 @@ def _string_function(self, func, *args, **kwargs): return self._default_to_pandas("agg", func, *args, **kwargs) raise ValueError("{} is an unknown string function".format(func)) - def _get_dtypes(self): + def _get_dtypes(self) -> list: """ Get dtypes as list. @@ -763,7 +782,7 @@ def align( limit=lib.no_default, fill_axis=lib.no_default, broadcast_axis=lib.no_default, - ): # noqa: PR01, RT01, D200 + ) -> tuple[Self, Self]: # noqa: PR01, RT01, D200 """ Align two objects on their axes with the specified join method. """ @@ -822,9 +841,25 @@ def align( query_compiler=right ) + @abc.abstractmethod + def _reduce_dimension(self, query_compiler: BaseQueryCompiler) -> Series | Scalar: + """ + Reduce the dimension of data from the `query_compiler`. + + Parameters + ---------- + query_compiler : BaseQueryCompiler + Query compiler to retrieve the data. + + Returns + ------- + Series | Scalar + """ + pass + def all( self, axis=0, bool_only=False, skipna=True, **kwargs - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Return whether all elements are True, potentially over an axis. """ @@ -869,7 +904,7 @@ def all( def any( self, *, axis=0, bool_only=False, skipna=True, **kwargs - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Return whether any element is True, potentially over an axis. """ @@ -918,7 +953,7 @@ def apply( result_type, args, **kwds, - ): # noqa: PR01, RT01, D200 + ) -> BaseQueryCompiler: # noqa: PR01, RT01, D200 """ Apply a function along an axis of the `BasePandasDataset`. """ @@ -960,7 +995,7 @@ def error_raiser(msg, exception): def asfreq( self, freq, method=None, how=None, normalize=False, fill_value=None - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Convert time series to specified frequency. """ @@ -974,7 +1009,7 @@ def asfreq( ) ) - def asof(self, where, subset=None): # noqa: PR01, RT01, D200 + def asof(self, where, subset=None) -> Self: # noqa: PR01, RT01, D200 """ Return the last row(s) without any NaNs before `where`. """ @@ -1002,7 +1037,9 @@ def asof(self, where, subset=None): # noqa: PR01, RT01, D200 result = result.squeeze() return result - def astype(self, dtype, copy=None, errors="raise"): # noqa: PR01, RT01, D200 + def astype( + self, dtype, copy=None, errors="raise" + ) -> Self: # noqa: PR01, RT01, D200 """ Cast a Modin object to a specified dtype `dtype`. """ @@ -1051,7 +1088,7 @@ def astype(self, dtype, copy=None, errors="raise"): # noqa: PR01, RT01, D200 return self @property - def at(self, axis=None): # noqa: PR01, RT01, D200 + def at(self, axis=None) -> _LocIndexer: # noqa: PR01, RT01, D200 """ Get a single value for a row/column label pair. """ @@ -1059,7 +1096,7 @@ def at(self, axis=None): # noqa: PR01, RT01, D200 return _LocIndexer(self) - def at_time(self, time, asof=False, axis=None): # noqa: PR01, RT01, D200 + def at_time(self, time, asof=False, axis=None) -> Self: # noqa: PR01, RT01, D200 """ Select values at particular time of day (e.g., 9:30AM). """ @@ -1074,12 +1111,12 @@ def at_time(self, time, asof=False, axis=None): # noqa: PR01, RT01, D200 pandas.DataFrame.between_time, apilink="pandas.DataFrame.between_time" ) def between_time( - self: "BasePandasDataset", + self, start_time, end_time, inclusive="both", axis=None, - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 return self._create_or_update_from_compiler( self._query_compiler.between_time( start_time=pandas.core.tools.times.to_time(start_time), @@ -1111,7 +1148,7 @@ def bfill( limit=None, limit_area=None, downcast=lib.no_default, - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Synonym for `DataFrame.fillna` with ``method='bfill'``. """ @@ -1140,7 +1177,7 @@ def bfill( def backfill( self, *, axis=None, inplace=False, limit=None, downcast=lib.no_default - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Synonym for `DataFrame.bfill`. """ @@ -1154,7 +1191,7 @@ def backfill( axis=axis, inplace=inplace, limit=limit, downcast=downcast ) - def bool(self): # noqa: RT01, D200 + def bool(self) -> bool: # noqa: RT01, D200 """ Return the bool of a single element `BasePandasDataset`. """ @@ -1177,7 +1214,7 @@ def bool(self): # noqa: RT01, D200 def clip( self, lower=None, upper=None, *, axis=None, inplace=False, **kwargs - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Trim values at input threshold(s). """ @@ -1202,7 +1239,9 @@ def clip( ) return self._create_or_update_from_compiler(new_query_compiler, inplace) - def combine(self, other, func, fill_value=None, **kwargs): # noqa: PR01, RT01, D200 + def combine( + self, other, func, fill_value=None, **kwargs + ) -> Self: # noqa: PR01, RT01, D200 """ Perform combination of `BasePandasDataset`-s according to `func`. """ @@ -1210,13 +1249,13 @@ def combine(self, other, func, fill_value=None, **kwargs): # noqa: PR01, RT01, "combine", other, _axis=0, func=func, fill_value=fill_value, **kwargs ) - def combine_first(self, other): # noqa: PR01, RT01, D200 + def combine_first(self, other) -> Self: # noqa: PR01, RT01, D200 """ Update null elements with value in the same location in `other`. """ return self._binary_op("combine_first", other, _axis=0) - def copy(self, deep=True): # noqa: PR01, RT01, D200 + def copy(self, deep=True) -> Self: # noqa: PR01, RT01, D200 """ Make a copy of the object's metadata. """ @@ -1226,7 +1265,9 @@ def copy(self, deep=True): # noqa: PR01, RT01, D200 self._add_sibling(new_obj) return new_obj - def count(self, axis=0, numeric_only=False): # noqa: PR01, RT01, D200 + def count( + self, axis=0, numeric_only=False + ) -> Series | Scalar: # noqa: PR01, RT01, D200 """ Count non-NA cells for `BasePandasDataset`. """ @@ -1239,7 +1280,9 @@ def count(self, axis=0, numeric_only=False): # noqa: PR01, RT01, D200 frame._query_compiler.count(axis=axis, numeric_only=numeric_only) ) - def cummax(self, axis=None, skipna=True, *args, **kwargs): # noqa: PR01, RT01, D200 + def cummax( + self, axis=None, skipna=True, *args, **kwargs + ) -> Self: # noqa: PR01, RT01, D200 """ Return cumulative maximum over a `BasePandasDataset` axis. """ @@ -1254,7 +1297,9 @@ def cummax(self, axis=None, skipna=True, *args, **kwargs): # noqa: PR01, RT01, ) ) - def cummin(self, axis=None, skipna=True, *args, **kwargs): # noqa: PR01, RT01, D200 + def cummin( + self, axis=None, skipna=True, *args, **kwargs + ) -> Self: # noqa: PR01, RT01, D200 """ Return cumulative minimum over a `BasePandasDataset` axis. """ @@ -1271,7 +1316,7 @@ def cummin(self, axis=None, skipna=True, *args, **kwargs): # noqa: PR01, RT01, def cumprod( self, axis=None, skipna=True, *args, **kwargs - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Return cumulative product over a `BasePandasDataset` axis. """ @@ -1285,7 +1330,9 @@ def cumprod( ) ) - def cumsum(self, axis=None, skipna=True, *args, **kwargs): # noqa: PR01, RT01, D200 + def cumsum( + self, axis=None, skipna=True, *args, **kwargs + ) -> Self: # noqa: PR01, RT01, D200 """ Return cumulative sum over a `BasePandasDataset` axis. """ @@ -1304,7 +1351,7 @@ def describe( percentiles=None, include=None, exclude=None, - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Generate descriptive statistics. """ @@ -1337,7 +1384,7 @@ def describe( query_compiler=data._query_compiler.describe(percentiles=percentiles) ) - def diff(self, periods=1, axis=0): # noqa: PR01, RT01, D200 + def diff(self, periods=1, axis=0) -> Self: # noqa: PR01, RT01, D200 """ First discrete difference of element. """ @@ -1365,7 +1412,7 @@ def drop( level=None, inplace=False, errors="raise", - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Drop specified labels from `BasePandasDataset`. """ @@ -1435,7 +1482,7 @@ def dropna( subset: IndexLabel = None, inplace: bool = False, ignore_index: bool = False, - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Remove missing values. """ @@ -1469,7 +1516,7 @@ def dropna( ) return self._create_or_update_from_compiler(new_query_compiler, inplace) - def droplevel(self, level, axis=0): # noqa: PR01, RT01, D200 + def droplevel(self, level, axis=0) -> Self: # noqa: PR01, RT01, D200 """ Return `BasePandasDataset` with requested index / column level(s) removed. """ @@ -1505,7 +1552,7 @@ def droplevel(self, level, axis=0): # noqa: PR01, RT01, D200 def drop_duplicates( self, keep="first", inplace=False, **kwargs - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Return `BasePandasDataset` with duplicate rows removed. """ @@ -1529,13 +1576,15 @@ def drop_duplicates( else: return result - def eq(self, other, axis="columns", level=None): # noqa: PR01, RT01, D200 + def eq(self, other, axis="columns", level=None) -> Self: # noqa: PR01, RT01, D200 """ Get equality of `BasePandasDataset` and `other`, element-wise (binary operator `eq`). """ return self._binary_op("eq", other, axis=axis, level=level, dtypes=np.bool_) - def explode(self, column, ignore_index: bool = False): # noqa: PR01, RT01, D200 + def explode( + self, column, ignore_index: bool = False + ) -> Self: # noqa: PR01, RT01, D200 """ Transform each element of a list-like to a row. """ @@ -1548,16 +1597,16 @@ def explode(self, column, ignore_index: bool = False): # noqa: PR01, RT01, D200 def ewm( self, - com: "float | None" = None, - span: "float | None" = None, - halflife: "float | TimedeltaConvertibleTypes | None" = None, - alpha: "float | None" = None, - min_periods: "int | None" = 0, + com: float | None = None, + span: float | None = None, + halflife: float | TimedeltaConvertibleTypes | None = None, + alpha: float | None = None, + min_periods: int | None = 0, adjust: bool = True, ignore_na: bool = False, - axis: "Axis" = lib.no_default, - times: "str | np.ndarray | BasePandasDataset | None" = None, - method: "str" = "single", + axis: Axis = lib.no_default, + times: str | np.ndarray | BasePandasDataset | None = None, + method: str = "single", ) -> pandas.core.window.ewm.ExponentialMovingWindow: # noqa: PR01, RT01, D200 """ Provide exponentially weighted (EW) calculations. @@ -1578,7 +1627,7 @@ def ewm( def expanding( self, min_periods=1, axis=lib.no_default, method="single" - ): # noqa: PR01, RT01, D200 + ) -> Expanding: # noqa: PR01, RT01, D200 """ Provide expanding window calculations. """ @@ -1619,7 +1668,7 @@ def ffill( limit=None, limit_area=None, downcast=lib.no_default, - ): # noqa: PR01, RT01, D200 + ) -> Self | None: # noqa: PR01, RT01, D200 """ Synonym for `DataFrame.fillna` with ``method='ffill'``. """ @@ -1648,7 +1697,7 @@ def ffill( def pad( self, *, axis=None, inplace=False, limit=None, downcast=lib.no_default - ): # noqa: PR01, RT01, D200 + ) -> Self | None: # noqa: PR01, RT01, D200 """ Synonym for `DataFrame.ffill`. """ @@ -1672,7 +1721,7 @@ def fillna( inplace=False, limit=None, downcast=lib.no_default, - ): + ) -> Self | None: """ Fill NA/NaN values using the specified method. @@ -1765,7 +1814,7 @@ def fillna( def filter( self, items=None, like=None, regex=None, axis=None - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Subset the `BasePandasDataset` rows or columns according to the specified index labels. """ @@ -1801,7 +1850,7 @@ def f(x): return self[bool_arr] return self[self.columns[bool_arr]] - def first(self, offset): # noqa: PR01, RT01, D200 + def first(self, offset) -> Self | None: # noqa: PR01, RT01, D200 """ Select initial periods of time series data based on a date offset. """ @@ -1814,7 +1863,7 @@ def first(self, offset): # noqa: PR01, RT01, D200 self._query_compiler.first(offset=to_offset(offset)) ) - def first_valid_index(self): # noqa: RT01, D200 + def first_valid_index(self) -> int: # noqa: RT01, D200 """ Return index for first non-NA value or None, if no non-NA value is found. """ @@ -1822,7 +1871,7 @@ def first_valid_index(self): # noqa: RT01, D200 def floordiv( self, other, axis="columns", level=None, fill_value=None - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Get integer division of `BasePandasDataset` and `other`, element-wise (binary operator `floordiv`). """ @@ -1830,13 +1879,15 @@ def floordiv( "floordiv", other, axis=axis, level=level, fill_value=fill_value ) - def ge(self, other, axis="columns", level=None): # noqa: PR01, RT01, D200 + def ge(self, other, axis="columns", level=None) -> Self: # noqa: PR01, RT01, D200 """ Get greater than or equal comparison of `BasePandasDataset` and `other`, element-wise (binary operator `ge`). """ return self._binary_op("ge", other, axis=axis, level=level, dtypes=np.bool_) - def get(self, key, default=None): # noqa: PR01, RT01, D200 + def get( + self, key, default=None + ) -> DataFrame | Series | Scalar: # noqa: PR01, RT01, D200 """ Get item from object for given key. """ @@ -1846,20 +1897,20 @@ def get(self, key, default=None): # noqa: PR01, RT01, D200 except (KeyError, ValueError, IndexError): return default - def gt(self, other, axis="columns", level=None): # noqa: PR01, RT01, D200 + def gt(self, other, axis="columns", level=None) -> Self: # noqa: PR01, RT01, D200 """ Get greater than comparison of `BasePandasDataset` and `other`, element-wise (binary operator `gt`). """ return self._binary_op("gt", other, axis=axis, level=level, dtypes=np.bool_) - def head(self, n=5): # noqa: PR01, RT01, D200 + def head(self, n=5) -> Self: # noqa: PR01, RT01, D200 """ Return the first `n` rows. """ return self.iloc[:n] @property - def iat(self, axis=None): # noqa: PR01, RT01, D200 + def iat(self, axis=None) -> _iLocIndexer: # noqa: PR01, RT01, D200 """ Get a single value for a row/column pair by integer position. """ @@ -1867,7 +1918,9 @@ def iat(self, axis=None): # noqa: PR01, RT01, D200 return _iLocIndexer(self) - def idxmax(self, axis=0, skipna=True, numeric_only=False): # noqa: PR01, RT01, D200 + def idxmax( + self, axis=0, skipna=True, numeric_only=False + ) -> Self: # noqa: PR01, RT01, D200 """ Return index of first occurrence of maximum over requested axis. """ @@ -1878,7 +1931,9 @@ def idxmax(self, axis=0, skipna=True, numeric_only=False): # noqa: PR01, RT01, ) ) - def idxmin(self, axis=0, skipna=True, numeric_only=False): # noqa: PR01, RT01, D200 + def idxmin( + self, axis=0, skipna=True, numeric_only=False + ) -> Self: # noqa: PR01, RT01, D200 """ Return index of first occurrence of minimum over requested axis. """ @@ -1889,7 +1944,7 @@ def idxmin(self, axis=0, skipna=True, numeric_only=False): # noqa: PR01, RT01, ) ) - def infer_objects(self, copy=None): # noqa: PR01, RT01, D200 + def infer_objects(self, copy=None) -> Self: # noqa: PR01, RT01, D200 """ Attempt to infer better dtypes for object columns. """ @@ -1906,7 +1961,7 @@ def convert_dtypes( convert_boolean: bool = True, convert_floating: bool = True, dtype_backend: DtypeBackend = "numpy_nullable", - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Convert columns to best possible dtypes using dtypes supporting ``pd.NA``. """ @@ -1921,7 +1976,7 @@ def convert_dtypes( ) ) - def isin(self, values): # noqa: PR01, RT01, D200 + def isin(self, values) -> Self: # noqa: PR01, RT01, D200 """ Whether elements in `BasePandasDataset` are contained in `values`. """ @@ -1935,16 +1990,16 @@ def isin(self, values): # noqa: PR01, RT01, D200 ) ) - def isna(self): # noqa: RT01, D200 + def isna(self) -> Self: # noqa: RT01, D200 """ Detect missing values. """ return self.__constructor__(query_compiler=self._query_compiler.isna()) - isnull = isna + isnull: Self = isna @property - def iloc(self): # noqa: RT01, D200 + def iloc(self) -> _iLocIndexer: # noqa: RT01, D200 """ Purely integer-location based indexing for selection by position. """ @@ -1953,12 +2008,12 @@ def iloc(self): # noqa: RT01, D200 return _iLocIndexer(self) @_inherit_docstrings(pandas.DataFrame.kurt, apilink="pandas.DataFrame.kurt") - def kurt(self, axis=0, skipna=True, numeric_only=False, **kwargs): + def kurt(self, axis=0, skipna=True, numeric_only=False, **kwargs) -> Series | float: return self._stat_operation("kurt", axis, skipna, numeric_only, **kwargs) - kurtosis = kurt + kurtosis: Series | float = kurt - def last(self, offset): # noqa: PR01, RT01, D200 + def last(self, offset) -> Self: # noqa: PR01, RT01, D200 """ Select final periods of time series data based on a date offset. """ @@ -1972,26 +2027,26 @@ def last(self, offset): # noqa: PR01, RT01, D200 self._query_compiler.last(offset=to_offset(offset)) ) - def last_valid_index(self): # noqa: RT01, D200 + def last_valid_index(self) -> int: # noqa: RT01, D200 """ Return index for last non-NA value or None, if no non-NA value is found. """ return self._query_compiler.last_valid_index() - def le(self, other, axis="columns", level=None): # noqa: PR01, RT01, D200 + def le(self, other, axis="columns", level=None) -> Self: # noqa: PR01, RT01, D200 """ Get less than or equal comparison of `BasePandasDataset` and `other`, element-wise (binary operator `le`). """ return self._binary_op("le", other, axis=axis, level=level, dtypes=np.bool_) - def lt(self, other, axis="columns", level=None): # noqa: PR01, RT01, D200 + def lt(self, other, axis="columns", level=None) -> Self: # noqa: PR01, RT01, D200 """ Get less than comparison of `BasePandasDataset` and `other`, element-wise (binary operator `lt`). """ return self._binary_op("lt", other, axis=axis, level=level, dtypes=np.bool_) @property - def loc(self): # noqa: RT01, D200 + def loc(self) -> _LocIndexer: # noqa: RT01, D200 """ Get a group of rows and columns by label(s) or a boolean array. """ @@ -2007,7 +2062,7 @@ def mask( inplace: bool = False, axis: Optional[Axis] = None, level: Optional[Level] = None, - ): # noqa: PR01, RT01, D200 + ) -> Self | None: # noqa: PR01, RT01, D200 """ Replace values where the condition is True. """ @@ -2028,7 +2083,7 @@ def max( skipna=True, numeric_only=False, **kwargs, - ): # noqa: PR01, RT01, D200 + ) -> Series | None: # noqa: PR01, RT01, D200 """ Return the maximum of the values over the requested axis. """ @@ -2061,7 +2116,7 @@ def min( skipna: bool = True, numeric_only=False, **kwargs, - ): # noqa: PR01, RT01, D200 + ) -> Series | None: # noqa: PR01, RT01, D200 """ Return the minimum of the values over the requested axis. """ @@ -2091,7 +2146,7 @@ def min( def _stat_operation( self, op_name: str, - axis: Union[int, str], + axis: int | str, skipna: bool, numeric_only: Optional[bool] = False, **kwargs, @@ -2152,7 +2207,9 @@ def _stat_operation( else result_qc ) - def memory_usage(self, index=True, deep=False): # noqa: PR01, RT01, D200 + def memory_usage( + self, index=True, deep=False + ) -> Series | None: # noqa: PR01, RT01, D200 """ Return the memory usage of the `BasePandasDataset`. """ @@ -2162,7 +2219,7 @@ def memory_usage(self, index=True, deep=False): # noqa: PR01, RT01, D200 def mod( self, other, axis="columns", level=None, fill_value=None - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Get modulo of `BasePandasDataset` and `other`, element-wise (binary operator `mod`). """ @@ -2170,7 +2227,9 @@ def mod( "mod", other, axis=axis, level=level, fill_value=fill_value ) - def mode(self, axis=0, numeric_only=False, dropna=True): # noqa: PR01, RT01, D200 + def mode( + self, axis=0, numeric_only=False, dropna=True + ) -> Self: # noqa: PR01, RT01, D200 """ Get the mode(s) of each element along the selected axis. """ @@ -2183,7 +2242,7 @@ def mode(self, axis=0, numeric_only=False, dropna=True): # noqa: PR01, RT01, D2 def mul( self, other, axis="columns", level=None, fill_value=None - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Get multiplication of `BasePandasDataset` and `other`, element-wise (binary operator `mul`). """ @@ -2191,23 +2250,23 @@ def mul( "mul", other, axis=axis, level=level, fill_value=fill_value ) - multiply = mul + multiply: Self = mul - def ne(self, other, axis="columns", level=None): # noqa: PR01, RT01, D200 + def ne(self, other, axis="columns", level=None) -> Self: # noqa: PR01, RT01, D200 """ Get Not equal comparison of `BasePandasDataset` and `other`, element-wise (binary operator `ne`). """ return self._binary_op("ne", other, axis=axis, level=level, dtypes=np.bool_) - def notna(self): # noqa: RT01, D200 + def notna(self) -> Self: # noqa: RT01, D200 """ Detect existing (non-missing) values. """ return self.__constructor__(query_compiler=self._query_compiler.notna()) - notnull = notna + notnull: Self = notna - def nunique(self, axis=0, dropna=True): # noqa: PR01, RT01, D200 + def nunique(self, axis=0, dropna=True) -> Series | int: # noqa: PR01, RT01, D200 """ Return number of unique elements in the `BasePandasDataset`. """ @@ -2223,7 +2282,7 @@ def pct_change( limit=lib.no_default, freq=None, **kwargs, - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Percentage change between the current and a prior element. """ @@ -2268,13 +2327,15 @@ def pct_change( ) ) - def pipe(self, func, *args, **kwargs): # noqa: PR01, RT01, D200 + def pipe( + self, func: Callable[..., T] | tuple[Callable[..., T], str], *args, **kwargs + ) -> T: # noqa: PR01, RT01, D200 """ Apply chainable functions that expect `BasePandasDataset`. """ return pipe(self, func, *args, **kwargs) - def pop(self, item): # noqa: PR01, RT01, D200 + def pop(self, item) -> Series | Scalar: # noqa: PR01, RT01, D200 """ Return item and drop from frame. Raise KeyError if not found. """ @@ -2284,7 +2345,7 @@ def pop(self, item): # noqa: PR01, RT01, D200 def pow( self, other, axis="columns", level=None, fill_value=None - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Get exponential power of `BasePandasDataset` and `other`, element-wise (binary operator `pow`). """ @@ -2294,7 +2355,7 @@ def pow( def quantile( self, q, axis, numeric_only, interpolation, method - ): # noqa: PR01, RT01, D200 + ) -> DataFrame | Series | Scalar: # noqa: PR01, RT01, D200 """ Return values at the given quantile over requested axis. """ @@ -2366,7 +2427,7 @@ def rank( na_option: str = "keep", ascending: bool = True, pct: bool = False, - ): + ) -> Self: if axis is None: raise ValueError( f"No axis named None for object type {type(self).__name__}" @@ -2417,7 +2478,7 @@ def reindex( columns=None, copy=True, **kwargs, - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Conform `BasePandasDataset` to new index with optional filling logic. """ @@ -2450,7 +2511,7 @@ def rename_axis( axis=0, copy=None, inplace=False, - ): # noqa: PR01, RT01, D200 + ) -> DataFrame | Series | None: # noqa: PR01, RT01, D200 """ Set the name of the axis for the index or columns. """ @@ -2507,7 +2568,7 @@ def f(x): if not inplace: return result - def reorder_levels(self, order, axis=0): # noqa: PR01, RT01, D200 + def reorder_levels(self, order, axis=0) -> Self: # noqa: PR01, RT01, D200 """ Rearrange index levels using input order. """ @@ -2525,10 +2586,10 @@ def resample( kind: Optional[str] = lib.no_default, on: Level = None, level: Level = None, - origin: Union[str, TimestampConvertibleTypes] = "start_day", + origin: str | TimestampConvertibleTypes = "start_day", offset: Optional[TimedeltaConvertibleTypes] = None, group_keys=False, - ): # noqa: PR01, RT01, D200 + ) -> Resampler: # noqa: PR01, RT01, D200 """ Resample time-series data. """ @@ -2576,7 +2637,7 @@ def reset_index( col_fill: Hashable = "", allow_duplicates=lib.no_default, names: Hashable | Sequence[Hashable] = None, - ): # noqa: PR01, RT01, D200 + ) -> DataFrame | Series | None: # noqa: PR01, RT01, D200 """ Reset the index, or a level of it. """ @@ -2603,7 +2664,7 @@ def reset_index( def radd( self, other, axis="columns", level=None, fill_value=None - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Return addition of `BasePandasDataset` and `other`, element-wise (binary operator `radd`). """ @@ -2613,7 +2674,7 @@ def radd( def rfloordiv( self, other, axis="columns", level=None, fill_value=None - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Get integer division of `BasePandasDataset` and `other`, element-wise (binary operator `rfloordiv`). """ @@ -2623,7 +2684,7 @@ def rfloordiv( def rmod( self, other, axis="columns", level=None, fill_value=None - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Get modulo of `BasePandasDataset` and `other`, element-wise (binary operator `rmod`). """ @@ -2633,7 +2694,7 @@ def rmod( def rmul( self, other, axis="columns", level=None, fill_value=None - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Get Multiplication of dataframe and other, element-wise (binary operator `rmul`). """ @@ -2652,7 +2713,7 @@ def rolling( closed: str | None = None, step: int | None = None, method: str = "single", - ): # noqa: PR01, RT01, D200 + ) -> Rolling | Window: # noqa: PR01, RT01, D200 """ Provide rolling window calculations. """ @@ -2706,7 +2767,7 @@ def rolling( method=method, ) - def round(self, decimals=0, *args, **kwargs): # noqa: PR01, RT01, D200 + def round(self, decimals=0, *args, **kwargs) -> Self: # noqa: PR01, RT01, D200 """ Round a `BasePandasDataset` to a variable number of decimal places. """ @@ -2718,7 +2779,7 @@ def round(self, decimals=0, *args, **kwargs): # noqa: PR01, RT01, D200 def rpow( self, other, axis="columns", level=None, fill_value=None - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Get exponential power of `BasePandasDataset` and `other`, element-wise (binary operator `rpow`). """ @@ -2728,7 +2789,7 @@ def rpow( def rsub( self, other, axis="columns", level=None, fill_value=None - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Get subtraction of `BasePandasDataset` and `other`, element-wise (binary operator `rsub`). """ @@ -2738,7 +2799,7 @@ def rsub( def rtruediv( self, other, axis="columns", level=None, fill_value=None - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Get floating division of `BasePandasDataset` and `other`, element-wise (binary operator `rtruediv`). """ @@ -2746,7 +2807,7 @@ def rtruediv( "rtruediv", other, axis=axis, level=level, fill_value=fill_value ) - rdiv = rtruediv + rdiv: Self = rtruediv def sample( self, @@ -2757,7 +2818,7 @@ def sample( random_state: RandomState | None = None, axis: Axis | None = None, ignore_index: bool = False, - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Return a random sample of items from an axis of object. """ @@ -2881,7 +2942,7 @@ def sem( ddof: int = 1, numeric_only=False, **kwargs, - ): # noqa: PR01, RT01, D200 + ) -> Series | float: # noqa: PR01, RT01, D200 """ Return unbiased standard error of the mean over requested axis. """ @@ -2895,7 +2956,7 @@ def mean( skipna=True, numeric_only=False, **kwargs, - ): # noqa: PR01, RT01, D200 + ) -> Series | float: # noqa: PR01, RT01, D200 """ Return the mean of the values over the requested axis. """ @@ -2907,7 +2968,7 @@ def median( skipna=True, numeric_only=False, **kwargs, - ): # noqa: PR01, RT01, D200 + ) -> Series | float: # noqa: PR01, RT01, D200 """ Return the mean of the values over the requested axis. """ @@ -2919,7 +2980,7 @@ def set_axis( *, axis: Axis = 0, copy=None, - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Assign desired index to given axis. """ @@ -2931,7 +2992,7 @@ def set_axis( def set_flags( self, *, copy: bool = False, allows_duplicate_labels: Optional[bool] = None - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Return a new `BasePandasDataset` with updated flags. """ @@ -2952,7 +3013,7 @@ def shift( axis: Axis = 0, fill_value: Hashable = lib.no_default, suffix=None, - ): # noqa: PR01, RT01, D200 + ) -> Self | DataFrame: # noqa: PR01, RT01, D200 """ Shift index by desired number of periods with an optional time `freq`. """ @@ -2989,7 +3050,7 @@ def skew( skipna: bool = True, numeric_only=False, **kwargs, - ): # noqa: PR01, RT01, D200 + ) -> Series | float: # noqa: PR01, RT01, D200 """ Return unbiased skew over requested axis. """ @@ -3007,7 +3068,7 @@ def sort_index( sort_remaining=True, ignore_index: bool = False, key: Optional[IndexKeyFunc] = None, - ): # noqa: PR01, RT01, D200 + ) -> Self | None: # noqa: PR01, RT01, D200 """ Sort object by labels (along an axis). """ @@ -3042,7 +3103,7 @@ def sort_values( na_position="last", ignore_index: bool = False, key: Optional[IndexKeyFunc] = None, - ): # noqa: PR01, RT01, D200 + ) -> Self | None: # noqa: PR01, RT01, D200 """ Sort by the values along either axis. """ @@ -3076,7 +3137,7 @@ def std( ddof: int = 1, numeric_only=False, **kwargs, - ): # noqa: PR01, RT01, D200 + ) -> Series | float: # noqa: PR01, RT01, D200 """ Return sample standard deviation over requested axis. """ @@ -3086,7 +3147,7 @@ def std( def sub( self, other, axis="columns", level=None, fill_value=None - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Get subtraction of `BasePandasDataset` and `other`, element-wise (binary operator `sub`). """ @@ -3094,9 +3155,9 @@ def sub( "sub", other, axis=axis, level=level, fill_value=fill_value ) - subtract = sub + subtract: Self = sub - def swapaxes(self, axis1, axis2, copy=None): # noqa: PR01, RT01, D200 + def swapaxes(self, axis1, axis2, copy=None) -> Self: # noqa: PR01, RT01, D200 """ Interchange axes and swap values axes appropriately. """ @@ -3110,7 +3171,7 @@ def swapaxes(self, axis1, axis2, copy=None): # noqa: PR01, RT01, D200 return self.copy() return self - def swaplevel(self, i=-2, j=-1, axis=0): # noqa: PR01, RT01, D200 + def swaplevel(self, i=-2, j=-1, axis=0) -> Self: # noqa: PR01, RT01, D200 """ Swap levels `i` and `j` in a `MultiIndex`. """ @@ -3118,7 +3179,7 @@ def swaplevel(self, i=-2, j=-1, axis=0): # noqa: PR01, RT01, D200 idx = self.index if axis == 0 else self.columns return self.set_axis(idx.swaplevel(i, j), axis=axis) - def tail(self, n=5): # noqa: PR01, RT01, D200 + def tail(self, n=5) -> Self: # noqa: PR01, RT01, D200 """ Return the last `n` rows. """ @@ -3126,7 +3187,7 @@ def tail(self, n=5): # noqa: PR01, RT01, D200 return self.iloc[-n:] return self.iloc[len(self.index) :] - def take(self, indices, axis=0, **kwargs): # noqa: PR01, RT01, D200 + def take(self, indices, axis=0, **kwargs) -> Self: # noqa: PR01, RT01, D200 """ Return the elements in the given *positional* indices along an axis. """ @@ -3166,7 +3227,7 @@ def to_csv( decimal=".", errors: str = "strict", storage_options: StorageOptions = None, - ): # pragma: no cover + ) -> str | None: # pragma: no cover from modin.core.execution.dispatching.factories.dispatcher import ( FactoryDispatcher, ) @@ -3215,7 +3276,7 @@ def to_excel( freeze_panes=None, storage_options: StorageOptions = None, engine_kwargs=None, - ): # pragma: no cover # noqa: PR01, RT01, D200 + ) -> None: # pragma: no cover # noqa: PR01, RT01, D200 """ Write object to an Excel sheet. """ @@ -3239,7 +3300,7 @@ def to_excel( engine_kwargs=engine_kwargs, ) - def to_dict(self, orient="dict", into=dict, index=True): + def to_dict(self, orient="dict", into=dict, index=True) -> dict: return self._query_compiler.dataframe_to_dict(orient, into, index) @expanduser_path_arg("path_or_buf") @@ -3297,7 +3358,7 @@ def to_json( indent=None, storage_options: StorageOptions = None, mode="w", - ): # pragma: no cover # noqa: PR01, RT01, D200 + ) -> str | None: # pragma: no cover # noqa: PR01, RT01, D200 """ Convert the object to a JSON string. """ @@ -3346,7 +3407,7 @@ def to_latex( caption=None, label=None, position=None, - ): # pragma: no cover # noqa: PR01, RT01, D200 + ) -> str | None: # pragma: no cover # noqa: PR01, RT01, D200 """ Render object to a LaTeX tabular, longtable, or nested table. """ @@ -3383,7 +3444,7 @@ def to_markdown( index: bool = True, storage_options: StorageOptions = None, **kwargs, - ): # noqa: PR01, RT01, D200 + ) -> str: # noqa: PR01, RT01, D200 """ Print `BasePandasDataset` in Markdown-friendly format. """ @@ -3403,7 +3464,7 @@ def to_pickle( compression: CompressionOptions = "infer", protocol: int = pkl.HIGHEST_PROTOCOL, storage_options: StorageOptions = None, - ): # pragma: no cover # noqa: PR01, D200 + ) -> None: # pragma: no cover # noqa: PR01, D200 """ Pickle (serialize) object to file. """ @@ -3431,7 +3492,7 @@ def _to_bare_numpy( def to_numpy( self, dtype=None, copy=False, na_value=lib.no_default - ): # noqa: PR01, RT01, D200 + ) -> np.ndarray: # noqa: PR01, RT01, D200 """ Convert the `BasePandasDataset` to a NumPy array or a Modin wrapper for NumPy array. """ @@ -3451,7 +3512,7 @@ def to_numpy( # TODO(williamma12): When this gets implemented, have the series one call this. def to_period( self, freq=None, axis=0, copy=None - ): # pragma: no cover # noqa: PR01, RT01, D200 + ) -> Self: # pragma: no cover # noqa: PR01, RT01, D200 """ Convert `BasePandasDataset` from DatetimeIndex to PeriodIndex. """ @@ -3479,7 +3540,7 @@ def to_string( line_width=None, max_colwidth=None, encoding=None, - ): # noqa: PR01, RT01, D200 + ) -> str | None: # noqa: PR01, RT01, D200 """ Render a `BasePandasDataset` to a console-friendly tabular output. """ @@ -3516,7 +3577,7 @@ def to_sql( chunksize=None, dtype=None, method=None, - ): # noqa: PR01, D200 + ) -> int | None: # noqa: PR01, D200 """ Write records stored in a `BasePandasDataset` to a SQL database. """ @@ -3553,7 +3614,7 @@ def to_sql( # TODO(williamma12): When this gets implemented, have the series one call this. def to_timestamp( self, freq=None, how="start", axis=0, copy=None - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Cast to DatetimeIndex of timestamps, at *beginning* of period. """ @@ -3569,7 +3630,7 @@ def to_xarray(self): # noqa: PR01, RT01, D200 def truediv( self, other, axis="columns", level=None, fill_value=None - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Get floating division of `BasePandasDataset` and `other`, element-wise (binary operator `truediv`). """ @@ -3577,11 +3638,12 @@ def truediv( "truediv", other, axis=axis, level=level, fill_value=fill_value ) - div = divide = truediv + div: Self = truediv + divide: Self = truediv def truncate( self, before=None, after=None, axis=None, copy=None - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Truncate a `BasePandasDataset` before and after some index value. """ @@ -3599,7 +3661,9 @@ def truncate( slice_obj = s if axis == 0 else (slice(None), s) return self.iloc[slice_obj] - def transform(self, func, axis=0, *args, **kwargs): # noqa: PR01, RT01, D200 + def transform( + self, func, axis=0, *args, **kwargs + ) -> Self: # noqa: PR01, RT01, D200 """ Call ``func`` on self producing a `BasePandasDataset` with the same axis shape as self. """ @@ -3618,7 +3682,9 @@ def transform(self, func, axis=0, *args, **kwargs): # noqa: PR01, RT01, D200 raise ValueError("Function did not transform") return result - def tz_convert(self, tz, axis=0, level=None, copy=None): # noqa: PR01, RT01, D200 + def tz_convert( + self, tz, axis=0, level=None, copy=None + ) -> Self: # noqa: PR01, RT01, D200 """ Convert tz-aware axis to target time zone. """ @@ -3633,7 +3699,7 @@ def tz_convert(self, tz, axis=0, level=None, copy=None): # noqa: PR01, RT01, D2 def tz_localize( self, tz, axis=0, level=None, copy=None, ambiguous="raise", nonexistent="raise" - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Localize tz-naive index of a `BasePandasDataset` to target time zone. """ @@ -3662,7 +3728,7 @@ def interpolate( limit_area=None, downcast=lib.no_default, **kwargs, - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 if downcast is not lib.no_default: warnings.warn( f"The 'downcast' keyword in {type(self).__name__}.interpolate " @@ -3705,7 +3771,7 @@ def value_counts( sort: bool = True, ascending: bool = False, dropna: bool = True, - ): + ) -> Series: if subset is None: subset = self._query_compiler.columns with warnings.catch_warnings(): @@ -3738,7 +3804,7 @@ def var( ddof: int = 1, numeric_only=False, **kwargs, - ): # noqa: PR01, RT01, D200 + ) -> Series | float: # noqa: PR01, RT01, D200 """ Return unbiased variance over requested axis. """ @@ -3746,7 +3812,7 @@ def var( "var", axis, skipna, numeric_only, ddof=ddof, **kwargs ) - def __abs__(self): + def __abs__(self) -> Self: """ Return a `BasePandasDataset` with absolute numeric value of each element. @@ -3760,16 +3826,16 @@ def __abs__(self): @_doc_binary_op( operation="union", bin_op="and", right="other", **_doc_binary_op_kwargs ) - def __and__(self, other): + def __and__(self, other) -> Self: return self._binary_op("__and__", other, axis=0) @_doc_binary_op( operation="union", bin_op="rand", right="other", **_doc_binary_op_kwargs ) - def __rand__(self, other): + def __rand__(self, other) -> Self: return self._binary_op("__rand__", other, axis=0) - def __array__(self, dtype=None): + def __array__(self, dtype=None) -> np.ndarray: """ Return the values as a NumPy array. @@ -3785,7 +3851,7 @@ def __array__(self, dtype=None): """ return self._to_bare_numpy(dtype) - def __copy__(self, deep=True): + def __copy__(self, deep=True) -> Self: """ Return the copy of the `BasePandasDataset`. @@ -3800,7 +3866,7 @@ def __copy__(self, deep=True): """ return self.copy(deep=deep) - def __deepcopy__(self, memo=None): + def __deepcopy__(self, memo=None) -> Self: """ Return the deep copy of the `BasePandasDataset`. @@ -3821,10 +3887,10 @@ def __deepcopy__(self, memo=None): right="other", **_doc_binary_op_kwargs, ) - def __eq__(self, other): + def __eq__(self, other) -> Self: return self.eq(other) - def __finalize__(self, other, method=None, **kwargs): + def __finalize__(self, other, method=None, **kwargs) -> Self: """ Propagate metadata from `other` to `self`. @@ -3851,10 +3917,10 @@ def __finalize__(self, other, method=None, **kwargs): right="right", **_doc_binary_op_kwargs, ) - def __ge__(self, right): + def __ge__(self, right) -> Self: return self.ge(right) - def __getitem__(self, key): + def __getitem__(self, key) -> Self: """ Retrieve dataset according to `key`. @@ -3886,7 +3952,7 @@ def xs( axis=0, level=None, drop_level: bool = True, - ): # noqa: PR01, RT01, D200 + ) -> Self: # noqa: PR01, RT01, D200 """ Return cross-section from the Series/DataFrame. """ @@ -3968,7 +4034,7 @@ def xs( __hash__ = None - def _setitem_slice(self, key: slice, value): + def _setitem_slice(self, key: slice, value) -> None: """ Set rows specified by `key` slice with `value`. @@ -3982,7 +4048,7 @@ def _setitem_slice(self, key: slice, value): indexer = self.index._convert_slice_indexer(key, kind="getitem") self.iloc[indexer] = value - def _getitem_slice(self, key: slice): + def _getitem_slice(self, key: slice) -> Self: """ Get rows specified by `key` slice. @@ -4010,10 +4076,10 @@ def _getitem_slice(self, key: slice): right="right", **_doc_binary_op_kwargs, ) - def __gt__(self, right): + def __gt__(self, right) -> Self: return self.gt(right) - def __invert__(self): + def __invert__(self) -> Self: """ Apply bitwise inverse to each element of the `BasePandasDataset`. @@ -4040,10 +4106,10 @@ def __invert__(self): right="right", **_doc_binary_op_kwargs, ) - def __le__(self, right): + def __le__(self, right) -> Self: return self.le(right) - def __len__(self): + def __len__(self) -> int: """ Return length of info axis. @@ -4059,10 +4125,10 @@ def __len__(self): right="right", **_doc_binary_op_kwargs, ) - def __lt__(self, right): + def __lt__(self, right) -> Self: return self.lt(right) - def __matmul__(self, other): + def __matmul__(self, other) -> Self | np.ndarray | Scalar: """ Compute the matrix multiplication between the `BasePandasDataset` and `other`. @@ -4083,10 +4149,10 @@ def __matmul__(self, other): right="other", **_doc_binary_op_kwargs, ) - def __ne__(self, other): + def __ne__(self, other) -> Self: return self.ne(other) - def __neg__(self): + def __neg__(self) -> Self: """ Change the sign for every value of self. @@ -4119,7 +4185,7 @@ def __nonzero__(self): right="other", **_doc_binary_op_kwargs, ) - def __or__(self, other): + def __or__(self, other) -> Self: return self._binary_op("__or__", other, axis=0) @_doc_binary_op( @@ -4128,10 +4194,10 @@ def __or__(self, other): right="other", **_doc_binary_op_kwargs, ) - def __ror__(self, other): + def __ror__(self, other) -> Self: return self._binary_op("__ror__", other, axis=0) - def __sizeof__(self): + def __sizeof__(self) -> int: """ Generate the total memory usage for an `BasePandasDataset`. @@ -4141,7 +4207,7 @@ def __sizeof__(self): """ return self._query_compiler.sizeof() - def __str__(self): # pragma: no cover + def __str__(self) -> str: # pragma: no cover """ Return str(self). @@ -4157,7 +4223,7 @@ def __str__(self): # pragma: no cover right="other", **_doc_binary_op_kwargs, ) - def __xor__(self, other): + def __xor__(self, other) -> Self: return self._binary_op("__xor__", other, axis=0) @_doc_binary_op( @@ -4166,24 +4232,24 @@ def __xor__(self, other): right="other", **_doc_binary_op_kwargs, ) - def __rxor__(self, other): + def __rxor__(self, other) -> Self: return self._binary_op("__rxor__", other, axis=0) @property - def size(self): # noqa: RT01, D200 + def size(self) -> int: # noqa: RT01, D200 """ Return an int representing the number of elements in this `BasePandasDataset` object. """ return len(self._query_compiler.index) * len(self._query_compiler.columns) @property - def values(self): # noqa: RT01, D200 + def values(self) -> np.ndarray: # noqa: RT01, D200 """ Return a NumPy representation of the `BasePandasDataset`. """ return self.to_numpy() - def _repartition(self, axis: Optional[int] = None): + def _repartition(self, axis: Optional[int] = None) -> Self: """ Repartitioning Modin objects to get ideal partitions inside. @@ -4211,7 +4277,7 @@ def _repartition(self, axis: Optional[int] = None): ) @disable_logging - def __getattribute__(self, item): + def __getattribute__(self, item) -> Any: """ Return item from the `BasePandasDataset`. @@ -4239,7 +4305,7 @@ def default_handler(*args, **kwargs): def __array_ufunc__( self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any - ): + ) -> DataFrame | Series | Any: """ Apply the `ufunc` to the `BasePandasDataset`. diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py index b50d23f9654..1cf927a2c07 100644 --- a/modin/pandas/dataframe.py +++ b/modin/pandas/dataframe.py @@ -3157,7 +3157,7 @@ def _validate_eval_query(self, expr, **kwargs) -> None: "'Not' nodes are not implemented." ) # pragma: no cover - def _reduce_dimension(self, query_compiler) -> Series: + def _reduce_dimension(self, query_compiler: BaseQueryCompiler) -> Series: """ Reduce the dimension of data from the `query_compiler`. diff --git a/modin/pandas/series.py b/modin/pandas/series.py index 8abc4299f3d..31d59d57fcc 100644 --- a/modin/pandas/series.py +++ b/modin/pandas/series.py @@ -2395,7 +2395,7 @@ def _qcut(self, q, **kwargs): # noqa: PR01, RT01, D200 """ return self._default_to_pandas(pandas.qcut, q, **kwargs) - def _reduce_dimension(self, query_compiler) -> Union[Series, None]: + def _reduce_dimension(self, query_compiler) -> Series | Scalar: """ Try to reduce the dimension of data from the `query_compiler`. diff --git a/requirements-dev.txt b/requirements-dev.txt index df7183c4019..0d66b7fb985 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -52,6 +52,7 @@ pytest>=7.3.2 pytest-benchmark>=4.0.0 pytest-cov>=4.0.0 pytest-xdist>=3.2.0 +typing_extensions ## code linters black>=24.1.0 diff --git a/requirements/env_hdk.yml b/requirements/env_hdk.yml index 3c0d3ecc9f8..f36ddf03efe 100644 --- a/requirements/env_hdk.yml +++ b/requirements/env_hdk.yml @@ -34,6 +34,7 @@ dependencies: - pytest>=7.3.2 - pytest-cov>=4.0.0 - pytest-xdist>=3.2.0 + - typing_extensions # code linters - black>=24.1.0 diff --git a/requirements/env_unidist_linux.yml b/requirements/env_unidist_linux.yml index 34935770c60..00584a12b82 100644 --- a/requirements/env_unidist_linux.yml +++ b/requirements/env_unidist_linux.yml @@ -44,6 +44,7 @@ dependencies: - pytest>=7.3.2 - pytest-cov>=4.0.0 - pytest-xdist>=3.2.0 + - typing_extensions # code linters - black>=24.1.0 diff --git a/requirements/env_unidist_win.yml b/requirements/env_unidist_win.yml index 91c140502ed..02ac278eafd 100644 --- a/requirements/env_unidist_win.yml +++ b/requirements/env_unidist_win.yml @@ -44,6 +44,7 @@ dependencies: - pytest>=7.3.2 - pytest-cov>=4.0.0 - pytest-xdist>=3.2.0 + - typing_extensions # code linters - black>=24.1.0 diff --git a/requirements/requirements-no-engine.yml b/requirements/requirements-no-engine.yml index a5be97db62c..0f0afe74930 100644 --- a/requirements/requirements-no-engine.yml +++ b/requirements/requirements-no-engine.yml @@ -37,6 +37,7 @@ dependencies: - pytest>=7.3.2 - pytest-cov>=4.0.0 - pytest-xdist>=3.2.0 + - typing_extensions # code linters - black>=24.1.0