diff --git a/modin/config/__init__.py b/modin/config/__init__.py index 86ae3abcb17..da779d0c5c7 100644 --- a/modin/config/__init__.py +++ b/modin/config/__init__.py @@ -25,8 +25,6 @@ DocModule, Engine, EnvironmentVariable, - ExperimentalGroupbyImpl, - ExperimentalNumPyAPI, GithubCI, GpuCount, IsDebug, @@ -43,7 +41,6 @@ PersistentPickle, ProgressBar, RangePartitioning, - RangePartitioningGroupby, RayInitCustomResources, RayRedisAddress, RayRedisPassword, @@ -54,7 +51,6 @@ TestReadFromPostgres, TestReadFromSqlServer, TrackFileLeaks, - use_range_partitioning_groupby, ) from modin.config.pubsub import Parameter, ValueSource, context @@ -91,11 +87,7 @@ "BenchmarkMode", "PersistentPickle", "ModinNumpy", - "ExperimentalNumPyAPI", - "RangePartitioningGroupby", "RangePartitioning", - "use_range_partitioning_groupby", - "ExperimentalGroupbyImpl", "AsyncReadMode", "ReadSqlEngine", "IsExperimental", diff --git a/modin/config/envvars.py b/modin/config/envvars.py index 59f1ca265ed..6c50472ed71 100644 --- a/modin/config/envvars.py +++ b/modin/config/envvars.py @@ -683,40 +683,12 @@ class GithubCI(EnvironmentVariable, type=bool): default = False -class ModinNumpy(EnvWithSibilings, type=bool): +class ModinNumpy(EnvironmentVariable, type=bool): """Set to true to use Modin's implementation of NumPy API.""" varname = "MODIN_NUMPY" default = False - @classmethod - def _sibling(cls) -> type[EnvWithSibilings]: - """Get a parameter sibling.""" - return ExperimentalNumPyAPI - - -class ExperimentalNumPyAPI(EnvWithSibilings, type=bool): - """ - Set to true to use Modin's implementation of NumPy API. - - This parameter is deprecated. Use ``ModinNumpy`` instead. - """ - - varname = "MODIN_EXPERIMENTAL_NUMPY_API" - default = False - - @classmethod - def _sibling(cls) -> type[EnvWithSibilings]: - """Get a parameter sibling.""" - return ModinNumpy - - -# Let the parameter's handling logic know that this variable is deprecated and that -# we should raise respective warnings -ExperimentalNumPyAPI._deprecation_descriptor = DeprecationDescriptor( - ExperimentalNumPyAPI, ModinNumpy -) - class RangePartitioning(EnvironmentVariable, type=bool): """ @@ -730,72 +702,6 @@ class RangePartitioning(EnvironmentVariable, type=bool): default = False -class RangePartitioningGroupby(EnvWithSibilings, type=bool): - """ - Set to true to use Modin's range-partitioning group by implementation. - - This parameter is deprecated. Use ``RangePartitioning`` instead. - """ - - varname = "MODIN_RANGE_PARTITIONING_GROUPBY" - default = False - - @classmethod - def _sibling(cls) -> type[EnvWithSibilings]: - """Get a parameter sibling.""" - return ExperimentalGroupbyImpl - - -# Let the parameter's handling logic know that this variable is deprecated and that -# we should raise respective warnings -RangePartitioningGroupby._deprecation_descriptor = DeprecationDescriptor( - RangePartitioningGroupby, RangePartitioning -) - - -class ExperimentalGroupbyImpl(EnvWithSibilings, type=bool): - """ - Set to true to use Modin's range-partitioning group by implementation. - - This parameter is deprecated. Use ``RangePartitioning`` instead. - """ - - varname = "MODIN_EXPERIMENTAL_GROUPBY" - default = False - - @classmethod - def _sibling(cls) -> type[EnvWithSibilings]: - """Get a parameter sibling.""" - return RangePartitioningGroupby - - -# Let the parameter's handling logic know that this variable is deprecated and that -# we should raise respective warnings -ExperimentalGroupbyImpl._deprecation_descriptor = DeprecationDescriptor( - ExperimentalGroupbyImpl, RangePartitioningGroupby -) - - -def use_range_partitioning_groupby() -> bool: - """ - Determine whether range-partitioning implementation for groupby was enabled by a user. - - This is a temporary helper function that queries ``RangePartitioning`` and deprecated - ``RangePartitioningGroupby`` config variables in order to determine whether to range-part - impl for groupby is enabled. Eventially this function should be removed together with - ``RangePartitioningGroupby`` variable. - - Returns - ------- - bool - """ - with warnings.catch_warnings(): - # filter deprecation warning, it was already showed when a user set the variable - warnings.filterwarnings("ignore", category=FutureWarning) - old_range_part_var = RangePartitioningGroupby.get() - return RangePartitioning.get() or old_range_part_var - - class CIAWSSecretAccessKey(EnvironmentVariable, type=str): """Set to AWS_SECRET_ACCESS_KEY when running mock S3 tests for Modin in GitHub CI.""" diff --git a/modin/core/storage_formats/pandas/groupby.py b/modin/core/storage_formats/pandas/groupby.py index 55de645a898..c47ceeb5d9a 100644 --- a/modin/core/storage_formats/pandas/groupby.py +++ b/modin/core/storage_formats/pandas/groupby.py @@ -17,7 +17,7 @@ import pandas from pandas.core.dtypes.cast import find_common_type -from modin.config import use_range_partitioning_groupby +from modin.config import RangePartitioning from modin.core.dataframe.algebra import GroupByReduce from modin.error_message import ErrorMessage from modin.utils import hashable @@ -94,7 +94,7 @@ def build_qc_method(cls, agg_name, finalizer_fn=None): ) def method(query_compiler, *args, **kwargs): - if use_range_partitioning_groupby(): + if RangePartitioning.get(): try: if finalizer_fn is not None: raise NotImplementedError( diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py index 40659ac5df8..5281e92d8c5 100644 --- a/modin/core/storage_formats/pandas/query_compiler.py +++ b/modin/core/storage_formats/pandas/query_compiler.py @@ -45,7 +45,7 @@ from pandas.core.indexing import check_bool_indexer from pandas.errors import DataError -from modin.config import CpuCount, RangePartitioning, use_range_partitioning_groupby +from modin.config import CpuCount, RangePartitioning from modin.core.dataframe.algebra import ( Binary, Fold, @@ -3625,7 +3625,7 @@ def groupby_nth( return result def groupby_mean(self, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop=False): - if use_range_partitioning_groupby(): + if RangePartitioning.get(): try: return self._groupby_shuffle( by=by, @@ -3696,7 +3696,7 @@ def groupby_size( agg_kwargs, drop=False, ): - if use_range_partitioning_groupby(): + if RangePartitioning.get(): try: return self._groupby_shuffle( by=by, @@ -4117,7 +4117,7 @@ def groupby_agg( # 'group_wise' means 'groupby.apply()'. We're certain that range-partitioning groupby # always works better for '.apply()', so we're using it regardless of the 'RangePartitioning' # value - if how == "group_wise" or use_range_partitioning_groupby(): + if how == "group_wise" or RangePartitioning.get(): try: return self._groupby_shuffle( by=by, @@ -4138,7 +4138,7 @@ def groupby_agg( + "\nFalling back to a full-axis implementation." ) get_logger().info(message) - if use_range_partitioning_groupby(): + if RangePartitioning.get(): ErrorMessage.warn(message) if isinstance(agg_func, dict) and GroupbyReduceImpl.has_impl_for(agg_func): diff --git a/modin/pandas/io.py b/modin/pandas/io.py index fc4d13163c2..508d1b2a4d5 100644 --- a/modin/pandas/io.py +++ b/modin/pandas/io.py @@ -64,7 +64,7 @@ from pandas.io.parsers import TextFileReader from pandas.io.parsers.readers import _c_parser_defaults -from modin.config import ExperimentalNumPyAPI +from modin.config import ModinNumpy from modin.error_message import ErrorMessage from modin.logging import ClassLogger, enable_logging from modin.utils import ( @@ -1146,7 +1146,7 @@ def to_numpy( if isinstance(modin_obj, SupportsPrivateToNumPy): return modin_obj._to_numpy() array = modin_obj.to_numpy() - if ExperimentalNumPyAPI.get(): + if ModinNumpy.get(): array = array._to_numpy() return array diff --git a/modin/tests/config/test_envvars.py b/modin/tests/config/test_envvars.py index 4341a64d79d..4fc1b65140b 100644 --- a/modin/tests/config/test_envvars.py +++ b/modin/tests/config/test_envvars.py @@ -12,8 +12,6 @@ # governing permissions and limitations under the License. import os -import unittest.mock -import warnings import pytest @@ -121,154 +119,6 @@ def test_ray_cluster_resources(): assert ray.cluster_resources()["special_hardware"] == 1.0 -@pytest.mark.parametrize( - "deprecated_var, new_var", - [ - (cfg.ExperimentalGroupbyImpl, cfg.RangePartitioning), - (cfg.ExperimentalNumPyAPI, cfg.ModinNumpy), - (cfg.RangePartitioningGroupby, cfg.RangePartitioning), - ], -) -def test_deprecated_bool_vars_warnings(deprecated_var, new_var): - """Test that deprecated parameters are raising `FutureWarnings` and their replacements don't.""" - old_depr_val = deprecated_var.get() - old_new_var = new_var.get() - - try: - reset_vars(deprecated_var, new_var) - with pytest.warns(FutureWarning): - deprecated_var.get() - - with pytest.warns(FutureWarning): - deprecated_var.put(False) - - with unittest.mock.patch.dict(os.environ, {deprecated_var.varname: "1"}): - with pytest.warns(FutureWarning): - _check_vars() - - # check that the new var doesn't raise any warnings - reset_vars(deprecated_var, new_var) - with warnings.catch_warnings(): - warnings.simplefilter("error") - new_var.get() - new_var.put(False) - with unittest.mock.patch.dict(os.environ, {new_var.varname: "1"}): - _check_vars() - finally: - deprecated_var.put(old_depr_val) - new_var.put(old_new_var) - - -@pytest.mark.parametrize( - "deprecated_var, new_var", - [ - (cfg.ExperimentalGroupbyImpl, cfg.RangePartitioningGroupby), - (cfg.ExperimentalNumPyAPI, cfg.ModinNumpy), - ], -) -@pytest.mark.parametrize("get_depr_first", [True, False]) -def test_deprecated_bool_vars_equals(deprecated_var, new_var, get_depr_first): - """ - Test that deprecated parameters always have values equal to the new replacement parameters. - - Parameters - ---------- - deprecated_var : EnvironmentVariable - new_var : EnvironmentVariable - get_depr_first : bool - Defines an order in which the ``.get()`` method should be called when comparing values. - If ``True``: get deprecated value at first ``deprecated_var.get() == new_var.get() == value``. - If ``False``: get new value at first ``new_var.get() == deprecated_var.get() == value``. - The logic of the ``.get()`` method depends on which parameter was initialized first, - that's why it's worth testing both cases. - """ - old_depr_val = deprecated_var.get() - old_new_var = new_var.get() - - def get_values(): - return ( - (deprecated_var.get(), new_var.get()) - if get_depr_first - else (new_var.get(), deprecated_var.get()) - ) - - try: - # case1: initializing the value using 'deprecated_var' - reset_vars(deprecated_var, new_var) - deprecated_var.put(True) - val1, val2 = get_values() - assert val1 == val2 == True # noqa: E712 ('obj == True' comparison) - - new_var.put(False) - val1, val2 = get_values() - assert val1 == val2 == False # noqa: E712 ('obj == False' comparison) - - new_var.put(True) - val1, val2 = get_values() - assert val1 == val2 == True # noqa: E712 ('obj == True' comparison) - - deprecated_var.put(False) - val1, val2 = get_values() - assert val1 == val2 == False # noqa: E712 ('obj == False' comparison) - - # case2: initializing the value using 'new_var' - reset_vars(deprecated_var, new_var) - new_var.put(True) - val1, val2 = get_values() - assert val1 == val2 == True # noqa: E712 ('obj == True' comparison) - - deprecated_var.put(False) - val1, val2 = get_values() - assert val1 == val2 == False # noqa: E712 ('obj == False' comparison) - - deprecated_var.put(True) - val1, val2 = get_values() - assert val1 == val2 == True # noqa: E712 ('obj == True' comparison) - - new_var.put(False) - val1, val2 = get_values() - assert val1 == val2 == False # noqa: E712 ('obj == False' comparison) - - # case3: initializing the value using 'deprecated_var' with env variable - reset_vars(deprecated_var, new_var) - with unittest.mock.patch.dict(os.environ, {deprecated_var.varname: "True"}): - val1, val2 = get_values() - assert val1 == val2 == True # noqa: E712 ('obj == True' comparison) - - new_var.put(False) - val1, val2 = get_values() - assert val1 == val2 == False # noqa: E712 ('obj == False' comparison) - - new_var.put(True) - val1, val2 = get_values() - assert val1 == val2 == True # noqa: E712 ('obj == True' comparison) - - deprecated_var.put(False) - val1, val2 = get_values() - assert val1 == val2 == False # noqa: E712 ('obj == False' comparison) - - # case4: initializing the value using 'new_var' with env variable - reset_vars(deprecated_var, new_var) - with unittest.mock.patch.dict(os.environ, {new_var.varname: "True"}): - val1, val2 = get_values() - assert val1 == val2 == True # noqa: E712 ('obj == True' comparison) - - deprecated_var.put(False) - val1, val2 = get_values() - assert val1 == val2 == False # noqa: E712 ('obj == False' comparison) - - deprecated_var.put(True) - val1, val2 = get_values() - assert val1 == val2 == True # noqa: E712 ('obj == True' comparison) - - new_var.put(False) - val1, val2 = get_values() - assert val1 == val2 == False # noqa: E712 ('obj == False' comparison) - finally: - deprecated_var.put(old_depr_val) - new_var.put(old_new_var) - - @pytest.mark.parametrize( "modify_config", [{cfg.RangePartitioning: False, cfg.LazyExecution: "Auto"}], diff --git a/modin/tests/pandas/test_groupby.py b/modin/tests/pandas/test_groupby.py index 46779e0dfc4..beabd7ac335 100644 --- a/modin/tests/pandas/test_groupby.py +++ b/modin/tests/pandas/test_groupby.py @@ -21,13 +21,7 @@ import pytest import modin.pandas as pd -from modin.config import ( - IsRayCluster, - NPartitions, - RangePartitioning, - StorageFormat, - use_range_partitioning_groupby, -) +from modin.config import IsRayCluster, NPartitions, RangePartitioning, StorageFormat from modin.core.dataframe.algebra.default2pandas.groupby import GroupBy from modin.core.dataframe.pandas.partitioning.axis_partition import ( PandasDataframeAxisPartition, @@ -291,7 +285,7 @@ def test_mixed_dtypes_groupby(as_index): # This test though produces so much NaN values in the result, so it's impossible to sort, # using manual comparison of set of rows instead assert_set_of_rows_identical - if use_range_partitioning_groupby() + if RangePartitioning.get() else None ), ) @@ -363,7 +357,7 @@ def test_mixed_dtypes_groupby(as_index): eval_max(modin_groupby, pandas_groupby) eval_len(modin_groupby, pandas_groupby) eval_sum(modin_groupby, pandas_groupby) - if not use_range_partitioning_groupby(): + if not RangePartitioning.get(): # `.group` fails with experimental groupby # https://github.com/modin-project/modin/issues/6083 eval_ngroup(modin_groupby, pandas_groupby) @@ -1357,7 +1351,7 @@ def sort_if_experimental_groupby(*dfs): https://github.com/modin-project/modin/issues/5924 """ result = dfs - if use_range_partitioning_groupby(): + if RangePartitioning.get(): dfs = try_cast_to_pandas(dfs) result = [] for df in dfs: @@ -1635,7 +1629,7 @@ def test(grp): def eval_groups(modin_groupby, pandas_groupby): for k, v in modin_groupby.groups.items(): assert v.equals(pandas_groupby.groups[k]) - if use_range_partitioning_groupby(): + if RangePartitioning.get(): # `.get_group()` doesn't work correctly with experimental groupby: # https://github.com/modin-project/modin/issues/6093 return @@ -1947,8 +1941,7 @@ def test_agg_func_None_rename(by_and_agg_dict, as_index): pytest.param( False, marks=pytest.mark.skipif( - get_current_execution() == "BaseOnPython" - or use_range_partitioning_groupby(), + get_current_execution() == "BaseOnPython" or RangePartitioning.get(), reason="See Pandas issue #39103", ), ), diff --git a/modin/tests/pandas/utils.py b/modin/tests/pandas/utils.py index 2dd4346c814..ff40e5bb4c4 100644 --- a/modin/tests/pandas/utils.py +++ b/modin/tests/pandas/utils.py @@ -46,7 +46,6 @@ RangePartitioning, TestDatasetSize, TrackFileLeaks, - use_range_partitioning_groupby, ) from modin.pandas.io import to_pandas from modin.pandas.testing import ( @@ -702,7 +701,7 @@ def sort_if_range_partitioning(df1, df2, comparator=None, force=False): if comparator is None: comparator = df_equals - if force or (RangePartitioning.get() or use_range_partitioning_groupby()): + if force or RangePartitioning.get(): df1, df2 = sort_data(df1), sort_data(df2) comparator(df1, df2)