From c1b0942c22789da23c15eda60301c67c9415ea7d Mon Sep 17 00:00:00 2001 From: arunjose696 Date: Wed, 28 Aug 2024 17:36:46 +0200 Subject: [PATCH] creating function for create_test_df_in_defined_mode Signed-off-by: arunjose696 --- .../pandas/query_compiler_caster.py | 105 +++++++++--------- .../pandas/native_df_mode/test_binary.py | 22 +++- .../pandas/native_df_mode/test_default.py | 36 +++--- .../pandas/native_df_mode/test_indexing.py | 88 +++++++++------ .../tests/pandas/native_df_mode/test_iter.py | 21 ++-- .../pandas/native_df_mode/test_join_sort.py | 50 +++++---- .../native_df_mode/test_map_metadata.py | 34 +++--- .../pandas/native_df_mode/test_pickle.py | 9 +- .../pandas/native_df_mode/test_window.py | 19 ++-- modin/tests/pandas/native_df_mode/utils.py | 32 +++++- 10 files changed, 251 insertions(+), 165 deletions(-) diff --git a/modin/core/storage_formats/pandas/query_compiler_caster.py b/modin/core/storage_formats/pandas/query_compiler_caster.py index 8b7102c0103..211860a8427 100644 --- a/modin/core/storage_formats/pandas/query_compiler_caster.py +++ b/modin/core/storage_formats/pandas/query_compiler_caster.py @@ -52,7 +52,7 @@ def __init_subclass__( **kwargs : Additional keyword arguments """ super().__init_subclass__(**kwargs) - apply_argument_cast()(cls) + apply_argument_cast(cls) def cast_nested_args_to_current_qc_type(arguments, current_qc): @@ -100,61 +100,60 @@ def cast_arg_to_current_qc(arg): return arguments -def apply_argument_cast(): +def apply_argument_cast(obj: Fn) -> Fn: """ - Cast any of args that is a query compiler to the type of left query compiler. + Cast all arguments that are query compilers to the current query compiler. + + Parameters + ---------- + obj : function Returns ------- - func - A decorator function. + function + Returns decorated function which does argument casting. """ + if isinstance(obj, type): + all_attrs = dict(inspect.getmembers(obj)) + all_attrs.pop("__abstractmethods__") + + # This is required because inspect converts class methods to member functions + current_class_attrs = vars(obj) + for key in current_class_attrs: + all_attrs[key] = current_class_attrs[key] + + for attr_name, attr_value in all_attrs.items(): + if isinstance( + attr_value, (FunctionType, MethodType, classmethod, staticmethod) + ): + wrapped = apply_argument_cast(attr_value) + setattr(obj, attr_name, wrapped) + return obj # type: ignore [return-value] + elif isinstance(obj, classmethod): + return classmethod(apply_argument_cast(obj.__func__)) # type: ignore [return-value, arg-type] + elif isinstance(obj, staticmethod): + return staticmethod(apply_argument_cast(obj.__func__)) + + @functools.wraps(obj) + def cast_args(*args: Tuple, **kwargs: Dict) -> Any: + """ + Add casting for query compiler arguments. + + Parameters + ---------- + *args : tuple + The function arguments. + **kwargs : dict + The function keyword arguments. + + Returns + ------- + Any + """ + current_qc = args[0] + if isinstance(current_qc, BaseQueryCompiler): + kwargs = cast_nested_args_to_current_qc_type(kwargs, current_qc) + args = cast_nested_args_to_current_qc_type(args, current_qc) + return obj(*args, **kwargs) - def decorator(obj: Fn) -> Fn: - """Cast all arguments that are query compilers to the current query compiler.""" - if isinstance(obj, type): - all_attrs = dict(inspect.getmembers(obj)) - all_attrs.pop("__abstractmethods__") - - # This is required because inspect converts class methods to member functions - current_class_attrs = vars(obj) - for key in current_class_attrs: - all_attrs[key] = current_class_attrs[key] - - for attr_name, attr_value in all_attrs.items(): - if isinstance( - attr_value, (FunctionType, MethodType, classmethod, staticmethod) - ): - wrapped = apply_argument_cast()(attr_value) - setattr(obj, attr_name, wrapped) - return obj # type: ignore [return-value] - elif isinstance(obj, classmethod): - return classmethod(decorator(obj.__func__)) # type: ignore [return-value, arg-type] - elif isinstance(obj, staticmethod): - return staticmethod(decorator(obj.__func__)) - - @functools.wraps(obj) - def cast_args(*args: Tuple, **kwargs: Dict) -> Any: - """ - Add casting for query compiler arguments. - - Parameters - ---------- - *args : tuple - The function arguments. - **kwargs : dict - The function keyword arguments. - - Returns - ------- - Any - """ - current_qc = args[0] - if isinstance(current_qc, BaseQueryCompiler): - kwargs = cast_nested_args_to_current_qc_type(kwargs, current_qc) - args = cast_nested_args_to_current_qc_type(args, current_qc) - return obj(*args, **kwargs) - - return cast_args - - return decorator + return cast_args diff --git a/modin/tests/pandas/native_df_mode/test_binary.py b/modin/tests/pandas/native_df_mode/test_binary.py index 0503b82460b..82c837b6416 100644 --- a/modin/tests/pandas/native_df_mode/test_binary.py +++ b/modin/tests/pandas/native_df_mode/test_binary.py @@ -17,9 +17,11 @@ import pytest from modin.config import NativeDataframeMode, NPartitions -from modin.tests.pandas.native_df_mode.utils import eval_general_interop +from modin.tests.pandas.native_df_mode.utils import ( + create_test_df_in_defined_mode, + eval_general_interop, +) from modin.tests.pandas.utils import ( - create_test_dfs, default_to_pandas_ignore_string, df_equals, test_data, @@ -153,8 +155,12 @@ def operation(df1, df2): "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2)) ) def test_equals(frame1_data, frame2_data, expected_pandas_equals, df_mode_pair): - modin_df1, pandas_df1 = create_test_dfs(frame1_data, df_mode=df_mode_pair[0]) - modin_df2, pandas_df2 = create_test_dfs(frame2_data, df_mode=df_mode_pair[1]) + modin_df1, pandas_df1 = create_test_df_in_defined_mode( + frame1_data, df_mode=df_mode_pair[0] + ) + modin_df2, pandas_df2 = create_test_df_in_defined_mode( + frame2_data, df_mode=df_mode_pair[1] + ) pandas_equals = pandas_df1.equals(pandas_df2) assert pandas_equals == expected_pandas_equals, ( @@ -172,8 +178,12 @@ def test_equals(frame1_data, frame2_data, expected_pandas_equals, df_mode_pair): "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2)) ) def test_empty_df(empty_operand, df_mode_pair): - modin_df, pandas_df = create_test_dfs([0, 1, 2, 0, 1, 2], df_mode=df_mode_pair[0]) - modin_df_empty, pandas_df_empty = create_test_dfs(df_mode=df_mode_pair[1]) + modin_df, pandas_df = create_test_df_in_defined_mode( + [0, 1, 2, 0, 1, 2], df_mode=df_mode_pair[0] + ) + modin_df_empty, pandas_df_empty = create_test_df_in_defined_mode( + df_mode=df_mode_pair[1] + ) if empty_operand == "right": modin_res = modin_df + modin_df_empty diff --git a/modin/tests/pandas/native_df_mode/test_default.py b/modin/tests/pandas/native_df_mode/test_default.py index 5e5257ed790..03d6d372fd4 100644 --- a/modin/tests/pandas/native_df_mode/test_default.py +++ b/modin/tests/pandas/native_df_mode/test_default.py @@ -23,10 +23,12 @@ import modin.pandas as pd from modin.config import NativeDataframeMode, NPartitions from modin.pandas.io import to_pandas -from modin.tests.pandas.native_df_mode.utils import eval_general_interop +from modin.tests.pandas.native_df_mode.utils import ( + create_test_df_in_defined_mode, + create_test_series_in_defined_mode, + eval_general_interop, +) from modin.tests.pandas.utils import ( - create_test_dfs, - create_test_series, default_to_pandas_ignore_string, df_equals, test_data, @@ -81,12 +83,12 @@ "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2)) ) def test_ops_defaulting_to_pandas(op, make_args, df_mode_pair): - modin_df1, _ = create_test_dfs( + modin_df1, _ = create_test_df_in_defined_mode( test_data_diff_dtype, post_fn=lambda df: df.drop(["str_col", "bool_col"], axis=1), df_mode=df_mode_pair[0], ) - modin_df2, _ = create_test_dfs( + modin_df2, _ = create_test_df_in_defined_mode( test_data_diff_dtype, post_fn=lambda df: df.drop(["str_col", "bool_col"], axis=1), df_mode=df_mode_pair[1], @@ -118,10 +120,10 @@ def test_to_numpy(data): ) def test_asfreq(df_mode_pair): index = pd.date_range("1/1/2000", periods=4, freq="min") - series, _ = create_test_series( + series, _ = create_test_series_in_defined_mode( [0.0, None, 2.0, 3.0], index=index, df_mode=df_mode_pair[0] ) - df, _ = create_test_dfs({"s": series}, df_mode=df_mode_pair[1]) + df, _ = create_test_df_in_defined_mode({"s": series}, df_mode=df_mode_pair[1]) with warns_that_defaulting_to_pandas(): # We are only testing that this defaults to pandas, so we will just check for # the warning @@ -152,9 +154,13 @@ def assign_multiple_columns(df1, df2): ) def test_combine_first(df_mode_pair): data1 = {"A": [None, 0], "B": [None, 4]} - modin_df1, pandas_df1 = create_test_dfs(data1, df_mode=df_mode_pair[0]) + modin_df1, pandas_df1 = create_test_df_in_defined_mode( + data1, df_mode=df_mode_pair[0] + ) data2 = {"A": [1, 1], "B": [3, 3]} - modin_df2, pandas_df2 = create_test_dfs(data2, df_mode=df_mode_pair[1]) + modin_df2, pandas_df2 = create_test_df_in_defined_mode( + data2, df_mode=df_mode_pair[1] + ) df_equals( modin_df1.combine_first(modin_df2), @@ -170,11 +176,11 @@ def test_combine_first(df_mode_pair): ) def test_dot(data, df_mode_pair): - modin_df, pandas_df = create_test_dfs(data, df_mode=df_mode_pair[0]) + modin_df, pandas_df = create_test_df_in_defined_mode(data, df_mode=df_mode_pair[0]) col_len = len(modin_df.columns) # Test series input - modin_series, pandas_series = create_test_series( + modin_series, pandas_series = create_test_series_in_defined_mode( np.arange(col_len), index=pandas_df.columns, df_mode=df_mode_pair[1], @@ -194,7 +200,7 @@ def dot_func(df1, df2): # Test when input series index doesn't line up with columns with pytest.raises(ValueError): - modin_series_without_index, _ = create_test_series( + modin_series_without_index, _ = create_test_series_in_defined_mode( np.arange(col_len), df_mode=df_mode_pair[1] ) modin_df.dot(modin_series_without_index) @@ -209,7 +215,7 @@ def dot_func(df1, df2): "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2)) ) def test_matmul(data, df_mode_pair): - modin_df, pandas_df = create_test_dfs(data, df_mode=df_mode_pair[0]) + modin_df, pandas_df = create_test_df_in_defined_mode(data, df_mode=df_mode_pair[0]) col_len = len(modin_df.columns) # Test list input @@ -223,7 +229,7 @@ def test_matmul(data, df_mode_pair): modin_df @ np.arange(col_len + 10) # Test series input - modin_series, pandas_series = create_test_series( + modin_series, pandas_series = create_test_series_in_defined_mode( np.arange(col_len), index=pandas_df.columns, df_mode=df_mode_pair[1], @@ -241,7 +247,7 @@ def matmul_func(df1, df2): # Test when input series index doesn't line up with columns with pytest.raises(ValueError): - modin_series_without_index, _ = create_test_series( + modin_series_without_index, _ = create_test_series_in_defined_mode( np.arange(col_len), df_mode=df_mode_pair[1] ) modin_df @ modin_series_without_index diff --git a/modin/tests/pandas/native_df_mode/test_indexing.py b/modin/tests/pandas/native_df_mode/test_indexing.py index b747313a56f..b434026394a 100644 --- a/modin/tests/pandas/native_df_mode/test_indexing.py +++ b/modin/tests/pandas/native_df_mode/test_indexing.py @@ -19,12 +19,14 @@ import modin.pandas as pd from modin.config import NativeDataframeMode, NPartitions -from modin.tests.pandas.native_df_mode.utils import eval_general_interop +from modin.tests.pandas.native_df_mode.utils import ( + create_test_df_in_defined_mode, + create_test_series_in_defined_mode, + eval_general_interop, +) from modin.tests.pandas.utils import ( RAND_HIGH, RAND_LOW, - create_test_dfs, - create_test_series, default_to_pandas_ignore_string, df_equals, eval_general, @@ -148,14 +150,14 @@ def test_set_index(data, key_func, drop_kwargs, request, df_mode_pair): "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2)) ) def test_loc(data, df_mode_pair): - modin_df, pandas_df = create_test_dfs(data, df_mode=df_mode_pair[0]) + modin_df, pandas_df = create_test_df_in_defined_mode(data, df_mode=df_mode_pair[0]) indices = [i % 3 == 0 for i in range(len(modin_df.index))] columns = [i % 5 == 0 for i in range(len(modin_df.columns))] # Key is a Modin or pandas series of booleans - series1, _ = create_test_series(indices, df_mode=df_mode_pair[0]) - series2, _ = create_test_series( + series1, _ = create_test_series_in_defined_mode(indices, df_mode=df_mode_pair[0]) + series2, _ = create_test_series_in_defined_mode( columns, index=modin_df.columns, df_mode=df_mode_pair[0] ) df_equals( @@ -197,13 +199,13 @@ def loc_iter_dfs_interop(request): df_mode_pair = request.param columns = ["col1", "col2", "col3"] index = ["row1", "row2", "row3"] - md_df1, pd_df1 = create_test_dfs( + md_df1, pd_df1 = create_test_df_in_defined_mode( {col: ([idx] * len(index)) for idx, col in enumerate(columns)}, columns=columns, index=index, df_mode=df_mode_pair[0], ) - md_df2, pd_df2 = create_test_dfs( + md_df2, pd_df2 = create_test_df_in_defined_mode( {col: ([idx] * len(index)) for idx, col in enumerate(columns)}, columns=columns, index=index, @@ -235,10 +237,10 @@ def test_loc_iter_assignment(loc_iter_dfs_interop, reverse_order, axis): "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2)) ) def test_loc_series(df_mode_pair): - md_df1, pd_df1 = create_test_dfs( + md_df1, pd_df1 = create_test_df_in_defined_mode( {"a": [1, 2], "b": [3, 4]}, df_mode=df_mode_pair[0] ) - md_df2, pd_df2 = create_test_dfs( + md_df2, pd_df2 = create_test_df_in_defined_mode( {"a": [1, 2], "b": [3, 4]}, df_mode=df_mode_pair[1] ) @@ -263,13 +265,13 @@ def test_reindex_like(df_mode_pair): new_data = [[28, "low"], [30, "low"], [35.1, "medium"]] new_columns = ["temp_celsius", "windspeed"] new_index = pd.DatetimeIndex(["2014-02-12", "2014-02-13", "2014-02-15"]) - modin_df1, pandas_df1 = create_test_dfs( + modin_df1, pandas_df1 = create_test_df_in_defined_mode( o_data, columns=o_columns, index=o_index, df_mode=df_mode_pair[0], ) - modin_df2, pandas_df2 = create_test_dfs( + modin_df2, pandas_df2 = create_test_df_in_defined_mode( new_data, columns=new_columns, index=new_index, @@ -289,10 +291,10 @@ def test_reindex_multiindex(df_mode_pair): pandas_midx = pandas.MultiIndex.from_product( [["Bank_1", "Bank_2"], ["AUD", "CAD", "EUR"]], names=["Bank", "Curency"] ) - modin_df1, pandas_df1 = create_test_dfs( + modin_df1, pandas_df1 = create_test_df_in_defined_mode( data=data1, index=index, columns=index, df_mode=df_mode_pair[0] ) - modin_df2, pandas_df2 = create_test_dfs( + modin_df2, pandas_df2 = create_test_df_in_defined_mode( data=data2, index=pandas_midx, df_mode=df_mode_pair[1] ) @@ -323,18 +325,24 @@ def test_getitem_empty_mask(df_mode_pair): modin_frames = [] pandas_frames = [] data1 = np.random.randint(0, 100, size=(100, 4)) - mdf1, pdf1 = create_test_dfs(data1, columns=list("ABCD"), df_mode=df_mode_pair[0]) + mdf1, pdf1 = create_test_df_in_defined_mode( + data1, columns=list("ABCD"), df_mode=df_mode_pair[0] + ) modin_frames.append(mdf1) pandas_frames.append(pdf1) data2 = np.random.randint(0, 100, size=(100, 4)) - mdf2, pdf2 = create_test_dfs(data2, columns=list("ABCD"), df_mode=df_mode_pair[1]) + mdf2, pdf2 = create_test_df_in_defined_mode( + data2, columns=list("ABCD"), df_mode=df_mode_pair[1] + ) modin_frames.append(mdf2) pandas_frames.append(pdf2) data3 = np.random.randint(0, 100, size=(100, 4)) - mdf3, pdf3 = create_test_dfs(data3, columns=list("ABCD"), df_mode=df_mode_pair[0]) + mdf3, pdf3 = create_test_df_in_defined_mode( + data3, columns=list("ABCD"), df_mode=df_mode_pair[0] + ) modin_frames.append(mdf3) pandas_frames.append(pdf3) @@ -352,8 +360,12 @@ def test_getitem_empty_mask(df_mode_pair): def test___setitem__mask(df_mode_pair): # DataFrame mask: data = test_data["int_data"] - modin_df1, pandas_df1 = create_test_dfs(data, df_mode=df_mode_pair[0]) - modin_df2, pandas_df2 = create_test_dfs(data, df_mode=df_mode_pair[0]) + modin_df1, pandas_df1 = create_test_df_in_defined_mode( + data, df_mode=df_mode_pair[0] + ) + modin_df2, pandas_df2 = create_test_df_in_defined_mode( + data, df_mode=df_mode_pair[0] + ) mean = int((RAND_HIGH + RAND_LOW) / 2) pandas_df1[pandas_df2 > mean] = -50 @@ -383,14 +395,16 @@ def test___setitem__mask(df_mode_pair): "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2)) ) def test_setitem_on_empty_df(data, value, convert_to_series, new_col_id, df_mode_pair): - modin_df, pandas_df = create_test_dfs(data, df_mode=df_mode_pair[0]) + modin_df, pandas_df = create_test_df_in_defined_mode(data, df_mode=df_mode_pair[0]) def applyier(df): if convert_to_series: converted_value = ( pandas.Series(value) if isinstance(df, pandas.DataFrame) - else create_test_series(value, df_mode=df_mode_pair[1])[1] + else create_test_series_in_defined_mode(value, df_mode=df_mode_pair[1])[ + 1 + ] ) else: converted_value = value @@ -424,8 +438,12 @@ def test_setitem_on_empty_df_4407(df_mode_pair): data = {} index = pd.date_range(end="1/1/2018", periods=0, freq="D") column = pd.date_range(end="1/1/2018", periods=1, freq="h")[0] - modin_df, pandas_df = create_test_dfs(data, columns=index, df_mode=df_mode_pair[0]) - modin_ser, pandas_ser = create_test_series([1], df_mode=df_mode_pair[1]) + modin_df, pandas_df = create_test_df_in_defined_mode( + data, columns=index, df_mode=df_mode_pair[0] + ) + modin_ser, pandas_ser = create_test_series_in_defined_mode( + [1], df_mode=df_mode_pair[1] + ) modin_df[column] = modin_ser pandas_df[column] = pandas_ser @@ -445,12 +463,12 @@ def build_value_picker(modin_value, pandas_value): else pandas_value ) - modin_df, pandas_df = create_test_dfs( + modin_df, pandas_df = create_test_df_in_defined_mode( test_data["int_data"], df_mode=df_mode_pair[0] ) # Easy case - key and value.columns are equal - modin_value, pandas_value = create_test_dfs( + modin_value, pandas_value = create_test_df_in_defined_mode( { "new_value1": np.arange(len(modin_df)), "new_value2": np.arange(len(modin_df)), @@ -507,10 +525,10 @@ def test(dfs, iloc): df1[cols1] = df2[cols2] return df1 - modin_df, pandas_df = create_test_dfs( + modin_df, pandas_df = create_test_df_in_defined_mode( test_data["int_data"], df_mode=df_mode_pair[0] ) - modin_df2, pandas_df2 = create_test_dfs( + modin_df2, pandas_df2 = create_test_df_in_defined_mode( test_data["int_data"], df_mode=df_mode_pair[1] ) modin_df2 *= 10 @@ -555,8 +573,12 @@ def test(dfs, iloc): def test___setitem__single_item_in_series(df_mode_pair): # Test assigning a single item in a Series for issue # https://github.com/modin-project/modin/issues/3860 - modin_series1, pandas_series1 = create_test_series(99, df_mode=df_mode_pair[0]) - modin_series2, pandas_series2 = create_test_series(100, df_mode=df_mode_pair[1]) + modin_series1, pandas_series1 = create_test_series_in_defined_mode( + 99, df_mode=df_mode_pair[0] + ) + modin_series2, pandas_series2 = create_test_series_in_defined_mode( + 100, df_mode=df_mode_pair[1] + ) modin_series1[:1] = modin_series2 pandas_series1[:1] = pandas_series2 df_equals(modin_series1, pandas_series1) @@ -578,7 +600,7 @@ def test___setitem__single_item_in_series(df_mode_pair): "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2)) ) def test_loc_boolean_assignment_scalar_dtypes(value, df_mode_pair): - modin_df, pandas_df = create_test_dfs( + modin_df, pandas_df = create_test_df_in_defined_mode( { "a": [1, 2, 3], "b": [3.0, 5.0, 6.0], @@ -589,7 +611,7 @@ def test_loc_boolean_assignment_scalar_dtypes(value, df_mode_pair): }, df_mode=df_mode_pair[1], ) - modin_idx, pandas_idx = create_test_series( + modin_idx, pandas_idx = create_test_series_in_defined_mode( [False, True, True], df_mode=df_mode_pair[1] ) @@ -628,12 +650,12 @@ def test_index_of_empty_frame(df_mode_pair): # Test on an empty frame produced by Modin's logic data = test_data_values[0] - md_df1, pd_df1 = create_test_dfs( + md_df1, pd_df1 = create_test_df_in_defined_mode( data, index=pandas.RangeIndex(len(next(iter(data.values()))), name="index name"), df_mode=df_mode_pair[0], ) - md_df2, pd_df2 = create_test_dfs( + md_df2, pd_df2 = create_test_df_in_defined_mode( data, index=pandas.RangeIndex(len(next(iter(data.values()))), name="index name"), df_mode=df_mode_pair[1], diff --git a/modin/tests/pandas/native_df_mode/test_iter.py b/modin/tests/pandas/native_df_mode/test_iter.py index 43057f385c7..a2e176d4372 100644 --- a/modin/tests/pandas/native_df_mode/test_iter.py +++ b/modin/tests/pandas/native_df_mode/test_iter.py @@ -20,12 +20,11 @@ import modin.pandas as pd from modin.config import NativeDataframeMode, NPartitions from modin.pandas.utils import SET_DATAFRAME_ATTRIBUTE_WARNING -from modin.tests.pandas.utils import ( - create_test_dfs, - create_test_series, - df_equals, - eval_general, +from modin.tests.pandas.native_df_mode.utils import ( + create_test_df_in_defined_mode, + create_test_series_in_defined_mode, ) +from modin.tests.pandas.utils import df_equals, eval_general NPartitions.put(4) @@ -38,13 +37,15 @@ ) def test___setattr__mutating_column(df_mode_pair): # Use case from issue #4577 - modin_df, pandas_df = create_test_dfs( + modin_df, pandas_df = create_test_df_in_defined_mode( [[1]], columns=["col0"], df_mode=df_mode_pair[0] ) # Replacing a column with a list should mutate the column in place. pandas_df.col0 = [3] modin_df.col0 = [3] - modin_ser, pandas_ser = create_test_series([3], df_mode=df_mode_pair[1]) + modin_ser, pandas_ser = create_test_series_in_defined_mode( + [3], df_mode=df_mode_pair[1] + ) df_equals(modin_df, pandas_df) # Check that the col0 attribute reflects the value update. df_equals(modin_df.col0, pandas_df.col0) @@ -95,10 +96,10 @@ def test___setattr__mutating_column(df_mode_pair): "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2)) ) def test_isin_with_modin_objects(df_mode_pair): - modin_df1, pandas_df1 = create_test_dfs( + modin_df1, pandas_df1 = create_test_df_in_defined_mode( {"a": [1, 2], "b": [3, 4]}, df_mode=df_mode_pair[0] ) - modin_series, pandas_series = create_test_series( + modin_series, pandas_series = create_test_series_in_defined_mode( [1, 4, 5, 6], df_mode=df_mode_pair[1] ) @@ -118,7 +119,7 @@ def test_isin_with_modin_objects(df_mode_pair): ) # Check case when indices are not matching - modin_df1, pandas_df1 = create_test_dfs( + modin_df1, pandas_df1 = create_test_df_in_defined_mode( {"a": [1, 2], "b": [3, 4]}, index=[10, 11], df_mode=df_mode_pair[0], diff --git a/modin/tests/pandas/native_df_mode/test_join_sort.py b/modin/tests/pandas/native_df_mode/test_join_sort.py index 46a3c50d7fa..62565dde382 100644 --- a/modin/tests/pandas/native_df_mode/test_join_sort.py +++ b/modin/tests/pandas/native_df_mode/test_join_sort.py @@ -21,10 +21,12 @@ import modin.pandas as pd from modin.config import NativeDataframeMode, NPartitions from modin.pandas.io import to_pandas -from modin.tests.pandas.native_df_mode.utils import eval_general_interop +from modin.tests.pandas.native_df_mode.utils import ( + create_test_df_in_defined_mode, + create_test_series_in_defined_mode, + eval_general_interop, +) from modin.tests.pandas.utils import ( - create_test_dfs, - create_test_series, default_to_pandas_ignore_string, df_equals, eval_general, @@ -59,8 +61,12 @@ def df_equals_and_sort(df1, df2): "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2)) ) def test_combine(data, df_mode_pair): - modin_df_1, pandas_df_1 = create_test_dfs(data, df_mode=df_mode_pair[0]) - modin_df_2, pandas_df_2 = create_test_dfs(data, df_mode=df_mode_pair[1]) + modin_df_1, pandas_df_1 = create_test_df_in_defined_mode( + data, df_mode=df_mode_pair[0] + ) + modin_df_2, pandas_df_2 = create_test_df_in_defined_mode( + data, df_mode=df_mode_pair[1] + ) modin_df_1.combine( modin_df_2 + 1, lambda s1, s2: s1 if s1.count() < s2.count() else s2 ) @@ -94,13 +100,13 @@ def test_combine(data, df_mode_pair): "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2)) ) def test_join(test_data, test_data2, df_mode_pair): - modin_df, pandas_df = create_test_dfs( + modin_df, pandas_df = create_test_df_in_defined_mode( test_data, columns=["col{}".format(i) for i in range(test_data.shape[1])], index=pd.Index([i for i in range(1, test_data.shape[0] + 1)], name="key"), df_mode=df_mode_pair[0], ) - modin_df2, pandas_df2 = create_test_dfs( + modin_df2, pandas_df2 = create_test_df_in_defined_mode( test_data2, columns=["col{}".format(i) for i in range(test_data2.shape[1])], index=pd.Index([i for i in range(1, test_data2.shape[0] + 1)], name="key"), @@ -174,10 +180,10 @@ def test_join(test_data, test_data2, df_mode_pair): ) def test_join_cross_6786(df_mode_pair): data = [[7, 8, 9], [10, 11, 12]] - modin_df_1, pandas_df_1 = create_test_dfs( + modin_df_1, pandas_df_1 = create_test_df_in_defined_mode( data, columns=["x", "y", "z"], df_mode=df_mode_pair[0] ) - modin_df_2, pandas_df_2 = create_test_dfs( + modin_df_2, pandas_df_2 = create_test_df_in_defined_mode( data, columns=["x", "y", "z"], df_mode=df_mode_pair[1] ) modin_join = modin_df_1.join( @@ -214,13 +220,13 @@ def test_join_cross_6786(df_mode_pair): "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2)) ) def test_merge(test_data, test_data2, df_mode_pair): - modin_df, pandas_df = create_test_dfs( + modin_df, pandas_df = create_test_df_in_defined_mode( test_data, columns=["col{}".format(i) for i in range(test_data.shape[1])], index=pd.Index([i for i in range(1, test_data.shape[0] + 1)], name="key"), df_mode=df_mode_pair[0], ) - modin_df2, pandas_df2 = create_test_dfs( + modin_df2, pandas_df2 = create_test_df_in_defined_mode( test_data2, columns=["col{}".format(i) for i in range(test_data2.shape[1])], index=pd.Index([i for i in range(1, test_data2.shape[0] + 1)], name="key"), @@ -280,7 +286,7 @@ def test_merge_empty( "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2)) ) def test_merge_with_mi_columns(df_mode_pair): - modin_df1, pandas_df1 = create_test_dfs( + modin_df1, pandas_df1 = create_test_df_in_defined_mode( { ("col0", "a"): [1, 2, 3, 4], ("col0", "b"): [2, 3, 4, 5], @@ -289,7 +295,7 @@ def test_merge_with_mi_columns(df_mode_pair): df_mode=df_mode_pair[0], ) - modin_df2, pandas_df2 = create_test_dfs( + modin_df2, pandas_df2 = create_test_df_in_defined_mode( { ("col0", "a"): [1, 2, 3, 4], ("col0", "c"): [2, 3, 4, 5], @@ -312,10 +318,10 @@ def test_where(df_mode_pair): columns = list("abcdefghij") frame_data = random_state.randn(100, 10) - modin_df_1, pandas_df_1 = create_test_dfs( + modin_df_1, pandas_df_1 = create_test_df_in_defined_mode( frame_data, columns=columns, df_mode=df_mode_pair[0] ) - modin_df_2, pandas_df_2 = create_test_dfs( + modin_df_2, pandas_df_2 = create_test_df_in_defined_mode( frame_data, columns=columns, df_mode=df_mode_pair[1] ) pandas_cond_df = pandas_df_2 % 5 < 2 @@ -327,7 +333,9 @@ def test_where(df_mode_pair): # test case when other is Series other_data = random_state.randn(len(pandas_df_1)) - modin_other, pandas_other = create_test_series(other_data, df_mode=df_mode_pair[0]) + modin_other, pandas_other = create_test_series_in_defined_mode( + other_data, df_mode=df_mode_pair[0] + ) pandas_result = pandas_df_1.where(pandas_cond_df, pandas_other, axis=0) modin_result = modin_df_1.where(modin_cond_df, modin_other, axis=0) df_equals(modin_result, pandas_result) @@ -335,7 +343,7 @@ def test_where(df_mode_pair): # Test that we choose the right values to replace when `other` == `True` # everywhere. other_data = np.full(shape=pandas_df_1.shape, fill_value=True) - modin_other, pandas_other = create_test_dfs( + modin_other, pandas_other = create_test_df_in_defined_mode( other_data, columns=columns, df_mode=df_mode_pair[0] ) pandas_result = pandas_df_1.where(pandas_cond_df, pandas_other) @@ -371,10 +379,10 @@ def test_compare(align_axis, keep_shape, keep_equal, df_mode_pair): } frame_data1 = random_state.randn(100, 10) frame_data2 = random_state.randn(100, 10) - modin_df, pandas_df = create_test_dfs( + modin_df, pandas_df = create_test_df_in_defined_mode( frame_data1, columns=list("abcdefghij"), df_mode=df_mode_pair[0] ) - modin_df2, pandas_df2 = create_test_dfs( + modin_df2, pandas_df2 = create_test_df_in_defined_mode( frame_data2, columns=list("abcdefghij"), df_mode=df_mode_pair[0] ) modin_result = modin_df.compare(modin_df2, **kwargs) @@ -387,10 +395,10 @@ def test_compare(align_axis, keep_shape, keep_equal, df_mode_pair): series_data1 = ["a", "b", "c", "d", "e"] series_data2 = ["a", "a", "c", "b", "e"] - modin_series1, pandas_series1 = create_test_series( + modin_series1, pandas_series1 = create_test_series_in_defined_mode( series_data1, df_mode=df_mode_pair[0] ) - modin_series2, pandas_series2 = create_test_series( + modin_series2, pandas_series2 = create_test_series_in_defined_mode( series_data2, df_mode=df_mode_pair[1] ) diff --git a/modin/tests/pandas/native_df_mode/test_map_metadata.py b/modin/tests/pandas/native_df_mode/test_map_metadata.py index 43243db2196..e9e460ffbc8 100644 --- a/modin/tests/pandas/native_df_mode/test_map_metadata.py +++ b/modin/tests/pandas/native_df_mode/test_map_metadata.py @@ -21,13 +21,15 @@ import modin.pandas as pd from modin.config import NativeDataframeMode, NPartitions, StorageFormat +from modin.tests.pandas.native_df_mode.utils import ( + create_test_df_in_defined_mode, + create_test_series_in_defined_mode, +) from modin.tests.pandas.utils import ( RAND_HIGH, RAND_LOW, axis_keys, axis_values, - create_test_dfs, - create_test_series, default_to_pandas_ignore_string, df_equals, eval_general, @@ -69,8 +71,10 @@ def eval_insert(modin_df, pandas_df, **kwargs): "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2)) ) def test_empty_df(df_mode_pair): - modin_df, pd_df = create_test_dfs(None, df_mode=df_mode_pair[0]) - md_series, pd_series = create_test_series([1, 2, 3, 4, 5], df_mode=df_mode_pair[1]) + modin_df, pd_df = create_test_df_in_defined_mode(None, df_mode=df_mode_pair[0]) + md_series, pd_series = create_test_series_in_defined_mode( + [1, 2, 3, 4, 5], df_mode=df_mode_pair[1] + ) modin_df["a"] = md_series pd_df["a"] = pd_series df_equals(modin_df, pd_df) @@ -81,7 +85,7 @@ def test_empty_df(df_mode_pair): ) def test_astype(df_mode_pair): td = pandas.DataFrame(test_data["int_data"])[["col1", "index", "col3", "col4"]] - modin_df, pandas_df = create_test_dfs( + modin_df, pandas_df = create_test_df_in_defined_mode( td.values, index=td.index, columns=td.columns, @@ -89,7 +93,7 @@ def test_astype(df_mode_pair): ) def astype_func(df): - md_ser, pd_ser = create_test_series( + md_ser, pd_ser = create_test_series_in_defined_mode( [str, str], index=["col1", "col1"], df_mode=df_mode_pair[1] ) if isinstance(df, pd.DataFrame): @@ -115,10 +119,10 @@ def astype_func(df): "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2)) ) def test_convert_dtypes_5653(df_mode_pair): - modin_part1, _ = create_test_dfs( + modin_part1, _ = create_test_df_in_defined_mode( {"col1": ["a", "b", "c", "d"]}, df_mode=df_mode_pair[0] ) - modin_part2, _ = create_test_dfs( + modin_part2, _ = create_test_df_in_defined_mode( {"col1": [None, None, None, None]}, df_mode=df_mode_pair[1] ) modin_df = pd.concat([modin_part1, modin_part2]) @@ -137,7 +141,7 @@ def test_convert_dtypes_5653(df_mode_pair): "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2)) ) def test_clip(request, data, axis, bound_type, df_mode_pair): - modin_df, pandas_df = create_test_dfs(data, df_mode=df_mode_pair[0]) + modin_df, pandas_df = create_test_df_in_defined_mode(data, df_mode=df_mode_pair[0]) if name_contains(request.node.name, numeric_dfs): ind_len = ( @@ -150,10 +154,10 @@ def test_clip(request, data, axis, bound_type, df_mode_pair): upper = random_state.randint(RAND_LOW, RAND_HIGH, ind_len) if bound_type == "series": - modin_lower, pandas_lower = create_test_series( + modin_lower, pandas_lower = create_test_series_in_defined_mode( lower, df_mode=df_mode_pair[1] ) - modin_upper, pandas_upper = create_test_series( + modin_upper, pandas_upper = create_test_series_in_defined_mode( upper, df_mode=df_mode_pair[0] ) else: @@ -191,8 +195,8 @@ def test_clip(request, data, axis, bound_type, df_mode_pair): "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2)) ) def test_update(data, other_data, errors, df_mode_pair): - modin_df, pandas_df = create_test_dfs(data, df_mode=df_mode_pair[0]) - other_modin_df, other_pandas_df = create_test_dfs( + modin_df, pandas_df = create_test_df_in_defined_mode(data, df_mode=df_mode_pair[0]) + other_modin_df, other_pandas_df = create_test_df_in_defined_mode( other_data, df_mode=df_mode_pair[1] ) expected_exception = None @@ -235,7 +239,9 @@ def test_update(data, other_data, errors, df_mode_pair): "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2)) ) def test_constructor_from_modin_series(get_index, get_columns, dtype, df_mode_pair): - modin_df, pandas_df = create_test_dfs(test_data_values[0], df_mode=df_mode_pair[0]) + modin_df, pandas_df = create_test_df_in_defined_mode( + test_data_values[0], df_mode=df_mode_pair[0] + ) modin_data = {f"new_col{i}": modin_df.iloc[:, i] for i in range(modin_df.shape[1])} pandas_data = { diff --git a/modin/tests/pandas/native_df_mode/test_pickle.py b/modin/tests/pandas/native_df_mode/test_pickle.py index f42d5863843..cf9b4dfcb9c 100644 --- a/modin/tests/pandas/native_df_mode/test_pickle.py +++ b/modin/tests/pandas/native_df_mode/test_pickle.py @@ -18,7 +18,8 @@ import modin.pandas as pd from modin.config import NativeDataframeMode, PersistentPickle -from modin.tests.pandas.utils import create_test_dfs, df_equals +from modin.tests.pandas.native_df_mode.utils import create_test_df_in_defined_mode +from modin.tests.pandas.utils import df_equals @pytest.fixture @@ -46,7 +47,7 @@ def test__reduce__(df_mode_pair): # `DataFrame.__reduce__` will be called implicitly when lambda expressions are # pre-processed for the distributed engine. dataframe_data = ["Major League Baseball", "National Basketball Association"] - abbr_md, abbr_pd = create_test_dfs( + abbr_md, abbr_pd = create_test_df_in_defined_mode( dataframe_data, index=["MLB", "NBA"], df_mode=df_mode_pair[0] ) @@ -54,7 +55,9 @@ def test__reduce__(df_mode_pair): "name": ["Mariners", "Lakers"] * 500, "league_abbreviation": ["MLB", "NBA"] * 500, } - teams_md, teams_pd = create_test_dfs(dataframe_data, df_mode=df_mode_pair[1]) + teams_md, teams_pd = create_test_df_in_defined_mode( + dataframe_data, df_mode=df_mode_pair[1] + ) result_md = ( teams_md.set_index("name") diff --git a/modin/tests/pandas/native_df_mode/test_window.py b/modin/tests/pandas/native_df_mode/test_window.py index e2e095bfc9d..7e8e5da9342 100644 --- a/modin/tests/pandas/native_df_mode/test_window.py +++ b/modin/tests/pandas/native_df_mode/test_window.py @@ -20,7 +20,8 @@ import modin.pandas as pd from modin.config import NativeDataframeMode, NPartitions -from modin.tests.pandas.utils import create_test_dfs, df_equals +from modin.tests.pandas.native_df_mode.utils import create_test_df_in_defined_mode +from modin.tests.pandas.utils import df_equals NPartitions.put(4) @@ -32,12 +33,12 @@ "df_mode_pair", list(product(NativeDataframeMode.choices, repeat=2)) ) def test_fillna_4660(df_mode_pair): - modin_df_1, pandas_df_1 = create_test_dfs( + modin_df_1, pandas_df_1 = create_test_df_in_defined_mode( {"a": ["a"], "b": ["b"], "c": [pd.NA]}, index=["row1"], df_mode=df_mode_pair[0], ) - modin_df_2, pandas_df_2 = create_test_dfs( + modin_df_2, pandas_df_2 = create_test_df_in_defined_mode( {"a": ["a"], "b": ["b"], "c": [pd.NA]}, index=["row1"], df_mode=df_mode_pair[1], @@ -58,8 +59,12 @@ def test_fillna_dict_series(df_mode_pair): } df = pandas.DataFrame(frame_data) modin_df = pd.DataFrame(frame_data) - modin_df_1, pandas_df_1 = create_test_dfs(frame_data, df_mode=df_mode_pair[0]) - modin_df_2, pandas_df_2 = create_test_dfs(frame_data, df_mode=df_mode_pair[1]) + modin_df_1, pandas_df_1 = create_test_df_in_defined_mode( + frame_data, df_mode=df_mode_pair[0] + ) + modin_df_2, pandas_df_2 = create_test_df_in_defined_mode( + frame_data, df_mode=df_mode_pair[1] + ) df_equals(modin_df.fillna({"a": 0, "b": 5}), df.fillna({"a": 0, "b": 5})) @@ -83,10 +88,10 @@ def test_fillna_dataframe(df_mode_pair): "b": [1, 2, 3, np.nan, np.nan], "c": [np.nan, 1, 2, 3, 4], } - modin_df_1, pandas_df_1 = create_test_dfs( + modin_df_1, pandas_df_1 = create_test_df_in_defined_mode( frame_data, index=list("VWXYZ"), df_mode=df_mode_pair[0] ) - modin_df_2, pandas_df_2 = create_test_dfs( + modin_df_2, pandas_df_2 = create_test_df_in_defined_mode( {"a": [np.nan, 10, 20, 30, 40], "b": [50, 60, 70, 80, 90], "foo": ["bar"] * 5}, index=list("VWXuZ"), df_mode=df_mode_pair[1], diff --git a/modin/tests/pandas/native_df_mode/utils.py b/modin/tests/pandas/native_df_mode/utils.py index 18039a98943..247a1ea61fa 100644 --- a/modin/tests/pandas/native_df_mode/utils.py +++ b/modin/tests/pandas/native_df_mode/utils.py @@ -11,10 +11,32 @@ # ANY KIND, either express or implied. See the License for the specific language # governing permissions and limitations under the License. from modin.config import Engine -from modin.tests.pandas.utils import NoModinException, create_test_dfs, df_equals +from modin.config.pubsub import context +from modin.tests.pandas.utils import ( + NoModinException, + create_test_dfs, + create_test_series, + df_equals, +) from modin.utils import try_cast_to_pandas +def create_test_df_in_defined_mode( + *args, post_fn=None, backend=None, df_mode=None, **kwargs +): + with context(NativeDataframeMode=df_mode): + return create_test_dfs( + *args, post_fn=None, backend=None, df_mode=None, **kwargs + ) + + +def create_test_series_in_defined_mode( + vals, sort=False, backend=None, df_mode=None, **kwargs +): + with context(NativeDataframeMode=df_mode): + return create_test_series(vals, sort=False, backend=None, **kwargs) + + def eval_general_interop( data, backend, @@ -29,8 +51,12 @@ def eval_general_interop( **kwargs, ): df_mode1, df_mode2 = df_mode_pair - modin_df1, pandas_df1 = create_test_dfs(data, backend=backend, df_mode=df_mode1) - modin_df2, pandas_df2 = create_test_dfs(data, backend=backend, df_mode=df_mode2) + modin_df1, pandas_df1 = create_test_df_in_defined_mode( + data, backend=backend, df_mode=df_mode1 + ) + modin_df2, pandas_df2 = create_test_df_in_defined_mode( + data, backend=backend, df_mode=df_mode2 + ) md_kwargs, pd_kwargs = {}, {} def execute_callable(fn, inplace=False, md_kwargs={}, pd_kwargs={}):