From 8d331fca9d504f1b378f95385fee2ebb73dcd882 Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Thu, 12 Sep 2024 16:32:29 -0700 Subject: [PATCH] FIX-#7327: Use sort parameter of DataFrame.stack Signed-off-by: Jonathan Shi --- modin/core/storage_formats/base/query_compiler.py | 7 +++++-- modin/core/storage_formats/pandas/query_compiler.py | 6 ++++-- modin/pandas/dataframe.py | 4 ++-- modin/tests/pandas/dataframe/test_default.py | 10 ++++++++++ 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py index 965fb98efb3..c3fbd9f210f 100644 --- a/modin/core/storage_formats/base/query_compiler.py +++ b/modin/core/storage_formats/base/query_compiler.py @@ -1956,7 +1956,7 @@ def searchsorted(self, **kwargs): # noqa: PR02 # END Abstract map partitions operations @doc_utils.add_refer_to("DataFrame.stack") - def stack(self, level, dropna): + def stack(self, level, dropna, sort): """ Stack the prescribed level(s) from columns to index. @@ -1970,7 +1970,10 @@ def stack(self, level, dropna): BaseQueryCompiler """ return DataFrameDefault.register(pandas.DataFrame.stack)( - self, level=level, dropna=dropna + self, + level=level, + dropna=dropna, + sort=sort, ) # Abstract map partitions across select indices diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py index 655d427de1d..c96c289ac5a 100644 --- a/modin/core/storage_formats/pandas/query_compiler.py +++ b/modin/core/storage_formats/pandas/query_compiler.py @@ -1944,7 +1944,7 @@ def get_unique_level_values(index): result = result.reindex(0, new_index) return result - def stack(self, level, dropna): + def stack(self, level, dropna, sort): if not isinstance(self.columns, pandas.MultiIndex) or ( isinstance(self.columns, pandas.MultiIndex) and is_list_like(level) @@ -1956,7 +1956,9 @@ def stack(self, level, dropna): new_modin_frame = self._modin_frame.apply_full_axis( 1, - lambda df: pandas.DataFrame(df.stack(level=level, dropna=dropna)), + lambda df: pandas.DataFrame( + df.stack(level=level, dropna=dropna, sort=sort) + ), new_columns=new_columns, ) return self.__constructor__(new_modin_frame) diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py index de96ea0ab26..f11f94923cb 100644 --- a/modin/pandas/dataframe.py +++ b/modin/pandas/dataframe.py @@ -2113,11 +2113,11 @@ def stack( is_multiindex and is_list_like(level) and len(level) == self.columns.nlevels ): return self._reduce_dimension( - query_compiler=self._query_compiler.stack(level, dropna) + query_compiler=self._query_compiler.stack(level, dropna, sort) ) else: return self.__constructor__( - query_compiler=self._query_compiler.stack(level, dropna) + query_compiler=self._query_compiler.stack(level, dropna, sort) ) def sub( diff --git a/modin/tests/pandas/dataframe/test_default.py b/modin/tests/pandas/dataframe/test_default.py index 71f49924c94..d40a30e38e9 100644 --- a/modin/tests/pandas/dataframe/test_default.py +++ b/modin/tests/pandas/dataframe/test_default.py @@ -1181,6 +1181,16 @@ def test_stack(data, is_multi_idx, is_multi_col): df_equals(modin_df.stack(level=[0, 1, 2]), pandas_df.stack(level=[0, 1, 2])) +@pytest.mark.parametrize("sort", [True, False]) +def test_stack_sort(sort): + # Example frame slightly modified from pandas docs to be unsorted + cols = pd.MultiIndex.from_tuples([("weight", "pounds"), ("weight", "kg")]) + modin_df, pandas_df = create_test_dfs( + [[1, 2], [2, 4]], index=["cat", "dog"], columns=cols + ) + df_equals(modin_df.stack(sort=sort), pandas_df.stack(sort=sort)) + + @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) @pytest.mark.parametrize("axis1", [0, 1]) @pytest.mark.parametrize("axis2", [0, 1])