diff --git a/modin/core/dataframe/pandas/dataframe/utils.py b/modin/core/dataframe/pandas/dataframe/utils.py index 4397f0ce232..f8f6b125ff9 100644 --- a/modin/core/dataframe/pandas/dataframe/utils.py +++ b/modin/core/dataframe/pandas/dataframe/utils.py @@ -868,8 +868,8 @@ def add_missing_categories_to_groupby( ### At this stage we want to get a fill_value for missing categorical values if is_udf_agg and isinstance(total_index, pandas.MultiIndex): # if grouping on multiple columns and aggregating with an UDF, then the - # fill value is always `np.NaN` - missing_values = pandas.DataFrame({0: [np.NaN]}) + # fill value is always `np.nan` + missing_values = pandas.DataFrame({0: [np.nan]}) else: # In case of a udf aggregation we're forced to run the operator against each # missing category, as in theory it can return different results for each @@ -903,8 +903,8 @@ def add_missing_categories_to_groupby( ).columns else: # HACK: If the aggregation has failed, the result would be empty. Assuming the - # fill value to be `np.NaN` here (this may not always be correct!!!) - fill_value = np.NaN if len(missing_values) == 0 else missing_values.iloc[0, 0] + # fill value to be `np.nan` here (this may not always be correct!!!) + fill_value = np.nan if len(missing_values) == 0 else missing_values.iloc[0, 0] missing_values = pandas.DataFrame( fill_value, index=missing_index, columns=combined_cols ) diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py index 5281e92d8c5..2cb419a2e21 100644 --- a/modin/core/storage_formats/pandas/query_compiler.py +++ b/modin/core/storage_formats/pandas/query_compiler.py @@ -1105,7 +1105,7 @@ def map_func(df, resample_kwargs=resample_kwargs): # pragma: no cover resample_kwargs = resample_kwargs.copy() resample_kwargs["level"] = None filler = pandas.DataFrame( - np.NaN, index=pandas.Index(timestamps), columns=df.columns + np.nan, index=pandas.Index(timestamps), columns=df.columns ) df = pandas.concat([df, filler], copy=False) if df_op is not None: diff --git a/modin/numpy/arr.py b/modin/numpy/arr.py index 8f8880a381b..ab40ecab2cc 100644 --- a/modin/numpy/arr.py +++ b/modin/numpy/arr.py @@ -1622,7 +1622,7 @@ def floor_divide( # the output. result = ( result.replace(numpy.inf, 0) - .replace(numpy.NINF, 0) + .replace(-numpy.inf, 0) .where(self._query_compiler.ne(0), 0) ) return fix_dtypes_and_determine_return( @@ -1644,7 +1644,7 @@ def floor_divide( # the output. result = ( result.replace(numpy.inf, 0) - .replace(numpy.NINF, 0) + .replace(-numpy.inf, 0) .where(callee.ne(0), 0) ) return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where) @@ -1902,7 +1902,7 @@ def remainder( if x2 == 0 and numpy.issubdtype(out_dtype, numpy.integer): # NumPy's remainder by 0 works differently from pandas', so we need to fix # the output. - result = result.replace(numpy.NaN, 0) + result = result.replace(numpy.nan, 0) return fix_dtypes_and_determine_return( result, self._ndim, dtype, out, where ) @@ -1920,7 +1920,7 @@ def remainder( if callee.eq(0).any() and numpy.issubdtype(out_dtype, numpy.integer): # NumPy's floor_divide by 0 works differently from pandas', so we need to fix # the output. - result = result.replace(numpy.NaN, 0) + result = result.replace(numpy.nan, 0) return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where) __mod__ = remainder diff --git a/modin/pandas/indexing.py b/modin/pandas/indexing.py index d901b6dac99..eb1808f0235 100644 --- a/modin/pandas/indexing.py +++ b/modin/pandas/indexing.py @@ -844,7 +844,7 @@ def _setitem_with_new_columns(self, row_loc, col_loc, item): for i in range(len(common_label_loc)): if not common_label_loc[i]: columns = columns.insert(len(columns), col_loc[i]) - self.qc = self.qc.reindex(labels=columns, axis=1, fill_value=np.NaN) + self.qc = self.qc.reindex(labels=columns, axis=1, fill_value=np.nan) self.df._update_inplace(new_query_compiler=self.qc) self._set_item_existing_loc(row_loc, np.array(col_loc), item) diff --git a/modin/tests/numpy/test_array.py b/modin/tests/numpy/test_array.py index 8291f0c4e36..f0849976546 100644 --- a/modin/tests/numpy/test_array.py +++ b/modin/tests/numpy/test_array.py @@ -275,7 +275,7 @@ def test_array_where(): @pytest.mark.parametrize("method", ["argmax", "argmin"]) def test_argmax_argmin(method): - numpy_arr = numpy.array([[1, 2, 3], [4, 5, np.NaN]]) + numpy_arr = numpy.array([[1, 2, 3], [4, 5, np.nan]]) modin_arr = np.array(numpy_arr) assert_scalar_or_array_equal( getattr(np, method)(modin_arr, axis=1), diff --git a/modin/tests/pandas/dataframe/test_binary.py b/modin/tests/pandas/dataframe/test_binary.py index 3e17962a6cd..e153f9f892f 100644 --- a/modin/tests/pandas/dataframe/test_binary.py +++ b/modin/tests/pandas/dataframe/test_binary.py @@ -254,7 +254,7 @@ def test_multi_level_comparison(data, op): pytest.param({}, {}, True, id="two_empty_dataframes"), pytest.param([[1]], [[0]], False, id="single_unequal_values"), pytest.param([[None]], [[None]], True, id="single_none_values"), - pytest.param([[np.NaN]], [[np.NaN]], True, id="single_nan_values"), + pytest.param([[np.nan]], [[np.nan]], True, id="single_nan_values"), pytest.param({1: [10]}, {1.0: [10]}, True, id="different_column_types"), pytest.param({1: [10]}, {2: [10]}, False, id="different_columns"), pytest.param( diff --git a/modin/tests/pandas/dataframe/test_join_sort.py b/modin/tests/pandas/dataframe/test_join_sort.py index 4e2c6517bb2..579c8e32212 100644 --- a/modin/tests/pandas/dataframe/test_join_sort.py +++ b/modin/tests/pandas/dataframe/test_join_sort.py @@ -879,7 +879,7 @@ def test_sort_values_with_only_one_non_na_row_in_partition(ascending, na_positio np.random.rand(1000, 100), columns=[f"col {i}" for i in range(100)] ) # Need to ensure that one of the partitions has all NA values except for one row - pandas_df.iloc[340:] = np.NaN + pandas_df.iloc[340:] = np.nan pandas_df.iloc[-1] = -4.0 modin_df = pd.DataFrame(pandas_df) eval_general( diff --git a/modin/tests/pandas/test_groupby.py b/modin/tests/pandas/test_groupby.py index beabd7ac335..82e0317fb5d 100644 --- a/modin/tests/pandas/test_groupby.py +++ b/modin/tests/pandas/test_groupby.py @@ -479,8 +479,8 @@ def test_simple_row_groupby(by, as_index, col1_category): pandas_df = pandas.DataFrame( { "col1": [0, 1, 2, 3], - "col2": [4, 5, np.NaN, 7], - "col3": [np.NaN, np.NaN, 12, 10], + "col2": [4, 5, np.nan, 7], + "col3": [np.nan, np.nan, 12, 10], "col4": [17, 13, 16, 15], "col5": [-4, -5, -6, -7], } @@ -1850,8 +1850,8 @@ def test_shift_freq(groupby_axis, shift_axis, groupby_sort): pandas_df = pandas.DataFrame( { "col1": [1, 0, 2, 3], - "col2": [4, 5, np.NaN, 7], - "col3": [np.NaN, np.NaN, 12, 10], + "col2": [4, 5, np.nan, 7], + "col3": [np.nan, np.nan, 12, 10], "col4": [17, 13, 16, 15], } ) diff --git a/modin/tests/pandas/test_series.py b/modin/tests/pandas/test_series.py index b7be43c62dc..0485dcc4d10 100644 --- a/modin/tests/pandas/test_series.py +++ b/modin/tests/pandas/test_series.py @@ -4008,11 +4008,11 @@ def test_str___getitem__(data, key): # Test str operations @pytest.mark.parametrize( "others", - [["abC|DeF,Hik", "gSaf,qWer|Gre", "asd3,4sad|", np.NaN], None], + [["abC|DeF,Hik", "gSaf,qWer|Gre", "asd3,4sad|", np.nan], None], ids=["list", "None"], ) def test_str_cat(others): - data = ["abC|DeF,Hik", "gSaf,qWer|Gre", "asd3,4sad|", np.NaN] + data = ["abC|DeF,Hik", "gSaf,qWer|Gre", "asd3,4sad|", np.nan] eval_general(*create_test_series(data), lambda s: s.str.cat(others=others)) @@ -4644,7 +4644,7 @@ def str_encode_decode_test_data() -> list[str]: "234,3245.67", "gSaf,qWer|Gre", "asd3,4sad|", - np.NaN, + np.nan, None, # add a string that we can't encode in ascii, and whose utf-8 encoding # we cannot decode in ascii diff --git a/modin/tests/pandas/utils.py b/modin/tests/pandas/utils.py index ff40e5bb4c4..3204ef3bb61 100644 --- a/modin/tests/pandas/utils.py +++ b/modin/tests/pandas/utils.py @@ -88,7 +88,7 @@ ( x if (j % 4 == 0 and i > NCOLS // 2) or (j != i and i <= NCOLS // 2) - else np.NaN + else np.nan ) for j, x in enumerate( random_state.uniform(RAND_LOW, RAND_HIGH, size=(NROWS)) @@ -161,7 +161,7 @@ for col in test_data["float_nan_data"]: for row in range(NROWS // 2): if row % 16 == 0: - test_data["float_nan_data"][col][row] = np.NaN + test_data["float_nan_data"][col][row] = np.nan test_data_values = list(test_data.values()) test_data_keys = list(test_data.keys()) @@ -226,8 +226,8 @@ test_data_diff_dtype = { "int_col": [-5, 2, 7, 16], - "float_col": [np.NaN, -9.4, 10.1, np.NaN], - "str_col": ["a", np.NaN, "c", "d"], + "float_col": [np.nan, -9.4, 10.1, np.nan], + "str_col": ["a", np.nan, "c", "d"], "bool_col": [False, True, True, False], } @@ -272,7 +272,7 @@ "234,3245.67", "gSaf,qWer|Gre", "asd3,4sad|", - np.NaN, + np.nan, ] } @@ -290,7 +290,7 @@ string_sep_values = list(string_seperators.values()) string_sep_keys = list(string_seperators.keys()) -string_na_rep = {"None na_rep": None, "- na_rep": "-", "nan na_rep": np.NaN} +string_na_rep = {"None na_rep": None, "- na_rep": "-", "nan na_rep": np.nan} string_na_rep_values = list(string_na_rep.values()) string_na_rep_keys = list(string_na_rep.keys())