Skip to content

Commit

Permalink
FIX-#7292: Prepare Modin code to NumPy 2.0 (#7293)
Browse files Browse the repository at this point in the history
Signed-off-by: Anatoly Myachev <[email protected]>
  • Loading branch information
anmyachev authored May 29, 2024
1 parent 31771d7 commit 2006292
Show file tree
Hide file tree
Showing 10 changed files with 26 additions and 26 deletions.
8 changes: 4 additions & 4 deletions modin/core/dataframe/pandas/dataframe/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -868,8 +868,8 @@ def add_missing_categories_to_groupby(
### At this stage we want to get a fill_value for missing categorical values
if is_udf_agg and isinstance(total_index, pandas.MultiIndex):
# if grouping on multiple columns and aggregating with an UDF, then the
# fill value is always `np.NaN`
missing_values = pandas.DataFrame({0: [np.NaN]})
# fill value is always `np.nan`
missing_values = pandas.DataFrame({0: [np.nan]})
else:
# In case of a udf aggregation we're forced to run the operator against each
# missing category, as in theory it can return different results for each
Expand Down Expand Up @@ -903,8 +903,8 @@ def add_missing_categories_to_groupby(
).columns
else:
# HACK: If the aggregation has failed, the result would be empty. Assuming the
# fill value to be `np.NaN` here (this may not always be correct!!!)
fill_value = np.NaN if len(missing_values) == 0 else missing_values.iloc[0, 0]
# fill value to be `np.nan` here (this may not always be correct!!!)
fill_value = np.nan if len(missing_values) == 0 else missing_values.iloc[0, 0]
missing_values = pandas.DataFrame(
fill_value, index=missing_index, columns=combined_cols
)
Expand Down
2 changes: 1 addition & 1 deletion modin/core/storage_formats/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1105,7 +1105,7 @@ def map_func(df, resample_kwargs=resample_kwargs): # pragma: no cover
resample_kwargs = resample_kwargs.copy()
resample_kwargs["level"] = None
filler = pandas.DataFrame(
np.NaN, index=pandas.Index(timestamps), columns=df.columns
np.nan, index=pandas.Index(timestamps), columns=df.columns
)
df = pandas.concat([df, filler], copy=False)
if df_op is not None:
Expand Down
8 changes: 4 additions & 4 deletions modin/numpy/arr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1622,7 +1622,7 @@ def floor_divide(
# the output.
result = (
result.replace(numpy.inf, 0)
.replace(numpy.NINF, 0)
.replace(-numpy.inf, 0)
.where(self._query_compiler.ne(0), 0)
)
return fix_dtypes_and_determine_return(
Expand All @@ -1644,7 +1644,7 @@ def floor_divide(
# the output.
result = (
result.replace(numpy.inf, 0)
.replace(numpy.NINF, 0)
.replace(-numpy.inf, 0)
.where(callee.ne(0), 0)
)
return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)
Expand Down Expand Up @@ -1902,7 +1902,7 @@ def remainder(
if x2 == 0 and numpy.issubdtype(out_dtype, numpy.integer):
# NumPy's remainder by 0 works differently from pandas', so we need to fix
# the output.
result = result.replace(numpy.NaN, 0)
result = result.replace(numpy.nan, 0)
return fix_dtypes_and_determine_return(
result, self._ndim, dtype, out, where
)
Expand All @@ -1920,7 +1920,7 @@ def remainder(
if callee.eq(0).any() and numpy.issubdtype(out_dtype, numpy.integer):
# NumPy's floor_divide by 0 works differently from pandas', so we need to fix
# the output.
result = result.replace(numpy.NaN, 0)
result = result.replace(numpy.nan, 0)
return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)

__mod__ = remainder
Expand Down
2 changes: 1 addition & 1 deletion modin/pandas/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -844,7 +844,7 @@ def _setitem_with_new_columns(self, row_loc, col_loc, item):
for i in range(len(common_label_loc)):
if not common_label_loc[i]:
columns = columns.insert(len(columns), col_loc[i])
self.qc = self.qc.reindex(labels=columns, axis=1, fill_value=np.NaN)
self.qc = self.qc.reindex(labels=columns, axis=1, fill_value=np.nan)
self.df._update_inplace(new_query_compiler=self.qc)
self._set_item_existing_loc(row_loc, np.array(col_loc), item)

Expand Down
2 changes: 1 addition & 1 deletion modin/tests/numpy/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ def test_array_where():

@pytest.mark.parametrize("method", ["argmax", "argmin"])
def test_argmax_argmin(method):
numpy_arr = numpy.array([[1, 2, 3], [4, 5, np.NaN]])
numpy_arr = numpy.array([[1, 2, 3], [4, 5, np.nan]])
modin_arr = np.array(numpy_arr)
assert_scalar_or_array_equal(
getattr(np, method)(modin_arr, axis=1),
Expand Down
2 changes: 1 addition & 1 deletion modin/tests/pandas/dataframe/test_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def test_multi_level_comparison(data, op):
pytest.param({}, {}, True, id="two_empty_dataframes"),
pytest.param([[1]], [[0]], False, id="single_unequal_values"),
pytest.param([[None]], [[None]], True, id="single_none_values"),
pytest.param([[np.NaN]], [[np.NaN]], True, id="single_nan_values"),
pytest.param([[np.nan]], [[np.nan]], True, id="single_nan_values"),
pytest.param({1: [10]}, {1.0: [10]}, True, id="different_column_types"),
pytest.param({1: [10]}, {2: [10]}, False, id="different_columns"),
pytest.param(
Expand Down
2 changes: 1 addition & 1 deletion modin/tests/pandas/dataframe/test_join_sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -879,7 +879,7 @@ def test_sort_values_with_only_one_non_na_row_in_partition(ascending, na_positio
np.random.rand(1000, 100), columns=[f"col {i}" for i in range(100)]
)
# Need to ensure that one of the partitions has all NA values except for one row
pandas_df.iloc[340:] = np.NaN
pandas_df.iloc[340:] = np.nan
pandas_df.iloc[-1] = -4.0
modin_df = pd.DataFrame(pandas_df)
eval_general(
Expand Down
8 changes: 4 additions & 4 deletions modin/tests/pandas/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,8 +479,8 @@ def test_simple_row_groupby(by, as_index, col1_category):
pandas_df = pandas.DataFrame(
{
"col1": [0, 1, 2, 3],
"col2": [4, 5, np.NaN, 7],
"col3": [np.NaN, np.NaN, 12, 10],
"col2": [4, 5, np.nan, 7],
"col3": [np.nan, np.nan, 12, 10],
"col4": [17, 13, 16, 15],
"col5": [-4, -5, -6, -7],
}
Expand Down Expand Up @@ -1850,8 +1850,8 @@ def test_shift_freq(groupby_axis, shift_axis, groupby_sort):
pandas_df = pandas.DataFrame(
{
"col1": [1, 0, 2, 3],
"col2": [4, 5, np.NaN, 7],
"col3": [np.NaN, np.NaN, 12, 10],
"col2": [4, 5, np.nan, 7],
"col3": [np.nan, np.nan, 12, 10],
"col4": [17, 13, 16, 15],
}
)
Expand Down
6 changes: 3 additions & 3 deletions modin/tests/pandas/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4008,11 +4008,11 @@ def test_str___getitem__(data, key):
# Test str operations
@pytest.mark.parametrize(
"others",
[["abC|DeF,Hik", "gSaf,qWer|Gre", "asd3,4sad|", np.NaN], None],
[["abC|DeF,Hik", "gSaf,qWer|Gre", "asd3,4sad|", np.nan], None],
ids=["list", "None"],
)
def test_str_cat(others):
data = ["abC|DeF,Hik", "gSaf,qWer|Gre", "asd3,4sad|", np.NaN]
data = ["abC|DeF,Hik", "gSaf,qWer|Gre", "asd3,4sad|", np.nan]
eval_general(*create_test_series(data), lambda s: s.str.cat(others=others))


Expand Down Expand Up @@ -4644,7 +4644,7 @@ def str_encode_decode_test_data() -> list[str]:
"234,3245.67",
"gSaf,qWer|Gre",
"asd3,4sad|",
np.NaN,
np.nan,
None,
# add a string that we can't encode in ascii, and whose utf-8 encoding
# we cannot decode in ascii
Expand Down
12 changes: 6 additions & 6 deletions modin/tests/pandas/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@
(
x
if (j % 4 == 0 and i > NCOLS // 2) or (j != i and i <= NCOLS // 2)
else np.NaN
else np.nan
)
for j, x in enumerate(
random_state.uniform(RAND_LOW, RAND_HIGH, size=(NROWS))
Expand Down Expand Up @@ -161,7 +161,7 @@
for col in test_data["float_nan_data"]:
for row in range(NROWS // 2):
if row % 16 == 0:
test_data["float_nan_data"][col][row] = np.NaN
test_data["float_nan_data"][col][row] = np.nan

test_data_values = list(test_data.values())
test_data_keys = list(test_data.keys())
Expand Down Expand Up @@ -226,8 +226,8 @@

test_data_diff_dtype = {
"int_col": [-5, 2, 7, 16],
"float_col": [np.NaN, -9.4, 10.1, np.NaN],
"str_col": ["a", np.NaN, "c", "d"],
"float_col": [np.nan, -9.4, 10.1, np.nan],
"str_col": ["a", np.nan, "c", "d"],
"bool_col": [False, True, True, False],
}

Expand Down Expand Up @@ -272,7 +272,7 @@
"234,3245.67",
"gSaf,qWer|Gre",
"asd3,4sad|",
np.NaN,
np.nan,
]
}

Expand All @@ -290,7 +290,7 @@
string_sep_values = list(string_seperators.values())
string_sep_keys = list(string_seperators.keys())

string_na_rep = {"None na_rep": None, "- na_rep": "-", "nan na_rep": np.NaN}
string_na_rep = {"None na_rep": None, "- na_rep": "-", "nan na_rep": np.nan}

string_na_rep_values = list(string_na_rep.values())
string_na_rep_keys = list(string_na_rep.keys())
Expand Down

0 comments on commit 2006292

Please sign in to comment.