diff --git a/docs/release_notes/release_notes-0.16.0.rst b/docs/release_notes/release_notes-0.16.0.rst index 52435bcc418..82bc8177faa 100644 --- a/docs/release_notes/release_notes-0.16.0.rst +++ b/docs/release_notes/release_notes-0.16.0.rst @@ -80,6 +80,7 @@ Key Features and Updates * PERF-#4920: Avoid index and cache computations in `take_2d_labels_or_positional` unless they are needed (#4921) * PERF-#4999: don't call `apply` in virtual partition' `drain_call_queue` if `call_queue` is empty (#4975) * PERF-#4268: Implement partition-parallel __getitem__ for bool Series masks (#4753) + * PERF-#5017: `reset_index` shouldn't trigger index materialization if possible (#5018) * PERF-#4963: Use partition `width/length` methods instead of `_compute_axis_labels_and_lengths` if index is already known (#4964) * PERF-#4940: Optimize categorical dtype check in `concatenate` (#4953) * Benchmarking enhancements diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py index 8bdd614577c..f70630fd3e6 100644 --- a/modin/core/storage_formats/pandas/query_compiler.py +++ b/modin/core/storage_formats/pandas/query_compiler.py @@ -550,7 +550,7 @@ def reset_index(self, **kwargs): if len(level) < self.index.nlevels else pandas.RangeIndex(len(self.index)) ) - else: + elif not drop: uniq_sorted_level = list(range(self.index.nlevels)) if not drop: @@ -627,7 +627,8 @@ def reset_index(self, **kwargs): else: new_self = self.copy() new_self.index = ( - pandas.RangeIndex(len(new_self.index)) + # Cheaper to compute row lengths than index + pandas.RangeIndex(sum(new_self._modin_frame._row_lengths)) if new_index is None else new_index )