modin-project · YarShev · Sep 23, 2022 · Sep 21, 2022 · YarShev · Sep 22, 2022
@@ -80,6 +80,7 @@ Key Features and Updates
   * PERF-#4920: Avoid index and cache computations in `take_2d_labels_or_positional` unless they are needed (#4921)
   * PERF-#4999: don't call `apply` in virtual partition' `drain_call_queue` if `call_queue` is empty (#4975)
   * PERF-#4268: Implement partition-parallel __getitem__ for bool Series masks (#4753)
+  * PERF-#5017: `reset_index` shouldn't trigger index materialization if possible (#5018)
   * PERF-#4963: Use partition `width/length` methods instead of `_compute_axis_labels_and_lengths` if index is already known (#4964)
   * PERF-#4940: Optimize categorical dtype check in `concatenate` (#4953)
 * Benchmarking enhancements

@@ -550,7 +550,7 @@ def reset_index(self, **kwargs):
                     if len(level) < self.index.nlevels
                     else pandas.RangeIndex(len(self.index))
                 )
-        else:
+        elif not drop:
             uniq_sorted_level = list(range(self.index.nlevels))
 
         if not drop:
@@ -627,7 +627,8 @@ def reset_index(self, **kwargs):
         else:
             new_self = self.copy()
             new_self.index = (
-                pandas.RangeIndex(len(new_self.index))
+                # Cheaper to compute row lengths than index
+                pandas.RangeIndex(sum(new_self._modin_frame._row_lengths))
                 if new_index is None
                 else new_index
             )