From 6a805e82d498392e38f5aff1a63f9ad73505e422 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Sat, 8 Feb 2025 18:25:54 +0800
Subject: [PATCH] [doc] Update the iterator demo.

---
 demo/guide-python/external_memory.py        |  4 ++
 demo/guide-python/quantile_data_iterator.py | 51 ++++++++++++---------
 ops/script/lint_python.py                   |  1 +
 3 files changed, 35 insertions(+), 21 deletions(-)

diff --git a/demo/guide-python/external_memory.py b/demo/guide-python/external_memory.py
index 388050f88730..8973cd6ebaaf 100644
--- a/demo/guide-python/external_memory.py
+++ b/demo/guide-python/external_memory.py
@@ -25,6 +25,10 @@
 - rmm
 - python-cuda
 
+.. seealso::
+
+  :ref:`sphx_glr_python_examples_distributed_extmem_basic.py`
+
 """
 
 import argparse
diff --git a/demo/guide-python/quantile_data_iterator.py b/demo/guide-python/quantile_data_iterator.py
index ac68bad119cc..4753d5c5083a 100644
--- a/demo/guide-python/quantile_data_iterator.py
+++ b/demo/guide-python/quantile_data_iterator.py
@@ -5,18 +5,24 @@
     .. versionadded:: 1.2.0
 
 The demo that defines a customized iterator for passing batches of data into
-:py:class:`xgboost.QuantileDMatrix` and use this ``QuantileDMatrix`` for
-training.  The feature is used primarily designed to reduce the required GPU
-memory for training on distributed environment.
+:py:class:`xgboost.QuantileDMatrix` and use this ``QuantileDMatrix`` for training.  The
+feature is primarily designed to reduce the required GPU memory for training on
+distributed environment.
 
-Aftering going through the demo, one might ask why don't we use more native
-Python iterator?  That's because XGBoost requires a `reset` function, while
-using `itertools.tee` might incur significant memory usage according to:
+Aftering going through the demo, one might ask why don't we use more native Python
+iterator?  That's because XGBoost requires a `reset` function, while using
+`itertools.tee` might incur significant memory usage according to:
 
   https://docs.python.org/3/library/itertools.html#itertools.tee.
 
+.. seealso::
+
+  :ref:`sphx_glr_python_examples_external_memory.py`
+
 """
 
+from typing import Callable
+
 import cupy
 import numpy
 
@@ -35,7 +41,7 @@ class IterForDMatrixDemo(xgboost.core.DataIter):
 
     """
 
-    def __init__(self):
+    def __init__(self) -> None:
         """Generate some random data for demostration.
 
         Actual data can be anything that is currently supported by XGBoost.
@@ -50,41 +56,44 @@ def __init__(self):
         self.it = 0  # set iterator to 0
         super().__init__()
 
-    def as_array(self):
+    def as_array(self) -> cupy.ndarray:
         return cupy.concatenate(self._data)
 
-    def as_array_labels(self):
+    def as_array_labels(self) -> cupy.ndarray:
         return cupy.concatenate(self._labels)
 
-    def as_array_weights(self):
+    def as_array_weights(self) -> cupy.ndarray:
         return cupy.concatenate(self._weights)
 
-    def data(self):
+    def data(self) -> cupy.ndarray:
         """Utility function for obtaining current batch of data."""
         return self._data[self.it]
 
-    def labels(self):
+    def labels(self) -> cupy.ndarray:
         """Utility function for obtaining current batch of label."""
         return self._labels[self.it]
 
-    def weights(self):
+    def weights(self) -> cupy.ndarray:
         return self._weights[self.it]
 
-    def reset(self):
+    def reset(self) -> None:
         """Reset the iterator"""
         self.it = 0
 
-    def next(self, input_data):
-        """Yield next batch of data."""
+    def next(self, input_data: Callable) -> bool:
+        """Yield the next batch of data."""
         if self.it == len(self._data):
-            # Return 0 when there's no more batch.
-            return 0
+            # Return False to let XGBoost know this is the end of iteration
+            return False
+
+        # input_data is a keyword-only function passed in by XGBoost and has the similar
+        # signature to the ``DMatrix`` constructor.
         input_data(data=self.data(), label=self.labels(), weight=self.weights())
         self.it += 1
-        return 1
+        return True
 
 
-def main():
+def main() -> None:
     rounds = 100
     it = IterForDMatrixDemo()
 
@@ -103,7 +112,7 @@ def main():
 
     assert m_with_it.num_col() == m.num_col()
     assert m_with_it.num_row() == m.num_row()
-    # Tree meethod must be `hist`.
+    # Tree method must be `hist`.
     reg_with_it = xgboost.train(
         {"tree_method": "hist", "device": "cuda"},
         m_with_it,
diff --git a/ops/script/lint_python.py b/ops/script/lint_python.py
index 8c30d261b520..0545c4b55644 100644
--- a/ops/script/lint_python.py
+++ b/ops/script/lint_python.py
@@ -122,6 +122,7 @@ class LintersPaths:
         "demo/guide-python/model_parser.py",
         "demo/guide-python/individual_trees.py",
         "demo/guide-python/quantile_regression.py",
+        "demo/guide-python/quantile_data_iterator.py",
         "demo/guide-python/multioutput_regression.py",
         "demo/guide-python/learning_to_rank.py",
         "demo/aft_survival/aft_survival_viz_demo.py",