ContinualAI · jneuendorf · Mar 21, 2024 · Mar 25, 2024 · Mar 25, 2024 · Mar 25, 2024
diff --git a/avalanche/benchmarks/classic/cmnist.py b/avalanche/benchmarks/classic/cmnist.py
@@ -33,9 +33,8 @@
 )
 from avalanche.benchmarks.utils.data import make_avalanche_dataset
 
-_default_mnist_train_transform = Compose([Normalize((0.1307,), (0.3081,))])
-
-_default_mnist_eval_transform = Compose([Normalize((0.1307,), (0.3081,))])
+default_mnist_train_transform = Compose([Normalize((0.1307,), (0.3081,))])
+default_mnist_eval_transform = Compose([Normalize((0.1307,), (0.3081,))])
 
 
 class PixelsPermutation(object):
@@ -83,8 +82,8 @@ def SplitMNIST(
     shuffle: bool = True,
     class_ids_from_zero_in_each_exp: bool = False,
     class_ids_from_zero_from_first_exp: bool = False,
-    train_transform: Optional[Any] = _default_mnist_train_transform,
-    eval_transform: Optional[Any] = _default_mnist_eval_transform,
+    train_transform: Optional[Any] = default_mnist_train_transform,
+    eval_transform: Optional[Any] = default_mnist_eval_transform,
     dataset_root: Optional[Union[str, Path]] = None
 ):
     """
@@ -170,8 +169,8 @@ def PermutedMNIST(
     *,
     return_task_id=False,
     seed: Optional[int] = None,
-    train_transform: Optional[Any] = _default_mnist_train_transform,
-    eval_transform: Optional[Any] = _default_mnist_eval_transform,
+    train_transform: Optional[Any] = default_mnist_train_transform,
+    eval_transform: Optional[Any] = default_mnist_eval_transform,
     dataset_root: Optional[Union[str, Path]] = None
 ) -> NCScenario:
     """
@@ -268,8 +267,8 @@ def RotatedMNIST(
     return_task_id: bool = False,
     seed: Optional[int] = None,
     rotations_list: Optional[Sequence[int]] = None,
-    train_transform: Optional[Any] = _default_mnist_train_transform,
-    eval_transform: Optional[Any] = _default_mnist_eval_transform,
+    train_transform: Optional[Any] = default_mnist_train_transform,
+    eval_transform: Optional[Any] = default_mnist_eval_transform,
     dataset_root: Optional[Union[str, Path]] = None
 ) -> NCScenario:
     """Creates a Rotated MNIST benchmark.
@@ -379,7 +378,13 @@ def RotatedMNIST(
     )
 
 
-__all__ = ["SplitMNIST", "PermutedMNIST", "RotatedMNIST"]
+__all__ = [
+    "SplitMNIST",
+    "PermutedMNIST",
+    "RotatedMNIST",
+    "default_mnist_train_transform",
+    "default_mnist_eval_transform",
+]
 
 
 if __name__ == "__main__":

diff --git a/avalanche/benchmarks/datasets/dataset_utils.py b/avalanche/benchmarks/datasets/dataset_utils.py
@@ -48,7 +48,7 @@ def load_config_file():
 
 
 def maybe_init_config_file():
-    """Initialize Avalanche user's config file, if it does not exists yet.
+    """Initialize Avalanche user's config file, if it does not exist yet.
 
     The file is located in `~/.avalanche/config.json`
     """

diff --git a/avalanche/benchmarks/scenarios/__init__.py b/avalanche/benchmarks/scenarios/__init__.py
@@ -1,6 +1,7 @@
 from .generic_scenario import *
 from .deprecated.dataset_scenario import *
 from .deprecated.classification_scenario import *
+from .deprecated.generic_benchmark_creation import *
 from .deprecated.new_classes import *
 from .deprecated.new_instances import *
 

diff --git a/avalanche/benchmarks/scenarios/dataset_scenario.py b/avalanche/benchmarks/scenarios/dataset_scenario.py
@@ -73,13 +73,12 @@ def __init__(
         self, *, dataset: TCLDataset, current_experience: Optional[int] = None
     ):
         super().__init__(current_experience=current_experience, origin_stream=None)
-        self._dataset: AvalancheDataset = dataset
+        self._dataset = dataset
 
     @property
-    def dataset(self) -> AvalancheDataset:
+    def dataset(self) -> TCLDataset:
         # dataset is a read-only property
-        data = self._dataset
-        return data
+        return self._dataset
 
 
 def _split_dataset_by_attribute(
@@ -101,9 +100,9 @@ def _split_dataset_by_attribute(
 def split_validation_random(
     validation_size: Union[int, float],
     shuffle: bool,
+    dataset: TCLDataset,
     seed: Optional[int] = None,
-    dataset: Optional[AvalancheDataset] = None,
-) -> Tuple[AvalancheDataset, AvalancheDataset]:
+) -> Tuple[TCLDataset, TCLDataset]:
     """Splits an `AvalancheDataset` in two splits.
 
     The default splitting strategy used by
@@ -119,7 +118,7 @@ def split_validation_random(
     a single parameter: the experience. Consider wrapping your custom
     splitting strategy with `partial` if more parameters are needed.
 
-    You can use this split strategy with methdos that require a custom
+    You can use this split strategy with methods that require a custom
     split strategy such as :func:`benchmark_with_validation_stream`to split
     a benchmark with::
 
@@ -133,11 +132,10 @@ def split_validation_random(
         Otherwise, the first instances will be allocated to the training
         dataset by leaving the last ones to the validation dataset.
     :param dataset: The dataset to split.
+    :param seed: The random seed for shuffling the dataset.
     :return: A tuple containing 2 elements: the new training and validation
         datasets.
     """
-    if dataset is None:
-        raise ValueError("dataset must be provided")
     exp_indices = list(range(len(dataset)))
 
     if seed is None:

diff --git a/avalanche/benchmarks/scenarios/deprecated/generators.py b/avalanche/benchmarks/scenarios/deprecated/generators.py
@@ -72,6 +72,7 @@
 TCLDataset = TypeVar("TCLDataset", bound="AvalancheDataset")
 
 
+# TODO: Nomenclature: experience vs task
 def nc_benchmark(
     train_dataset: Union[Sequence[SupportedDataset], SupportedDataset],
     test_dataset: Union[Sequence[SupportedDataset], SupportedDataset],

diff --git a/avalanche/benchmarks/scenarios/deprecated/generic_benchmark_creation.py b/avalanche/benchmarks/scenarios/deprecated/generic_benchmark_creation.py
@@ -31,7 +31,7 @@
 
 from avalanche.benchmarks.utils.classification_dataset import (
     _make_taskaware_tensor_classification_dataset,
-    _make_taskaware_classification_dataset,
+    _make_taskaware_classification_dataset, ClassificationDataset,
 )
 
 from avalanche.benchmarks.utils import (
@@ -197,7 +197,7 @@ class LazyStreamDefinition(NamedTuple):
     This class is a named tuple containing the fields required for defining
     a lazily-created benchmark.
 
-    - exps_generator: The experiences generator. Can be a "yield"-based
+    - exps_generator: The experience's generator. Can be a "yield"-based
       generator, a custom sequence, a standard list or any kind of
       iterable returning :class:`AvalancheDataset`.
     - stream_length: The number of experiences in the stream. Must match the
@@ -207,7 +207,7 @@ class LazyStreamDefinition(NamedTuple):
       can be used.
     """
 
-    exps_generator: Iterable[TaskAwareClassificationDataset]
+    exps_generator: Iterable[ClassificationDataset]
     """
     The experiences generator. Can be a "yield"-based generator, a custom
     sequence, a standard list or any kind of iterable returning

diff --git a/avalanche/benchmarks/scenarios/deprecated/new_classes/nc_scenario.py b/avalanche/benchmarks/scenarios/deprecated/new_classes/nc_scenario.py
@@ -556,7 +556,7 @@ class NCExperience(ClassificationExperience[TaskAwareSupervisedClassificationDat
     def __init__(self, origin_stream: NCStream, current_experience: int):
         """
         Creates a ``NCExperience`` instance given the stream from this
-        experience was taken and and the current experience ID.
+        experience was taken and the current experience ID.
 
         :param origin_stream: The stream from which this experience was
             obtained.

diff --git a/avalanche/benchmarks/scenarios/generic_scenario.py b/avalanche/benchmarks/scenarios/generic_scenario.py
@@ -36,7 +36,6 @@
     slice_alike_object_to_indices,
 )
 
-
 # Typing
 T = TypeVar("T")
 TCov = TypeVar("TCov", covariant=True)
@@ -275,6 +274,7 @@ def _check_unset_attribute(attribute_name: str, attribute_value: Any):
         )
 
 
+# TODO: itertools.cycle?
 class GeneratorMemo(Generic[T]):
     def __init__(self, generator: Generator[T, None, None]):
         self._generator: Optional[Generator[T, None, None]] = generator
@@ -386,7 +386,7 @@ def __len__(self) -> int:
 
         :return: The number of experiences in this stream.
         """
-        pass
+        ...
 
 
 class SequenceCLStream(SizedCLStream[TCLExperience], Sequence[TCLExperience], ABC):
@@ -590,6 +590,13 @@ class CLScenario(Generic[TCLStream]):
     provide access to past, current, and future data.
     """
 
+    # Define usual empty streams for typing
+    # TODO: If regarded unnecessary, the constructor magic should be removed
+    #  and `scenario.streams['train']` yields the correct type
+    train_stream = CLStream('train', [])
+    test_stream = CLStream('test', [])
+    valid_stream = CLStream('valid', [])
+
     def __init__(self, streams: Iterable[TCLStream]):
         """Creates an instance of a Continual Learning benchmark.
 
@@ -603,7 +610,7 @@ def __init__(self, streams: Iterable[TCLStream]):
 
     @property
     def streams(self):
-        # we don't want in-place modifications so we return a copy
+        # we don't want in-place modifications, so we return a copy
         return copy(self._streams)
 
 
@@ -612,7 +619,7 @@ def make_stream(name: str, exps: Iterable[CLExperience]) -> CLStream:
 
     Uses the correct class for generators, sized generators, and lists.
 
-    :param new_name: The name of the new stream.
+    :param name: The name of the new stream.
     :param exps: sequence of experiences.
     """
     s_wrapped: CLStream

diff --git a/avalanche/benchmarks/scenarios/online.py b/avalanche/benchmarks/scenarios/online.py
@@ -46,6 +46,7 @@ class CyclicSampler(Sampler):
     """Samples elements from [0,..,len(dataset)-1] in a cyclic manner."""
 
     def __init__(self, n_samples, shuffle=True, rng=None):
+        super().__init__()
         self.n_samples = n_samples
         self.rng = rng
         self.shuffle = shuffle

diff --git a/avalanche/benchmarks/scenarios/supervised.py b/avalanche/benchmarks/scenarios/supervised.py
@@ -11,6 +11,7 @@
 
 """High-level benchmark generators for supervised scenarios such as class-incremental."""
 import warnings
+from collections.abc import Collection
 from copy import copy
 from typing import (
     Iterable,
@@ -22,6 +23,7 @@
 )
 
 import torch
+from typing_extensions import Self
 
 from avalanche.benchmarks.utils.classification_dataset import (
     ClassificationDataset,
@@ -30,7 +32,7 @@
 from avalanche.benchmarks.utils.data import AvalancheDataset
 from avalanche.benchmarks.utils.data_attribute import DataAttribute
 from .dataset_scenario import _split_dataset_by_attribute, DatasetExperience
-from .generic_scenario import CLScenario, CLStream, EagerCLStream
+from .generic_scenario import CLScenario, CLStream, EagerCLStream, CLExperience
 
 
 def class_incremental_benchmark(
@@ -40,7 +42,7 @@ def class_incremental_benchmark(
     num_experiences: Optional[int] = None,
     num_classes_per_exp: Optional[Sequence[int]] = None,
     seed: Optional[int] = None,
-) -> CLScenario:
+) -> CLScenario[EagerCLStream[DatasetExperience[ClassificationDataset]]]:
     """Splits datasets according to a class-incremental scenario.
 
     Each dataset will create a stream with the same class order.
@@ -103,14 +105,14 @@ def class_incremental_benchmark(
     classes_exp_assignment = []
     if num_experiences is not None:
         assert num_classes_per_exp is None, "BUG: num_classes_per_exp must be None"
-        curr_classess_per_exp: int = num_classes // num_experiences
+        curr_classes_per_exp: int = num_classes // num_experiences
         for eid in range(num_experiences):
             if eid == 0:
-                classes_exp_assignment.append(class_order[:curr_classess_per_exp])
+                classes_exp_assignment.append(class_order[:curr_classes_per_exp])
             else:
                 # final exp will take reminder of classes if they don't divide equally
-                start_idx = curr_classess_per_exp * eid
-                end_idx = start_idx + curr_classess_per_exp
+                start_idx = curr_classes_per_exp * eid
+                end_idx = start_idx + curr_classes_per_exp
                 classes_exp_assignment.append(class_order[start_idx:end_idx])
     elif num_classes_per_exp is not None:
         num_curr = 0
@@ -120,7 +122,7 @@ def class_incremental_benchmark(
             num_curr += num_classes
 
     # create the streams using class_order to split the data
-    streams = []
+    streams: List[EagerCLStream[DatasetExperience[ClassificationDataset]]] = []
     for name, dd in datasets_dict.items():
         curr_stream = []
         data_by_class = _split_dataset_by_attribute(dd, "targets")
@@ -339,12 +341,6 @@ def new_instances_benchmark(
     return CLScenario(streams=[train_stream, test_stream])
 
 
-__all__ = [
-    "class_incremental_benchmark",
-    "new_instances_benchmark",
-]
-
-
 class ClassesTimeline(Protocol):
     """Experience decorator that provides info about classes occurrence over time."""
 
@@ -381,7 +377,7 @@ def _decorate_benchmark(obj: CLScenario):
             new_streams.append(_decorate_stream(s))
         return CLScenario(new_streams)
 
-    def _decorate_stream(obj: CLStream):
+    def _decorate_stream(obj: CLStream[DatasetExperience[ClassificationDataset]]):
         # TODO: support stream generators. Should return a new generators which applies
         #  foo_decorate_exp every time a new experience is generated.
         new_stream = []

diff --git a/avalanche/benchmarks/scenarios/task_aware.py b/avalanche/benchmarks/scenarios/task_aware.py
@@ -124,7 +124,7 @@ def task_incremental_benchmark(bm: CLScenario, reset_task_labels=False) -> CLSce
 
         with_task_labels(benchmark_from_datasets(**dataset_streams)
 
-    :param **dataset_streams: keys are stream names, values are list of datasets.
+    :param dataset_streams: keys are stream names, values are list of datasets.
     :param reset_task_labels: whether existing task labels should be ignored.
         If False (default) if any dataset has task labels the function will raise
         a ValueError. If `True`, it will reset task labels.

diff --git a/avalanche/benchmarks/scenarios/validation_scenario.py b/avalanche/benchmarks/scenarios/validation_scenario.py
@@ -70,7 +70,7 @@ def benchmark_with_validation_stream(
         # functools.partial is a more compact option
         # However, MyPy does not understand what a partial is -_-
         def random_validation_split_strategy_wrapper(data):
-            return split_validation_random(validation_size, shuffle, seed, data)
+            return split_validation_random(validation_size, shuffle, data, seed)
 
         split_strategy = random_validation_split_strategy_wrapper
     else: