diff --git a/.flake8 b/.flake8
new file mode 100644
index 000000000..b61e46d48
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,5 @@
+[flake8]
+ignore = E203, E266, E501, W503, F403, F401
+max-line-length = 89
+max-complexity = 18
+select = B,C,E,F,W,T4,B9
\ No newline at end of file
diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml
new file mode 100644
index 000000000..9065b5e02
--- /dev/null
+++ b/.github/workflows/black.yaml
@@ -0,0 +1,10 @@
+name: Lint
+
+on: [push, pull_request]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: psf/black@stable
diff --git a/.github/workflows/pep8.yml b/.github/workflows/pep8.yml
deleted file mode 100644
index 8008d1311..000000000
--- a/.github/workflows/pep8.yml
+++ /dev/null
@@ -1,67 +0,0 @@
-################################################################################
-# Copyright (c) 2021 ContinualAI.                                              #
-# Copyrights licensed under the MIT License.                                   #
-# See the accompanying LICENSE file for terms.                                 #
-#                                                                              #
-# Date: 28-01-2021                                                             #
-# Author(s): Gabriele Graffieti                                                #
-# E-mail: contact@continualai.org                                              #
-# Website: avalanche.continualai.org                                           #
-################################################################################
-
-
-name: PEP8 checking
-# This workflow is triggered on pushes to the repository.
-on:
-  push:
-    paths:
-      - '**.py'
-      - '.github/workflows/pep8.yml'
-      - '.pylintrc'
-  pull_request_target:
-    branches:
-      - master
-    paths:
-      - '**.py'
-      - '.github/workflows/pep8.yml'
-      - '.pylintrc'
-
-jobs:
-  check:
-    name: check pep8
-    runs-on: ubuntu-latest
-    container: python:3.8
-    steps:
-      - name: check out the repo (push)
-        if: github.event_name == 'push'
-        uses: actions/checkout@v3
-        with:
-          token: ${{ secrets.GITHUB_TOKEN }}
-      - name: check out the repo (PR)
-        if: github.event_name == 'pull_request_target'
-        uses: actions/checkout@v3
-        with:
-          ref: ${{github.event.pull_request.head.ref}}
-          repository: ${{github.event.pull_request.head.repo.full_name}}
-      - name: install pycodestyle
-        run: pip install pycodestyle
-      - name: check pep8 in source files
-        run: |
-          pycodestyle avalanche tests examples --exclude "examples/tvdetection/**" > pep8_report.txt
-          cat pep8_report.txt
-      - name: send PR message
-        if: failure() && github.event_name == 'pull_request_target' && github.repository == 'ContinualAI/avalanche'
-        uses: actions/github-script@v3
-        with:
-          github-token: ${{ secrets.BOT_TOKEN }}
-          script: |
-            var message = "Oh no! It seems there are some PEP8 errors! 😕\nDon't worry, you can fix them! 💪\nHere's a report about the errors and where you can find them:\n\n```\n"
-            var fs = require("fs");
-            var text = fs.readFileSync("pep8_report.txt", "utf-8");
-            var text_complete = message.concat(text, "\n```")
-            github.issues.createComment({
-              issue_number: context.issue.number,
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              body: text_complete
-            })
diff --git a/.gitignore b/.gitignore
index 42af821b3..08f6a6435 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,4 +14,4 @@ csvlogs/
 docs/generated/
 .fleet
 pip-wheel-metadata
-**/.DS_Store
\ No newline at end of file
+**/.DS_Store
diff --git a/avalanche/_annotations.py b/avalanche/_annotations.py
index c2d6b7eb9..7a6b12df6 100644
--- a/avalanche/_annotations.py
+++ b/avalanche/_annotations.py
@@ -34,7 +34,7 @@ def shining_new_method():
 
     def decorator(func):
         if func.__doc__ is None:
-            func.__doc__ = ''
+            func.__doc__ = ""
         else:
             func.__doc__ += "\n\n"
 
@@ -59,6 +59,7 @@ def deprecated(version: float, reason: str):
         alternative
     :return:
     """
+
     def decorator(func):
         if inspect.isclass(func):
             msg_prefix = "Call to deprecated class {name}"
@@ -69,21 +70,23 @@ def decorator(func):
         msg = msg_prefix + msg_suffix
 
         if func.__doc__ is None:
-            func.__doc__ = ''
+            func.__doc__ = ""
         else:
             func.__doc__ += "\n\n"
 
         func.__doc__ += "Warning: Deprecated" + msg_suffix.format(
-            name=func.__name__, version=version, reason=reason)
+            name=func.__name__, version=version, reason=reason
+        )
 
         @functools.wraps(func)
         def wrapper(*args, **kwargs):
-            warnings.simplefilter('always', DeprecationWarning)
-            warnings.warn(msg.format(name=func.__name__, version=version,
-                                     reason=reason),
-                          category=DeprecationWarning,
-                          stacklevel=2)
-            warnings.simplefilter('default', DeprecationWarning)
+            warnings.simplefilter("always", DeprecationWarning)
+            warnings.warn(
+                msg.format(name=func.__name__, version=version, reason=reason),
+                category=DeprecationWarning,
+                stacklevel=2,
+            )
+            warnings.simplefilter("default", DeprecationWarning)
             return func(*args, **kwargs)
 
         return wrapper
diff --git a/avalanche/benchmarks/classic/ccifar10.py b/avalanche/benchmarks/classic/ccifar10.py
index 1721c5b02..a96d6fb6b 100644
--- a/avalanche/benchmarks/classic/ccifar10.py
+++ b/avalanche/benchmarks/classic/ccifar10.py
@@ -18,26 +18,21 @@
     check_vision_benchmark,
 )
 
-from avalanche.benchmarks.datasets.external_datasets.cifar import \
-    get_cifar10_dataset
+from avalanche.benchmarks.datasets.external_datasets.cifar import get_cifar10_dataset
 
 _default_cifar10_train_transform = transforms.Compose(
     [
         transforms.RandomCrop(32, padding=4),
         transforms.RandomHorizontalFlip(),
         transforms.ToTensor(),
-        transforms.Normalize(
-            (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
-        ),
+        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
     ]
 )
 
 _default_cifar10_eval_transform = transforms.Compose(
     [
         transforms.ToTensor(),
-        transforms.Normalize(
-            (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
-        ),
+        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
     ]
 )
 
@@ -158,6 +153,4 @@ class "34" will be mapped to "1", class "11" to "2" and so on.
     check_vision_benchmark(benchmark_instance)
     sys.exit(0)
 
-__all__ = [
-    "SplitCIFAR10"
-]
+__all__ = ["SplitCIFAR10"]
diff --git a/avalanche/benchmarks/classic/ccifar100.py b/avalanche/benchmarks/classic/ccifar100.py
index 85f6bc4cf..6f08af93d 100644
--- a/avalanche/benchmarks/classic/ccifar100.py
+++ b/avalanche/benchmarks/classic/ccifar100.py
@@ -19,10 +19,13 @@
     check_vision_benchmark,
 )
 
-from avalanche.benchmarks.datasets.external_datasets.cifar import \
-    get_cifar100_dataset, get_cifar10_dataset
-from avalanche.benchmarks.utils.classification_dataset import \
-    concat_classification_datasets_sequentially
+from avalanche.benchmarks.datasets.external_datasets.cifar import (
+    get_cifar100_dataset,
+    get_cifar10_dataset,
+)
+from avalanche.benchmarks.utils.classification_dataset import (
+    concat_classification_datasets_sequentially,
+)
 
 from avalanche.benchmarks import nc_benchmark, NCScenario
 
@@ -31,18 +34,14 @@
         transforms.RandomCrop(32, padding=4),
         transforms.RandomHorizontalFlip(),
         transforms.ToTensor(),
-        transforms.Normalize(
-            (0.5071, 0.4865, 0.4409), (0.2673, 0.2564, 0.2762)
-        ),
+        transforms.Normalize((0.5071, 0.4865, 0.4409), (0.2673, 0.2564, 0.2762)),
     ]
 )
 
 _default_cifar100_eval_transform = transforms.Compose(
     [
         transforms.ToTensor(),
-        transforms.Normalize(
-            (0.5071, 0.4865, 0.4409), (0.2673, 0.2564, 0.2762)
-        ),
+        transforms.Normalize((0.5071, 0.4865, 0.4409), (0.2673, 0.2564, 0.2762)),
     ]
 )
 
@@ -242,10 +241,13 @@ class "34" will be mapped to "1", class "11" to "2" and so on.
     cifar10_train, cifar10_test = get_cifar10_dataset(dataset_root_cifar10)
     cifar100_train, cifar100_test = get_cifar100_dataset(dataset_root_cifar100)
 
-    cifar_10_100_train, cifar_10_100_test, _ = \
-        concat_classification_datasets_sequentially(
-            [cifar10_train, cifar100_train], [cifar10_test, cifar100_test]
-        )
+    (
+        cifar_10_100_train,
+        cifar_10_100_test,
+        _,
+    ) = concat_classification_datasets_sequentially(
+        [cifar10_train, cifar100_train], [cifar10_test, cifar100_test]
+    )
     # cifar10 classes
     class_order = [_ for _ in range(10)]
     # if a class order is defined (for cifar100) the given class labels are
@@ -288,7 +290,4 @@ class "34" will be mapped to "1", class "11" to "2" and so on.
     sys.exit(0)
 
 
-__all__ = [
-    "SplitCIFAR100",
-    "SplitCIFAR110"
-]
+__all__ = ["SplitCIFAR100", "SplitCIFAR110"]
diff --git a/avalanche/benchmarks/classic/ccub200.py b/avalanche/benchmarks/classic/ccub200.py
index 31cab1a54..3a6764289 100644
--- a/avalanche/benchmarks/classic/ccub200.py
+++ b/avalanche/benchmarks/classic/ccub200.py
@@ -26,18 +26,14 @@
     [
         transforms.RandomHorizontalFlip(),
         transforms.ToTensor(),
-        transforms.Normalize(
-            (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
-        ),
+        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
     ]
 )
 
 _default_eval_transform = transforms.Compose(
     [
         transforms.ToTensor(),
-        transforms.Normalize(
-            (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
-        ),
+        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
     ]
 )
 
diff --git a/avalanche/benchmarks/classic/cfashion_mnist.py b/avalanche/benchmarks/classic/cfashion_mnist.py
index 190f3beea..b2827fd26 100644
--- a/avalanche/benchmarks/classic/cfashion_mnist.py
+++ b/avalanche/benchmarks/classic/cfashion_mnist.py
@@ -19,8 +19,7 @@
 from avalanche.benchmarks.classic.classic_benchmarks_utils import (
     check_vision_benchmark,
 )
-from avalanche.benchmarks.datasets.external_datasets.fmnist import \
-    get_fmnist_dataset
+from avalanche.benchmarks.datasets.external_datasets.fmnist import get_fmnist_dataset
 
 _default_fmnist_train_transform = transforms.Compose(
     [transforms.ToTensor(), transforms.Normalize((0.2860,), (0.3530,))]
diff --git a/avalanche/benchmarks/classic/cimagenet.py b/avalanche/benchmarks/classic/cimagenet.py
index c9aaca0aa..f0ff5377f 100644
--- a/avalanche/benchmarks/classic/cimagenet.py
+++ b/avalanche/benchmarks/classic/cimagenet.py
@@ -19,9 +19,7 @@
 
 from torchvision import transforms
 
-normalize = transforms.Normalize(
-    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
-)
+normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 
 _default_train_transform = transforms.Compose(
     [
diff --git a/avalanche/benchmarks/classic/cinaturalist.py b/avalanche/benchmarks/classic/cinaturalist.py
index 9e809da91..33b5ecfc3 100644
--- a/avalanche/benchmarks/classic/cinaturalist.py
+++ b/avalanche/benchmarks/classic/cinaturalist.py
@@ -22,9 +22,7 @@
 
 from torchvision import transforms
 
-normalize = transforms.Normalize(
-    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
-)
+normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 
 _default_train_transform = transforms.Compose(
     [
@@ -181,16 +179,10 @@ def _get_inaturalist_dataset(dataset_root, super_categories, download):
         dataset_root = default_dataset_location("inatuarlist2018")
 
     train_set = INATURALIST2018(
-        str(dataset_root),
-        split="train",
-        supcats=super_categories,
-        download=download
+        str(dataset_root), split="train", supcats=super_categories, download=download
     )
     test_set = INATURALIST2018(
-        str(dataset_root),
-        split="val",
-        supcats=super_categories,
-        download=download
+        str(dataset_root), split="val", supcats=super_categories, download=download
     )
 
     return train_set, test_set
diff --git a/avalanche/benchmarks/classic/classic_benchmarks_utils.py b/avalanche/benchmarks/classic/classic_benchmarks_utils.py
index aef55f998..ffc336dcf 100644
--- a/avalanche/benchmarks/classic/classic_benchmarks_utils.py
+++ b/avalanche/benchmarks/classic/classic_benchmarks_utils.py
@@ -4,14 +4,13 @@
     DatasetStream,
 )
 
-from avalanche.benchmarks.utils.classification_dataset import \
-    ClassificationDataset
+from avalanche.benchmarks.utils.classification_dataset import ClassificationDataset
 from avalanche.benchmarks.utils.data import AvalancheDataset
 
 
 def check_vision_benchmark(
-        benchmark_instance: DatasetScenario,
-        show_without_transforms=True):
+    benchmark_instance: DatasetScenario, show_without_transforms=True
+):
     from matplotlib import pyplot as plt
     from torch.utils.data.dataloader import DataLoader
 
diff --git a/avalanche/benchmarks/classic/clear.py b/avalanche/benchmarks/classic/clear.py
index ec71941c2..541716c15 100644
--- a/avalanche/benchmarks/classic/clear.py
+++ b/avalanche/benchmarks/classic/clear.py
@@ -118,8 +118,7 @@ def CLEAR(
 
     if evaluation_protocol == "streaming":
         assert seed is None, (
-            "Seed for train/test split is not required "
-            "under streaming protocol"
+            "Seed for train/test split is not required " "under streaming protocol"
         )
         train_split = "all"
         test_split = "all"
@@ -300,7 +299,7 @@ def backward_transfer(self, matrix):
             seed_list = [None]
         else:
             seed_list = SEED_LIST
-        
+
         for f in [None] + CLEAR_FEATURE_TYPES[data_name]:
             t = transform if f is None else None
             for seed in seed_list:
diff --git a/avalanche/benchmarks/classic/cmnist.py b/avalanche/benchmarks/classic/cmnist.py
index d53365dba..54f58d774 100644
--- a/avalanche/benchmarks/classic/cmnist.py
+++ b/avalanche/benchmarks/classic/cmnist.py
@@ -26,21 +26,16 @@
 from avalanche.benchmarks.classic.classic_benchmarks_utils import (
     check_vision_benchmark,
 )
-from avalanche.benchmarks.datasets.external_datasets.mnist import \
-    get_mnist_dataset
+from avalanche.benchmarks.datasets.external_datasets.mnist import get_mnist_dataset
 from avalanche.benchmarks.utils import (
     make_classification_dataset,
     DefaultTransformGroups,
 )
 from avalanche.benchmarks.utils.data import make_avalanche_dataset
 
-_default_mnist_train_transform = Compose(
-    [Normalize((0.1307,), (0.3081,))]
-)
+_default_mnist_train_transform = Compose([Normalize((0.1307,), (0.3081,))])
 
-_default_mnist_eval_transform = Compose(
-    [Normalize((0.1307,), (0.3081,))]
-)
+_default_mnist_eval_transform = Compose([Normalize((0.1307,), (0.3081,))])
 
 
 class PixelsPermutation(object):
@@ -60,7 +55,7 @@ def __call__(self, img: Union[Image, Tensor]):
         is_image = isinstance(img, Image)
         if (not is_image) and (not isinstance(img, Tensor)):
             raise ValueError("Invalid input: must be a PIL image or a Tensor")
-    
+
         image_as_tensor: Tensor
         if is_image:
             image_as_tensor = self._to_tensor(img)
@@ -68,7 +63,8 @@ def __call__(self, img: Union[Image, Tensor]):
             image_as_tensor = img
 
         image_as_tensor = image_as_tensor.view(-1)[self.permutation].view(
-            *image_as_tensor.shape)
+            *image_as_tensor.shape
+        )
 
         if is_image:
             img = self._to_image(image_as_tensor)
@@ -235,9 +231,7 @@ def PermutedMNIST(
     # for every incremental experience
     for _ in range(n_experiences):
         # choose a random permutation of the pixels in the image
-        idx_permute = torch.from_numpy(rng_permute.permutation(784)).type(
-            torch.int64
-        )
+        idx_permute = torch.from_numpy(rng_permute.permutation(784)).type(torch.int64)
 
         permutation = PixelsPermutation(idx_permute)
 
@@ -392,21 +386,15 @@ def RotatedMNIST(
     import sys
 
     print("Split MNIST")
-    benchmark_instance = SplitMNIST(
-        5, train_transform=None, eval_transform=None
-    )
+    benchmark_instance = SplitMNIST(5, train_transform=None, eval_transform=None)
     check_vision_benchmark(benchmark_instance)
 
     print("Permuted MNIST")
-    benchmark_instance = PermutedMNIST(
-        5, train_transform=None, eval_transform=None
-    )
+    benchmark_instance = PermutedMNIST(5, train_transform=None, eval_transform=None)
     check_vision_benchmark(benchmark_instance)
 
     print("Rotated MNIST")
-    benchmark_instance = RotatedMNIST(
-        5, train_transform=None, eval_transform=None
-    )
+    benchmark_instance = RotatedMNIST(5, train_transform=None, eval_transform=None)
     check_vision_benchmark(benchmark_instance)
 
     sys.exit(0)
diff --git a/avalanche/benchmarks/classic/comniglot.py b/avalanche/benchmarks/classic/comniglot.py
index 7d42d52cf..925ec37af 100644
--- a/avalanche/benchmarks/classic/comniglot.py
+++ b/avalanche/benchmarks/classic/comniglot.py
@@ -58,7 +58,8 @@ def __call__(self, img: Union[Image, Tensor]):
             image_as_tensor = img
 
         image_as_tensor = image_as_tensor.view(-1)[self.permutation].view(
-            *image_as_tensor.shape)
+            *image_as_tensor.shape
+        )
 
         if is_image:
             img = self._to_image(image_as_tensor)
@@ -268,9 +269,7 @@ def _get_omniglot_dataset(dataset_root):
     import sys
 
     print("Split Omniglot")
-    benchmark_instance = SplitOmniglot(
-        4, train_transform=None, eval_transform=None
-    )
+    benchmark_instance = SplitOmniglot(4, train_transform=None, eval_transform=None)
     check_vision_benchmark(benchmark_instance)
 
     sys.exit(0)
diff --git a/avalanche/benchmarks/classic/core50.py b/avalanche/benchmarks/classic/core50.py
index f7e39b5b5..70809cd9b 100644
--- a/avalanche/benchmarks/classic/core50.py
+++ b/avalanche/benchmarks/classic/core50.py
@@ -52,9 +52,7 @@
 
 normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 
-_default_train_transform = Compose(
-    [ToTensor(), RandomHorizontalFlip(), normalize]
-)
+_default_train_transform = Compose([ToTensor(), RandomHorizontalFlip(), normalize])
 
 _default_eval_transform = Compose([ToTensor(), normalize])
 
@@ -113,8 +111,7 @@ def CORe50(
     """
 
     assert 0 <= run <= 9, (
-        "Pre-defined run of CORe50 are only 10. Indicate "
-        "a number between 0 and 9."
+        "Pre-defined run of CORe50 are only 10. Indicate " "a number between 0 and 9."
     )
     assert scenario in nbatch.keys(), (
         "The selected scenario is note "
diff --git a/avalanche/benchmarks/classic/ctiny_imagenet.py b/avalanche/benchmarks/classic/ctiny_imagenet.py
index e7de15137..934846e4b 100644
--- a/avalanche/benchmarks/classic/ctiny_imagenet.py
+++ b/avalanche/benchmarks/classic/ctiny_imagenet.py
@@ -24,18 +24,14 @@
     [
         transforms.RandomHorizontalFlip(),
         transforms.ToTensor(),
-        transforms.Normalize(
-            (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
-        ),
+        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
     ]
 )
 
 _default_eval_transform = transforms.Compose(
     [
         transforms.ToTensor(),
-        transforms.Normalize(
-            (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
-        ),
+        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
     ]
 )
 
diff --git a/avalanche/benchmarks/classic/ctrl.py b/avalanche/benchmarks/classic/ctrl.py
index b896ccf48..e42d880c7 100644
--- a/avalanche/benchmarks/classic/ctrl.py
+++ b/avalanche/benchmarks/classic/ctrl.py
@@ -102,23 +102,21 @@ def CTrL(
                     files.append((sample_path, label.item()))
 
                 common_root, exp_paths_list = common_paths_root(files)
-                paths_dataset: PathsDataset[Image, int] = \
-                    PathsDataset(common_root, exp_paths_list)
-                dataset: SupervisedClassificationDataset = \
-                    make_classification_dataset(
-                        paths_dataset,
-                        task_labels=task_labels,
-                        transform=transforms.Compose(
-                            [transforms.ToTensor(), trans]
-                        ),
-                    )
+                paths_dataset: PathsDataset[Image, int] = PathsDataset(
+                    common_root, exp_paths_list
+                )
+                dataset: SupervisedClassificationDataset = make_classification_dataset(
+                    paths_dataset,
+                    task_labels=task_labels,
+                    transform=transforms.Compose([transforms.ToTensor(), trans]),
+                )
             else:
                 dataset = make_tensor_classification_dataset(
                     samples,
                     labels.squeeze(1),
                     task_labels=task_labels,
                     transform=trans,
-                    targets=1  # Use the 2nd tensor as targets
+                    targets=1,  # Use the 2nd tensor as targets
                 )
             exp.append(dataset)
         if stream_name == "s_long":
diff --git a/avalanche/benchmarks/classic/endless_cl_sim.py b/avalanche/benchmarks/classic/endless_cl_sim.py
index 367b082ee..860d940fd 100644
--- a/avalanche/benchmarks/classic/endless_cl_sim.py
+++ b/avalanche/benchmarks/classic/endless_cl_sim.py
@@ -136,14 +136,10 @@ def EndlessCLSim(
         eval_data.transform = eval_transform
 
         train_datasets.append(
-            make_classification_dataset(
-                dataset=train_data, task_labels=task_order[i]
-            )
+            make_classification_dataset(dataset=train_data, task_labels=task_order[i])
         )
         eval_datasets.append(
-            make_classification_dataset(
-                dataset=eval_data, task_labels=task_order[i]
-            )
+            make_classification_dataset(dataset=eval_data, task_labels=task_order[i])
         )
 
     scenario_obj = dataset_benchmark(train_datasets, eval_datasets)
diff --git a/avalanche/benchmarks/classic/ex_model.py b/avalanche/benchmarks/classic/ex_model.py
index e1bf9d837..138a5f72b 100644
--- a/avalanche/benchmarks/classic/ex_model.py
+++ b/avalanche/benchmarks/classic/ex_model.py
@@ -125,9 +125,7 @@ def __init__(self, scenario="split", run_id=0):
 
         CURR_SEED = SEED_BENCHMARK_RUNS[run_id]
 
-        transforms = Compose(
-            [Resize(32), Normalize((0.1307,), (0.3081,))]
-        )
+        transforms = Compose([Resize(32), Normalize((0.1307,), (0.3081,))])
         if scenario == "split":
             benchmark = SplitMNIST(
                 n_experiences=5,
@@ -149,9 +147,7 @@ def __init__(self, scenario="split", run_id=0):
 
         ll = len(benchmark.train_stream)
         base_model = LeNet5(10, 1)
-        experts = _load_expert_models(
-            f"{scenario}_mnist", base_model, run_id, ll
-        )
+        experts = _load_expert_models(f"{scenario}_mnist", base_model, run_id, ll)
         super().__init__(benchmark, experts)
 
 
@@ -209,12 +205,8 @@ def __init__(self, scenario="ni", run_id=0):
             )
         elif scenario == "joint":
             core50nc = CORe50(scenario="nc")
-            train_cat = concat_datasets(
-                [e.dataset for e in core50nc.train_stream]
-            )
-            test_cat = concat_datasets(
-                [e.dataset for e in core50nc.test_stream]
-            )
+            train_cat = concat_datasets([e.dataset for e in core50nc.train_stream])
+            test_cat = concat_datasets([e.dataset for e in core50nc.test_stream])
             benchmark = nc_benchmark(
                 train_cat, test_cat, n_experiences=1, task_labels=False
             )
@@ -227,9 +219,7 @@ def __init__(self, scenario="ni", run_id=0):
             nn.Dropout(0.2),
             nn.Linear(base_model.last_channel, 50),
         )
-        experts = _load_expert_models(
-            f"{scenario}_core50", base_model, run_id, ll
-        )
+        experts = _load_expert_models(f"{scenario}_core50", base_model, run_id, ll)
         super().__init__(benchmark, experts)
 
 
@@ -271,9 +261,7 @@ def __init__(self, scenario="split", run_id=0):
 
         ll = len(benchmark.train_stream)
         base_model = SlimResNet18(10)
-        experts = _load_expert_models(
-            f"{scenario}_cifar10", base_model, run_id, ll
-        )
+        experts = _load_expert_models(f"{scenario}_cifar10", base_model, run_id, ll)
         super().__init__(benchmark, experts)
 
 
diff --git a/avalanche/benchmarks/classic/openloris.py b/avalanche/benchmarks/classic/openloris.py
index 38648efa9..2e560bd8b 100644
--- a/avalanche/benchmarks/classic/openloris.py
+++ b/avalanche/benchmarks/classic/openloris.py
@@ -112,9 +112,7 @@ def OpenLORIS(
     train_failists_paths = []
     for i in range(nbatch[factor]):
         train_failists_paths.append(
-            dataset_root
-            / filelists_bp
-            / ("train_batch_" + str(i).zfill(2) + ".txt")
+            dataset_root / filelists_bp / ("train_batch_" + str(i).zfill(2) + ".txt")
         )
 
     factor_obj = create_generic_benchmark_from_filelists(
diff --git a/avalanche/benchmarks/classic/stream51.py b/avalanche/benchmarks/classic/stream51.py
index 970e9b00f..18044ee9e 100644
--- a/avalanche/benchmarks/classic/stream51.py
+++ b/avalanche/benchmarks/classic/stream51.py
@@ -15,7 +15,8 @@
 
 from avalanche.benchmarks.datasets import Stream51
 from avalanche.benchmarks.scenarios.generic_benchmark_creation import (
-    create_generic_benchmark_from_paths, FileAndLabel
+    create_generic_benchmark_from_paths,
+    FileAndLabel,
 )
 from torchvision import transforms
 import math
@@ -131,9 +132,7 @@ def CLStream51(
     # get train and test sets and order them by benchmark
     train_set = Stream51(root=dataset_root, train=True, download=download)
     test_set = Stream51(root=dataset_root, train=False, download=download)
-    samples = Stream51.make_dataset(
-        train_set.samples, ordering=scenario, seed=seed
-    )
+    samples = Stream51.make_dataset(train_set.samples, ordering=scenario, seed=seed)
     dataset_root = train_set.root
 
     # set appropriate train parameters
@@ -143,9 +142,7 @@ def CLStream51(
     # compute number of tasks
     if eval_num is None and scenario == "instance":
         eval_num = 30000
-        num_tasks = math.ceil(
-            len(train_set) / eval_num
-        )  # evaluate every 30000 samples
+        num_tasks = math.ceil(len(train_set) / eval_num)  # evaluate every 30000 samples
     elif eval_num is None and scenario == "class_instance":
         eval_num = 10
         num_tasks = math.ceil(51 / eval_num)  # evaluate every 10 classes
@@ -158,9 +155,7 @@ def CLStream51(
 
     test_filelists_paths: List[List[FileAndLabel]] = []
     train_filelists_paths: List[List[FileAndLabel]] = []
-    test_ood_filelists_paths: Optional[
-        List[List[FileAndLabel]]
-    ] = []
+    test_ood_filelists_paths: Optional[List[List[FileAndLabel]]] = []
     if scenario == "instance":
         # break files into task lists based on eval_num samples
         train_filelists_paths = []
@@ -185,16 +180,18 @@ def CLStream51(
             start = end
 
         # use all test data for instance ordering
-        test_filelists_paths = [[
-            (
-                os.path.join(dataset_root, test_set.samples[j][-1]),
-                test_set.samples[j][0],
-                _adjust_bbox(
-                    test_set.samples[j][-3], test_set.samples[j][-2], ratio
-                ),
-            )
-            for j in range(len(test_set))
-        ]]
+        test_filelists_paths = [
+            [
+                (
+                    os.path.join(dataset_root, test_set.samples[j][-1]),
+                    test_set.samples[j][0],
+                    _adjust_bbox(
+                        test_set.samples[j][-3], test_set.samples[j][-2], ratio
+                    ),
+                )
+                for j in range(len(test_set))
+            ]
+        ]
         test_ood_filelists_paths = None  # no ood testing for instance ordering
     elif scenario == "class_instance":
         # break files into task lists based on classes
@@ -274,9 +271,7 @@ def CLStream51(
         train_filelists_paths = [
             [(j[0], j[1]) for j in i] for i in train_filelists_paths
         ]
-        test_filelists_paths = [
-            [(j[0], j[1]) for j in i] for i in test_filelists_paths
-        ]
+        test_filelists_paths = [[(j[0], j[1]) for j in i] for i in test_filelists_paths]
         if scenario == "class_instance":
             assert test_ood_filelists_paths is not None
             test_ood_filelists_paths = [
diff --git a/avalanche/benchmarks/datasets/clear/clear.py b/avalanche/benchmarks/datasets/clear/clear.py
index 2ed1e9347..03e723733 100644
--- a/avalanche/benchmarks/datasets/clear/clear.py
+++ b/avalanche/benchmarks/datasets/clear/clear.py
@@ -26,14 +26,12 @@
 from avalanche.benchmarks.utils import default_flist_reader
 from avalanche.benchmarks.datasets.clear import clear_data
 
-_CLEAR_DATA_SPLITS = {"clear10", "clear100", 
-                      "clear10_neurips2021", "clear100_cvpr2022"}
+_CLEAR_DATA_SPLITS = {"clear10", "clear100", "clear10_neurips2021", "clear100_cvpr2022"}
 
 CLEAR_FEATURE_TYPES = {
     "clear10": ["moco_b0"],
     "clear100": ["moco_b0"],
-    "clear10_neurips2021": ["moco_b0", 
-                            "moco_imagenet", "byol_imagenet", "imagenet"],
+    "clear10_neurips2021": ["moco_b0", "moco_imagenet", "byol_imagenet", "imagenet"],
     "clear100_cvpr2022": ["moco_b0"],
 }
 
@@ -80,9 +78,7 @@ def __init__(
         self.module = clear_data
         self._paths_and_targets: List[List[Tuple[str, int]]] = []
 
-        super(CLEARDataset, self).__init__(
-            root, download=download, verbose=True
-        )
+        super(CLEARDataset, self).__init__(root, download=download, verbose=True)
         self._load_dataset()
 
     def _download_dataset(self) -> None:
@@ -93,30 +89,25 @@ def _download_dataset(self) -> None:
                 print("Downloading " + name + "...")
             url = os.path.join(base_url, name)
             self._download_and_extract_archive(
-                url=url,
-                file_name=name,
-                checksum=None,
-                remove_archive=True
+                url=url, file_name=name, checksum=None, remove_archive=True
             )
 
     def _load_metadata(self) -> bool:
-        if '_' in self.data_name:
+        if "_" in self.data_name:
             return self._load_metadata_old()
         else:
             return self._load_metadata_new()
 
     def _load_metadata_old(self) -> bool:
         """
-            Load Metadata for clear10_neurips2021 and clear100_cvpr2022
+        Load Metadata for clear10_neurips2021 and clear100_cvpr2022
         """
         train_folder_path = self.root / "training_folder"
         if not train_folder_path.exists():
             print(f"{train_folder_path} does not exist. ")
             return False
 
-        self.bucket_indices = _load_json(
-            train_folder_path / "bucket_indices.json"
-        )
+        self.bucket_indices = _load_json(train_folder_path / "bucket_indices.json")
 
         class_names_file = self.root / "class_names.txt"
         self.class_names = class_names_file.read_text().split("\n")
@@ -153,7 +144,7 @@ def _load_metadata_new(self) -> bool:
             if not train_folder_path.exists():
                 print(f"{train_folder_path} does not exist. ")
                 return False
-        
+
             self.labeled_metadata = _load_json(
                 train_folder_path / "labeled_metadata.json"
             )
@@ -275,7 +266,7 @@ def __init__(
         )
 
     def _load_metadata(self) -> bool:
-        if '_' in self.data_name:
+        if "_" in self.data_name:
             return self._load_metadata_old()
         else:
             return self._load_metadata_new()
@@ -319,7 +310,7 @@ def _load_metadata_new(self) -> bool:
         if not super(_CLEARImage, self)._load_metadata_new():
             print("CLEAR has not yet been downloaded")
             return False
-        
+
         self.paths = []
         self.targets = []
         self._paths_and_targets = []
@@ -329,7 +320,7 @@ def _load_metadata_new(self) -> bool:
             if not train_folder_path.exists():
                 print(f"{train_folder_path} does not exist. ")
                 return False
-        
+
             self.labeled_metadata = _load_json(
                 train_folder_path / "labeled_metadata.json"
             )
@@ -358,8 +349,9 @@ def _load_metadata_new(self) -> bool:
                 self.targets.append(target)
         return True
 
-    def get_paths_and_targets(self, root_appended=True) -> \
-            Sequence[Sequence[Tuple[Union[str, Path], int]]]:
+    def get_paths_and_targets(
+        self, root_appended=True
+    ) -> Sequence[Sequence[Tuple[Union[str, Path], int]]]:
         """Return self._paths_and_targets with root appended or not"""
         if not root_appended:
             return self._paths_and_targets
@@ -446,15 +438,14 @@ def __init__(
         assert feature_type in CLEAR_FEATURE_TYPES[data_name]
         self.target_transform = target_transform
 
-        self.tensors_and_targets: List[Tuple[List[torch.Tensor],
-                                             List[int]]] = []
+        self.tensors_and_targets: List[Tuple[List[torch.Tensor], List[int]]] = []
 
         super(_CLEARFeature, self).__init__(
             root, data_name=data_name, download=download, verbose=True
         )
 
     def _load_metadata(self) -> bool:
-        if '_' in self.data_name:
+        if "_" in self.data_name:
             return self._load_metadata_old()
         else:
             return self._load_metadata_new()
@@ -483,9 +474,7 @@ def _load_metadata_old(self) -> bool:
             try:
                 tensors, targets = torch.load(f_path)
                 if split_name:
-                    indices_json = (
-                        split_folder_path / str(bucket_index) / split_name
-                    )
+                    indices_json = split_folder_path / str(bucket_index) / split_name
                     chosen_indices = _load_json(indices_json)
                     tensors = [tensors[i] for i in chosen_indices]
                     targets = [targets[i] for i in chosen_indices]
@@ -507,7 +496,7 @@ def _load_metadata_new(self) -> bool:
         if not super(_CLEARFeature, self)._load_metadata_new():
             print("CLEAR has not yet been downloaded")
             return False
-        
+
         self.tensors_and_targets = []
         splits = ["test", "train"] if self.split == "all" else [self.split]
         for split in splits:
@@ -558,7 +547,6 @@ def __len__(self):
 
 
 if __name__ == "__main__":
-
     # this little example script can be used to visualize the first image
     # loaded from the dataset.
     from torch.utils.data.dataloader import DataLoader
@@ -577,8 +565,7 @@ def __len__(self):
             normalize,
         ]
     )
-    data_names = ["clear10_neurips2021", "clear100_cvpr2022", 
-                  "clear10", "clear100"]
+    data_names = ["clear10_neurips2021", "clear100_cvpr2022", "clear10", "clear100"]
     for data_name in data_names:
         root = f"../avalanche_datasets/{data_name}"
         print(root)
@@ -620,9 +607,8 @@ def __len__(self):
             split="train",
             seed=0,
         )
-        print("clear10 size (train features): ", 
-              len(clear_dataset_train_feature))
-        if '_' in data_name:
+        print("clear10 size (train features): ", len(clear_dataset_train_feature))
+        if "_" in data_name:
             clear_dataset_all_feature = _CLEARFeature(
                 root=root,
                 data_name=data_name,
@@ -639,10 +625,10 @@ def __len__(self):
                 split="test",
                 seed=0,
             )
-            print(f"{data_name} size (test features): ", 
-                  len(clear_dataset_test_feature))
-            print(f"{data_name} size (all features): ", 
-                  len(clear_dataset_all_feature))
+            print(
+                f"{data_name} size (test features): ", len(clear_dataset_test_feature)
+            )
+            print(f"{data_name} size (all features): ", len(clear_dataset_all_feature))
         print("Classes are: ")
         for i, name in enumerate(clear_dataset_test.class_names):
             print(f"{i} : {name}")
diff --git a/avalanche/benchmarks/datasets/clear/clear_data.py b/avalanche/benchmarks/datasets/clear/clear_data.py
index 2067785ed..a1af74f03 100644
--- a/avalanche/benchmarks/datasets/clear/clear_data.py
+++ b/avalanche/benchmarks/datasets/clear/clear_data.py
@@ -18,7 +18,7 @@
     (
         "clear10-test.zip",  # name
         "https://clear-challenge.s3.us-east-2.amazonaws.com",
-    )
+    ),
 ]
 clear100 = [
     (
@@ -28,7 +28,7 @@
     (
         "clear100-test.zip",  # name
         "https://clear-challenge.s3.us-east-2.amazonaws.com",
-    )
+    ),
 ]
 clear10_neurips2021 = [
     (
diff --git a/avalanche/benchmarks/datasets/core50/core50.py b/avalanche/benchmarks/datasets/core50/core50.py
index 8df0e94d0..cefce9fb5 100644
--- a/avalanche/benchmarks/datasets/core50/core50.py
+++ b/avalanche/benchmarks/datasets/core50/core50.py
@@ -43,7 +43,6 @@ def __init__(
         mini=False,
         object_level=True,
     ):
-
         """Creates an instance of the CORe50 dataset.
 
         :param root: root for the datasets data. Defaults to None, which
@@ -65,9 +64,7 @@ def __init__(
         if root is None:
             root = default_dataset_location("core50")
 
-        super(CORe50Dataset, self).__init__(
-            root, download=download, verbose=True
-        )
+        super(CORe50Dataset, self).__init__(root, download=download, verbose=True)
 
         self.train = train  # training set or test set
         self.transform = transform
@@ -151,9 +148,7 @@ def _load_metadata(self) -> bool:
             self.all_targets = pkl.load(f)
             self.train_test_targets = []
             for i in range(self._nbatch + 1):
-                self.train_test_targets += self.all_targets[self._scen][
-                    self._run
-                ][i]
+                self.train_test_targets += self.all_targets[self._scen][self._run][i]
 
         if self.verbose:
             print("Loading LUP...")
@@ -237,9 +232,7 @@ def _objlab2cat(self, label, scen, run):
         based on the scenario."""
 
         if scen == "nc":
-            return core50_data.name2cat[
-                self.labels2names["nc"][run][label][:-1]
-            ]
+            return core50_data.name2cat[self.labels2names["nc"][run][label][:-1]]
         else:
             return int(label) // 5
 
@@ -255,7 +248,6 @@ def CORe50(*args, **kwargs):
 
 
 if __name__ == "__main__":
-
     # this litte example script can be used to visualize the first image
     # leaded from the dataset.
     from torch.utils.data.dataloader import DataLoader
diff --git a/avalanche/benchmarks/datasets/core50/core50_data.py b/avalanche/benchmarks/datasets/core50/core50_data.py
index 2d65f534c..99aa023b0 100644
--- a/avalanche/benchmarks/datasets/core50/core50_data.py
+++ b/avalanche/benchmarks/datasets/core50/core50_data.py
@@ -53,7 +53,7 @@
     (
         "core50_imgs.npz",
         "http://bias.csr.unibo.it/maltoni/download/core50/core50_imgs.npz",
-        "3689d65d0a1c760b87821b114c8c4c6c"
+        "3689d65d0a1c760b87821b114c8c4c6c",
     ),
     (
         "core50_32x32.zip",
diff --git a/avalanche/benchmarks/datasets/cub200/cub200.py b/avalanche/benchmarks/datasets/cub200/cub200.py
index 8ba45b316..4fb21966f 100644
--- a/avalanche/benchmarks/datasets/cub200/cub200.py
+++ b/avalanche/benchmarks/datasets/cub200/cub200.py
@@ -41,12 +41,10 @@ class CUB200(PathsDataset, DownloadableDataset):
 
     images_folder = "CUB_200_2011/images"
     official_url = (
-        "http://www.vision.caltech.edu/visipedia-data/CUB-200-2011/"
-        "CUB_200_2011.tgz"
+        "http://www.vision.caltech.edu/visipedia-data/CUB-200-2011/" "CUB_200_2011.tgz"
     )
     gdrive_url = (
-        "https://drive.google.com/u/0/uc?id="
-        "1hbzc_P1FuxMkcabkgn9ZKinBwW683j45"
+        "https://drive.google.com/u/0/uc?id=" "1hbzc_P1FuxMkcabkgn9ZKinBwW683j45"
     )
     filename = "CUB_200_2011.tgz"
     tgz_md5 = "97eceeb196236b17998738112f37df78"
@@ -88,9 +86,7 @@ def __init__(
         self.root: Path = Path(root)
         self._images: List[Tuple[str, int]]
 
-        DownloadableDataset.__init__(
-            self, root, download=download, verbose=True
-        )
+        DownloadableDataset.__init__(self, root, download=download, verbose=True)
         self._load_dataset()
 
         PathsDataset.__init__(
@@ -183,7 +179,6 @@ def _load_metadata(self):
 
 
 if __name__ == "__main__":
-
     """Simple test that will start if you run this script directly"""
 
     import matplotlib.pyplot as plt
diff --git a/avalanche/benchmarks/datasets/dataset_utils.py b/avalanche/benchmarks/datasets/dataset_utils.py
index 7db22e4bd..575093176 100644
--- a/avalanche/benchmarks/datasets/dataset_utils.py
+++ b/avalanche/benchmarks/datasets/dataset_utils.py
@@ -55,9 +55,7 @@ def maybe_init_config_file():
     if os.path.exists(AVALANCHE_CONFIG_FILENAME):
         return
     os.makedirs(AVALANCHE_CONFIG_ROOT, exist_ok=True)
-    default_config = {
-        "dataset_location": os.path.expanduser("~/.avalanche/data")
-    }
+    default_config = {"dataset_location": os.path.expanduser("~/.avalanche/data")}
 
     with open(AVALANCHE_CONFIG_FILENAME, "w") as f:
         json.dump(default_config, f, indent=4)
diff --git a/avalanche/benchmarks/datasets/downloadable_dataset.py b/avalanche/benchmarks/datasets/downloadable_dataset.py
index 5054cc6ab..da6560b8a 100644
--- a/avalanche/benchmarks/datasets/downloadable_dataset.py
+++ b/avalanche/benchmarks/datasets/downloadable_dataset.py
@@ -26,7 +26,7 @@
 
 from avalanche.benchmarks.datasets.dataset_utils import default_dataset_location
 
-T_co = TypeVar('T_co', covariant=True)
+T_co = TypeVar("T_co", covariant=True)
 
 
 class DownloadableDataset(Dataset[T_co], ABC):
@@ -215,9 +215,7 @@ def _cleanup_dataset_root(self):
         shutil.rmtree(self.root)
         self.root.mkdir(parents=True, exist_ok=True)
 
-    def _download_file(
-        self, url: str, file_name: str, checksum: Optional[str]
-    ) -> Path:
+    def _download_file(self, url: str, file_name: str, checksum: Optional[str]) -> Path:
         """
         Utility method that can be used to download and verify a file.
 
@@ -310,8 +308,10 @@ def _download_and_extract_archive(
                 remove_finished=remove_archive,
             )
         except BaseException:
-            print('Error while downloading the dataset archive. '
-                  'The partially downloaded archive will be removed.')
+            print(
+                "Error while downloading the dataset archive. "
+                "The partially downloaded archive will be removed."
+            )
             attempt_fpath = self.root / file_name
             attempt_fpath.unlink(missing_ok=True)
             raise
diff --git a/avalanche/benchmarks/datasets/endless_cl_sim/endless_cl_sim.py b/avalanche/benchmarks/datasets/endless_cl_sim/endless_cl_sim.py
index cb751b015..78ec4b765 100644
--- a/avalanche/benchmarks/datasets/endless_cl_sim/endless_cl_sim.py
+++ b/avalanche/benchmarks/datasets/endless_cl_sim/endless_cl_sim.py
@@ -160,9 +160,7 @@ def __init__(
         self.classmap = self._load_classmap(classmap_file=self.classmap_file)
 
         # Init labelmap
-        self.labelmap = self._load_labelmap(
-            labelmap_file=self.segmentation_file
-        )
+        self.labelmap = self._load_labelmap(labelmap_file=self.segmentation_file)
         return
 
     def _pil_loader(self, file_path, is_target=False):
@@ -319,9 +317,7 @@ def __init__(
         if scenario is None and download:
             raise ValueError("No scenario defined to download!")
 
-        super(EndlessCLSimDataset, self).__init__(
-            root, download=download, verbose=True
-        )
+        super(EndlessCLSimDataset, self).__init__(root, download=download, verbose=True)
 
         self.scenario = scenario
         self.patch_size = patch_size
@@ -479,17 +475,13 @@ def _prepare_video_subsequence_datasets(self, path) -> bool:
                     dir_name = data_content.split(os.path.sep)[-1]
                     if "Color" == dir_name:
                         # Extend color path
-                        color_path = (
-                            data_content + os.path.sep + "0" + os.path.sep
-                        )
+                        color_path = data_content + os.path.sep + "0" + os.path.sep
                         # Get all files
                         for file_name in sorted(os.listdir(color_path)):
                             image_paths.append(color_path + file_name)
                     elif "Seg" == dir_name:
                         # Extend seg path
-                        seg_path = (
-                            data_content + os.path.sep + "0" + os.path.sep
-                        )
+                        seg_path = data_content + os.path.sep + "0" + os.path.sep
                         # Get all files
                         for file_name in sorted(os.listdir(seg_path)):
                             target_paths.append(seg_path + file_name)
@@ -515,9 +507,7 @@ def _prepare_video_subsequence_datasets(self, path) -> bool:
             if self.verbose:
                 print("All metadata checks complete!")
 
-            sequence_indices = self._load_sequence_indices(
-                sequence_file=sequence_file
-            )
+            sequence_indices = self._load_sequence_indices(sequence_file=sequence_file)
 
             if self.verbose:
                 print("Sequence file loaded..")
@@ -532,9 +522,7 @@ def _prepare_video_subsequence_datasets(self, path) -> bool:
                 image_subsequence_paths = image_paths[last_index:next_index]
                 target_subsequence_paths = target_paths[last_index:next_index]
 
-                assert len(image_subsequence_paths) == len(
-                    target_subsequence_paths
-                )
+                assert len(image_subsequence_paths) == len(target_subsequence_paths)
 
                 # Create subsequence dataset
                 subsequence_dataset = VideoSubSequence(
@@ -587,14 +575,10 @@ def _download_dataset(self) -> None:
             extract_root_file_list = glob.glob(str(extract_root) + "/*")
             for file_name in extract_root_file_list:
                 sub_file_name = file_name.split("/")[-1]
-                extract_subsubdir = (
-                    extract_subdir + "/" + sub_file_name.split(".")[0]
-                )
+                extract_subsubdir = extract_subdir + "/" + sub_file_name.split(".")[0]
                 if self.verbose:
                     print(f"Extracting: {sub_file_name} to {extract_subdir}")
-                self._extract_archive(
-                    file_name, extract_subdir, remove_archive=True
-                )
+                self._extract_archive(file_name, extract_subdir, remove_archive=True)
                 if self.verbose:
                     print("Extraction complete!")
 
@@ -626,9 +610,7 @@ def _load_metadata(self) -> bool:
 
             if not self.semseg:
                 is_subsequence_preparation_done = (
-                    self._prepare_classification_subsequence_datasets(
-                        match_path
-                    )
+                    self._prepare_classification_subsequence_datasets(match_path)
                 )
             else:
                 is_subsequence_preparation_done = (
@@ -644,13 +626,11 @@ def _load_metadata(self) -> bool:
         # If a 'generic'-endless-cl-sim-scenario has been selected
         if not self.semseg:
             is_subsequence_preparation_done = (
-                self._prepare_classification_subsequence_datasets(
-                    str(self.root)
-                )
+                self._prepare_classification_subsequence_datasets(str(self.root))
             )
         else:
-            is_subsequence_preparation_done = (
-                self._prepare_video_subsequence_datasets(str(self.root))
+            is_subsequence_preparation_done = self._prepare_video_subsequence_datasets(
+                str(self.root)
             )
 
         if is_subsequence_preparation_done and self.verbose:
diff --git a/avalanche/benchmarks/datasets/external_datasets/cifar.py b/avalanche/benchmarks/datasets/external_datasets/cifar.py
index bb103cf74..93c344dc6 100644
--- a/avalanche/benchmarks/datasets/external_datasets/cifar.py
+++ b/avalanche/benchmarks/datasets/external_datasets/cifar.py
@@ -25,30 +25,33 @@ def get_cifar100_dataset(dataset_root):
 
 
 def load_CIFAR100(root, train, transform, target_transform):
-    return CIFAR100(root=root, train=train, transform=transform,
-                    target_transform=target_transform)
+    return CIFAR100(
+        root=root, train=train, transform=transform, target_transform=target_transform
+    )
 
 
 @dill.register(CIFAR100)
 def save_CIFAR100(pickler, obj: CIFAR100):
-    pickler.save_reduce(load_CIFAR100,
-                        (obj.root, obj.train, obj.transform,
-                         obj.target_transform), obj=obj)
+    pickler.save_reduce(
+        load_CIFAR100,
+        (obj.root, obj.train, obj.transform, obj.target_transform),
+        obj=obj,
+    )
 
 
 def load_CIFAR10(root, train, transform, target_transform):
-    return CIFAR10(root=root, train=train, transform=transform,
-                   target_transform=target_transform)
+    return CIFAR10(
+        root=root, train=train, transform=transform, target_transform=target_transform
+    )
 
 
 @dill.register(CIFAR10)
 def save_CIFAR10(pickler, obj: CIFAR10):
-    pickler.save_reduce(load_CIFAR10,
-                        (obj.root, obj.train, obj.transform,
-                         obj.target_transform), obj=obj)
+    pickler.save_reduce(
+        load_CIFAR10,
+        (obj.root, obj.train, obj.transform, obj.target_transform),
+        obj=obj,
+    )
 
 
-__all__ = [
-    'get_cifar10_dataset',
-    'get_cifar100_dataset'
-]
+__all__ = ["get_cifar10_dataset", "get_cifar100_dataset"]
diff --git a/avalanche/benchmarks/datasets/external_datasets/fmnist.py b/avalanche/benchmarks/datasets/external_datasets/fmnist.py
index 90a164b1a..413e78562 100644
--- a/avalanche/benchmarks/datasets/external_datasets/fmnist.py
+++ b/avalanche/benchmarks/datasets/external_datasets/fmnist.py
@@ -14,17 +14,18 @@ def get_fmnist_dataset(dataset_root):
 
 
 def load_FashionMNIST(root, train, transform, target_transform):
-    return FashionMNIST(root=root, train=train, transform=transform,
-                        target_transform=target_transform)
+    return FashionMNIST(
+        root=root, train=train, transform=transform, target_transform=target_transform
+    )
 
 
 @dill.register(FashionMNIST)
 def save_FashionMNIST(pickler, obj: FashionMNIST):
-    pickler.save_reduce(load_FashionMNIST,
-                        (obj.root, obj.train, obj.transform,
-                         obj.target_transform), obj=obj)
+    pickler.save_reduce(
+        load_FashionMNIST,
+        (obj.root, obj.train, obj.transform, obj.target_transform),
+        obj=obj,
+    )
 
 
-__all__ = [
-    'get_fmnist_dataset'
-]
+__all__ = ["get_fmnist_dataset"]
diff --git a/avalanche/benchmarks/datasets/external_datasets/mnist.py b/avalanche/benchmarks/datasets/external_datasets/mnist.py
index 5e8e1939b..91bbc1d13 100644
--- a/avalanche/benchmarks/datasets/external_datasets/mnist.py
+++ b/avalanche/benchmarks/datasets/external_datasets/mnist.py
@@ -12,7 +12,7 @@ def __getitem__(self, index: int):
         Returns:
             tuple: (image, target) where target is index of the target class.
         """
-        img = self.data[index].float().unsqueeze(0) / 255.
+        img = self.data[index].float().unsqueeze(0) / 255.0
         target = int(self.targets[index])
 
         if self.transform is not None:
@@ -36,17 +36,16 @@ def get_mnist_dataset(dataset_root):
 
 
 def load_MNIST(root, train, transform, target_transform):
-    return TensorMNIST(root=root, train=train, transform=transform,
-                       target_transform=target_transform)
+    return TensorMNIST(
+        root=root, train=train, transform=transform, target_transform=target_transform
+    )
 
 
 @dill.register(TensorMNIST)
 def save_MNIST(pickler, obj: TensorMNIST):
-    pickler.save_reduce(load_MNIST,
-                        (obj.root, obj.train, obj.transform,
-                         obj.target_transform), obj=obj)
+    pickler.save_reduce(
+        load_MNIST, (obj.root, obj.train, obj.transform, obj.target_transform), obj=obj
+    )
 
 
-__all__ = [
-    'get_mnist_dataset'
-]
+__all__ = ["get_mnist_dataset"]
diff --git a/avalanche/benchmarks/datasets/imagenet_data.py b/avalanche/benchmarks/datasets/imagenet_data.py
index 4601afdf5..c5161e004 100644
--- a/avalanche/benchmarks/datasets/imagenet_data.py
+++ b/avalanche/benchmarks/datasets/imagenet_data.py
@@ -2201,9 +2201,7 @@
 ]
 
 IMAGENET_TORCHVISION_CLASS_TO_IDX: Dict[str, int] = {
-    cls: idx
-    for idx, clss in enumerate(IMAGENET_TORCHVISION_CLASSES)
-    for cls in clss
+    cls: idx for idx, clss in enumerate(IMAGENET_TORCHVISION_CLASSES) for cls in clss
 }
 
 __all__ = [
diff --git a/avalanche/benchmarks/datasets/inaturalist/inaturalist.py b/avalanche/benchmarks/datasets/inaturalist/inaturalist.py
index a4fb9566f..78d78b54c 100644
--- a/avalanche/benchmarks/datasets/inaturalist/inaturalist.py
+++ b/avalanche/benchmarks/datasets/inaturalist/inaturalist.py
@@ -138,7 +138,6 @@ def __init__(
             supcat = cat["supercategory"]  # Is parent directory
 
             if self.supcats is None or supcat in self.supcats:  # Made selection
-
                 # Add category to supercategory
                 if supcat not in self.cats_per_supcat:
                     self.cats_per_supcat[supcat] = set()
diff --git a/avalanche/benchmarks/datasets/inaturalist/inaturalist_data.py b/avalanche/benchmarks/datasets/inaturalist/inaturalist_data.py
index 44d235e34..b84b830ca 100644
--- a/avalanche/benchmarks/datasets/inaturalist/inaturalist_data.py
+++ b/avalanche/benchmarks/datasets/inaturalist/inaturalist_data.py
@@ -141,9 +141,7 @@ def __init__(self, data_folder="data/", trainval=True):
         if os.path.isabs(data_folder):
             self.data_folder = data_folder
         else:
-            self.data_folder = os.path.join(
-                os.path.dirname(__file__), data_folder
-            )
+            self.data_folder = os.path.join(os.path.dirname(__file__), data_folder)
 
         try:
             # Create target Directory for INATURALIST data
@@ -172,8 +170,13 @@ def download_inaturalist(self):
             self.log.info("Downloading " + name[1] + "...")
             save_name = os.path.join(self.data_folder, name[0])
             if not os.path.exists(save_name):
-                with TqdmUpTo(unit='B', unit_scale=True, unit_divisor=1024,
-                              miniters=1, desc=name[0]) as t:
+                with TqdmUpTo(
+                    unit="B",
+                    unit_scale=True,
+                    unit_divisor=1024,
+                    miniters=1,
+                    desc=name[0],
+                ) as t:
                     urlretrieve(name[1], save_name, reporthook=t.update_to)
             else:
                 self.log.info("Skipping download, exists: ", save_name)
diff --git a/avalanche/benchmarks/datasets/lvis_dataset/lvis_dataset.py b/avalanche/benchmarks/datasets/lvis_dataset/lvis_dataset.py
index a63696d7f..93b3b43d4 100644
--- a/avalanche/benchmarks/datasets/lvis_dataset/lvis_dataset.py
+++ b/avalanche/benchmarks/datasets/lvis_dataset/lvis_dataset.py
@@ -125,9 +125,7 @@ def _load_metadata(self) -> bool:
             # Try loading an image
             if len(self.img_ids) > 0:
                 img_id = self.img_ids[0]
-                img_dict: LVISImgEntry = self.lvis_api.load_imgs(ids=[img_id])[
-                    0
-                ]
+                img_dict: LVISImgEntry = self.lvis_api.load_imgs(ids=[img_id])[0]
                 assert self._load_img(img_dict) is not None
         except BaseException:
             if must_load_api:
@@ -244,10 +242,7 @@ class LVISImgTargets(TypedDict):
 
 
 class LVISDetectionTargets(Sequence[List[LVISImgTargets]]):
-    def __init__(
-            self, 
-            lvis_api: LVIS,
-            img_ids: Optional[List[int]] = None):
+    def __init__(self, lvis_api: LVIS, img_ids: Optional[List[int]] = None):
         super(LVISDetectionTargets, self).__init__()
         self.lvis_api = lvis_api
         if img_ids is None:
@@ -269,21 +264,21 @@ def __getitem__(self, index):
 
         category_tensor = torch.empty((n_annotations,), dtype=torch.long)
         target_dict: LVISImgTargets = {
-            'bbox': torch.empty((n_annotations, 4), dtype=torch.float32),
-            'category_id': category_tensor,
-            'id': torch.empty((n_annotations,), dtype=torch.long),
-            'area': torch.empty((n_annotations,), dtype=torch.float32),
-            'image_id': torch.full((1,), img_id, dtype=torch.long),
-            'segmentation': [],
-            'labels': category_tensor  # Alias of category_id
+            "bbox": torch.empty((n_annotations, 4), dtype=torch.float32),
+            "category_id": category_tensor,
+            "id": torch.empty((n_annotations,), dtype=torch.long),
+            "area": torch.empty((n_annotations,), dtype=torch.float32),
+            "image_id": torch.full((1,), img_id, dtype=torch.long),
+            "segmentation": [],
+            "labels": category_tensor,  # Alias of category_id
         }
 
         for ann_idx, annotation in enumerate(annotation_dicts):
-            target_dict['bbox'][ann_idx] = torch.as_tensor(annotation['bbox'])
-            target_dict['category_id'][ann_idx] = annotation['category_id']
-            target_dict['id'][ann_idx] = annotation['id']
-            target_dict['area'][ann_idx] = annotation['area']
-            target_dict['segmentation'].append(annotation['segmentation'])
+            target_dict["bbox"][ann_idx] = torch.as_tensor(annotation["bbox"])
+            target_dict["category_id"][ann_idx] = annotation["category_id"]
+            target_dict["id"][ann_idx] = annotation["id"]
+            target_dict["area"][ann_idx] = annotation["area"]
+            target_dict["segmentation"].append(annotation["segmentation"])
 
         return target_dict
 
@@ -327,9 +322,7 @@ def _plot_detection_sample(img: Image.Image, target):
     test_data = LvisDataset(transform=_test_to_tensor, train=False)
     print("train size: ", len(train_data))
     print("Test size: ", len(test_data))
-    dataloader = DataLoader(
-        train_data, batch_size=1, collate_fn=_detection_collate_fn
-    )
+    dataloader = DataLoader(train_data, batch_size=1, collate_fn=_detection_collate_fn)
 
     n_to_show = 5
     for instance_idx, batch_data in enumerate(dataloader):
diff --git a/avalanche/benchmarks/datasets/mini_imagenet/mini_imagenet.py b/avalanche/benchmarks/datasets/mini_imagenet/mini_imagenet.py
index 44f5441a4..d2d5803c8 100644
--- a/avalanche/benchmarks/datasets/mini_imagenet/mini_imagenet.py
+++ b/avalanche/benchmarks/datasets/mini_imagenet/mini_imagenet.py
@@ -156,9 +156,7 @@ def __init__(
         """
 
         # TODO: the original loader from yaoyao-liu uses cv2.INTER_AREA
-        self._transform = Resize(
-            self.resize_to, interpolation=PIL.Image.BILINEAR
-        )
+        self._transform = Resize(self.resize_to, interpolation=PIL.Image.BILINEAR)
 
         # The following fields are filled by self.prepare_dataset()
         self.image_paths: List[str] = []
@@ -264,24 +262,16 @@ def prepare_dataset(self):
         for cls in images.keys():
             cls_numerical_label = self.wnid_to_idx[cls]
             lst_files = []
-            for file in glob.glob(
-                str(self.imagenet_path / cls / ("*" + cls + "*"))
-            ):
+            for file in glob.glob(str(self.imagenet_path / cls / ("*" + cls + "*"))):
                 lst_files.append(file)
 
-            lst_index = [
-                int(i[i.rfind("_") + 1 : i.rfind(".")]) for i in lst_files
-            ]
-            index_sorted = sorted(
-                range(len(lst_index)), key=lst_index.__getitem__
-            )
+            lst_index = [int(i[i.rfind("_") + 1 : i.rfind(".")]) for i in lst_files]
+            index_sorted = sorted(range(len(lst_index)), key=lst_index.__getitem__)
 
             index_selected = [
                 int(i[i.index(".") - 4 : i.index(".")]) for i in images[cls]
             ]
-            selected_images = np.array(index_sorted)[
-                np.array(index_selected) - 1
-            ]
+            selected_images = np.array(index_sorted)[np.array(index_selected) - 1]
             for i in np.arange(len(selected_images)):
                 self.image_paths.append(lst_files[selected_images[i]])
                 self.targets.append(cls_numerical_label)
@@ -301,9 +291,7 @@ def __getitem__(self, item):
     import matplotlib.pyplot as plt
 
     print("Creating training dataset")
-    train_dataset = MiniImageNetDataset(
-        "/ssd2/datasets/imagenet", split="train"
-    )
+    train_dataset = MiniImageNetDataset("/ssd2/datasets/imagenet", split="train")
     print("Creating validation dataset")
     val_dataset = MiniImageNetDataset("/ssd2/datasets/imagenet", split="val")
     print("Creating test dataset")
@@ -325,9 +313,7 @@ def __getitem__(self, item):
         plt.show()
         print(img)
         print(label)
-        class_to_idx = train_dataset.class_to_idx[
-            train_dataset.classes[label][0]
-        ]
+        class_to_idx = train_dataset.class_to_idx[train_dataset.classes[label][0]]
         assert class_to_idx == label
         if img_idx == 2:
             break
@@ -357,9 +343,7 @@ def __getitem__(self, item):
         plt.show()
         print(img)
         print(label)
-        class_to_idx = test_dataset.class_to_idx[
-            train_dataset.classes[label][0]
-        ]
+        class_to_idx = test_dataset.class_to_idx[train_dataset.classes[label][0]]
         assert class_to_idx == label
         if img_idx == 2:
             break
diff --git a/avalanche/benchmarks/datasets/openloris/openloris.py b/avalanche/benchmarks/datasets/openloris/openloris.py
index 49076dc8f..3c6dc8542 100644
--- a/avalanche/benchmarks/datasets/openloris/openloris.py
+++ b/avalanche/benchmarks/datasets/openloris/openloris.py
@@ -123,9 +123,7 @@ def _load_metadata(self) -> bool:
 
     def _download_error_message(self) -> str:
         base_url = openloris_data.base_gdrive_url
-        all_urls = [
-            base_url + name_url[1] for name_url in openloris_data.avl_vps_data
-        ]
+        all_urls = [base_url + name_url[1] for name_url in openloris_data.avl_vps_data]
 
         base_msg = (
             "[OpenLoris] Direct download may no longer be supported!\n"
@@ -170,7 +168,6 @@ def __len__(self):
 
 
 if __name__ == "__main__":
-
     # this little example script can be used to visualize the first image
     # loaded from the dataset.
     from torch.utils.data.dataloader import DataLoader
diff --git a/avalanche/benchmarks/datasets/penn_fudan/penn_fudan_dataset.py b/avalanche/benchmarks/datasets/penn_fudan/penn_fudan_dataset.py
index 53bfc5ef4..184ec9c34 100644
--- a/avalanche/benchmarks/datasets/penn_fudan/penn_fudan_dataset.py
+++ b/avalanche/benchmarks/datasets/penn_fudan/penn_fudan_dataset.py
@@ -169,8 +169,9 @@ def make_targets(self, idx):
         masks = torch.as_tensor(masks, dtype=torch.uint8)
 
         image_id = torch.tensor([idx])
-        area = (boxes_as_tensor[:, 3] - boxes_as_tensor[:, 1]) * \
-               (boxes_as_tensor[:, 2] - boxes_as_tensor[:, 0])
+        area = (boxes_as_tensor[:, 3] - boxes_as_tensor[:, 1]) * (
+            boxes_as_tensor[:, 2] - boxes_as_tensor[:, 0]
+        )
         # suppose all instances are not crowd
         iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
 
diff --git a/avalanche/benchmarks/datasets/stream51/stream51.py b/avalanche/benchmarks/datasets/stream51/stream51.py
index f7f37cceb..6490d7821 100644
--- a/avalanche/benchmarks/datasets/stream51/stream51.py
+++ b/avalanche/benchmarks/datasets/stream51/stream51.py
@@ -31,7 +31,7 @@
 from avalanche.benchmarks.datasets.stream51 import stream51_data
 
 
-TSequence = TypeVar('TSequence', bound=Sequence)
+TSequence = TypeVar("TSequence", bound=Sequence)
 
 
 class Stream51(DownloadableDataset):
@@ -100,9 +100,7 @@ def _download_dataset(self) -> None:
                     if "json" in filename:
                         target = open(str(self.root / filename), "wb")
                     else:
-                        dest_folder = os.path.join(
-                            *(member.split(os.path.sep)[1:-1])
-                        )
+                        dest_folder = os.path.join(*(member.split(os.path.sep)[1:-1]))
                         dest_folder_path = self.root / dest_folder
                         dest_folder_path.mkdir(exist_ok=True, parents=True)
 
@@ -114,13 +112,9 @@ def _download_dataset(self) -> None:
 
     def _load_metadata(self) -> bool:
         if self.train:
-            data_list = json.load(
-                open(str(self.root / "Stream-51_meta_train.json"))
-            )
+            data_list = json.load(open(str(self.root / "Stream-51_meta_train.json")))
         else:
-            data_list = json.load(
-                open(str(self.root / "Stream-51_meta_test.json"))
-            )
+            data_list = json.load(open(str(self.root / "Stream-51_meta_test.json")))
 
         self.samples = data_list
         self.targets = [s[0] for s in data_list]
@@ -140,9 +134,7 @@ def _download_error_message(self) -> str:
         )
 
     @staticmethod
-    def _instance_ordering(
-            data_list: Sequence[TSequence], 
-            seed) -> List[TSequence]:
+    def _instance_ordering(data_list: Sequence[TSequence], seed) -> List[TSequence]:
         # organize data by video
         total_videos = 0
         new_data_list = []
@@ -178,9 +170,7 @@ def _class_ordering(data_list, class_type, seed):
                 random.shuffle(class_data_list)
             else:
                 # shuffle clips within class
-                class_data_list = Stream51._instance_ordering(
-                    class_data_list, seed
-                )
+                class_data_list = Stream51._instance_ordering(class_data_list, seed)
             new_data_list.append(class_data_list)
         # shuffle classes
         random.seed(seed)
@@ -262,15 +252,12 @@ def __repr__(self):
         tmp = "    Target Transforms (if any): "
         fmt_str += "{0}{1}".format(
             tmp,
-            self.target_transform.__repr__().replace(
-                "\n", "\n" + " " * len(tmp)
-            ),
+            self.target_transform.__repr__().replace("\n", "\n" + " " * len(tmp)),
         )
         return fmt_str
 
 
 if __name__ == "__main__":
-
     # this little example script can be used to visualize the first image
     # loaded from the dataset.
     from torch.utils.data.dataloader import DataLoader
diff --git a/avalanche/benchmarks/datasets/tiny_imagenet/tiny_imagenet.py b/avalanche/benchmarks/datasets/tiny_imagenet/tiny_imagenet.py
index d1969c35a..ef00a5836 100644
--- a/avalanche/benchmarks/datasets/tiny_imagenet/tiny_imagenet.py
+++ b/avalanche/benchmarks/datasets/tiny_imagenet/tiny_imagenet.py
@@ -74,9 +74,7 @@ def __init__(
     def _load_metadata(self) -> bool:
         self.data_folder = self.root / "tiny-imagenet-200"
 
-        self.label2id, self.id2label = TinyImagenet.labels2dict(
-            self.data_folder
-        )
+        self.label2id, self.id2label = TinyImagenet.labels2dict(self.data_folder)
         self.data, self.targets = self.load_data()
         return True
 
@@ -94,7 +92,6 @@ def labels2dict(data_folder: Path):
         id2label = {}
 
         with open(str(data_folder / "wnids.txt"), "r") as f:
-
             reader = csv.reader(f)
             curr_idx = 0
             for ll in reader:
@@ -139,8 +136,7 @@ def get_train_images_paths(self, class_name) -> List[Path]:
             collected.
         :returns img_paths: list of strings (paths)
         """
-        train_img_folder: Path = \
-            self.data_folder / "train" / class_name / "images"
+        train_img_folder: Path = self.data_folder / "train" / class_name / "images"
 
         img_paths = [f for f in train_img_folder.iterdir() if f.is_file()]
 
@@ -155,16 +151,13 @@ def get_test_images_paths(self, class_name) -> List[Path]:
         :returns img_paths: list of strings (paths)
         """
 
-        val_img_folder: Path = \
-            self.data_folder / "val" / "images"
-        annotations_file: Path = \
-            self.data_folder / "val" / "val_annotations.txt"
+        val_img_folder: Path = self.data_folder / "val" / "images"
+        annotations_file: Path = self.data_folder / "val" / "val_annotations.txt"
 
         valid_names = []
 
         # filter validation images by class using appropriate file
         with open(str(annotations_file), "r") as f:
-
             reader = csv.reader(f, dialect="excel-tab")
             for ll in reader:
                 if ll[1] == class_name:
@@ -197,7 +190,6 @@ def __getitem__(self, index):
 
 
 if __name__ == "__main__":
-
     # this little example script can be used to visualize the first image
     # loaded from the dataset.
     from torch.utils.data.dataloader import DataLoader
diff --git a/avalanche/benchmarks/datasets/torchvision_wrapper.py b/avalanche/benchmarks/datasets/torchvision_wrapper.py
index c49fe0a58..566e244c9 100644
--- a/avalanche/benchmarks/datasets/torchvision_wrapper.py
+++ b/avalanche/benchmarks/datasets/torchvision_wrapper.py
@@ -155,7 +155,6 @@ def CelebA(*args, **kwargs):
 
 
 if __name__ == "__main__":
-
     mnist = MNIST(".", download=True)
 
 
diff --git a/avalanche/benchmarks/generators/benchmark_generators.py b/avalanche/benchmarks/generators/benchmark_generators.py
index a5465d883..04ef698c3 100644
--- a/avalanche/benchmarks/generators/benchmark_generators.py
+++ b/avalanche/benchmarks/generators/benchmark_generators.py
@@ -32,8 +32,9 @@
 )
 
 import torch
-from avalanche.benchmarks.scenarios.classification_scenario import \
-    ClassificationScenario
+from avalanche.benchmarks.scenarios.classification_scenario import (
+    ClassificationScenario,
+)
 
 from avalanche.benchmarks.scenarios.dataset_scenario import (
     DatasetScenario,
@@ -62,26 +63,16 @@
     SupportedDataset,
     as_supervised_classification_dataset,
     make_classification_dataset,
-    concat_classification_datasets_sequentially
+    concat_classification_datasets_sequentially,
 )
 from avalanche.benchmarks.utils.data import AvalancheDataset
 
 
-TDatasetScenario = TypeVar(
-    'TDatasetScenario',
-    bound='DatasetScenario')
-TCLStream = TypeVar(
-    'TCLStream',
-    bound='CLStream')
-TSizedCLStream = TypeVar(
-    'TSizedCLStream',
-    bound='SizedCLStream')
-TDatasetExperience = TypeVar(
-    'TDatasetExperience',
-    bound='DatasetExperience')
-TCLDataset = TypeVar(
-    'TCLDataset',
-    bound='AvalancheDataset')
+TDatasetScenario = TypeVar("TDatasetScenario", bound="DatasetScenario")
+TCLStream = TypeVar("TCLStream", bound="CLStream")
+TSizedCLStream = TypeVar("TSizedCLStream", bound="SizedCLStream")
+TDatasetExperience = TypeVar("TDatasetExperience", bound="DatasetExperience")
+TCLDataset = TypeVar("TCLDataset", bound="AvalancheDataset")
 
 
 def nc_benchmark(
@@ -99,7 +90,7 @@ def nc_benchmark(
     one_dataset_per_exp: bool = False,
     train_transform=None,
     eval_transform=None,
-    reproducibility_data: Optional[Dict[str, Any]] = None
+    reproducibility_data: Optional[Dict[str, Any]] = None,
 ) -> NCScenario:
     """
     This is the high-level benchmark instances generator for the
@@ -230,14 +221,15 @@ class "34" will be mapped to "1", class "11" to "2" and so on.
         train_dataset_sup = list(
             map(as_supervised_classification_dataset, train_dataset)
         )
-        test_dataset_sup = list(
-            map(as_supervised_classification_dataset, test_dataset)
+        test_dataset_sup = list(map(as_supervised_classification_dataset, test_dataset))
+
+        (
+            seq_train_dataset,
+            seq_test_dataset,
+            mapping,
+        ) = concat_classification_datasets_sequentially(
+            train_dataset_sup, test_dataset_sup
         )
-        
-        seq_train_dataset, seq_test_dataset, mapping = \
-            concat_classification_datasets_sequentially(
-                train_dataset_sup, test_dataset_sup
-            )
 
         if one_dataset_per_exp:
             # If one_dataset_per_exp is True, each dataset will be treated as
@@ -259,9 +251,7 @@ class "34" will be mapped to "1", class "11" to "2" and so on.
         seq_train_dataset = as_supervised_classification_dataset(train_dataset)
         seq_test_dataset = as_supervised_classification_dataset(test_dataset)
 
-    transform_groups = dict(
-        train=(train_transform, None), eval=(eval_transform, None)
-    )
+    transform_groups = dict(train=(train_transform, None), eval=(eval_transform, None))
 
     # Set transformation groups
     final_train_dataset = as_supervised_classification_dataset(
@@ -287,7 +277,7 @@ class "34" will be mapped to "1", class "11" to "2" and so on.
         per_experience_classes=per_exp_classes,
         class_ids_from_zero_from_first_exp=class_ids_from_zero_from_first_exp,
         class_ids_from_zero_in_each_exp=class_ids_from_zero_in_each_exp,
-        reproducibility_data=reproducibility_data
+        reproducibility_data=reproducibility_data,
     )
 
 
@@ -368,7 +358,7 @@ def ni_benchmark(
 
     :return: A properly initialized :class:`NIScenario` instance.
     """
-    
+
     seq_train_dataset, seq_test_dataset = train_dataset, test_dataset
     if isinstance(train_dataset, (list, tuple)):
         if not isinstance(test_dataset, (list, tuple)):
@@ -376,7 +366,7 @@ def ni_benchmark(
                 "If a list is passed for train_dataset, "
                 "then test_dataset must be a list, too."
             )
-        
+
         if len(train_dataset) != len(test_dataset):
             raise ValueError(
                 "Train/test dataset lists must contain the "
@@ -386,21 +376,20 @@ def ni_benchmark(
         train_dataset_sup = list(
             map(as_supervised_classification_dataset, train_dataset)
         )
-        test_dataset_sup = list(
-            map(as_supervised_classification_dataset, test_dataset)
+        test_dataset_sup = list(map(as_supervised_classification_dataset, test_dataset))
+
+        (
+            seq_train_dataset,
+            seq_test_dataset,
+            _,
+        ) = concat_classification_datasets_sequentially(
+            train_dataset_sup, test_dataset_sup
         )
-
-        seq_train_dataset, seq_test_dataset, _ = \
-            concat_classification_datasets_sequentially(
-                train_dataset_sup, test_dataset_sup
-            )
     else:
         seq_train_dataset = as_supervised_classification_dataset(train_dataset)
         seq_test_dataset = as_supervised_classification_dataset(test_dataset)
 
-    transform_groups = dict(
-        train=(train_transform, None), eval=(eval_transform, None)
-    )
+    transform_groups = dict(train=(train_transform, None), eval=(eval_transform, None))
 
     # Set transformation groups
     final_train_dataset = make_classification_dataset(
@@ -425,7 +414,7 @@ def ni_benchmark(
         balance_experiences=balance_experiences,
         min_class_patterns_in_exp=min_class_patterns_in_exp,
         fixed_exp_assignment=fixed_exp_assignment,
-        reproducibility_data=reproducibility_data
+        reproducibility_data=reproducibility_data,
     )
 
 
@@ -478,7 +467,7 @@ def fixed_size_experience_split_strategy(
     experience_size: int,
     shuffle: bool,
     drop_last: bool,
-    experience: DatasetExperience[TCLDataset]
+    experience: DatasetExperience[TCLDataset],
 ) -> Sequence[TCLDataset]:
     """
     The default splitting strategy used by :func:`data_incremental_benchmark`.
@@ -524,51 +513,42 @@ def fixed_size_experience_split_strategy(
 
             final_idx = len(exp_indices)
 
-        result_datasets.append(
-            exp_dataset.subset(exp_indices[init_idx:final_idx])
-        )
+        result_datasets.append(exp_dataset.subset(exp_indices[init_idx:final_idx]))
         init_idx = final_idx
 
     return result_datasets
 
 
-TDatasetStream = TypeVar(
-    'TDatasetStream',
-    bound='DatasetStream'
-)
+TDatasetStream = TypeVar("TDatasetStream", bound="DatasetStream")
 
 
 def _make_plain_experience(
-    stream: DatasetStream[DatasetExperience[TCLDataset]],
-    experience_idx: int
+    stream: DatasetStream[DatasetExperience[TCLDataset]], experience_idx: int
 ) -> DatasetExperience[TCLDataset]:
-    dataset = stream.benchmark.stream_definitions[
-        stream.name
-    ].exps_data[experience_idx]
+    dataset = stream.benchmark.stream_definitions[stream.name].exps_data[experience_idx]
 
     return DatasetExperience(
         current_experience=experience_idx,
         origin_stream=stream,
         benchmark=stream.benchmark,
-        dataset=dataset
+        dataset=dataset,
     )
 
 
 def _smart_benchmark_factory(
     original_benchmark: DatasetScenario,
     new_streams_definitions: TStreamsUserDict,
-    complete_test_set_only: bool
+    complete_test_set_only: bool,
 ) -> DatasetScenario:
-    
     if isinstance(original_benchmark, ClassificationScenario):
         return ClassificationScenario(
             stream_definitions=new_streams_definitions,
-            complete_test_set_only=complete_test_set_only
+            complete_test_set_only=complete_test_set_only,
         )
     elif isinstance(original_benchmark, DetectionScenario):
         return DetectionScenario(
             stream_definitions=new_streams_definitions,
-            complete_test_set_only=complete_test_set_only
+            complete_test_set_only=complete_test_set_only,
         )
     else:
         # Generic scenario
@@ -581,39 +561,40 @@ def _smart_benchmark_factory(
 
 
 def data_incremental_benchmark(
-    benchmark_instance: DatasetScenario[TDatasetStream,
-                                        TDatasetExperience,
-                                        TCLDataset],
+    benchmark_instance: DatasetScenario[TDatasetStream, TDatasetExperience, TCLDataset],
     experience_size: int,
     shuffle: bool = False,
     drop_last: bool = False,
     split_streams: Sequence[str] = ("train",),
-    custom_split_strategy: Optional[Callable[
-        [DatasetExperience[TCLDataset]],
-        Sequence[TCLDataset]
-    ]] = None,
+    custom_split_strategy: Optional[
+        Callable[[DatasetExperience[TCLDataset]], Sequence[TCLDataset]]
+    ] = None,
     *,
-    benchmark_factory: Optional[Callable[
-        [
-            DatasetScenario[TDatasetStream,
-                            TDatasetExperience,
-                            TCLDataset],
-            TStreamsUserDict,
-            bool
-        ], DatasetScenario[
-            DatasetStream[DatasetExperience[TCLDataset]],
-            DatasetExperience[TCLDataset],
-            TCLDataset]
+    benchmark_factory: Optional[
+        Callable[
+            [
+                DatasetScenario[TDatasetStream, TDatasetExperience, TCLDataset],
+                TStreamsUserDict,
+                bool,
+            ],
+            DatasetScenario[
+                DatasetStream[DatasetExperience[TCLDataset]],
+                DatasetExperience[TCLDataset],
+                TCLDataset,
+            ],
         ]
     ] = _smart_benchmark_factory,
-    experience_factory: Optional[Callable[
-        [DatasetStream[DatasetExperience[TCLDataset]], int], 
-        DatasetExperience[TCLDataset]
-    ]] = _make_plain_experience,
+    experience_factory: Optional[
+        Callable[
+            [DatasetStream[DatasetExperience[TCLDataset]], int],
+            DatasetExperience[TCLDataset],
+        ]
+    ] = _make_plain_experience,
 ) -> DatasetScenario[
-        DatasetStream[DatasetExperience[TCLDataset]],
-        DatasetExperience[TCLDataset],
-        TCLDataset]:
+    DatasetStream[DatasetExperience[TCLDataset]],
+    DatasetExperience[TCLDataset],
+    TCLDataset,
+]:
     """
     High-level benchmark generator for a Data Incremental setup.
 
@@ -660,7 +641,7 @@ def data_incremental_benchmark(
         A good starting to understand the mechanism is to look at the
         implementation of the standard splitting function
         :func:`fixed_size_experience_split_strategy`.
-    :param benchmark_factory: The scenario factory. Defaults to 
+    :param benchmark_factory: The scenario factory. Defaults to
         `_smart_experience_factory`, which will try to create a benchmark of the
         same class of the originating one. Can be None, in which case a generic
         :class:`DatasetScenario` will be used coupled with the factory defined
@@ -671,19 +652,13 @@ def data_incremental_benchmark(
     :return: The Data Incremental benchmark instance.
     """
 
-    split_strategy: Callable[
-        [DatasetExperience[TCLDataset]], 
-        Sequence[TCLDataset]
-    ]
+    split_strategy: Callable[[DatasetExperience[TCLDataset]], Sequence[TCLDataset]]
     if custom_split_strategy is None:
         # functools.partial is a more compact option
         # However, MyPy does not understand what a partial is -_-
         def fixed_size_experience_split_strategy_wrapper(exp):
             return fixed_size_experience_split_strategy(
-                experience_size,
-                shuffle,
-                drop_last,
-                exp
+                experience_size, shuffle, drop_last, exp
             )
 
         split_strategy = fixed_size_experience_split_strategy_wrapper
@@ -697,13 +672,10 @@ def fixed_size_experience_split_strategy_wrapper(exp):
     for stream_name in split_streams:
         if stream_name not in stream_definitions:
             raise ValueError(
-                f"Stream {stream_name} could not be found in the "
-                f"benchmark instance"
+                f"Stream {stream_name} could not be found in the " f"benchmark instance"
             )
 
-        stream: TDatasetStream = getattr(
-            benchmark_instance,
-            f"{stream_name}_stream")
+        stream: TDatasetStream = getattr(benchmark_instance, f"{stream_name}_stream")
 
         split_datasets: List[TCLDataset] = []
         split_task_labels: List[Set[int]] = []
@@ -719,7 +691,7 @@ def fixed_size_experience_split_strategy_wrapper(exp):
             LazyDatasetSequence(split_datasets, len(split_datasets)),
             split_task_labels,
             stream_definitions[stream_name].origin_dataset,
-            False
+            False,
         )
         stream_def.exps_data.load_all_experiences()
 
@@ -734,9 +706,7 @@ def fixed_size_experience_split_strategy_wrapper(exp):
         # Try to create a benchmark of the same class of the
         # initial benchmark.
         return benchmark_factory(
-            benchmark_instance,
-            stream_definitions,
-            complete_test_set_only
+            benchmark_instance, stream_definitions, complete_test_set_only
         )
 
     # Generic benchmark class
@@ -866,13 +836,8 @@ def class_balanced_split_strategy(
 
 
 def _gen_split(
-    split_generator: Iterable[
-        Tuple[TCLDataset, TCLDataset]
-    ]
-) -> Tuple[
-    Generator[TCLDataset, None, None],
-    Generator[TCLDataset, None, None],
-]:
+    split_generator: Iterable[Tuple[TCLDataset, TCLDataset]]
+) -> Tuple[Generator[TCLDataset, None, None], Generator[TCLDataset, None, None],]:
     """
     Internal utility function to split the train-validation generator
     into two distinct generators (one for the train stream and another one
@@ -894,9 +859,7 @@ def _lazy_train_val_split(
         Tuple[TCLDataset, TCLDataset],
     ],
     experiences: Iterable[DatasetExperience[TCLDataset]],
-) -> Generator[
-    Tuple[TCLDataset, TCLDataset], None, None
-]:
+) -> Generator[Tuple[TCLDataset, TCLDataset], None, None]:
     """
     Creates a generator operating around the split strategy and the
     experiences stream.
@@ -912,39 +875,44 @@ def _lazy_train_val_split(
 
 
 def benchmark_with_validation_stream(
-    benchmark_instance: DatasetScenario[TDatasetStream,
-                                        TDatasetExperience,
-                                        TCLDataset],
+    benchmark_instance: DatasetScenario[TDatasetStream, TDatasetExperience, TCLDataset],
     validation_size: Union[int, float] = 0.5,
     shuffle: bool = False,
     input_stream: str = "train",
     output_stream: str = "valid",
-    custom_split_strategy: Optional[Callable[
-        [DatasetExperience[TCLDataset]],
-        Tuple[TCLDataset, TCLDataset],
-    ]] = None,
+    custom_split_strategy: Optional[
+        Callable[
+            [DatasetExperience[TCLDataset]],
+            Tuple[TCLDataset, TCLDataset],
+        ]
+    ] = None,
     *,
-    benchmark_factory: Optional[Callable[
-        [
-            DatasetScenario[TDatasetStream,
-                            TDatasetExperience,
-                            TCLDataset],
-            TStreamsUserDict,
-            bool
-        ], DatasetScenario[
+    benchmark_factory: Optional[
+        Callable[
+            [
+                DatasetScenario[TDatasetStream, TDatasetExperience, TCLDataset],
+                TStreamsUserDict,
+                bool,
+            ],
+            DatasetScenario[
                 DatasetStream[DatasetExperience[TCLDataset]],
                 DatasetExperience[TCLDataset],
-                TCLDataset]]
+                TCLDataset,
+            ],
+        ]
     ] = _smart_benchmark_factory,
-    experience_factory: Optional[Callable[
-        [DatasetStream[DatasetExperience[TCLDataset]], int],
-        DatasetExperience[TCLDataset]
-    ]] = _make_plain_experience,
-    lazy_splitting: Optional[bool] = None
+    experience_factory: Optional[
+        Callable[
+            [DatasetStream[DatasetExperience[TCLDataset]], int],
+            DatasetExperience[TCLDataset],
+        ]
+    ] = _make_plain_experience,
+    lazy_splitting: Optional[bool] = None,
 ) -> DatasetScenario[
-        DatasetStream[DatasetExperience[TCLDataset]],
-        DatasetExperience[TCLDataset],
-        TCLDataset]:
+    DatasetStream[DatasetExperience[TCLDataset]],
+    DatasetExperience[TCLDataset],
+    TCLDataset,
+]:
     """
     Helper that can be used to obtain a benchmark with a validation stream.
 
@@ -1000,7 +968,7 @@ def benchmark_with_validation_stream(
         A good starting to understand the mechanism is to look at the
         implementation of the standard splitting function
         :func:`random_validation_split_strategy`.
-    :param benchmark_factory: The scenario factory. Defaults to 
+    :param benchmark_factory: The scenario factory. Defaults to
         `_smart_experience_factory`, which will try to create a benchmark of the
         same class of the originating one. Can be None, in which case a generic
         :class:`DatasetScenario` will be used coupled with the factory defined
@@ -1023,30 +991,25 @@ def benchmark_with_validation_stream(
         # functools.partial is a more compact option
         # However, MyPy does not understand what a partial is -_-
         def random_validation_split_strategy_wrapper(exp):
-            return random_validation_split_strategy(
-                validation_size,
-                shuffle,
-                exp
-            )
+            return random_validation_split_strategy(validation_size, shuffle, exp)
 
         split_strategy = random_validation_split_strategy_wrapper
     else:
         split_strategy = custom_split_strategy
 
-    original_stream_definitions: Dict[str, StreamDef[TCLDataset]] = \
-        benchmark_instance.stream_definitions
+    original_stream_definitions: Dict[
+        str, StreamDef[TCLDataset]
+    ] = benchmark_instance.stream_definitions
     streams = benchmark_instance.streams
 
     if input_stream not in streams:
         raise ValueError(
-            f"Stream {input_stream} could not be found in the "
-            f"benchmark instance"
+            f"Stream {input_stream} could not be found in the " f"benchmark instance"
         )
 
     if output_stream in streams:
         raise ValueError(
-            f"Stream {output_stream} already exists in the "
-            f"benchmark instance"
+            f"Stream {output_stream} already exists in the " f"benchmark instance"
         )
 
     stream: TDatasetStream = streams[input_stream]
@@ -1056,14 +1019,10 @@ def random_validation_split_strategy_wrapper(exp):
     else:
         split_lazily = lazy_splitting
 
-    exps_tasks_labels = list(
-        original_stream_definitions[input_stream].exps_task_labels
-    )
+    exps_tasks_labels = list(original_stream_definitions[input_stream].exps_task_labels)
 
-    train_exps_source: Union[Iterable[TCLDataset], 
-                             Tuple[Iterable[TCLDataset], int]]
-    valid_exps_source: Union[Iterable[TCLDataset], 
-                             Tuple[Iterable[TCLDataset], int]]
+    train_exps_source: Union[Iterable[TCLDataset], Tuple[Iterable[TCLDataset], int]]
+    valid_exps_source: Union[Iterable[TCLDataset], Tuple[Iterable[TCLDataset], int]]
     if not split_lazily:
         # Classic static splitting
         train_exps_source = []
@@ -1080,10 +1039,10 @@ def random_validation_split_strategy_wrapper(exp):
         train_exps_gen, valid_exps_gen = _gen_split(split_generator)
         train_exps_source = (train_exps_gen, len(stream))
         valid_exps_source = (valid_exps_gen, len(stream))
-    
-    stream_definitions: Dict[str, Union[StreamUserDef[TCLDataset], 
-                                        StreamDef[TCLDataset]]] = \
-        dict(original_stream_definitions)
+
+    stream_definitions: Dict[
+        str, Union[StreamUserDef[TCLDataset], StreamDef[TCLDataset]]
+    ] = dict(original_stream_definitions)
 
     train_stream_def: StreamUserDef[TCLDataset] = StreamUserDef(
         train_exps_source,
@@ -1108,9 +1067,7 @@ def random_validation_split_strategy_wrapper(exp):
         # Try to create a benchmark of the same class of the
         # initial benchmark.
         return benchmark_factory(
-            benchmark_instance,
-            stream_definitions,
-            complete_test_set_only
+            benchmark_instance, stream_definitions, complete_test_set_only
         )
 
     # Generic benchmark class
diff --git a/avalanche/benchmarks/generators/scenario_generators.py b/avalanche/benchmarks/generators/scenario_generators.py
index b3049595e..5d95d5760 100644
--- a/avalanche/benchmarks/generators/scenario_generators.py
+++ b/avalanche/benchmarks/generators/scenario_generators.py
@@ -37,9 +37,7 @@
 from avalanche.benchmarks.scenarios.generic_scenario_creation import *
 from avalanche.benchmarks.scenarios.new_classes.nc_scenario import NCScenario
 from avalanche.benchmarks.scenarios.new_instances.ni_scenario import NIScenario
-from avalanche.benchmarks.utils.classification_dataset import (
-    SupportedDataset
-)
+from avalanche.benchmarks.utils.classification_dataset import SupportedDataset
 
 
 def nc_scenario(
@@ -156,7 +154,7 @@ class "34" will be mapped to "1", class "11" to "2" and so on.
         class_ids_from_zero_from_first_exp=class_ids_from_zero_from_first_exp,
         class_ids_from_zero_in_each_exp=class_ids_from_zero_in_each_exp,
         one_dataset_per_exp=one_dataset_per_exp,
-        reproducibility_data=reproducibility_data
+        reproducibility_data=reproducibility_data,
     )
 
 
@@ -243,7 +241,7 @@ def ni_scenario(
         balance_experiences=balance_experiences,
         min_class_patterns_in_exp=min_class_patterns_in_exp,
         fixed_exp_assignment=fixed_exp_assignment,
-        reproducibility_data=reproducibility_data
+        reproducibility_data=reproducibility_data,
     )
 
 
@@ -394,9 +392,7 @@ def filelist_scenario(
 
 def paths_scenario(
     train_list_of_files: Sequence[Sequence[FileAndLabel]],
-    test_list_of_files: Union[
-        Sequence[FileAndLabel], Sequence[Sequence[FileAndLabel]]
-    ],
+    test_list_of_files: Union[Sequence[FileAndLabel], Sequence[Sequence[FileAndLabel]]],
     task_labels: Sequence[int],
     *,
     complete_test_set_only: bool = False,
@@ -669,8 +665,7 @@ def tensor_scenario(
 
     if len(train_data_x) != len(train_data_y):
         raise ValueError(
-            "train_data_x and train_data_y must contain"
-            " the same amount of elements"
+            "train_data_x and train_data_y must contain" " the same amount of elements"
         )
 
     exp_train_first_structure = []
diff --git a/avalanche/benchmarks/scenarios/benchmark_wrapper_utils.py b/avalanche/benchmarks/scenarios/benchmark_wrapper_utils.py
index 39ef94d88..1ebace1c2 100644
--- a/avalanche/benchmarks/scenarios/benchmark_wrapper_utils.py
+++ b/avalanche/benchmarks/scenarios/benchmark_wrapper_utils.py
@@ -15,102 +15,82 @@
     SizedCLStream,
 )
 
-TCLExperience = TypeVar(
-    'TCLExperience',
-    bound='CLExperience')
-TCLStreamWrapper = TypeVar(
-    'TCLStreamWrapper',
-    bound='CLStreamWrapper')
-TSizedCLStreamWrapper = TypeVar(
-    'TSizedCLStreamWrapper',
-    bound='SizedCLStreamWrapper')
+TCLExperience = TypeVar("TCLExperience", bound="CLExperience")
+TCLStreamWrapper = TypeVar("TCLStreamWrapper", bound="CLStreamWrapper")
+TSizedCLStreamWrapper = TypeVar("TSizedCLStreamWrapper", bound="SizedCLStreamWrapper")
 TSequenceStreamWrapper = TypeVar(
-    'TSequenceStreamWrapper',
-    bound='SequenceStreamWrapper')
+    "TSequenceStreamWrapper", bound="SequenceStreamWrapper"
+)
 
 
-class ExperienceWrapper(
-        CLExperience,
-        Generic[TCLExperience]):
+class ExperienceWrapper(CLExperience, Generic[TCLExperience]):
     """
     Utility class used to wrap an experience.
 
     Instances of this class will allow to get attrbitues setted
-    in the original experience, but the `origin_stream` and 
+    in the original experience, but the `origin_stream` and
     `current_experience` attributes will be overridden.
     """
+
     def __init__(
-            self,
-            base_exp: TCLExperience,
-            current_experience: int,
-            origin_stream: CLStream):
+        self, base_exp: TCLExperience, current_experience: int, origin_stream: CLStream
+    ):
         self.wrapped_exp: TCLExperience = base_exp
         super().__init__(
-            current_experience=current_experience,
-            origin_stream=origin_stream
+            current_experience=current_experience, origin_stream=origin_stream
         )
-    
+
     def __getattr__(self, attr):
-        if attr == 'wrapped_exp' and attr not in self.__dict__:
+        if attr == "wrapped_exp" and attr not in self.__dict__:
             # Happens when using copy.copy or copy.deepcopy
             raise AttributeError(attr)
-        
+
         if attr in self.__dict__:
             return self.__dict__[attr]
 
         return getattr(self.wrapped_exp, attr)
-    
+
     @property
     def task_labels(self) -> List[int]:
-        return getattr(self.wrapped_exp, 'task_labels')
+        return getattr(self.wrapped_exp, "task_labels")
 
 
-class CLStreamWrapper(
-        CLStream[
-            ExperienceWrapper[
-                TCLExperience]]):
+class CLStreamWrapper(CLStream[ExperienceWrapper[TCLExperience]]):
     """
     Utility class used to wrap a stream.
 
     Objects of this class will return experiences wrapped
     using class:`ExperienceWrapper`.
     """
+
     def __init__(
-            self,
-            name: str,
-            benchmark: CLScenario,
-            wrapped_stream: CLStream[TCLExperience]):
-        
-        self._wrapped_stream: CLStream[TCLExperience] = \
-            wrapped_stream
+        self, name: str, benchmark: CLScenario, wrapped_stream: CLStream[TCLExperience]
+    ):
+        self._wrapped_stream: CLStream[TCLExperience] = wrapped_stream
         """
         A reference to the wrapped stream.
         """
-        
+
         super().__init__(
             name=name,
             exps_iter=None,  # type: ignore
             benchmark=benchmark,
-            set_stream_info=True)
+            set_stream_info=True,
+        )
 
     def __getattr__(self, attr):
         if attr in self.__dict__:
             return getattr(self, attr)
         return getattr(self._wrapped_exp, attr)
 
-    def __iter__(self) -> \
-            Iterator[
-                ExperienceWrapper[
-                    TCLExperience]]:
+    def __iter__(self) -> Iterator[ExperienceWrapper[TCLExperience]]:
         exp: TCLExperience
         for i, exp in enumerate(self._wrapped_stream):
             exp_wrapped = ExperienceWrapper(exp, i, self)
             yield exp_wrapped
 
 
-class SizedCLStreamWrapper(
-        CLStreamWrapper[
-            TCLExperience]):
+class SizedCLStreamWrapper(CLStreamWrapper[TCLExperience]):
     """
     Utility class used to wrap a sized stream.
 
@@ -119,25 +99,20 @@ class SizedCLStreamWrapper(
     """
 
     def __init__(
-            self,
-            name: str,
-            benchmark: CLScenario,
-            wrapped_stream: SizedCLStream[TCLExperience]):
-
+        self,
+        name: str,
+        benchmark: CLScenario,
+        wrapped_stream: SizedCLStream[TCLExperience],
+    ):
         self._wrapped_stream: SizedCLStream[TCLExperience] = wrapped_stream
-        
-        super().__init__(
-            name=name,
-            benchmark=benchmark,
-            wrapped_stream=wrapped_stream)
+
+        super().__init__(name=name, benchmark=benchmark, wrapped_stream=wrapped_stream)
 
     def __len__(self):
         return len(self._wrapped_stream)
 
 
-class SequenceStreamWrapper(
-    SequenceCLStream[
-        ExperienceWrapper[TCLExperience]]):
+class SequenceStreamWrapper(SequenceCLStream[ExperienceWrapper[TCLExperience]]):
     """
     Utility class used to wrap a sequence stream.
 
@@ -146,28 +121,23 @@ class SequenceStreamWrapper(
     """
 
     def __init__(
-            self,
-            name: str,
-            benchmark: CLScenario,
-            wrapped_stream: SequenceCLStream[TCLExperience],
-            slice_ids: Optional[Iterable[int]] = None):
+        self,
+        name: str,
+        benchmark: CLScenario,
+        wrapped_stream: SequenceCLStream[TCLExperience],
+        slice_ids: Optional[Iterable[int]] = None,
+    ):
         self._wrapped_stream: SequenceCLStream[TCLExperience] = wrapped_stream
-        
-        super().__init__(
-            name,
-            benchmark,
-            set_stream_info=True,
-            slice_ids=slice_ids)
-        
+
+        super().__init__(name, benchmark, set_stream_info=True, slice_ids=slice_ids)
+
     def _full_length(self) -> int:
         """
         Gets the number of experiences in the wrapped stream.
         """
         return len(self._wrapped_stream)
 
-    def _make_experience(
-            self,
-            experience_idx: int) -> ExperienceWrapper[TCLExperience]:
+    def _make_experience(self, experience_idx: int) -> ExperienceWrapper[TCLExperience]:
         """
         Obtain the experience at the given position in the wrapped stream.
         """
@@ -177,9 +147,7 @@ def _make_experience(
 
 
 def wrap_stream(
-    new_name: str,
-    new_benchmark: CLScenario,
-    wrapped_stream: CLStream
+    new_name: str, new_benchmark: CLScenario, wrapped_stream: CLStream
 ) -> CLStream:
     """
     Internal utility used to wrap a stream by keeping
@@ -193,24 +161,19 @@ def wrap_stream(
     if isinstance(wrapped_stream, SequenceCLStream):
         # Maintain indexing/slicing functionalities
         s_wrapped = SequenceStreamWrapper(
-            name=new_name,
-            benchmark=new_benchmark,
-            wrapped_stream=wrapped_stream)
+            name=new_name, benchmark=new_benchmark, wrapped_stream=wrapped_stream
+        )
     elif isinstance(wrapped_stream, SizedCLStream):
         # Sized stream
         s_wrapped = SizedCLStreamWrapper(
-            name=new_name,
-            benchmark=new_benchmark,
-            wrapped_stream=wrapped_stream)
+            name=new_name, benchmark=new_benchmark, wrapped_stream=wrapped_stream
+        )
     else:
         # Plain iter-based stream
         s_wrapped = CLStreamWrapper(
-            name=new_name,
-            benchmark=new_benchmark,
-            wrapped_stream=wrapped_stream)
+            name=new_name, benchmark=new_benchmark, wrapped_stream=wrapped_stream
+        )
     return s_wrapped
 
 
-__all__ = [
-    'wrap_stream'
-]
+__all__ = ["wrap_stream"]
diff --git a/avalanche/benchmarks/scenarios/classification_scenario.py b/avalanche/benchmarks/scenarios/classification_scenario.py
index cfebe300a..114658c0d 100644
--- a/avalanche/benchmarks/scenarios/classification_scenario.py
+++ b/avalanche/benchmarks/scenarios/classification_scenario.py
@@ -22,89 +22,74 @@
     DatasetScenario,
     ClassesTimelineCLScenario,
     FactoryBasedStream,
-    TStreamsUserDict
+    TStreamsUserDict,
 )
 
-from avalanche.benchmarks.utils import (
-    AvalancheDataset
-)
+from avalanche.benchmarks.utils import AvalancheDataset
 from avalanche.benchmarks.utils.classification_dataset import (
     ClassificationDataset,
 )
-from avalanche.benchmarks.utils.dataset_utils import \
-    manage_advanced_indexing
+from avalanche.benchmarks.utils.dataset_utils import manage_advanced_indexing
 
 
 # --- Dataset ---
 # From utils:
 TClassificationDataset = TypeVar(
-    'TClassificationDataset',
-    bound='ClassificationDataset')
+    "TClassificationDataset", bound="ClassificationDataset"
+)
 
 # --- Scenario ---
 # From dataset_scenario:
-TDatasetScenario = TypeVar(
-    'TDatasetScenario',
-    bound='DatasetScenario'
-)
+TDatasetScenario = TypeVar("TDatasetScenario", bound="DatasetScenario")
 TClassificationScenario = TypeVar(
-    'TClassificationScenario',
-    bound='ClassificationScenario')
+    "TClassificationScenario", bound="ClassificationScenario"
+)
 
 # --- Stream ---
 # Defined here:
-TClassificationStream = TypeVar(
-    'TClassificationStream',
-    bound='ClassificationStream'
-)
+TClassificationStream = TypeVar("TClassificationStream", bound="ClassificationStream")
 
 # --- Experience ---
 TClassificationExperience = TypeVar(
-    'TClassificationExperience',
-    bound='ClassificationExperience')
+    "TClassificationExperience", bound="ClassificationExperience"
+)
 
 
 def _default_classification_stream_factory(
-        stream_name: str,
-        benchmark: 'ClassificationScenario'):
-    return ClassificationStream(
-        name=stream_name,
-        benchmark=benchmark
-    )
+    stream_name: str, benchmark: "ClassificationScenario"
+):
+    return ClassificationStream(name=stream_name, benchmark=benchmark)
 
 
 def _default_classification_experience_factory(
-        stream: 'ClassificationStream',
-        experience_idx: int):
+    stream: "ClassificationStream", experience_idx: int
+):
     return ClassificationExperience(
-        origin_stream=stream,
-        current_experience=experience_idx
+        origin_stream=stream, current_experience=experience_idx
     )
 
 
 class ClassificationScenario(
     ClassesTimelineCLScenario[
-        TClassificationStream,
-        TClassificationExperience,
-        TClassificationDataset]):
+        TClassificationStream, TClassificationExperience, TClassificationDataset
+    ]
+):
     """
     Base implementation of a Continual Learning classification benchmark.
 
     For more info, please refer to the base class :class:`DatasetScenario`.
     """
-    
+
     def __init__(
         self: TClassificationScenario,
         *,
         stream_definitions: TStreamsUserDict,
         stream_factory: Callable[
-            [str, TClassificationScenario],
-            TClassificationStream
-            ] = _default_classification_stream_factory,
+            [str, TClassificationScenario], TClassificationStream
+        ] = _default_classification_stream_factory,
         experience_factory: Callable[
-            [TClassificationStream, int], 
-            TClassificationExperience
-            ] = _default_classification_experience_factory,
+            [TClassificationStream, int], TClassificationExperience
+        ] = _default_classification_experience_factory,
         complete_test_set_only: bool = False
     ):
         """
@@ -132,7 +117,8 @@ def __init__(
             stream_definitions=stream_definitions,
             stream_factory=stream_factory,
             experience_factory=experience_factory,
-            complete_test_set_only=complete_test_set_only)
+            complete_test_set_only=complete_test_set_only,
+        )
 
     @property
     def classes_in_experience(self):
@@ -142,11 +128,7 @@ def classes_in_experience(self):
 GenericCLScenario = ClassificationScenario
 
 
-class ClassificationStream(
-    FactoryBasedStream[
-        TClassificationExperience
-    ]
-):
+class ClassificationStream(FactoryBasedStream[TClassificationExperience]):
     def __init__(
         self,
         name: str,
@@ -160,14 +142,11 @@ def __init__(
             name=name,
             benchmark=benchmark,
             slice_ids=slice_ids,
-            set_stream_info=set_stream_info)
+            set_stream_info=set_stream_info,
+        )
 
 
-class ClassificationExperience(
-    AbstractClassTimelineExperience[
-        TClassificationDataset
-    ]
-):
+class ClassificationExperience(AbstractClassTimelineExperience[TClassificationDataset]):
     """
     Definition of a learning experience based on a :class:`GenericCLScenario`
     instance.
@@ -179,10 +158,8 @@ class ClassificationExperience(
 
     def __init__(
         self: TClassificationExperience,
-        origin_stream: ClassificationStream[
-            TClassificationExperience
-        ],
-        current_experience: int
+        origin_stream: ClassificationStream[TClassificationExperience],
+        current_experience: int,
     ):
         """
         Creates an instance of a generic experience given the stream from this
@@ -195,11 +172,9 @@ def __init__(
 
         self._benchmark: ClassificationScenario = origin_stream.benchmark
 
-        dataset: TClassificationDataset = (
-            origin_stream.benchmark.stream_definitions[
-                origin_stream.name
-            ].exps_data[current_experience]
-        )
+        dataset: TClassificationDataset = origin_stream.benchmark.stream_definitions[
+            origin_stream.name
+        ].exps_data[current_experience]
 
         (
             classes_in_this_exp,
@@ -223,9 +198,7 @@ def __init__(
     @property  # type: ignore[override]
     def benchmark(self) -> ClassificationScenario:
         bench = self._benchmark
-        ClassificationExperience._check_unset_attribute(
-            'benchmark', bench
-        )   
+        ClassificationExperience._check_unset_attribute("benchmark", bench)
         return bench
 
     @benchmark.setter
@@ -245,13 +218,10 @@ def task_labels(self) -> List[int]:
 GenericClassificationExperience = ClassificationExperience
 
 
-class _LazyStreamClassesInClassificationExps(
-        Mapping[str, 
-                Sequence[Set[int]]]):
+class _LazyStreamClassesInClassificationExps(Mapping[str, Sequence[Set[int]]]):
     def __init__(self, benchmark: GenericCLScenario):
         self._benchmark = benchmark
-        self._default_lcie = _LazyClassesInClassificationExps(
-            benchmark, stream="train")
+        self._default_lcie = _LazyClassesInClassificationExps(benchmark, stream="train")
 
     def __len__(self):
         return len(self._benchmark.stream_definitions)
@@ -288,25 +258,20 @@ def __len__(self) -> int:
     @overload
     def __getitem__(self, exp_id: int) -> Optional[Set[int]]:
         ...
-    
+
     @overload
     def __getitem__(self, exp_id: slice) -> Tuple[Optional[Set[int]], ...]:
         ...
-    
+
     def __getitem__(self, exp_id: Union[int, slice]) -> LazyClassesInExpsRet:
         indexing_collate = _LazyClassesInClassificationExps._slice_collate
         result = manage_advanced_indexing(
-            exp_id,
-            self._get_single_exp_classes,
-            len(self),
-            indexing_collate
+            exp_id, self._get_single_exp_classes, len(self), indexing_collate
         )
         return result
 
     def __str__(self):
-        return (
-            "[" + ", ".join([str(self[idx]) for idx in range(len(self))]) + "]"
-        )
+        return "[" + ", ".join([str(self[idx]) for idx in range(len(self))]) + "]"
 
     def _get_single_exp_classes(self, exp_id) -> Optional[Set[int]]:
         b = self._benchmark.stream_definitions[self._stream]
@@ -315,12 +280,13 @@ def _get_single_exp_classes(self, exp_id) -> Optional[Set[int]]:
         targets = b.exps_data.targets_field_sequence[exp_id]
         if targets is None:
             return None
-        
+
         return set(targets)
 
     @staticmethod
-    def _slice_collate(classes_in_exps: Iterable[Optional[Iterable[int]]]) -> \
-            Optional[Tuple[Set[int], ...]]:
+    def _slice_collate(
+        classes_in_exps: Iterable[Optional[Iterable[int]]],
+    ) -> Optional[Tuple[Set[int], ...]]:
         result: List[Set[int]] = []
         for x in classes_in_exps:
             if x is None:
diff --git a/avalanche/benchmarks/scenarios/dataset_scenario.py b/avalanche/benchmarks/scenarios/dataset_scenario.py
index cfc1c2f56..c098e4187 100644
--- a/avalanche/benchmarks/scenarios/dataset_scenario.py
+++ b/avalanche/benchmarks/scenarios/dataset_scenario.py
@@ -27,50 +27,31 @@
     SequenceCLStream,
 )
 
-from avalanche.benchmarks.scenarios.lazy_dataset_sequence import (
-    LazyDatasetSequence
-)
+from avalanche.benchmarks.scenarios.lazy_dataset_sequence import LazyDatasetSequence
 
-from avalanche.benchmarks.utils import (
-    AvalancheDataset
-)
+from avalanche.benchmarks.utils import AvalancheDataset
 from torch.utils.data.dataset import Dataset
 
 # --- Dataset ---
 # From utils:
-TCLDataset = TypeVar(
-    'TCLDataset',
-    bound='AvalancheDataset',
-    covariant=True)
+TCLDataset = TypeVar("TCLDataset", bound="AvalancheDataset", covariant=True)
 
 # --- Scenario ---
 # From generic_scenario:
-TCLScenario = TypeVar(
-    'TCLScenario',
-    bound='CLScenario',
-    covariant=True)
+TCLScenario = TypeVar("TCLScenario", bound="CLScenario", covariant=True)
 # Defined here:
-TDatasetScenario = TypeVar(
-    'TDatasetScenario',
-    bound='DatasetScenario'
-)
+TDatasetScenario = TypeVar("TDatasetScenario", bound="DatasetScenario")
 TClassesTimelineCLScenario = TypeVar(
-    'TClassesTimelineCLScenario',
-    bound='ClassesTimelineCLScenario')
+    "TClassesTimelineCLScenario", bound="ClassesTimelineCLScenario"
+)
 
 # --- Stream ---
 # From generic_scenario:
-TCLStream = TypeVar(
-    'TCLStream',
-    bound='CLStream')
+TCLStream = TypeVar("TCLStream", bound="CLStream")
 
 # --- Experience ---
-TCLExperience = TypeVar(
-    'TCLExperience',
-    bound='CLExperience')
-TDatasetExperience = TypeVar(
-    'TDatasetExperience',
-    bound='DatasetExperience')
+TCLExperience = TypeVar("TCLExperience", bound="CLExperience")
+TDatasetExperience = TypeVar("TDatasetExperience", bound="DatasetExperience")
 
 
 # Definitions (stream)
@@ -88,8 +69,7 @@
 # mandate setting task labels and the origin dataset
 @dataclass
 class StreamUserDef(Generic[TCLDataset]):
-    exps_data: Union[TCLDataset, Iterable[TCLDataset],
-                     Tuple[Iterable[TCLDataset], int]]
+    exps_data: Union[TCLDataset, Iterable[TCLDataset], Tuple[Iterable[TCLDataset], int]]
     exps_task_labels: TStreamTaskLabels = None
     origin_dataset: TOriginDataset = None
     is_lazy: Optional[bool] = None
@@ -118,8 +98,8 @@ class StreamDef(Generic[TCLDataset]):
 
 
 class DatasetScenario(
-        CLScenario[TCLStream],
-        Generic[TCLStream, TDatasetExperience, TCLDataset]):
+    CLScenario[TCLStream], Generic[TCLStream, TDatasetExperience, TCLDataset]
+):
     """
     Base implementation of a Continual Learning benchmark instance.
     A Continual Learning benchmark instance is defined by a set of streams of
@@ -208,22 +188,21 @@ def __init__(
             [str, TDatasetScenario], TCLStream
         ] = stream_factory
 
-        self.stream_definitions: Dict[str, StreamDef[TCLDataset]] = \
-            DatasetScenario._check_stream_definitions(
-                stream_definitions
-            )
+        self.stream_definitions: Dict[
+            str, StreamDef[TCLDataset]
+        ] = DatasetScenario._check_stream_definitions(stream_definitions)
         """
         A structure containing the definition of the streams.
         """
 
-        self.original_train_dataset: Optional[
-            TOriginDataset
-        ] = self.stream_definitions["train"].origin_dataset
+        self.original_train_dataset: Optional[TOriginDataset] = self.stream_definitions[
+            "train"
+        ].origin_dataset
         """ The original training set. May be None. """
 
-        self.original_test_dataset: Optional[
-            TOriginDataset
-        ] = self.stream_definitions["test"].origin_dataset
+        self.original_test_dataset: Optional[TOriginDataset] = self.stream_definitions[
+            "test"
+        ].origin_dataset
         """ The original test set. May be None. """
 
         self.train_stream: TCLStream = self.stream_factory("train", self)
@@ -265,8 +244,10 @@ def __init__(
         self._make_stream_fields()
 
         super().__init__(
-            [getattr(self, f"{stream_name}_stream") for 
-             stream_name in self.stream_definitions.keys()]
+            [
+                getattr(self, f"{stream_name}_stream")
+                for stream_name in self.stream_definitions.keys()
+            ]
         )
 
     @property
@@ -364,10 +345,10 @@ def _check_stream_name(stream_name: Any):
 
     @staticmethod
     def _check_and_adapt_user_stream_def(
-        stream_def: Union[StreamDef[TCLDataset], 
-                          StreamUserDef[TCLDataset],
-                          TStreamUserDef], 
-        stream_name: str
+        stream_def: Union[
+            StreamDef[TCLDataset], StreamUserDef[TCLDataset], TStreamUserDef
+        ],
+        stream_name: str,
     ) -> StreamDef[TCLDataset]:
         if isinstance(stream_def, StreamDef):
             return stream_def
@@ -377,7 +358,8 @@ def _check_and_adapt_user_stream_def(
                 stream_def.exps_data,
                 stream_def.exps_task_labels,
                 stream_def.origin_dataset,
-                stream_def.is_lazy)
+                stream_def.is_lazy,
+            )
 
         exp_data: TStreamDataOrigin = stream_def[0]
         task_labels: TStreamTaskLabels = None
@@ -385,7 +367,7 @@ def _check_and_adapt_user_stream_def(
         is_lazy: Optional[bool] = None
 
         if exp_data is None:
-            raise ValueError('Experience data can\'t be None')
+            raise ValueError("Experience data can't be None")
 
         if len(stream_def) > 1:
             task_labels = stream_def[1]  # type: ignore
@@ -403,18 +385,17 @@ def _check_and_adapt_user_stream_def(
                 # per se (only if is_lazy==True, otherwise is treated as a
                 # standard Sequence)
                 if not isinstance(exp_data, LazyDatasetSequence):
-                    if (not isinstance(exp_data, tuple)) or (
-                        not len(exp_data) == 2
-                    ):
+                    if (not isinstance(exp_data, tuple)) or (not len(exp_data) == 2):
                         raise ValueError(
                             f"The stream {stream_name} was flagged as "
                             f"lazy-generated but its definition is not a "
                             f"2-elements tuple (generator and stream length)."
                         )
             else:
-                if (not isinstance(exp_data, Sequence)) or \
-                    (not len(exp_data) == 2) or (
-                    not isinstance(exp_data[1], int)
+                if (
+                    (not isinstance(exp_data, Sequence))
+                    or (not len(exp_data) == 2)
+                    or (not isinstance(exp_data[1], int))
                 ):
                     raise ValueError(
                         f"The stream {stream_name} was detected "
@@ -461,7 +442,8 @@ def _check_and_adapt_user_stream_def(
             exp_dataset: AvalancheDataset
             for i, exp_dataset in enumerate(exp_data):  # type: ignore
                 task_labels_list.append(
-                    set(exp_dataset.targets_task_labels))  # type: ignore
+                    set(exp_dataset.targets_task_labels)
+                )  # type: ignore
         else:
             # Standardize task labels structure
             for t_l in task_labels:
@@ -483,30 +465,21 @@ def _check_and_adapt_user_stream_def(
             if isinstance(exp_data, LazyDatasetSequence):
                 lazy_sequence = exp_data  # type: ignore
             else:
-                lazy_sequence = \
-                    LazyDatasetSequence(
-                        exp_data[0],  # type: ignore
-                        stream_length)  # type: ignore
+                lazy_sequence = LazyDatasetSequence(
+                    exp_data[0], stream_length  # type: ignore
+                )  # type: ignore
         else:
-            lazy_sequence = \
-                LazyDatasetSequence(
-                    exp_data,  # type: ignore
-                    stream_length)  # type: ignore
+            lazy_sequence = LazyDatasetSequence(
+                exp_data, stream_length  # type: ignore
+            )  # type: ignore
             lazy_sequence.load_all_experiences()
 
-        return StreamDef(
-            lazy_sequence,
-            task_labels_list,
-            origin_dataset,
-            is_lazy)
+        return StreamDef(lazy_sequence, task_labels_list, origin_dataset, is_lazy)
 
 
 class ClassesTimelineCLScenario(
-        DatasetScenario[
-            TCLStream,
-            TDatasetExperience,
-            TCLDataset],
-        ABC):
+    DatasetScenario[TCLStream, TDatasetExperience, TCLDataset], ABC
+):
     @property
     @abstractmethod
     def classes_in_experience(
@@ -528,10 +501,11 @@ def classes_in_experience(
     def get_classes_timeline(
         self, current_experience: int, stream: str = "train"
     ) -> Tuple[
-            Optional[List[int]],
-            Optional[List[int]],
-            Optional[List[int]],
-            Optional[List[int]]]:
+        Optional[List[int]],
+        Optional[List[int]],
+        Optional[List[int]],
+        Optional[List[int]],
+    ]:
         """
         Returns the classes timeline given the ID of a experience.
 
@@ -554,9 +528,7 @@ def get_classes_timeline(
             experiences. Beware that each of these elements can be None when
             the benchmark is initialized by using a lazy generator.
         """
-        class_set_current_exp = self.classes_in_experience[stream][
-            current_experience
-        ]
+        class_set_current_exp = self.classes_in_experience[stream][current_experience]
 
         if class_set_current_exp is not None:
             # May be None in lazy benchmarks
@@ -579,13 +551,8 @@ def get_classes_timeline(
         else:
             previous_classes = None
 
-        if (
-            class_set_current_exp is not None
-            and prev_exps_not_none
-        ):
-            classes_seen_so_far = list(
-                class_set_current_exp.union(class_set_prev_exps)
-            )
+        if class_set_current_exp is not None and prev_exps_not_none:
+            classes_seen_so_far = list(class_set_current_exp.union(class_set_prev_exps))
         else:
             classes_seen_so_far = None
 
@@ -612,9 +579,7 @@ def get_classes_timeline(
         )
 
 
-class DatasetStream(
-    SequenceCLStream[TDatasetExperience]
-): 
+class DatasetStream(SequenceCLStream[TDatasetExperience]):
     """
     Base class for all streams connected to a :class:`DatasetScenario`.
 
@@ -622,21 +587,23 @@ class DatasetStream(
     the `drop_previous_experiences` method, which can be used to drop
     references to already processed datasets.
     """
+
     def __init__(
         self,
         name: str,
         benchmark: DatasetScenario,
         *,
         slice_ids: Optional[List[int]] = None,
-        set_stream_info: bool = True
+        set_stream_info: bool = True,
     ):
         self.benchmark: DatasetScenario = benchmark
-        
+
         super().__init__(
             name=name,
             benchmark=benchmark,
             slice_ids=slice_ids,
-            set_stream_info=set_stream_info)
+            set_stream_info=set_stream_info,
+        )
 
     def drop_previous_experiences(self, to_exp: int) -> None:
         """
@@ -673,30 +640,27 @@ def drop_previous_experiences(self, to_exp: int) -> None:
         ].exps_data.drop_previous_experiences(to_exp)
 
 
-class FactoryBasedStream(
-    DatasetStream[TDatasetExperience]
-):
+class FactoryBasedStream(DatasetStream[TDatasetExperience]):
     def __init__(
         self,
         name: str,
         benchmark: DatasetScenario,
         *,
         slice_ids: Optional[List[int]] = None,
-        set_stream_info: bool = True
+        set_stream_info: bool = True,
     ):
         super().__init__(
             name=name,
             benchmark=benchmark,
             slice_ids=slice_ids,
-            set_stream_info=set_stream_info)
+            set_stream_info=set_stream_info,
+        )
 
     def _full_length(self) -> int:
         return len(self.benchmark.stream_definitions[self.name].exps_data)
 
     def _make_experience(self, experience_idx: int) -> TDatasetExperience:
-        a = self.benchmark.experience_factory(
-            self,  # type: ignore
-            experience_idx)
+        a = self.benchmark.experience_factory(self, experience_idx)  # type: ignore
         return a
 
 
@@ -707,5 +671,5 @@ def _make_experience(self, experience_idx: int) -> TDatasetExperience:
     "StreamDef",
     "DatasetScenario",
     "ClassesTimelineCLScenario",
-    "FactoryBasedStream"
+    "FactoryBasedStream",
 ]
diff --git a/avalanche/benchmarks/scenarios/detection_scenario.py b/avalanche/benchmarks/scenarios/detection_scenario.py
index bd33d50cd..f88ac1f4f 100644
--- a/avalanche/benchmarks/scenarios/detection_scenario.py
+++ b/avalanche/benchmarks/scenarios/detection_scenario.py
@@ -43,57 +43,35 @@
 
 # --- Dataset ---
 # From utils:
-TCLDataset = TypeVar(
-    'TCLDataset',
-    bound='AvalancheDataset',
-    covariant=True)
+TCLDataset = TypeVar("TCLDataset", bound="AvalancheDataset", covariant=True)
 
 # --- Scenario ---
 # From dataset_scenario:
-TDatasetScenario = TypeVar(
-    'TDatasetScenario',
-    bound='DatasetScenario')
-TDetectionScenario = TypeVar(
-    'TDetectionScenario',
-    bound='DetectionScenario')
+TDatasetScenario = TypeVar("TDatasetScenario", bound="DatasetScenario")
+TDetectionScenario = TypeVar("TDetectionScenario", bound="DetectionScenario")
 
 # --- Stream ---
 # Defined here:
-TDetectionStream = TypeVar(
-    'TDetectionStream',
-    bound='DetectionStream'
-)
+TDetectionStream = TypeVar("TDetectionStream", bound="DetectionStream")
 
 # --- Experience ---
 # From generic_scenario:
-TDetectionExperience = TypeVar(
-    'TDetectionExperience',
-    bound='DetectionExperience')
+TDetectionExperience = TypeVar("TDetectionExperience", bound="DetectionExperience")
 
 
-def _default_detection_stream_factory(
-        stream_name: str,
-        benchmark: 'DetectionScenario'):
-    return DetectionStream(
-        name=stream_name,
-        benchmark=benchmark
-    )
+def _default_detection_stream_factory(stream_name: str, benchmark: "DetectionScenario"):
+    return DetectionStream(name=stream_name, benchmark=benchmark)
 
 
 def _default_detection_experience_factory(
-        stream: 'DetectionStream',
-        experience_idx: int):
-    return DetectionExperience(
-        origin_stream=stream,
-        current_experience=experience_idx
-    )
+    stream: "DetectionStream", experience_idx: int
+):
+    return DetectionExperience(origin_stream=stream, current_experience=experience_idx)
 
 
 class DetectionScenario(
-    ClassesTimelineCLScenario[
-        TDetectionStream,
-        TDetectionExperience,
-        DetectionDataset]):
+    ClassesTimelineCLScenario[TDetectionStream, TDetectionExperience, DetectionDataset]
+):
     """
     Base implementation of a Continual Learning object detection benchmark.
 
@@ -105,14 +83,12 @@ def __init__(
         stream_definitions: TStreamsUserDict,
         n_classes: Optional[int] = None,
         stream_factory: Callable[
-            [str, TDetectionScenario],
-            TDetectionStream
-            ] = _default_detection_stream_factory,
+            [str, TDetectionScenario], TDetectionStream
+        ] = _default_detection_stream_factory,
         experience_factory: Callable[
-            [TDetectionStream, int],
-            TDetectionExperience
-            ] = _default_detection_experience_factory,
-        complete_test_set_only: bool = False
+            [TDetectionStream, int], TDetectionExperience
+        ] = _default_detection_experience_factory,
+        complete_test_set_only: bool = False,
     ):
         """
         Creates an instance a Continual Learning object detection benchmark.
@@ -138,7 +114,8 @@ def __init__(
             stream_definitions=stream_definitions,
             stream_factory=stream_factory,
             experience_factory=experience_factory,
-            complete_test_set_only=complete_test_set_only)
+            complete_test_set_only=complete_test_set_only,
+        )
 
         self.n_classes: Optional[int] = n_classes
         """
@@ -150,16 +127,12 @@ def __init__(
     @property
     def classes_in_experience(self):
         return _LazyStreamClassesInDetectionExps(self)
-    
+
 
 DetectionCLScenario = DetectionScenario
 
 
-class DetectionStream(
-    FactoryBasedStream[
-        TDetectionExperience
-    ]
-):
+class DetectionStream(FactoryBasedStream[TDetectionExperience]):
     def __init__(
         self,
         name: str,
@@ -173,14 +146,11 @@ def __init__(
             name=name,
             benchmark=benchmark,
             slice_ids=slice_ids,
-            set_stream_info=set_stream_info)
+            set_stream_info=set_stream_info,
+        )
 
 
-class DetectionExperience(
-    AbstractClassTimelineExperience[
-        DetectionDataset
-    ]
-):
+class DetectionExperience(AbstractClassTimelineExperience[DetectionDataset]):
     """
     Definition of a learning experience based on a :class:`DetectionScenario`
     instance.
@@ -192,10 +162,8 @@ class DetectionExperience(
 
     def __init__(
         self: TDetectionExperience,
-        origin_stream: DetectionStream[
-            TDetectionExperience
-        ],
-        current_experience: int
+        origin_stream: DetectionStream[TDetectionExperience],
+        current_experience: int,
     ):
         """
         Creates an instance of an experience given the stream from this
@@ -208,11 +176,9 @@ def __init__(
 
         self._benchmark: DetectionScenario = origin_stream.benchmark
 
-        dataset: DetectionDataset = (
-            origin_stream.benchmark.stream_definitions[
-                origin_stream.name
-            ].exps_data[current_experience]
-        )
+        dataset: DetectionDataset = origin_stream.benchmark.stream_definitions[
+            origin_stream.name
+        ].exps_data[current_experience]
 
         (
             classes_in_this_exp,
@@ -236,9 +202,7 @@ def __init__(
     @property  # type: ignore[override]
     def benchmark(self) -> DetectionScenario:
         bench = self._benchmark
-        DetectionExperience._check_unset_attribute(
-            'benchmark', bench
-        )   
+        DetectionExperience._check_unset_attribute("benchmark", bench)
         return bench
 
     @benchmark.setter
@@ -257,13 +221,10 @@ def task_labels(self) -> List[int]:
 GenericDetectionExperience = DetectionExperience
 
 
-class _LazyStreamClassesInDetectionExps(
-        Mapping[str,
-                Sequence[Optional[Set[int]]]]):
+class _LazyStreamClassesInDetectionExps(Mapping[str, Sequence[Optional[Set[int]]]]):
     def __init__(self, benchmark: DetectionScenario):
         self._benchmark = benchmark
-        self._default_lcie = _LazyClassesInDetectionExps(
-            benchmark, stream="train")
+        self._default_lcie = _LazyClassesInDetectionExps(benchmark, stream="train")
 
     def __len__(self):
         return len(self._benchmark.stream_definitions)
@@ -296,11 +257,11 @@ def __init__(self, benchmark: DetectionScenario, stream: str = "train"):
 
     def __len__(self):
         return len(self._benchmark.streams[self._stream])
-    
+
     @overload
     def __getitem__(self, exp_id: int) -> Optional[Set[int]]:
         ...
-    
+
     @overload
     def __getitem__(self, exp_id: slice) -> Tuple[Optional[Set[int]], ...]:
         ...
@@ -308,17 +269,12 @@ def __getitem__(self, exp_id: slice) -> Tuple[Optional[Set[int]], ...]:
     def __getitem__(self, exp_id: Union[int, slice]) -> LazyClassesInExpsRet:
         indexing_collate = _LazyClassesInDetectionExps._slice_collate
         result = manage_advanced_indexing(
-            exp_id,
-            self._get_single_exp_classes,
-            len(self),
-            indexing_collate
+            exp_id, self._get_single_exp_classes, len(self), indexing_collate
         )
         return result
 
     def __str__(self):
-        return (
-            "[" + ", ".join([str(self[idx]) for idx in range(len(self))]) + "]"
-        )
+        return "[" + ", ".join([str(self[idx]) for idx in range(len(self))]) + "]"
 
     def _get_single_exp_classes(self, exp_id) -> Optional[Set[int]]:
         b = self._benchmark.stream_definitions[self._stream]
@@ -330,19 +286,20 @@ def _get_single_exp_classes(self, exp_id) -> Optional[Set[int]]:
 
         classes_in_exp = set()
         for target in targets:
-            for label in target['labels']:
+            for label in target["labels"]:
                 classes_in_exp.add(int(label))
         return classes_in_exp
 
     @staticmethod
-    def _slice_collate(classes_in_exps: Iterable[Optional[Iterable[int]]]) -> \
-            Optional[Tuple[Set[int], ...]]:
+    def _slice_collate(
+        classes_in_exps: Iterable[Optional[Iterable[int]]],
+    ) -> Optional[Tuple[Set[int], ...]]:
         result: List[Set[int]] = []
         for x in classes_in_exps:
             if x is None:
                 return None
             result.append(set(x))
-        
+
         return tuple(result)
 
 
@@ -351,5 +308,5 @@ def _slice_collate(classes_in_exps: Iterable[Optional[Iterable[int]]]) -> \
     "DetectionCLScenario",
     "DetectionStream",
     "GenericDetectionExperience",
-    "DetectionExperience"
+    "DetectionExperience",
 ]
diff --git a/avalanche/benchmarks/scenarios/exmodel_scenario.py b/avalanche/benchmarks/scenarios/exmodel_scenario.py
index 6e7d7c5c5..39445c6d5 100644
--- a/avalanche/benchmarks/scenarios/exmodel_scenario.py
+++ b/avalanche/benchmarks/scenarios/exmodel_scenario.py
@@ -14,7 +14,7 @@
 
 from . import CLScenario, CLExperience, CLStream
 
-TExModelExperience = TypeVar('TExModelExperience', bound='ExModelExperience')
+TExModelExperience = TypeVar("TExModelExperience", bound="ExModelExperience")
 
 
 class ExModelExperience(CLExperience):
@@ -32,8 +32,7 @@ def __init__(
         classes_in_this_experience=None,
     ):
         super().__init__(
-            current_experience=current_experience,
-            origin_stream=origin_stream
+            current_experience=current_experience, origin_stream=origin_stream
         )
         self.expert_model: Module = expert_model
         self.classes_in_this_experience = classes_in_this_experience
@@ -53,11 +52,7 @@ class ExModelCLScenario(CLScenario[CLStream[TExModelExperience]]):
     https://arxiv.org/abs/2112.06511
     """
 
-    def __init__(
-        self,
-        original_benchmark: CLScenario,
-        expert_models: List[Module]
-    ):
+    def __init__(self, original_benchmark: CLScenario, expert_models: List[Module]):
         """Init.
 
         :param original_benchmark: a reference to the original benchmark
@@ -67,26 +62,23 @@ def __init__(
             `original_benchmark`.
         """
         expert_models_l: List[TExModelExperience] = []
-        for i, (m, e) in enumerate(zip(
-                expert_models,
-                original_benchmark.train_stream)):  # type: ignore
+        for i, (m, e) in enumerate(
+            zip(expert_models, original_benchmark.train_stream)
+        ):  # type: ignore
             cine = e.classes_in_this_experience
             expert_models_l.append(
                 ExModelExperience(
                     expert_model=m,
                     current_experience=i,
                     origin_stream=None,  # type: ignore
-                    classes_in_this_experience=cine)
+                    classes_in_this_experience=cine,
                 )
-
-        expert_stream: CLStream[TExModelExperience] = \
-            CLStream(
-                name="expert_models",
-                exps_iter=expert_models_l,
-                benchmark=self
             )
-        streams: List[CLStream[TExModelExperience]] = \
-            [expert_stream]
+
+        expert_stream: CLStream[TExModelExperience] = CLStream(
+            name="expert_models", exps_iter=expert_models_l, benchmark=self
+        )
+        streams: List[CLStream[TExModelExperience]] = [expert_stream]
 
         self.original_benchmark = original_benchmark
         # for s in original_benchmark.streams.values():
@@ -96,7 +88,4 @@ def __init__(
         super().__init__(streams)
 
 
-__all__ = [
-    'ExModelExperience',
-    'ExModelCLScenario'
-]
+__all__ = ["ExModelExperience", "ExModelCLScenario"]
diff --git a/avalanche/benchmarks/scenarios/generic_benchmark_creation.py b/avalanche/benchmarks/scenarios/generic_benchmark_creation.py
index f29aab947..613ba205d 100644
--- a/avalanche/benchmarks/scenarios/generic_benchmark_creation.py
+++ b/avalanche/benchmarks/scenarios/generic_benchmark_creation.py
@@ -47,15 +47,13 @@ def create_multi_dataset_generic_benchmark(
     train_datasets: Sequence[SupportedDataset],
     test_datasets: Sequence[SupportedDataset],
     *,
-    other_streams_datasets: Optional[
-        Mapping[str, Sequence[SupportedDataset]]] = None,
+    other_streams_datasets: Optional[Mapping[str, Sequence[SupportedDataset]]] = None,
     complete_test_set_only: bool = False,
     train_transform=None,
     train_target_transform=None,
     eval_transform=None,
     eval_target_transform=None,
-    other_streams_transforms: Optional[
-        Mapping[str, Tuple[Any, Any]]] = None
+    other_streams_transforms: Optional[Mapping[str, Tuple[Any, Any]]] = None
 ) -> GenericCLScenario:
     """
     Creates a benchmark instance given a list of datasets. Each dataset will be
@@ -74,7 +72,7 @@ def create_multi_dataset_generic_benchmark(
 
     :param train_datasets: A list of training datasets.
     :param test_datasets: A list of test datasets.
-    :param other_streams_datasets: A dictionary describing the content of 
+    :param other_streams_datasets: A dictionary describing the content of
         custom streams. Keys must be valid stream names (letters and numbers,
         not starting with a number) while the value must be a list of dataset.
         If this dictionary contains the definition for "train" or "test"
@@ -140,8 +138,7 @@ def create_multi_dataset_generic_benchmark(
                 "complete_test_set_only is True"
             )
 
-    stream_definitions: Dict[str, Tuple[Iterable[ClassificationDataset]]] = \
-        dict()
+    stream_definitions: Dict[str, Tuple[Iterable[ClassificationDataset]]] = dict()
 
     for stream_name, dataset_list in input_streams.items():
         initial_transform_group = "train"
@@ -328,15 +325,16 @@ def create_lazy_generic_benchmark(
                 "Test stream must contain one experience when"
                 "complete_test_set_only is True"
             )
-    
+
     stream_definitions: Dict[
-        str, Tuple[
+        str,
+        Tuple[
             # Dataset generator + stream length
             Tuple[Generator[ClassificationDataset, None, None], int],
             # Task label(s) for each experience
-            Iterable[Union[int, Iterable[int]]]
-            ]
-        ] = dict()
+            Iterable[Union[int, Iterable[int]]],
+        ],
+    ] = dict()
 
     for stream_name, (
         generator,
@@ -369,16 +367,14 @@ def create_generic_benchmark_from_filelists(
     train_file_lists: Sequence[Union[str, Path]],
     test_file_lists: Sequence[Union[str, Path]],
     *,
-    other_streams_file_lists: Optional[
-        Dict[str, Sequence[Union[str, Path]]]] = None,
+    other_streams_file_lists: Optional[Dict[str, Sequence[Union[str, Path]]]] = None,
     task_labels: Sequence[int],
     complete_test_set_only: bool = False,
     train_transform=None,
     train_target_transform=None,
     eval_transform=None,
     eval_target_transform=None,
-    other_streams_transforms: Optional[
-        Dict[str, Tuple[Any, Any]]] = None
+    other_streams_transforms: Optional[Dict[str, Tuple[Any, Any]]] = None
 ) -> GenericCLScenario:
     """
     Creates a benchmark instance given a list of filelists and the respective
@@ -466,7 +462,6 @@ def create_generic_benchmark_from_filelists(
     for stream_name, file_lists in input_streams.items():
         stream_datasets: List[ClassificationDataset] = []
         for exp_id, f_list in enumerate(file_lists):
-
             f_list_dataset = FilelistDataset(root, f_list)
             stream_datasets.append(
                 make_classification_dataset(
@@ -498,9 +493,9 @@ def create_generic_benchmark_from_paths(
     train_lists_of_files: Sequence[Sequence[FileAndLabel]],
     test_lists_of_files: Sequence[Sequence[FileAndLabel]],
     *,
-    other_streams_lists_of_files: Optional[Dict[
-        str, Sequence[Sequence[FileAndLabel]]
-    ]] = None,
+    other_streams_lists_of_files: Optional[
+        Dict[str, Sequence[Sequence[FileAndLabel]]]
+    ] = None,
     task_labels: Sequence[int],
     complete_test_set_only: bool = False,
     train_transform=None,
@@ -594,8 +589,9 @@ def create_generic_benchmark_from_paths(
         stream_datasets: List[ClassificationDataset] = []
         for exp_id, list_of_files in enumerate(lists_of_files):
             common_root, exp_paths_list = common_paths_root(list_of_files)
-            paths_dataset: PathsDataset[Any, int] = \
-                PathsDataset(common_root, exp_paths_list)
+            paths_dataset: PathsDataset[Any, int] = PathsDataset(
+                common_root, exp_paths_list
+            )
             stream_datasets.append(
                 make_classification_dataset(
                     paths_dataset, task_labels=task_labels[exp_id]
diff --git a/avalanche/benchmarks/scenarios/generic_scenario.py b/avalanche/benchmarks/scenarios/generic_scenario.py
index c92feebb4..dd7fa06fe 100644
--- a/avalanche/benchmarks/scenarios/generic_scenario.py
+++ b/avalanche/benchmarks/scenarios/generic_scenario.py
@@ -38,48 +38,39 @@
     slice_alike_object_to_indices,
 )
 
-T = TypeVar('T')
-TCov = TypeVar('TCov', covariant=True)
-E = TypeVar('E')
+T = TypeVar("T")
+TCov = TypeVar("TCov", covariant=True)
+E = TypeVar("E")
 
 # Dataset
 TCLDataset = TypeVar(
-    'TCLDataset',
-    bound='AvalancheDataset')  # Implementation, defined in utils
+    "TCLDataset", bound="AvalancheDataset"
+)  # Implementation, defined in utils
 TCLDatasetCov = TypeVar(
-    'TCLDatasetCov',
-    bound='AvalancheDataset',
-    covariant=True)  # Implementation, defined in utils
+    "TCLDatasetCov", bound="AvalancheDataset", covariant=True
+)  # Implementation, defined in utils
 
 # Scenario
-TCLScenario = TypeVar(
-    'TCLScenario',
-    bound='CLScenario')  # Implementation, defined here
+TCLScenario = TypeVar("TCLScenario", bound="CLScenario")  # Implementation, defined here
 TCLScenarioCov = TypeVar(
-    'TCLScenarioCov',
-    bound="CLScenario",
-    covariant=True)  # Implementation, defined here
+    "TCLScenarioCov", bound="CLScenario", covariant=True
+)  # Implementation, defined here
 
 # Stream
-TCLStream = TypeVar(
-    'TCLStream',
-    bound='CLStream')  # Implementation, defined here
+TCLStream = TypeVar("TCLStream", bound="CLStream")  # Implementation, defined here
 TCLStreamCov = TypeVar(
-    'TCLStreamCov',
-    bound='CLStream',
-    covariant=True)  # Implementation, defined here
-TSequenceCLStream = TypeVar(
-    'TSequenceCLStream',
-    bound='SequenceCLStream')
+    "TCLStreamCov", bound="CLStream", covariant=True
+)  # Implementation, defined here
+TSequenceCLStream = TypeVar("TSequenceCLStream", bound="SequenceCLStream")
 
 
 # Experience
 TCLExperience = TypeVar(
-    'TCLExperience',
-    bound='CLExperience')  # Implementation, defined here
+    "TCLExperience", bound="CLExperience"
+)  # Implementation, defined here
 TDatasetExperience = TypeVar(
-    'TDatasetExperience',
-    bound='DatasetExperience')  # Implementation, defined here
+    "TDatasetExperience", bound="DatasetExperience"
+)  # Implementation, defined here
 
 
 class MaskedAttributeError(ValueError):
@@ -146,14 +137,15 @@ class CLExperience:
     """
 
     def __init__(
-            self: TCLExperience,
-            current_experience: int,
-            origin_stream: 'CLStream[TCLExperience]'):
+        self: TCLExperience,
+        current_experience: int,
+        origin_stream: "CLStream[TCLExperience]",
+    ):
         super().__init__()
         self._current_experience: int = current_experience
         """Experience identifier (the position in the origin_stream)."""
 
-        self._origin_stream: 'CLStream[TCLExperience]' = origin_stream
+        self._origin_stream: "CLStream[TCLExperience]" = origin_stream
         """Stream containing the experience."""
 
         self._exp_mode: ExperienceMode = ExperienceMode.LOGGING
@@ -162,12 +154,12 @@ def __init__(
 
         self._unmask_context_depth = 0
 
-        self._as_attributes('_current_experience')
+        self._as_attributes("_current_experience")
 
     @property
     def current_experience(self) -> int:
         curr_exp = self._current_experience
-        CLExperience._check_unset_attribute('current_experience', curr_exp)
+        CLExperience._check_unset_attribute("current_experience", curr_exp)
         return curr_exp
 
     @current_experience.setter
@@ -175,13 +167,13 @@ def current_experience(self, id: int):
         self._current_experience = id
 
     @property
-    def origin_stream(self: TCLExperience) -> 'CLStream[TCLExperience]':
+    def origin_stream(self: TCLExperience) -> "CLStream[TCLExperience]":
         orig_stream = self._origin_stream
-        CLExperience._check_unset_attribute('origin_stream', orig_stream)
+        CLExperience._check_unset_attribute("origin_stream", orig_stream)
         return orig_stream
 
     @origin_stream.setter
-    def origin_stream(self: TCLExperience, stream: 'CLStream[TCLExperience]'):
+    def origin_stream(self: TCLExperience, stream: "CLStream[TCLExperience]"):
         self._origin_stream = stream
 
     @contextmanager
@@ -214,11 +206,7 @@ def __getattribute__(self, item):
             elif self._exp_mode == ExperienceMode.LOGGING:
                 return v.value
             else:
-                mode = (
-                    "train"
-                    if self._exp_mode == ExperienceMode.TRAIN
-                    else "eval"
-                )
+                mode = "train" if self._exp_mode == ExperienceMode.TRAIN else "eval"
                 se = (
                     f"Attribute {item} is not available for the experience "
                     f"in {mode} mode."
@@ -226,7 +214,7 @@ def __getattribute__(self, item):
                 raise MaskedAttributeError(se)
         else:
             return v
-        
+
     def __setattr__(self, name, value):
         try:
             v = self.__dict__[name]
@@ -241,11 +229,7 @@ def __setattr__(self, name, value):
         else:
             return super().__setattr__(name, value)
 
-    def _as_attributes(
-            self,
-            *fields: str,
-            use_in_train=False,
-            use_in_eval=False):
+    def _as_attributes(self, *fields: str, use_in_train=False, use_in_eval=False):
         """
         Internal method used to transform plain object fields to
         ExperienceAttribute(s).
@@ -259,23 +243,25 @@ def _as_attributes(
             if isinstance(v, ExperienceAttribute):
                 if v.use_in_train != use_in_train:
                     raise RuntimeError(
-                        f'Experience attribute {field} redefined with '
-                        f'incongruent use_in_train field. Was '
-                        f'{v.use_in_train}, overridden with {use_in_train}.'
+                        f"Experience attribute {field} redefined with "
+                        f"incongruent use_in_train field. Was "
+                        f"{v.use_in_train}, overridden with {use_in_train}."
                     )
-                
+
                 if v.use_in_eval != use_in_eval:
                     raise RuntimeError(
-                        f'Experience attribute {field} redefined with '
-                        f'incongruent use_in_eval field. Was '
-                        f'{v.use_in_eval}, overridden with {use_in_train}.'
+                        f"Experience attribute {field} redefined with "
+                        f"incongruent use_in_eval field. Was "
+                        f"{v.use_in_eval}, overridden with {use_in_train}."
                     )
             else:
-                setattr(self, field, ExperienceAttribute(
-                    value=v,
-                    use_in_train=use_in_train,
-                    use_in_eval=use_in_eval
-                ))
+                setattr(
+                    self,
+                    field,
+                    ExperienceAttribute(
+                        value=v, use_in_train=use_in_train, use_in_eval=use_in_eval
+                    ),
+                )
 
     def train(self: TCLExperience) -> TCLExperience:
         """Return training experience.
@@ -309,15 +295,14 @@ def logging(self: TCLExperience) -> TCLExperience:
 
     @staticmethod
     def _check_unset_attribute(attribute_name: str, attribute_value: Any):
-        assert attribute_value is not None, f'Attribute {attribute_name} ' + \
-            'not set. This is an unexpected and usually liked to errors ' + \
-            'in the implementation of the stream\'s experience factory.'
+        assert attribute_value is not None, (
+            f"Attribute {attribute_name} "
+            + "not set. This is an unexpected and usually liked to errors "
+            + "in the implementation of the stream's experience factory."
+        )
 
 
-class DatasetExperience(
-        CLExperience,
-        Generic[TCLDataset],
-        ABC):
+class DatasetExperience(CLExperience, Generic[TCLDataset], ABC):
     """Base Experience.
 
     Experiences have an index which track the experience's position
@@ -325,36 +310,33 @@ class DatasetExperience(
     """
 
     def __init__(
-            self: TDatasetExperience,
-            current_experience: int,
-            origin_stream: 'CLStream[TDatasetExperience]',
-            benchmark: 'CLScenario',
-            dataset: TCLDataset):
+        self: TDatasetExperience,
+        current_experience: int,
+        origin_stream: "CLStream[TDatasetExperience]",
+        benchmark: "CLScenario",
+        dataset: TCLDataset,
+    ):
         super().__init__(
-            current_experience=current_experience,
-            origin_stream=origin_stream)
+            current_experience=current_experience, origin_stream=origin_stream
+        )
 
         self._benchmark: CLScenario = benchmark
         self._dataset: TCLDataset = dataset
-    
+
     @property
-    def benchmark(self) -> 'CLScenario':
+    def benchmark(self) -> "CLScenario":
         bench = self._benchmark
-        CLExperience._check_unset_attribute(
-            'benchmark', bench
-        )   
+        CLExperience._check_unset_attribute("benchmark", bench)
         return bench
 
     @benchmark.setter
-    def benchmark(self, bench: 'CLScenario'):
+    def benchmark(self, bench: "CLScenario"):
         self._benchmark = bench
 
     @property
     def dataset(self) -> TCLDataset:
         data = self._dataset
-        CLExperience._check_unset_attribute(
-            'dataset', data
-        ) 
+        CLExperience._check_unset_attribute("dataset", data)
         return data
 
     @dataset.setter
@@ -362,7 +344,7 @@ def dataset(self, d: TCLDataset):
         self._dataset = d
 
     @property
-    def scenario(self) -> 'CLScenario':
+    def scenario(self) -> "CLScenario":
         """This property is DEPRECATED."""
         warnings.warn(
             "Using self.scenario is deprecated in Experience. "
@@ -391,24 +373,19 @@ def task_label(self) -> int:
 
     @property
     def task_labels(self) -> List[int]:
-        task_labels = getattr(
-            self.dataset,
-            'targets_task_labels',
-            None)
-        
-        assert task_labels is not None, \
-            ('In its default implementation, DatasetExperience will use the '
-             'the dataset `targets_task_labels` field to compute the '
-             'content of the `task_label(s)` field. The given does not '
-             'contain such field.')
+        task_labels = getattr(self.dataset, "targets_task_labels", None)
+
+        assert task_labels is not None, (
+            "In its default implementation, DatasetExperience will use the "
+            "the dataset `targets_task_labels` field to compute the "
+            "content of the `task_label(s)` field. The given does not "
+            "contain such field."
+        )
 
         return list(set(task_labels))
 
 
-class AbstractClassTimelineExperience(
-    DatasetExperience[TCLDataset],
-    ABC
-):
+class AbstractClassTimelineExperience(DatasetExperience[TCLDataset], ABC):
     """
     Definition of a learning experience. A learning experience contains a set of
     patterns which has become available at a particular time instant. The
@@ -423,7 +400,7 @@ class AbstractClassTimelineExperience(
 
     def __init__(
         self: TDatasetExperience,
-        origin_stream: 'CLStream[TDatasetExperience]',
+        origin_stream: "CLStream[TDatasetExperience]",
         dataset: TCLDataset,
         current_experience: int,
         classes_in_this_exp: Optional[Sequence[int]],
@@ -445,27 +422,23 @@ def __init__(
         :param future_classes: The list of classes of next experiences.
         """
 
-        self.classes_in_this_experience: Optional[Sequence[int]] = \
-            classes_in_this_exp
+        self.classes_in_this_experience: Optional[Sequence[int]] = classes_in_this_exp
         """ The list of classes in this experience """
 
-        self.previous_classes: Optional[Sequence[int]] = \
-            previous_classes
+        self.previous_classes: Optional[Sequence[int]] = previous_classes
         """ The list of classes in previous experiences """
 
-        self.classes_seen_so_far: Optional[Sequence[int]] = \
-            classes_seen_so_far
+        self.classes_seen_so_far: Optional[Sequence[int]] = classes_seen_so_far
         """ List of classes of current and previous experiences """
 
-        self.future_classes: Optional[Sequence[int]] = \
-            future_classes
+        self.future_classes: Optional[Sequence[int]] = future_classes
         """ The list of classes of next experiences """
 
         super().__init__(
             current_experience=current_experience,
             origin_stream=origin_stream,
             benchmark=origin_stream.benchmark,  # type: ignore
-            dataset=dataset
+            dataset=dataset,
         )
 
 
@@ -473,13 +446,13 @@ class GeneratorMemo(Generic[T]):
     def __init__(self, generator: Generator[T, None, None]):
         self._generator: Optional[Generator[T, None, None]] = generator
         self._already_generated: List[T] = []
-    
+
     def __iter__(self):
         idx = 0
         while True:
             if idx < len(self._already_generated):
                 yield self._already_generated[idx]
-            else:     
+            else:
                 if self._generator is None:
                     break
                 try:
@@ -490,7 +463,7 @@ def __iter__(self):
                 self._already_generated.append(next_item)
                 yield next_item
             idx += 1
-            
+
 
 class CLStream(Generic[TCLExperience]):
     """A CL stream is a named iterator of experiences.
@@ -506,16 +479,16 @@ def __init__(
         self: TCLStream,
         name: str,
         exps_iter: Iterable[TCLExperience],
-        benchmark: 'CLScenario[TCLStream]',
+        benchmark: "CLScenario[TCLStream]",
         set_stream_info: bool = True,
-    ): 
+    ):
         """
         Creates an instance of a experience stream.
 
         :param name: The name of the stream.
         :param exps_iter: The iterable from which experiences will be obtained.
         :param benchmark: The benchmarks defining this stream.
-        :param set_stream_info: If True, will set the `current_experience` and 
+        :param set_stream_info: If True, will set the `current_experience` and
             `origin_stream` fields on experience objects before returning them.
             Defaults to True.
         """
@@ -529,7 +502,7 @@ def __init__(
         The iterable from which experiences will be obtained.
         """
 
-        self.benchmark: 'CLScenario[TCLStream]' = benchmark
+        self.benchmark: "CLScenario[TCLStream]" = benchmark
         """
         A reference to the benchmark.
         """
@@ -543,7 +516,7 @@ def __init__(
         if isinstance(self.exps_iter, GeneratorType):
             # Prevent issues when iterating the stream more than once
             self.exps_iter = GeneratorMemo(self.exps_iter)
-        
+
     def __iter__(self) -> Iterator[TCLExperience]:
         exp: TCLExperience
         for i, exp in enumerate(self.exps_iter):
@@ -563,14 +536,15 @@ def __init__(
         self: TCLStream,
         name: str,
         exps_iter: Iterable[TCLExperience],
-        benchmark: 'CLScenario[TCLStream]',
+        benchmark: "CLScenario[TCLStream]",
         set_stream_info: bool = True,
     ):
         super().__init__(
             name=name,
             exps_iter=exps_iter,
             benchmark=benchmark,
-            set_stream_info=set_stream_info)
+            set_stream_info=set_stream_info,
+        )
 
     @abstractmethod
     def __len__(self) -> int:
@@ -582,26 +556,25 @@ def __len__(self) -> int:
         pass
 
 
-class SequenceCLStream(
-        SizedCLStream[TCLExperience],
-        Sequence[TCLExperience],
-        ABC):
+class SequenceCLStream(SizedCLStream[TCLExperience], Sequence[TCLExperience], ABC):
     """
     Defines a stream that behaves like a :class:`Sequence`.
-    
+
     This is the most common base class for streams in Avalanche as
     it implements the basic indexing and slicing functionalities
     for streams.
     """
+
     def __init__(
         self,
         name: str,
-        benchmark: 'CLScenario',
+        benchmark: "CLScenario",
         set_stream_info: bool = True,
-        slice_ids: Optional[Iterable[int]] = None
+        slice_ids: Optional[Iterable[int]] = None,
     ):
-        self.slice_ids: Optional[List[int]] = \
+        self.slice_ids: Optional[List[int]] = (
             list(slice_ids) if slice_ids is not None else None
+        )
         """
         Describes which experiences are contained in the current stream slice. 
         Can be None, which means that this object is the original stream.
@@ -611,7 +584,8 @@ def __init__(
             name=name,
             exps_iter=self,
             benchmark=benchmark,
-            set_stream_info=set_stream_info)
+            set_stream_info=set_stream_info,
+        )
 
     def __iter__(self) -> Iterator[TCLExperience]:
         exp: TCLExperience
@@ -624,20 +598,18 @@ def __getitem__(self, item: int) -> TCLExperience:
         ...
 
     @overload
-    def __getitem__(self: TSequenceCLStream, item: slice) -> \
-            TSequenceCLStream:
+    def __getitem__(self: TSequenceCLStream, item: slice) -> TSequenceCLStream:
         ...
-    
+
     @final
-    def __getitem__(self: TSequenceCLStream, item: Union[int, slice]) -> \
-            Union[TSequenceCLStream, TCLExperience]:
+    def __getitem__(
+        self: TSequenceCLStream, item: Union[int, slice]
+    ) -> Union[TSequenceCLStream, TCLExperience]:
         # This check allows CL streams slicing
         if isinstance(item, (int, np.integer)):
             item = int(item)
             if item >= len(self):
-                raise IndexError(
-                    "Experience index out of bounds" + str(int(item))
-                )
+                raise IndexError("Experience index out of bounds" + str(int(item)))
 
             curr_exp = item if self.slice_ids is None else self.slice_ids[item]
 
@@ -661,8 +633,9 @@ def __len__(self) -> int:
         else:
             return self._full_length()
 
-    def _forward_slice(self, *slices: Union[None, slice, Iterable[int]]) -> \
-            Optional[Iterable[int]]:
+    def _forward_slice(
+        self, *slices: Union[None, slice, Iterable[int]]
+    ) -> Optional[Iterable[int]]:
         any_slice = False
         indices = list(range(self._full_length()))
         for sl in slices:
@@ -671,8 +644,7 @@ def _forward_slice(self, *slices: Union[None, slice, Iterable[int]]) -> \
             any_slice = True
 
             slice_indices = slice_alike_object_to_indices(
-                slice_alike_object=sl,
-                max_length=len(indices)
+                slice_alike_object=sl, max_length=len(indices)
             )
 
             new_indices = [indices[x] for x in slice_indices]
@@ -692,8 +664,7 @@ def _full_length(self) -> int:
         pass
 
     @abstractmethod
-    def _make_experience(self, experience_idx: int) -> \
-            TCLExperience:
+    def _make_experience(self, experience_idx: int) -> TCLExperience:
         """
         Obtain the experience at the given position in the originating
         stream (that is, the non-sliced stream).
@@ -701,18 +672,19 @@ def _make_experience(self, experience_idx: int) -> \
         pass
 
     def _make_slice(
-            self: TSequenceCLStream,
-            experience_slice: Optional[Iterable[int]]) -> TSequenceCLStream:
+        self: TSequenceCLStream, experience_slice: Optional[Iterable[int]]
+    ) -> TSequenceCLStream:
         """
         Obtain a sub-stream given a list of indices of the experiences to
         include.
-        
+
         Experience ids are the ones of the originating stream
         (that is, the non-sliced stream).
         """
         stream_copy = copy(self)
-        stream_copy.slice_ids = list(experience_slice) if \
-            experience_slice is not None else None
+        stream_copy.slice_ids = (
+            list(experience_slice) if experience_slice is not None else None
+        )
         return stream_copy
 
 
@@ -728,9 +700,9 @@ def __init__(
         self,
         name: str,
         exps: Sequence[TCLExperience],
-        benchmark: 'CLScenario',
+        benchmark: "CLScenario",
         set_stream_info: bool = True,
-        slice_ids: Optional[Iterable[int]] = None
+        slice_ids: Optional[Iterable[int]] = None,
     ):
         """Create a CL stream given a list of experiences.
         :param name: name of the stream.
@@ -747,18 +719,20 @@ def __init__(
             name=name,
             benchmark=benchmark,
             set_stream_info=set_stream_info,
-            slice_ids=slice_ids)
+            slice_ids=slice_ids,
+        )
 
         if self.set_stream_info:
-            slice_ids_enum = self.slice_ids if \
-                self.slice_ids is not None else range(len(self._exps))
+            slice_ids_enum = (
+                self.slice_ids if self.slice_ids is not None else range(len(self._exps))
+            )
             for i in slice_ids_enum:
                 exp = self._exps[i]
                 exp.current_experience = i
                 exp.origin_stream = self  # type: ignore
-            
+
             self.set_stream_info = False
-    
+
     @property
     def exps(self) -> Tuple[TCLExperience, ...]:
         return tuple(self.exps_iter)
@@ -767,7 +741,7 @@ def _full_length(self) -> int:
         return len(self._exps)
 
     def _make_experience(self, experience_idx: int) -> TCLExperience:
-        return self._exps[experience_idx] 
+        return self._exps[experience_idx]
 
 
 class CLScenario(Generic[TCLStream]):
@@ -812,5 +786,5 @@ def streams(self):
     "CLStream",
     "SequenceCLStream",
     "EagerCLStream",
-    "CLScenario"
+    "CLScenario",
 ]
diff --git a/avalanche/benchmarks/scenarios/generic_scenario_creation.py b/avalanche/benchmarks/scenarios/generic_scenario_creation.py
index 410dda14f..29ee97daf 100644
--- a/avalanche/benchmarks/scenarios/generic_scenario_creation.py
+++ b/avalanche/benchmarks/scenarios/generic_scenario_creation.py
@@ -105,14 +105,12 @@ def create_multi_dataset_generic_scenario(
     if complete_test_set_only:
         if len(test_dataset_list) != 1:
             raise ValueError(
-                "Test must contain 1 element when"
-                "complete_test_set_only is True"
+                "Test must contain 1 element when" "complete_test_set_only is True"
             )
     else:
         if len(test_dataset_list) != len(train_dataset_list):
             raise ValueError(
-                "Train and test lists must define the same "
-                " amount of experiences"
+                "Train and test lists must define the same " " amount of experiences"
             )
 
     train_t_labels = []
@@ -120,14 +118,14 @@ def create_multi_dataset_generic_scenario(
     for dataset_idx, dataset in enumerate(train_dataset_list):
         dataset = train_dataset_list[dataset_idx]
         train_t_labels.append(task_labels[dataset_idx])
-        train_dataset_list_avl.append(make_classification_dataset(
-            dataset,
-            task_labels=ConstantSequence(
-                task_labels[dataset_idx], len(dataset)
-            ),
-            transform_groups=transform_groups,
-            initial_transform_group="train",
-        ))
+        train_dataset_list_avl.append(
+            make_classification_dataset(
+                dataset,
+                task_labels=ConstantSequence(task_labels[dataset_idx], len(dataset)),
+                transform_groups=transform_groups,
+                initial_transform_group="train",
+            )
+        )
 
     test_t_labels = []
     test_dataset_list_avl = []
@@ -140,12 +138,14 @@ def create_multi_dataset_generic_scenario(
 
         test_t_labels.append(test_t_label)
 
-        test_dataset_list_avl.append(make_classification_dataset(
-            dataset,
-            task_labels=ConstantSequence(test_t_label, len(dataset)),
-            transform_groups=transform_groups,
-            initial_transform_group="eval",
-        ))
+        test_dataset_list_avl.append(
+            make_classification_dataset(
+                dataset,
+                task_labels=ConstantSequence(test_t_label, len(dataset)),
+                transform_groups=transform_groups,
+                initial_transform_group="eval",
+            )
+        )
 
     return GenericCLScenario(
         stream_definitions={
@@ -251,9 +251,7 @@ def create_generic_scenario_from_filelists(
 
 def create_generic_scenario_from_paths(
     train_list_of_files: Sequence[Sequence[FileAndLabel]],
-    test_list_of_files: Union[
-        Sequence[FileAndLabel], Sequence[Sequence[FileAndLabel]]
-    ],
+    test_list_of_files: Union[Sequence[FileAndLabel], Sequence[Sequence[FileAndLabel]]],
     task_labels: Sequence[int],
     complete_test_set_only: bool = False,
     train_transform=None,
@@ -434,8 +432,7 @@ def create_generic_scenario_from_tensor_lists(
     ]
 
     test_datasets = [
-        make_tensor_classification_dataset(*exp_tensors)
-        for exp_tensors in test_tensors
+        make_tensor_classification_dataset(*exp_tensors) for exp_tensors in test_tensors
     ]
 
     return create_multi_dataset_generic_scenario(
@@ -533,14 +530,11 @@ def create_generic_scenario_from_tensors(
 
     if len(train_data_x) != len(train_data_y):
         raise ValueError(
-            "train_data_x and train_data_y must contain"
-            " the same amount of elements"
+            "train_data_x and train_data_y must contain" " the same amount of elements"
         )
 
     if type(test_data_x) != type(test_data_y):
-        raise ValueError(
-            "test_data_x and test_data_y must be of" " the same type"
-        )
+        raise ValueError("test_data_x and test_data_y must be of" " the same type")
 
     if isinstance(test_data_x, Tensor):
         test_data_x = [test_data_x]
diff --git a/avalanche/benchmarks/scenarios/lazy_dataset_sequence.py b/avalanche/benchmarks/scenarios/lazy_dataset_sequence.py
index e6221978a..2165aeaf7 100644
--- a/avalanche/benchmarks/scenarios/lazy_dataset_sequence.py
+++ b/avalanche/benchmarks/scenarios/lazy_dataset_sequence.py
@@ -45,9 +45,7 @@ def __init__(
         experience_generator: Iterable[TCLDataset],
         stream_length: int,
     ):
-        self._exp_source: Optional[
-            Iterable[TCLDataset]
-        ] = experience_generator
+        self._exp_source: Optional[Iterable[TCLDataset]] = experience_generator
         """
         The source of the experiences stream, as an Iterable.
         
@@ -64,9 +62,7 @@ def __init__(
         The ID of the next experience that will be generated.
         """
 
-        self._loaded_experiences: Dict[
-            int, TCLDataset
-        ] = dict()
+        self._loaded_experiences: Dict[int, TCLDataset] = dict()
         """
         The sequence of experiences obtained from the generator.
         """
@@ -76,9 +72,7 @@ def __init__(
         The length of the stream.
         """
         try:
-            self._exp_generator: Optional[
-                Iterator[TCLDataset]
-            ] = iter(self._exp_source)
+            self._exp_generator: Optional[Iterator[TCLDataset]] = iter(self._exp_source)
         except TypeError as e:
             if callable(self._exp_source):
                 # https://stackoverflow.com/a/17092033
@@ -95,9 +89,9 @@ def __init__(
         This field is None when if all the experiences have been loaded.
         """
 
-        self.targets_field_sequence: Dict[
-            int, Optional[Sequence]
-        ] = defaultdict(lambda: None)
+        self.targets_field_sequence: Dict[int, Optional[Sequence]] = defaultdict(
+            lambda: None
+        )
         """
         A dictionary mapping each experience to its `targets` field.
         
@@ -131,8 +125,9 @@ def __getitem__(self, exp_idx: int) -> TCLDataset:
     def __getitem__(self, exp_idx: slice) -> Sequence[TCLDataset]:
         ...
 
-    def __getitem__(self, exp_idx: Union[int, slice]) -> \
-            Union[TCLDataset, Sequence[TCLDataset]]:
+    def __getitem__(
+        self, exp_idx: Union[int, slice]
+    ) -> Union[TCLDataset, Sequence[TCLDataset]]:
         """
         Gets the dataset associated to an experience.
 
@@ -141,19 +136,17 @@ def __getitem__(self, exp_idx: Union[int, slice]) -> \
         """
         # A lot of unuseful lines needed for MyPy -_-
         indexing_collate: Callable[
-            [Iterable[TCLDataset]],
-            Sequence[TCLDataset]] = lambda x: list(x)
+            [Iterable[TCLDataset]], Sequence[TCLDataset]
+        ] = lambda x: list(x)
         result = manage_advanced_indexing(
             exp_idx,
             self._get_experience_and_load_if_needed,
             len(self),
-            indexing_collate
+            indexing_collate,
         )
         return result
 
-    def _get_experience_and_load_if_needed(
-        self, exp_idx: int
-    ) -> TCLDataset:
+    def _get_experience_and_load_if_needed(self, exp_idx: int) -> TCLDataset:
         """
         Gets the dataset associated to an experience.
 
@@ -166,9 +159,7 @@ def _get_experience_and_load_if_needed(
             raise RuntimeError(f"Experience {exp_idx} has been dropped")
         return self._loaded_experiences[exp_idx]
 
-    def get_experience_if_loaded(
-        self, exp_idx: int
-    ) -> Optional[TCLDataset]:
+    def get_experience_if_loaded(self, exp_idx: int) -> Optional[TCLDataset]:
         """
         Gets the dataset associated to an experience.
 
@@ -181,9 +172,7 @@ def get_experience_if_loaded(
         """
         exp_idx = int(exp_idx)  # Handle single element tensors
         if exp_idx >= len(self):
-            raise IndexError(
-                f"The stream doesn't contain {exp_idx+1}" f"experiences"
-            )
+            raise IndexError(f"The stream doesn't contain {exp_idx+1}" f"experiences")
 
         return self._loaded_experiences.get(exp_idx, None)
 
@@ -230,9 +219,7 @@ def load_all_experiences(self, to_exp: Optional[int] = None) -> None:
             to_exp = int(to_exp)  # Handle single element tensors
 
         if to_exp >= len(self):
-            raise IndexError(
-                f"The stream doesn't contain {to_exp+1}" f"experiences"
-            )
+            raise IndexError(f"The stream doesn't contain {to_exp+1}" f"experiences")
 
         if self._next_exp_id > to_exp:
             # Nothing to do
@@ -253,16 +240,14 @@ def load_all_experiences(self, to_exp: Optional[int] = None) -> None:
 
             if not isinstance(generated_exp, AvalancheDataset):
                 raise ValueError(
-                    "All experience datasets must be subclasses of"
-                    " AvalancheDataset"
+                    "All experience datasets must be subclasses of" " AvalancheDataset"
                 )
 
             self._loaded_experiences[exp_id] = generated_exp
-            self.targets_field_sequence[exp_id] = \
-                getattr(generated_exp, 'targets')
-            self.task_labels_field_sequence[
-                exp_id
-            ] = getattr(generated_exp, 'targets_task_labels')
+            self.targets_field_sequence[exp_id] = getattr(generated_exp, "targets")
+            self.task_labels_field_sequence[exp_id] = getattr(
+                generated_exp, "targets_task_labels"
+            )
             self._next_exp_id += 1
 
         if self._next_exp_id >= len(self):
diff --git a/avalanche/benchmarks/scenarios/new_classes/nc_scenario.py b/avalanche/benchmarks/scenarios/new_classes/nc_scenario.py
index fdb75916a..c3b47b204 100644
--- a/avalanche/benchmarks/scenarios/new_classes/nc_scenario.py
+++ b/avalanche/benchmarks/scenarios/new_classes/nc_scenario.py
@@ -19,17 +19,17 @@
     ClassificationExperience,
 )
 from avalanche.benchmarks.utils import classification_subset
-from avalanche.benchmarks.utils.classification_dataset import \
-    ClassificationDataset, SupervisedClassificationDataset
+from avalanche.benchmarks.utils.classification_dataset import (
+    ClassificationDataset,
+    SupervisedClassificationDataset,
+)
 
 from avalanche.benchmarks.utils.flat_data import ConstantSequence
 
 
 class NCScenario(
-    ClassificationScenario[
-        'NCStream',
-        'NCExperience',
-        SupervisedClassificationDataset]):
+    ClassificationScenario["NCStream", "NCExperience", SupervisedClassificationDataset]
+):
 
     """
     This class defines a "New Classes" scenario. Once created, an instance
@@ -128,11 +128,8 @@ class "34" will be mapped to "1", class "11" to "2" and so on.
             train_dataset = SupervisedClassificationDataset(train_dataset)
         if not isinstance(test_dataset, SupervisedClassificationDataset):
             test_dataset = SupervisedClassificationDataset(test_dataset)
-        
-        if (
-            class_ids_from_zero_from_first_exp
-            and class_ids_from_zero_in_each_exp
-        ):
+
+        if class_ids_from_zero_from_first_exp and class_ids_from_zero_in_each_exp:
             raise ValueError(
                 "Invalid mutually exclusive options "
                 "class_ids_from_zero_from_first_exp and "
@@ -187,9 +184,7 @@ class "34" will be mapped to "1", class "11" to "2" and so on.
         )
         """ If True the class IDs have been remapped to start from zero. """
 
-        self.class_ids_from_zero_in_each_exp: bool = (
-            class_ids_from_zero_in_each_exp
-        )
+        self.class_ids_from_zero_in_each_exp: bool = class_ids_from_zero_in_each_exp
         """ If True the class IDs have been remapped to start from zero in 
         each experience """
 
@@ -209,9 +204,7 @@ class "34" will be mapped to "1", class "11" to "2" and so on.
         elif fixed_class_order is not None:
             # User defined class order -> just use it
             if len(
-                set(self.classes_order_original_ids).union(
-                    set(fixed_class_order)
-                )
+                set(self.classes_order_original_ids).union(set(fixed_class_order))
             ) != len(self.classes_order_original_ids):
                 raise ValueError("Invalid classes defined in fixed_class_order")
 
@@ -313,9 +306,7 @@ class "34" will be mapped to "1", class "11" to "2" and so on.
                     f"dataset ({self.n_classes}) cannot be divided by "
                     f"n_experiences ({n_experiences})"
                 )
-            self.n_classes_per_exp = [
-                self.n_classes // n_experiences
-            ] * n_experiences
+            self.n_classes_per_exp = [self.n_classes // n_experiences] * n_experiences
 
         # Before populating the classes_in_experience list,
         # define the remapped class IDs.
@@ -373,11 +364,7 @@ class "34" will be mapped to "1", class "11" to "2" and so on.
                 set(self.classes_order[classes_start_idx:classes_end_idx])
             )
             self.original_classes_in_exp.append(
-                set(
-                    self.classes_order_original_ids[
-                        classes_start_idx:classes_end_idx
-                    ]
-                )
+                set(self.classes_order_original_ids[classes_start_idx:classes_end_idx])
             )
 
         # Finally, create the experience -> patterns assignment.
@@ -388,9 +375,7 @@ class "34" will be mapped to "1", class "11" to "2" and so on.
 
         self._has_task_labels = task_labels
         if reproducibility_data is not None:
-            self._has_task_labels = bool(
-                reproducibility_data["has_task_labels"]
-            )
+            self._has_task_labels = bool(reproducibility_data["has_task_labels"])
 
         pattern_train_task_labels: Sequence[int]
         pattern_test_task_labels: Sequence[int]
@@ -477,10 +462,8 @@ class "34" will be mapped to "1", class "11" to "2" and so on.
                 test_task_labels.append(t_id)
             else:
                 test_task_labels.append(0)
-            
-            exp_task_labels = ConstantSequence(
-                test_task_labels[-1], len(test_dataset)
-            )
+
+            exp_task_labels = ConstantSequence(test_task_labels[-1], len(test_dataset))
             test_experiences.append(
                 classification_subset(
                     test_dataset, indices=exp_def, task_labels=exp_task_labels
@@ -493,7 +476,7 @@ class "34" will be mapped to "1", class "11" to "2" and so on.
                 "test": (test_experiences, test_task_labels, test_dataset),
             },
             stream_factory=NCStream,
-            experience_factory=NCExperience
+            experience_factory=NCExperience,
         )
 
     def get_reproducibility_data(self):
@@ -537,28 +520,27 @@ def classes_in_exp_range(
 
         return [
             item
-            for sublist in self.classes_in_experience["train"][
-                exp_start:exp_end
-            ]
+            for sublist in self.classes_in_experience["train"][exp_start:exp_end]
             for item in sublist
         ]
 
 
-class NCStream(ClassificationStream['NCExperience']):
+class NCStream(ClassificationStream["NCExperience"]):
     def __init__(
         self,
         name: str,
         benchmark: NCScenario,
         *,
         slice_ids: Optional[List[int]] = None,
-        set_stream_info: bool = True
+        set_stream_info: bool = True,
     ):
         self.benchmark: NCScenario = benchmark
         super().__init__(
             name=name,
             benchmark=benchmark,
             slice_ids=slice_ids,
-            set_stream_info=set_stream_info)
+            set_stream_info=set_stream_info,
+        )
 
 
 class NCExperience(ClassificationExperience[SupervisedClassificationDataset]):
@@ -568,11 +550,7 @@ class NCExperience(ClassificationExperience[SupervisedClassificationDataset]):
     stream from which this experience was taken.
     """
 
-    def __init__(
-        self,
-        origin_stream: NCStream,
-        current_experience: int
-    ):
+    def __init__(self, origin_stream: NCStream, current_experience: int):
         """
         Creates a ``NCExperience`` instance given the stream from this
         experience was taken and and the current experience ID.
@@ -581,7 +559,7 @@ def __init__(
             obtained.
         :param current_experience: The current experience ID, as an integer.
         """
-        
+
         self._benchmark: NCScenario = origin_stream.benchmark
 
         super().__init__(origin_stream, current_experience)
@@ -589,9 +567,7 @@ def __init__(
     @property  # type: ignore[override]
     def benchmark(self) -> NCScenario:
         bench = self._benchmark
-        NCExperience._check_unset_attribute(
-            'benchmark', bench
-        )   
+        NCExperience._check_unset_attribute("benchmark", bench)
         return bench
 
     @benchmark.setter
@@ -599,8 +575,4 @@ def benchmark(self, bench: NCScenario):
         self._benchmark = bench
 
 
-__all__ = [
-    "NCScenario",
-    "NCStream",
-    "NCExperience"
-]
+__all__ = ["NCScenario", "NCStream", "NCExperience"]
diff --git a/avalanche/benchmarks/scenarios/new_classes/nc_utils.py b/avalanche/benchmarks/scenarios/new_classes/nc_utils.py
index 81b24a852..de5b5fb6b 100644
--- a/avalanche/benchmarks/scenarios/new_classes/nc_utils.py
+++ b/avalanche/benchmarks/scenarios/new_classes/nc_utils.py
@@ -39,16 +39,14 @@ def _indexes_grouped_by_classes(
     # Without the tensor_as_list conversion:
     # result_per_class[element].append(idx) -> error
     # because result_per_class[0] won't exist (result_per_class[tensor(0)] will)
-    
+
     sequence_list: List[int] = tensor_as_list(sequence)
     if search_elements is not None:
         search_elements_list = tensor_as_list(search_elements)
     else:
-        search_elements_list = torch.unique(
-            torch.as_tensor(sequence_list)).tolist()
+        search_elements_list = torch.unique(torch.as_tensor(sequence_list)).tolist()
 
     if sort_classes:
-
         # Consider that result_per_class is an OrderedDict
         # This means that, if sort_classes is True, the next for statement
         # will initialize the "result_per_class" in sorted order ->
@@ -194,7 +192,7 @@ class buckets. Defaults to False.
     return classification_subset(
         dataset,
         indices=_indexes_from_set(
-            getattr(dataset, 'targets'),
+            getattr(dataset, "targets"),
             classes,
             bucket_classes=bucket_classes,
             sort_classes=sort_classes,
@@ -205,6 +203,4 @@ class buckets. Defaults to False.
     )
 
 
-__all__ = [
-    "make_nc_transformation_subset"
-]
+__all__ = ["make_nc_transformation_subset"]
diff --git a/avalanche/benchmarks/scenarios/new_instances/ni_scenario.py b/avalanche/benchmarks/scenarios/new_instances/ni_scenario.py
index 63abf42c6..7e5263698 100644
--- a/avalanche/benchmarks/scenarios/new_instances/ni_scenario.py
+++ b/avalanche/benchmarks/scenarios/new_instances/ni_scenario.py
@@ -22,16 +22,16 @@
     _exp_structure_from_assignment,
 )
 from avalanche.benchmarks.utils import classification_subset
-from avalanche.benchmarks.utils.classification_dataset import \
-    ClassificationDataset, SupervisedClassificationDataset
+from avalanche.benchmarks.utils.classification_dataset import (
+    ClassificationDataset,
+    SupervisedClassificationDataset,
+)
 from avalanche.benchmarks.utils.flat_data import ConstantSequence
 
 
 class NIScenario(
-        ClassificationScenario[
-            'NIStream',
-            'NIExperience',
-            SupervisedClassificationDataset]):
+    ClassificationScenario["NIStream", "NIExperience", SupervisedClassificationDataset]
+):
     """
     This class defines a "New Instance" scenario.
     Once created, an instance of this class can be iterated in order to obtain
@@ -153,9 +153,7 @@ def __init__(
         The amount of classes in the original training set.
         """
 
-        self.n_patterns_per_class: List[int] = [
-            0 for _ in range(self.n_classes)
-        ]
+        self.n_patterns_per_class: List[int] = [0 for _ in range(self.n_classes)]
         """
         The amount of patterns for each class in the original training set.
         """
@@ -170,9 +168,7 @@ def __init__(
             included_patterns: List[int] = list()
             for exp_def in lst_fixed_exp_assignment:
                 included_patterns.extend(exp_def)
-            subset = classification_subset(
-                train_dataset, indices=included_patterns
-            )
+            subset = classification_subset(train_dataset, indices=included_patterns)
             unique_targets, unique_count = torch.unique(
                 torch.as_tensor(subset.targets), return_counts=True
             )
@@ -226,9 +222,7 @@ def __init__(
             # First, get the patterns indexes for each class
             targets_as_tensor = torch.as_tensor(train_dataset.targets)
             classes_to_patterns_idx = [
-                torch.nonzero(torch.eq(targets_as_tensor, class_id))
-                .view(-1)
-                .tolist()
+                torch.nonzero(torch.eq(targets_as_tensor, class_id)).view(-1).tolist()
                 for class_id in range(self.n_classes)
             ]
 
@@ -336,8 +330,7 @@ def __init__(
                 # First, initialize exp_patterns and exp_structure
                 exp_patterns = [[] for _ in range(n_experiences)]
                 self.exp_structure = [
-                    [0 for _ in range(self.n_classes)]
-                    for _ in range(n_experiences)
+                    [0 for _ in range(self.n_classes)] for _ in range(n_experiences)
                 ]
 
                 # For each experience we assign exactly
@@ -353,9 +346,7 @@ def __init__(
                     for class_id in range(self.n_classes):
                         next_idx = next_idx_per_class[class_id]
                         end_idx = next_idx + min_class_patterns_in_exp
-                        selected_patterns = classes_to_patterns_idx[
-                            next_idx:end_idx
-                        ]
+                        selected_patterns = classes_to_patterns_idx[next_idx:end_idx]
                         exp_patterns[exp_id].extend(selected_patterns)
                         self.exp_structure[exp_id][
                             class_id
@@ -369,7 +360,7 @@ def __init__(
                 # now we assign the remaining patterns
                 #
                 # We'll work on lst_remaining_patterns, which contains
-                # indexes of patterns not assigned in the previous 
+                # indexes of patterns not assigned in the previous
                 # experience.
                 if shuffle:
                     patterns_order = torch.as_tensor(lst_remaining_patterns)[
@@ -379,8 +370,7 @@ def __init__(
                     lst_remaining_patterns.sort()
                     patterns_order = lst_remaining_patterns
                 targets_order = [
-                    train_dataset.targets[pattern_idx]
-                    for pattern_idx in patterns_order
+                    train_dataset.targets[pattern_idx] for pattern_idx in patterns_order
                 ]
 
                 avg_exp_size = len(patterns_order) // n_experiences
@@ -388,9 +378,7 @@ def __init__(
                 prev_idx = 0
                 for exp_id in range(n_experiences):
                     next_idx = prev_idx + avg_exp_size
-                    exp_patterns[exp_id].extend(
-                        patterns_order[prev_idx:next_idx]
-                    )
+                    exp_patterns[exp_id].extend(patterns_order[prev_idx:next_idx])
                     cls_ids, cls_counts = torch.unique(
                         torch.as_tensor(targets_order[prev_idx:next_idx]),
                         return_counts=True,
@@ -400,9 +388,9 @@ def __init__(
                     cls_counts = cls_counts.tolist()
 
                     for unique_idx in range(len(cls_ids)):
-                        self.exp_structure[exp_id][
-                            cls_ids[unique_idx]
-                        ] += cls_counts[unique_idx]
+                        self.exp_structure[exp_id][cls_ids[unique_idx]] += cls_counts[
+                            unique_idx
+                        ]
                     prev_idx = next_idx
 
                 # Distribute remaining patterns
@@ -434,7 +422,7 @@ def __init__(
                 train_task_labels.append(t_id)
             else:
                 train_task_labels.append(0)
-            
+
             exp_task_labels = ConstantSequence(
                 train_task_labels[-1], len(train_dataset)
             )
@@ -456,7 +444,7 @@ def __init__(
             },
             complete_test_set_only=True,
             stream_factory=NIStream,
-            experience_factory=NIExperience
+            experience_factory=NIExperience,
         )
 
     def get_reproducibility_data(self) -> Dict[str, Any]:
@@ -467,7 +455,7 @@ def get_reproducibility_data(self) -> Dict[str, Any]:
         return reproducibility_data
 
 
-class NIStream(ClassificationStream['NIExperience']):
+class NIStream(ClassificationStream["NIExperience"]):
     def __init__(
         self,
         name: str,
@@ -481,7 +469,8 @@ def __init__(
             name=name,
             benchmark=benchmark,
             slice_ids=slice_ids,
-            set_stream_info=set_stream_info)
+            set_stream_info=set_stream_info,
+        )
 
 
 class NIExperience(ClassificationExperience[SupervisedClassificationDataset]):
@@ -512,9 +501,7 @@ def __init__(
     @property  # type: ignore[override]
     def benchmark(self) -> NIScenario:
         bench = self._benchmark
-        NIExperience._check_unset_attribute(
-            'benchmark', bench
-        )   
+        NIExperience._check_unset_attribute("benchmark", bench)
         return bench
 
     @benchmark.setter
@@ -522,8 +509,4 @@ def benchmark(self, bench: NIScenario):
         self._benchmark = bench
 
 
-__all__ = [
-    "NIScenario",
-    "NIStream",
-    "NIExperience"
-]
+__all__ = ["NIScenario", "NIStream", "NIExperience"]
diff --git a/avalanche/benchmarks/scenarios/new_instances/ni_utils.py b/avalanche/benchmarks/scenarios/new_instances/ni_utils.py
index f6d99a503..1c20d57c0 100644
--- a/avalanche/benchmarks/scenarios/new_instances/ni_utils.py
+++ b/avalanche/benchmarks/scenarios/new_instances/ni_utils.py
@@ -22,14 +22,11 @@ def _exp_structure_from_assignment(
     n_classes: int,
 ):
     n_experiences = len(assignment)
-    exp_structure = [
-        [0 for _ in range(n_classes)] for _ in range(n_experiences)
-    ]
+    exp_structure = [[0 for _ in range(n_classes)] for _ in range(n_experiences)]
 
     for exp_id in range(n_experiences):
         exp_targets = [
-            int(dataset.targets[pattern_idx])
-            for pattern_idx in assignment[exp_id]
+            int(dataset.targets[pattern_idx]) for pattern_idx in assignment[exp_id]
         ]
         cls_ids, cls_counts = torch.unique(
             torch.as_tensor(exp_targets), return_counts=True
@@ -43,6 +40,4 @@ def _exp_structure_from_assignment(
     return exp_structure
 
 
-__all__ = [
-    "_exp_structure_from_assignment"
-]
+__all__ = ["_exp_structure_from_assignment"]
diff --git a/avalanche/benchmarks/scenarios/online_scenario.py b/avalanche/benchmarks/scenarios/online_scenario.py
index 7ad52d61f..db79e53c9 100644
--- a/avalanche/benchmarks/scenarios/online_scenario.py
+++ b/avalanche/benchmarks/scenarios/online_scenario.py
@@ -21,8 +21,7 @@
 from typing_extensions import Literal
 
 import torch
-from avalanche.benchmarks.scenarios.benchmark_wrapper_utils import \
-    wrap_stream
+from avalanche.benchmarks.scenarios.benchmark_wrapper_utils import wrap_stream
 
 from avalanche.benchmarks.utils import AvalancheDataset
 from avalanche.benchmarks.scenarios.generic_scenario import (
@@ -33,40 +32,23 @@
     CLScenario,
 )
 
-from avalanche.benchmarks.scenarios.dataset_scenario import (
-    DatasetScenario
-)
-from avalanche.benchmarks.utils.classification_dataset import \
-    ClassificationDataset
+from avalanche.benchmarks.scenarios.dataset_scenario import DatasetScenario
+from avalanche.benchmarks.utils.classification_dataset import ClassificationDataset
 
 
-TCLDataset = TypeVar(
-    'TCLDataset',
-    bound='AvalancheDataset')
+TCLDataset = TypeVar("TCLDataset", bound="AvalancheDataset")
 TClassificationDataset = TypeVar(
-    'TClassificationDataset',
-    bound='ClassificationDataset')
-TCLScenario = TypeVar(
-    "TCLScenario",
-    bound="CLScenario")
-TDatasetScenario = TypeVar(
-    "TDatasetScenario",
-    bound="DatasetScenario")
-TOnlineCLScenario = TypeVar(
-    'TOnlineCLScenario',
-    bound='OnlineCLScenario')
-TCLStream = TypeVar(
-    'TCLStream',
-    bound='CLStream')
-TCLExperience = TypeVar(
-    'TCLExperience',
-    bound='CLExperience')
-TOnlineCLExperience = TypeVar(
-    'TOnlineCLExperience',
-    bound='OnlineCLExperience')
+    "TClassificationDataset", bound="ClassificationDataset"
+)
+TCLScenario = TypeVar("TCLScenario", bound="CLScenario")
+TDatasetScenario = TypeVar("TDatasetScenario", bound="DatasetScenario")
+TOnlineCLScenario = TypeVar("TOnlineCLScenario", bound="OnlineCLScenario")
+TCLStream = TypeVar("TCLStream", bound="CLStream")
+TCLExperience = TypeVar("TCLExperience", bound="CLExperience")
+TOnlineCLExperience = TypeVar("TOnlineCLExperience", bound="OnlineCLExperience")
 TOnlineClassificationExperience = TypeVar(
-    'TOnlineClassificationExperience',
-    bound='OnlineClassificationExperience')
+    "TOnlineClassificationExperience", bound="OnlineClassificationExperience"
+)
 
 
 class OnlineCLExperience(DatasetExperience[TCLDataset]):
@@ -102,7 +84,8 @@ def __init__(
             current_experience=current_experience,
             origin_stream=origin_stream,
             benchmark=benchmark,
-            dataset=dataset)
+            dataset=dataset,
+        )
         self.access_task_boundaries = access_task_boundaries
 
         self.origin_experience: DatasetExperience = origin_experience
@@ -112,21 +95,20 @@ def __init__(
         self.sub_stream_length: Optional[int] = sub_stream_length
 
         self._as_attributes(
-            'origin_experience',
-            'subexp_size',
-            'is_first_subexp',
-            'is_last_subexp',
-            'sub_stream_length',
-            use_in_train=access_task_boundaries)
+            "origin_experience",
+            "subexp_size",
+            "is_first_subexp",
+            "is_last_subexp",
+            "sub_stream_length",
+            use_in_train=access_task_boundaries,
+        )
 
     @property
     def task_labels(self) -> List[int]:
         return self.origin_experience.task_labels
 
 
-class OnlineClassificationExperience(
-        OnlineCLExperience[
-            TClassificationDataset]):
+class OnlineClassificationExperience(OnlineCLExperience[TClassificationDataset]):
     """
     A specialization of :class:`OnlineCLExperience` with the
     `classes_in_this_experience` field.
@@ -165,8 +147,9 @@ def __init__(
             is_first_subexp=is_first_subexp,
             is_last_subexp=is_last_subexp,
             sub_stream_length=sub_stream_length,
-            access_task_boundaries=access_task_boundaries)
-        
+            access_task_boundaries=access_task_boundaries,
+        )
+
         self.classes_in_this_experience: List[int] = classes_in_this_experience
 
 
@@ -176,12 +159,8 @@ def fixed_size_experience_split(
     online_benchmark: TOnlineCLScenario,
     shuffle: bool = True,
     drop_last: bool = False,
-    access_task_boundaries: bool = False
-) -> Generator[
-        OnlineClassificationExperience[
-            TClassificationDataset],
-        None,
-        None]:
+    access_task_boundaries: bool = False,
+) -> Generator[OnlineClassificationExperience[TClassificationDataset], None, None]:
     """
     Returns a lazy stream generated by splitting an experience into smaller
     ones.
@@ -224,12 +203,12 @@ def fixed_size_experience_split(
             final_idx = len(exp_indices)
             is_last = True
 
-        sub_exp_subset = \
-            exp_dataset.subset(exp_indices[init_idx:final_idx])
-        sub_exp_targets: torch.Tensor = \
-            exp_targets[exp_indices[init_idx:final_idx]].unique()
+        sub_exp_subset = exp_dataset.subset(exp_indices[init_idx:final_idx])
+        sub_exp_targets: torch.Tensor = exp_targets[
+            exp_indices[init_idx:final_idx]
+        ].unique()
 
-        # origin_stream will be lazily set later 
+        # origin_stream will be lazily set later
         exp = OnlineClassificationExperience(
             current_experience=exp_idx,
             origin_stream=None,  # type: ignore
@@ -251,12 +230,13 @@ def fixed_size_experience_split(
 
 
 def _default_online_split(
-        online_benchmark,
-        shuffle: bool,
-        drop_last: bool,
-        access_task_boundaries: bool,
-        exp: DatasetExperience[TClassificationDataset],
-        size: int):
+    online_benchmark,
+    shuffle: bool,
+    drop_last: bool,
+    access_task_boundaries: bool,
+    exp: DatasetExperience[TClassificationDataset],
+    size: int,
+):
     return fixed_size_experience_split(
         experience=exp,
         experience_size=size,
@@ -270,19 +250,17 @@ def _default_online_split(
 def split_online_stream(
     original_stream: Iterable[DatasetExperience[TClassificationDataset]],
     experience_size: int,
-    online_benchmark: 'OnlineCLScenario[TClassificationDataset]',
+    online_benchmark: "OnlineCLScenario[TClassificationDataset]",
     shuffle: bool = True,
     drop_last: bool = False,
-    experience_split_strategy: Optional[Callable[
-        [DatasetExperience[TClassificationDataset], int],
-        Iterable[
-            OnlineClassificationExperience[
-                TClassificationDataset]]
-    ]] = None,
-    access_task_boundaries: bool = False
-) -> CLStream[
-        DatasetExperience[
-            TClassificationDataset]]:
+    experience_split_strategy: Optional[
+        Callable[
+            [DatasetExperience[TClassificationDataset], int],
+            Iterable[OnlineClassificationExperience[TClassificationDataset]],
+        ]
+    ] = None,
+    access_task_boundaries: bool = False,
+) -> CLStream[DatasetExperience[TClassificationDataset]]:
     """Split a stream of large batches to create an online stream of small
     mini-batches.
 
@@ -312,18 +290,13 @@ def split_online_stream(
         :func:`fixed_size_experience_split`.
     :return: A lazy online stream with experiences of size `experience_size`.
     """
-    
+
     if experience_split_strategy is None:
         # functools.partial is a more compact option
         # However, MyPy does not understand what a partial is -_-
         def default_online_split_wrapper(e, e_sz):
             return _default_online_split(
-                online_benchmark,
-                shuffle,
-                drop_last,
-                access_task_boundaries,
-                e,
-                e_sz
+                online_benchmark, shuffle, drop_last, access_task_boundaries, e, e_sz
             )
 
         split_strategy = default_online_split_wrapper
@@ -340,20 +313,17 @@ def exps_iter():
         name=stream_name,
         exps_iter=exps_iter(),
         set_stream_info=True,
-        benchmark=online_benchmark
+        benchmark=online_benchmark,
     )
 
 
 def _fixed_size_split(
-    online_benchmark: 'OnlineCLScenario',
+    online_benchmark: "OnlineCLScenario",
     experience_size: int,
     access_task_boundaries: bool,
     shuffle: bool,
-    s: Iterable[
-            DatasetExperience[TClassificationDataset]]) -> \
-        CLStream[
-            DatasetExperience[
-                TClassificationDataset]]:
+    s: Iterable[DatasetExperience[TClassificationDataset]],
+) -> CLStream[DatasetExperience[TClassificationDataset]]:
     return split_online_stream(
         original_stream=s,
         experience_size=experience_size,
@@ -367,11 +337,13 @@ class OnlineCLScenario(CLScenario[CLStream[DatasetExperience[TCLDataset]]]):
     def __init__(
         self,
         original_streams: Iterable[CLStream[DatasetExperience[TCLDataset]]],
-        experiences: Optional[Union[
-            DatasetExperience[TCLDataset],
-            Iterable[DatasetExperience[TCLDataset]]]] = None,
+        experiences: Optional[
+            Union[
+                DatasetExperience[TCLDataset], Iterable[DatasetExperience[TCLDataset]]
+            ]
+        ] = None,
         experience_size: int = 10,
-        stream_split_strategy: Literal['fixed_size_split'] = "fixed_size_split",
+        stream_split_strategy: Literal["fixed_size_split"] = "fixed_size_split",
         access_task_boundaries: bool = False,
         shuffle: bool = True,
     ):
@@ -397,15 +369,13 @@ def __init__(
         :param shuffle: If True, experiences will be split by first shuffling
             instances in each experience. Defaults to True.
         """
-        
+
         if stream_split_strategy != "fixed_size_split":
             raise ValueError("Unknown experience split strategy")
 
-        split_strat = partial(_fixed_size_split, 
-                              self,
-                              experience_size,
-                              access_task_boundaries,
-                              shuffle)
+        split_strat = partial(
+            _fixed_size_split, self, experience_size, access_task_boundaries, shuffle
+        )
 
         streams_dict = {s.name: s for s in original_streams}
         if "train" not in streams_dict:
@@ -420,22 +390,18 @@ def __init__(
         streams: List[CLStream] = [online_train_stream]
         for s in original_streams:
             s_wrapped = wrap_stream(
-                new_name="original_" + s.name,
-                new_benchmark=self,
-                wrapped_stream=s
+                new_name="original_" + s.name, new_benchmark=self, wrapped_stream=s
             )
 
             streams.append(s_wrapped)
 
-        super().__init__(
-            streams=streams
-        )
+        super().__init__(streams=streams)
 
 
 __all__ = [
-    'OnlineCLExperience',
-    'OnlineClassificationExperience',
-    'fixed_size_experience_split',
-    'split_online_stream',
-    'OnlineCLScenario'
+    "OnlineCLExperience",
+    "OnlineClassificationExperience",
+    "fixed_size_experience_split",
+    "split_online_stream",
+    "OnlineCLScenario",
 ]
diff --git a/avalanche/benchmarks/scenarios/rl_scenario.py b/avalanche/benchmarks/scenarios/rl_scenario.py
index 1368bc81f..44f9e9172 100644
--- a/avalanche/benchmarks/scenarios/rl_scenario.py
+++ b/avalanche/benchmarks/scenarios/rl_scenario.py
@@ -30,9 +30,9 @@
     )
 
 
-TCLStream = TypeVar('TCLStream', bound='CLStream')
-TRLScenario = TypeVar('TRLScenario', bound='RLScenario')
-TRLExperience = TypeVar('TRLExperience', bound='RLExperience')
+TCLStream = TypeVar("TCLStream", bound="CLStream")
+TRLScenario = TypeVar("TRLScenario", bound="RLScenario")
+TRLExperience = TypeVar("TRLExperience", bound="RLExperience")
 
 
 class RLExperience(CLExperience):
@@ -57,11 +57,7 @@ def __init__(
         # task label to be (optionally) used for training purposes
         self.task_label = task_label
 
-        self._as_attributes(
-            'task_label',
-            use_in_train=True,
-            use_in_eval=True
-        )
+        self._as_attributes("task_label", use_in_train=True, use_in_eval=True)
 
     @property
     def environment(self) -> Env:
@@ -160,23 +156,21 @@ def get_unique_task_labels(env_list):
             tr_task_labels = [tr_task_labels[i] for i in perm]
 
         # decide whether to provide task labels to experiences
-        tr_task_labels = (
-            tr_task_labels if task_labels else [None] * len(tr_envs)
-        )
+        tr_task_labels = tr_task_labels if task_labels else [None] * len(tr_envs)
 
         tr_exps: List[TRLExperience] = [
             RLExperience(
-                current_experience=i, 
+                current_experience=i,
                 origin_stream=None,  # type: ignore
                 env=tr_envs[i],
                 n_envs=n_parallel_envs[i],
-                task_label=tr_task_labels[i])
+                task_label=tr_task_labels[i],
+            )
             for i in range(len(tr_envs))
         ]
         tstream: EagerCLStream[TRLExperience] = EagerCLStream(
-            name="train",
-            exps=tr_exps,
-            benchmark=self)
+            name="train", exps=tr_exps, benchmark=self
+        )
         # we're only supporting single process envs in evaluation atm
         print("EVAL ", eval_task_labels)
         eval_exps: List[TRLExperience] = [
@@ -185,18 +179,15 @@ def get_unique_task_labels(env_list):
                 origin_stream=None,  # type: ignore
                 env=e,
                 n_envs=1,
-                task_label=l)
+                task_label=l,
+            )
             for i, (e, l) in enumerate(zip(eval_envs, eval_task_labels))
         ]
         estream: EagerCLStream[TRLExperience] = EagerCLStream(
-            name="eval", 
-            exps=eval_exps,
-            benchmark=self)
+            name="eval", exps=eval_exps, benchmark=self
+        )
 
         super().__init__([tstream, estream])
 
 
-__all__ = [
-    "RLExperience",
-    "RLScenario"
-]
+__all__ = ["RLExperience", "RLScenario"]
diff --git a/avalanche/benchmarks/utils/classification_dataset.py b/avalanche/benchmarks/utils/classification_dataset.py
index 80697bfbd..0aa6460d7 100644
--- a/avalanche/benchmarks/utils/classification_dataset.py
+++ b/avalanche/benchmarks/utils/classification_dataset.py
@@ -70,8 +70,7 @@
 TTargetType = int
 
 TClassificationDataset = TypeVar(
-    "TClassificationDataset",
-    bound="ClassificationDataset"
+    "TClassificationDataset", bound="ClassificationDataset"
 )
 
 
@@ -85,58 +84,52 @@ def lookup(indexable, idx):
 
 
 class ClassificationDataset(AvalancheDataset[T_co]):
-
     @property
     def task_pattern_indices(self) -> Dict[int, Sequence[int]]:
         """A dictionary mapping task ids to their sample indices."""
         return self.targets_task_labels.val_to_idx  # type: ignore
 
     @property
-    def task_set(self: TClassificationDataset) -> \
-            TaskSet[TClassificationDataset]:
+    def task_set(self: TClassificationDataset) -> TaskSet[TClassificationDataset]:
         """Returns the datasets's ``TaskSet``, which is a mapping <task-id,
         task-dataset>."""
         return TaskSet(self)
-    
+
     def subset(self, indices):
         data = super().subset(indices)
-        return data.with_transforms(
-            self._flat_data._transform_groups.current_group)
+        return data.with_transforms(self._flat_data._transform_groups.current_group)
 
     def concat(self, other):
         data = super().concat(other)
-        return data.with_transforms(
-            self._flat_data._transform_groups.current_group)
+        return data.with_transforms(self._flat_data._transform_groups.current_group)
 
     def __hash__(self):
         return id(self)
-    
+
 
 class SupervisedClassificationDataset(ClassificationDataset[T_co]):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        assert 'targets' in self._data_attributes, \
-            'The supervised version of the ClassificationDataset requires ' + \
-            'the targets field'
-        assert 'targets_task_labels' in self._data_attributes, \
-            'The supervised version of the ClassificationDataset requires ' + \
-            'the targets_task_labels field'
+        assert "targets" in self._data_attributes, (
+            "The supervised version of the ClassificationDataset requires "
+            + "the targets field"
+        )
+        assert "targets_task_labels" in self._data_attributes, (
+            "The supervised version of the ClassificationDataset requires "
+            + "the targets_task_labels field"
+        )
 
     @property
     def targets(self) -> DataAttribute[TTargetType]:
-        return self._data_attributes['targets']
+        return self._data_attributes["targets"]
 
     @property
     def targets_task_labels(self) -> DataAttribute[int]:
-        return self._data_attributes['targets_task_labels']
+        return self._data_attributes["targets_task_labels"]
 
 
 SupportedDataset = Union[
-    IDatasetWithTargets,
-    ITensorDataset,
-    Subset,
-    ConcatDataset,
-    ClassificationDataset
+    IDatasetWithTargets, ITensorDataset, Subset, ConcatDataset, ClassificationDataset
 ]
 
 
@@ -282,10 +275,10 @@ def make_classification_dataset(
         initial_transform_group,
         dataset,
     )
-    targets_data: Optional[DataAttribute[TTargetType]] = \
-        _init_targets(dataset, targets)
-    task_labels_data: Optional[DataAttribute[int]] = \
-        _init_task_labels(dataset, task_labels)
+    targets_data: Optional[DataAttribute[TTargetType]] = _init_targets(dataset, targets)
+    task_labels_data: Optional[DataAttribute[int]] = _init_task_labels(
+        dataset, task_labels
+    )
 
     das: List[DataAttribute] = []
     if targets_data is not None:
@@ -295,8 +288,8 @@ def make_classification_dataset(
 
         # Check if supervision data has been added
     is_supervised = is_supervised or (
-        targets_data is not None and
-        task_labels_data is not None)
+        targets_data is not None and task_labels_data is not None
+    )
 
     data: Union[ClassificationDataset, SupervisedClassificationDataset]
     if is_supervised:
@@ -313,15 +306,16 @@ def make_classification_dataset(
             transform_groups=transform_gs,
             collate_fn=collate_fn,
         )
-    
+
     if initial_transform_group is not None:
         return data.with_transforms(initial_transform_group)
     else:
         return data
 
 
-def _init_targets(dataset, targets, check_shape=True) -> \
-        Optional[DataAttribute[TTargetType]]:
+def _init_targets(
+    dataset, targets, check_shape=True
+) -> Optional[DataAttribute[TTargetType]]:
     if targets is not None:
         # User defined targets always take precedence
         if isinstance(targets, int):
@@ -334,16 +328,15 @@ def _init_targets(dataset, targets, check_shape=True) -> \
             )
         return DataAttribute(targets, "targets")
 
-    targets = _traverse_supported_dataset(
-        dataset, _select_targets)
-    
+    targets = _traverse_supported_dataset(dataset, _select_targets)
+
     if targets is not None:
         if isinstance(targets, torch.Tensor):
             targets = targets.tolist()
 
     if targets is None:
         return None
-    
+
     return DataAttribute(targets, "targets")
 
 
@@ -355,8 +348,7 @@ def classification_subset(
     class_mapping: Optional[Sequence[int]] = None,
     transform: Optional[XTransform] = None,
     target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str, 
-                                       Tuple[XTransform, YTransform]]] = None,
+    transform_groups: Optional[Mapping[str, Tuple[XTransform, YTransform]]] = None,
     initial_transform_group: Optional[str] = None,
     task_labels: Optional[Union[int, Sequence[int]]] = None,
     targets: Optional[Sequence[TTargetType]] = None,
@@ -373,8 +365,7 @@ def classification_subset(
     class_mapping: Optional[Sequence[int]] = None,
     transform: Optional[XTransform] = None,
     target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str,
-                                       Tuple[XTransform, YTransform]]] = None,
+    transform_groups: Optional[Mapping[str, Tuple[XTransform, YTransform]]] = None,
     initial_transform_group: Optional[str] = None,
     task_labels: Union[int, Sequence[int]],
     targets: Sequence[TTargetType],
@@ -391,8 +382,7 @@ def classification_subset(
     class_mapping: Optional[Sequence[int]] = None,
     transform: Optional[XTransform] = None,
     target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str,
-                                       Tuple[XTransform, YTransform]]] = None,
+    transform_groups: Optional[Mapping[str, Tuple[XTransform, YTransform]]] = None,
     initial_transform_group: Optional[str] = None,
     task_labels: Optional[Union[int, Sequence[int]]] = None,
     targets: Optional[Sequence[TTargetType]] = None,
@@ -408,8 +398,7 @@ def classification_subset(
     class_mapping: Optional[Sequence[int]] = None,
     transform: Optional[XTransform] = None,
     target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str, 
-                                       Tuple[XTransform, YTransform]]] = None,
+    transform_groups: Optional[Mapping[str, Tuple[XTransform, YTransform]]] = None,
     initial_transform_group: Optional[str] = None,
     task_labels: Optional[Union[int, Sequence[int]]] = None,
     targets: Optional[Sequence[TTargetType]] = None,
@@ -484,7 +473,7 @@ def classification_subset(
     """
 
     is_supervised = isinstance(dataset, SupervisedClassificationDataset)
-    
+
     if isinstance(dataset, ClassificationDataset):
         if (
             class_mapping is None
@@ -498,10 +487,12 @@ def classification_subset(
         ):
             return dataset.subset(indices)
 
-    targets_data: Optional[DataAttribute[TTargetType]] = \
-        _init_targets(dataset, targets, check_shape=False)
-    task_labels_data: Optional[DataAttribute[int]] = \
-        _init_task_labels(dataset, task_labels, check_shape=False)
+    targets_data: Optional[DataAttribute[TTargetType]] = _init_targets(
+        dataset, targets, check_shape=False
+    )
+    task_labels_data: Optional[DataAttribute[int]] = _init_task_labels(
+        dataset, task_labels, check_shape=False
+    )
 
     transform_gs = _init_transform_groups(
         transform_groups,
@@ -511,9 +502,7 @@ def classification_subset(
         dataset,
     )
 
-    if initial_transform_group is not None and isinstance(
-        dataset, AvalancheDataset
-    ):
+    if initial_transform_group is not None and isinstance(dataset, AvalancheDataset):
         dataset = dataset.with_transforms(initial_transform_group)
 
     if class_mapping is not None:  # update targets
@@ -537,9 +526,9 @@ def classification_subset(
 
     # Check if supervision data has been added
     is_supervised = is_supervised or (
-        targets_data is not None and
-        task_labels_data is not None)
-    
+        targets_data is not None and task_labels_data is not None
+    )
+
     if task_labels_data is not None:
         # special treatment for task labels depending on length for
         # backward compatibility
@@ -557,13 +546,13 @@ def classification_subset(
             if is_supervised:
                 return SupervisedClassificationDataset(
                     [dataset],
-                    data_attributes=[dataset.targets,  # type: ignore
-                                     task_labels_data])
+                    data_attributes=[dataset.targets, task_labels_data],  # type: ignore
+                )
             else:
                 return ClassificationDataset(
                     [dataset],
-                    data_attributes=[dataset.targets,  # type: ignore
-                                     task_labels_data])
+                    data_attributes=[dataset.targets, task_labels_data],  # type: ignore
+                )
         else:
             das.append(task_labels_data)
 
@@ -674,7 +663,7 @@ def make_tensor_classification_dataset(
         targets = dataset_tensors[targets]
     tts = []
     for tt in dataset_tensors:  # TorchTensor requires a pytorch tensor
-        if not hasattr(tt, 'size'):
+        if not hasattr(tt, "size"):
             tt = torch.tensor(tt)
         tts.append(tt)
     dataset = _TensorClassificationDataset(*tts)
@@ -688,20 +677,16 @@ def make_tensor_classification_dataset(
     )
     targets_data = _init_targets(dataset, targets)
     task_labels_data = _init_task_labels(dataset, task_labels)
-    if initial_transform_group is not None and isinstance(
-        dataset, AvalancheDataset
-    ):
+    if initial_transform_group is not None and isinstance(dataset, AvalancheDataset):
         dataset = dataset.with_transforms(initial_transform_group)
 
     das = []
     for d in [targets_data, task_labels_data]:
         if d is not None:
             das.append(d)
-    
+
     # Check if supervision data has been added
-    is_supervised = (
-        targets_data is not None and
-        task_labels_data is not None)
+    is_supervised = targets_data is not None and task_labels_data is not None
 
     if is_supervised:
         return SupervisedClassificationDataset(
@@ -736,12 +721,10 @@ def concat_classification_datasets(
     target_transform: Optional[YTransform] = None,
     transform_groups: Optional[Mapping[str, TransformGroupDef]] = None,
     initial_transform_group: Optional[str] = None,
-    task_labels: Optional[Union[int,
-                                Sequence[int],
-                                Sequence[Sequence[int]]]] = None,
-    targets: Optional[Union[
-        Sequence[TTargetType], Sequence[Sequence[TTargetType]]
-    ]] = None,
+    task_labels: Optional[Union[int, Sequence[int], Sequence[Sequence[int]]]] = None,
+    targets: Optional[
+        Union[Sequence[TTargetType], Sequence[Sequence[TTargetType]]]
+    ] = None,
     collate_fn: Optional[Callable[[List], Any]] = None
 ) -> SupervisedClassificationDataset:
     ...
@@ -756,9 +739,7 @@ def concat_classification_datasets(
     transform_groups: Optional[Mapping[str, TransformGroupDef]] = None,
     initial_transform_group: Optional[str] = None,
     task_labels: Union[int, Sequence[int], Sequence[Sequence[int]]],
-    targets: Union[
-        Sequence[TTargetType], Sequence[Sequence[TTargetType]]
-    ],
+    targets: Union[Sequence[TTargetType], Sequence[Sequence[TTargetType]]],
     collate_fn: Optional[Callable[[List], Any]] = None
 ) -> SupervisedClassificationDataset:
     ...
@@ -772,12 +753,10 @@ def concat_classification_datasets(
     target_transform: Optional[YTransform] = None,
     transform_groups: Optional[Mapping[str, TransformGroupDef]] = None,
     initial_transform_group: Optional[str] = None,
-    task_labels: Optional[Union[int, 
-                                Sequence[int],
-                                Sequence[Sequence[int]]]] = None,
-    targets: Optional[Union[
-        Sequence[TTargetType], Sequence[Sequence[TTargetType]]
-    ]] = None,
+    task_labels: Optional[Union[int, Sequence[int], Sequence[Sequence[int]]]] = None,
+    targets: Optional[
+        Union[Sequence[TTargetType], Sequence[Sequence[TTargetType]]]
+    ] = None,
     collate_fn: Optional[Callable[[List], Any]] = None
 ) -> ClassificationDataset:
     ...
@@ -790,12 +769,10 @@ def concat_classification_datasets(
     target_transform: Optional[YTransform] = None,
     transform_groups: Optional[Mapping[str, TransformGroupDef]] = None,
     initial_transform_group: Optional[str] = None,
-    task_labels: Optional[Union[int, 
-                                Sequence[int],
-                                Sequence[Sequence[int]]]] = None,
-    targets: Optional[Union[
-        Sequence[TTargetType], Sequence[Sequence[TTargetType]]
-    ]] = None,
+    task_labels: Optional[Union[int, Sequence[int], Sequence[Sequence[int]]]] = None,
+    targets: Optional[
+        Union[Sequence[TTargetType], Sequence[Sequence[TTargetType]]]
+    ] = None,
     collate_fn: Optional[Callable[[List], Any]] = None
 ) -> Union[ClassificationDataset, SupervisedClassificationDataset]:
     """Creates a ``AvalancheConcatDataset`` instance.
@@ -862,25 +839,22 @@ def concat_classification_datasets(
         in different datasets.
     """
     dds = []
-    per_dataset_task_labels = _split_user_def_task_label(
-        datasets,
-        task_labels
-    )
+    per_dataset_task_labels = _split_user_def_task_label(datasets, task_labels)
 
     per_dataset_targets = _split_user_def_targets(
-        datasets,
-        targets,
-        lambda x: isinstance(x, int)
+        datasets, targets, lambda x: isinstance(x, int)
     )
 
     # Find common "current_group" or use "train"
     if initial_transform_group is None:
-        initial_transform_group = \
-            find_common_transforms_group(datasets, default_group="train")
+        initial_transform_group = find_common_transforms_group(
+            datasets, default_group="train"
+        )
 
     supervised = True
-    for dd, dataset_task_labels, dataset_targets in \
-            zip(datasets, per_dataset_task_labels, per_dataset_targets):
+    for dd, dataset_task_labels, dataset_targets in zip(
+        datasets, per_dataset_task_labels, per_dataset_targets
+    ):
         dd = make_classification_dataset(
             dd,
             transform=transform,
@@ -894,7 +868,7 @@ def concat_classification_datasets(
 
         if not isinstance(dd, SupervisedClassificationDataset):
             supervised = False
-        
+
         dds.append(dd)
 
     if len(dds) > 0:
@@ -909,28 +883,22 @@ def concat_classification_datasets(
         transform_groups_obj = None
 
     supervised = supervised and (
-        (len(dds) > 0) or (
-            targets is not None and task_labels is not None
-        )
+        (len(dds) > 0) or (targets is not None and task_labels is not None)
     )
-    
+
     data: Union[SupervisedClassificationDataset, ClassificationDataset]
     if supervised:
         data = SupervisedClassificationDataset(
-            dds,
-            transform_groups=transform_groups_obj
+            dds, transform_groups=transform_groups_obj
         )
     else:
-        data = ClassificationDataset(
-            dds,
-            transform_groups=transform_groups_obj
-        )
+        data = ClassificationDataset(dds, transform_groups=transform_groups_obj)
     return data.with_transforms(initial_transform_group)
 
 
 def _select_targets(
-        dataset: SupportedDataset,
-        indices: Optional[List[int]]) -> Sequence[TTargetType]:
+    dataset: SupportedDataset, indices: Optional[List[int]]
+) -> Sequence[TTargetType]:
     if hasattr(dataset, "targets"):
         # Standard supported dataset
         found_targets = dataset.targets  # type: ignore
@@ -938,8 +906,7 @@ def _select_targets(
         # Support for PyTorch TensorDataset
         if len(dataset.tensors) < 2:  # type: ignore
             raise ValueError(
-                "Tensor dataset has not enough tensors: "
-                "at least 2 are required."
+                "Tensor dataset has not enough tensors: " "at least 2 are required."
             )
         found_targets = dataset.tensors[1]  # type: ignore
     else:
@@ -958,9 +925,9 @@ def _select_targets(
 def concat_classification_datasets_sequentially(
     train_dataset_list: Sequence[ISupportedClassificationDataset],
     test_dataset_list: Sequence[ISupportedClassificationDataset],
-) -> Tuple[SupervisedClassificationDataset, 
-           SupervisedClassificationDataset,
-           List[list]]:
+) -> Tuple[
+    SupervisedClassificationDataset, SupervisedClassificationDataset, List[list]
+]:
     """
     Concatenates a list of datasets. This is completely different from
     :class:`ConcatDataset`, in which datasets are merged together without
@@ -1028,7 +995,6 @@ def concat_classification_datasets_sequentially(
 
     new_class_ids_per_dataset = []
     for dataset_idx in range(len(train_dataset_list_sup)):
-
         # Get the train and test sets of the dataset
         train_set = train_dataset_list_sup[dataset_idx]
         test_set = test_dataset_list_sup[dataset_idx]
@@ -1085,18 +1051,17 @@ def as_supervised_classification_dataset(
     initial_transform_group: Optional[str] = None,
     task_labels: Optional[Union[int, Sequence[int]]] = None,
     targets: Optional[Sequence[TTargetType]] = None,
-    collate_fn: Optional[Callable[[List], Any]] = None) -> \
-        SupervisedClassificationDataset:
-
+    collate_fn: Optional[Callable[[List], Any]] = None
+) -> SupervisedClassificationDataset:
     if (
-        transform is not None or
-        target_transform is not None or
-        transform_groups is not None or
-        initial_transform_group is not None or
-        task_labels is not None or
-        targets is not None or
-        collate_fn is not None or
-        not isinstance(dataset, SupervisedClassificationDataset)
+        transform is not None
+        or target_transform is not None
+        or transform_groups is not None
+        or initial_transform_group is not None
+        or task_labels is not None
+        or targets is not None
+        or collate_fn is not None
+        or not isinstance(dataset, SupervisedClassificationDataset)
     ):
         result_dataset = make_classification_dataset(
             dataset=dataset,
@@ -1106,17 +1071,17 @@ def as_supervised_classification_dataset(
             initial_transform_group=initial_transform_group,
             task_labels=task_labels,
             targets=targets,
-            collate_fn=collate_fn
+            collate_fn=collate_fn,
         )
 
         if not isinstance(result_dataset, SupervisedClassificationDataset):
             raise ValueError(
-                'The given dataset does not have supervision fields '
-                '(targets, task_labels).'
+                "The given dataset does not have supervision fields "
+                "(targets, task_labels)."
             )
 
         return result_dataset
-    
+
     return dataset
 
 
@@ -1129,5 +1094,5 @@ def as_supervised_classification_dataset(
     "make_tensor_classification_dataset",
     "concat_classification_datasets",
     "concat_classification_datasets_sequentially",
-    "as_supervised_classification_dataset"
+    "as_supervised_classification_dataset",
 ]
diff --git a/avalanche/benchmarks/utils/collate_functions.py b/avalanche/benchmarks/utils/collate_functions.py
index be4e1d18a..874014d35 100644
--- a/avalanche/benchmarks/utils/collate_functions.py
+++ b/avalanche/benchmarks/utils/collate_functions.py
@@ -42,9 +42,7 @@
 
 DetectionExampleT = Tuple[Tensor, Dict, int]
 DetectionBatchT = Tuple[Tuple[Tensor, ...], Tuple[Dict, ...], Tuple[int, ...]]
-DetectionBatchedFeatureT = Union[Tuple[Tensor, ...],
-                                 Tuple[Dict, ...],
-                                 Tuple[int, ...]]
+DetectionBatchedFeatureT = Union[Tuple[Tensor, ...], Tuple[Dict, ...], Tuple[int, ...]]
 DetectionFeatureT = Union[Tensor, Dict, int]
 
 
@@ -124,8 +122,9 @@ def detection_collate_fn(batch: Sequence[DetectionExampleT]) -> DetectionBatchT:
     return tuple(zip(*batch))  # type: ignore
 
 
-def detection_collate_mbatches_fn(mbatches: Sequence[DetectionBatchT]) -> \
-        DetectionBatchT:
+def detection_collate_mbatches_fn(
+    mbatches: Sequence[DetectionBatchT],
+) -> DetectionBatchT:
     """
     Collate function used when loading detection datasets using a DataLoader.
 
@@ -170,5 +169,5 @@ def detection_collate_mbatches_fn(mbatches: Sequence[DetectionBatchT]) -> \
     "classification_collate_mbatches_fn",
     "classification_single_values_collate_mbatches_fn",
     "detection_collate_fn",
-    "detection_collate_mbatches_fn"
+    "detection_collate_mbatches_fn",
 ]
diff --git a/avalanche/benchmarks/utils/data.py b/avalanche/benchmarks/utils/data.py
index 7e187e4fc..824aac473 100644
--- a/avalanche/benchmarks/utils/data.py
+++ b/avalanche/benchmarks/utils/data.py
@@ -103,20 +103,19 @@ def __init__(
             applied by this dataset.
         :param transform_groups: Avalanche transform groups.
         """
-        if isinstance(datasets, TorchDataset) or isinstance(
-            datasets, AvalancheDataset
-        ):
+        if isinstance(datasets, TorchDataset) or isinstance(datasets, AvalancheDataset):
             warnings.warn(
                 "AvalancheDataset constructor has been changed. "
                 "Please check the documentation for the correct usage. You can"
                 " use `avalanche.benchmarks.utils.make_classification_dataset "
                 "if you need the old behavior.",
                 DeprecationWarning,
-                stacklevel=2
+                stacklevel=2,
             )
 
-        if issubclass(type(datasets), TorchDataset) or  \
-                issubclass(type(datasets), AvalancheDataset):
+        if issubclass(type(datasets), TorchDataset) or issubclass(
+            type(datasets), AvalancheDataset
+        ):
             datasets = [datasets]  # type: ignore
 
         # NOTES on implementation:
@@ -136,13 +135,21 @@ def __init__(
                     flat_datas.append(_FlatDataWithTransform([d]))
                 else:
                     flat_datas.append(d)
-        if transform_groups is None and frozen_transform_groups is None and \
-                indices is not None and len(flat_datas) == 1:
+        if (
+            transform_groups is None
+            and frozen_transform_groups is None
+            and indices is not None
+            and len(flat_datas) == 1
+        ):
             # TODO: remove. shouldn't be needed but helps with flattening
             assert len(flat_datas) == 1
             self._flat_data = flat_datas[0].subset(indices)
-        elif transform_groups is None and frozen_transform_groups is None and \
-                indices is None and len(flat_datas) >= 1:
+        elif (
+            transform_groups is None
+            and frozen_transform_groups is None
+            and indices is None
+            and len(flat_datas) >= 1
+        ):
             # TODO: remove. shouldn't be needed but helps with flattening
             if len(flat_datas) == 0:
                 self._flat_data = _FlatDataWithTransform([])
@@ -155,12 +162,12 @@ def __init__(
                     d = _FlatDataWithTransform([d])
                 self._flat_data = self._flat_data.concat(d)
         else:
-            self._flat_data: _FlatDataWithTransform[T_co] = \
-                _FlatDataWithTransform(
-                    flat_datas,
-                    indices=indices,
-                    transform_groups=transform_groups,
-                    frozen_transform_groups=frozen_transform_groups)
+            self._flat_data: _FlatDataWithTransform[T_co] = _FlatDataWithTransform(
+                flat_datas,
+                indices=indices,
+                transform_groups=transform_groups,
+                frozen_transform_groups=frozen_transform_groups,
+            )
         self.collate_fn = collate_fn
 
         ####################################
@@ -192,14 +199,13 @@ def __init__(
 
         self._data_attributes: Dict[str, DataAttribute] = OrderedDict()
         first_dataset = datasets[0] if len(datasets) > 0 else None
-        if isinstance(
-            first_dataset, AvalancheDataset
-        ):
+        if isinstance(first_dataset, AvalancheDataset):
             for attr in first_dataset._data_attributes.values():
                 if attr.name in new_data_attributes:
                     # Keep overridden attributes in their previous position
-                    self._data_attributes[attr.name] = \
-                        new_data_attributes.pop(attr.name)
+                    self._data_attributes[attr.name] = new_data_attributes.pop(
+                        attr.name
+                    )
                     continue
 
                 acat = attr
@@ -235,9 +241,7 @@ def __init__(
 
         # set attributes dynamically
         for el in self._data_attributes.values():
-            assert len(el) == len(
-                self
-            ), f"BUG: Wrong size for attribute {el.name}"
+            assert len(el) == len(self), f"BUG: Wrong size for attribute {el.name}"
 
             is_property = False
             if hasattr(self, el.name):
@@ -245,8 +249,7 @@ def __init__(
                 # Do not raise an error if a property.
                 # Any check related to the property will be done
                 # in the property setter method.
-                if not isinstance(getattr(type(self), el.name, None), 
-                                  property):
+                if not isinstance(getattr(type(self), el.name, None), property):
                     raise ValueError(
                         f"Trying to add DataAttribute `{el.name}` to "
                         f"AvalancheDataset but the attribute name is "
@@ -258,12 +261,12 @@ def __init__(
     def __len__(self) -> int:
         return len(self._flat_data)
 
-    def __add__(self: TAvalancheDataset, other: TAvalancheDataset) -> \
-            TAvalancheDataset:
+    def __add__(self: TAvalancheDataset, other: TAvalancheDataset) -> TAvalancheDataset:
         return self.concat(other)
 
-    def __radd__(self: TAvalancheDataset, other: TAvalancheDataset) -> \
-            TAvalancheDataset:
+    def __radd__(
+        self: TAvalancheDataset, other: TAvalancheDataset
+    ) -> TAvalancheDataset:
         return other.concat(self)
 
     @property
@@ -271,8 +274,7 @@ def _datasets(self):
         """Only for backward compatibility of old unit tests. Do not use."""
         return self._flat_data._datasets
 
-    def concat(self: TAvalancheDataset, other: TAvalancheDataset) \
-            -> TAvalancheDataset:
+    def concat(self: TAvalancheDataset, other: TAvalancheDataset) -> TAvalancheDataset:
         """Concatenate this dataset with other.
 
         :param other: Other dataset to concatenate.
@@ -280,8 +282,7 @@ def concat(self: TAvalancheDataset, other: TAvalancheDataset) \
         """
         return self.__class__([self, other])
 
-    def subset(self: TAvalancheDataset, indices: Sequence[int]) \
-            -> TAvalancheDataset:
+    def subset(self: TAvalancheDataset, indices: Sequence[int]) -> TAvalancheDataset:
         """Subset this dataset.
 
         :param indices: The indices to keep.
@@ -296,11 +297,10 @@ def transform(self):
             "methods such as `replace_current_transform_group`. "
             "See the documentation for more info."
         )
-    
+
     def update_data_attribute(
-            self: TAvalancheDataset,
-            name: str,
-            new_value) -> TAvalancheDataset:
+        self: TAvalancheDataset, name: str, new_value
+    ) -> TAvalancheDataset:
         """
         Return a new dataset with the added or replaced data attribute.
 
@@ -316,12 +316,12 @@ def update_data_attribute(
             containing as many elements as the datasets.
         :returns: A copy of this dataset with the given data attribute set.
         """
-        assert len(new_value) == len(self), \
-            f'Size mismatch when updating data attribute {name}'
+        assert len(new_value) == len(
+            self
+        ), f"Size mismatch when updating data attribute {name}"
 
         datacopy = self._shallow_clone_dataset()
-        datacopy._data_attributes = copy.copy(
-            datacopy._data_attributes)
+        datacopy._data_attributes = copy.copy(datacopy._data_attributes)
 
         if isinstance(new_value, DataAttribute):
             assert name == new_value.name
@@ -331,44 +331,39 @@ def update_data_attribute(
             prev_attr = datacopy._data_attributes.get(name, None)
             if prev_attr is not None:
                 use_in_getitem = prev_attr.use_in_getitem
-            
+
             datacopy._data_attributes[name] = DataAttribute(
-                new_value,
-                name=name,
-                use_in_getitem=use_in_getitem)
-        
+                new_value, name=name, use_in_getitem=use_in_getitem
+            )
+
         if not hasattr(datacopy, name):
             # Creates the field if it does not exist
             setattr(datacopy, name, datacopy._data_attributes[name])
-        
+
         return datacopy
 
     def __eq__(self, other: object):
-        for required_attr in ['_flat_data',
-                              '_data_attributes',
-                              'collate_fn']:
+        for required_attr in ["_flat_data", "_data_attributes", "collate_fn"]:
             if not hasattr(other, required_attr):
                 return False
 
         return (
             other._flat_data == self._flat_data
-            and
-            self._data_attributes == other._data_attributes  # type: ignore
-            and
-            self.collate_fn == other.collate_fn  # type: ignore
+            and self._data_attributes == other._data_attributes  # type: ignore
+            and self.collate_fn == other.collate_fn  # type: ignore
         )
-    
+
     @overload
     def __getitem__(self, exp_id: int) -> T_co:
         ...
-    
+
     @overload
-    def __getitem__(self: TAvalancheDataset, exp_id: slice) -> \
-            TAvalancheDataset:
+    def __getitem__(self: TAvalancheDataset, exp_id: slice) -> TAvalancheDataset:
         ...
 
-    def __getitem__(self: TAvalancheDataset, idx: Union[int, slice]) -> \
-            Union[T_co, TAvalancheDataset]:
+    def __getitem__(
+        self: TAvalancheDataset, idx: Union[int, slice]
+    ) -> Union[T_co, TAvalancheDataset]:
         elem = self._flat_data[idx]
         for da in self._data_attributes.values():
             if da.use_in_getitem:
@@ -406,9 +401,7 @@ def eval(self):
         """
         return self.with_transforms("eval")
 
-    def with_transforms(
-        self: TAvalancheDataset, group_name: str
-    ) -> TAvalancheDataset:
+    def with_transforms(self: TAvalancheDataset, group_name: str) -> TAvalancheDataset:
         """
         Returns a new dataset with the transformations of a different group
         loaded.
@@ -475,6 +468,7 @@ class _FlatDataWithTransform(FlatData[T_co]):
 
     Do not use outside of this file.
     """
+
     def __init__(
         self,
         datasets: Sequence[IDataset[T_co]],
@@ -483,10 +477,7 @@ def __init__(
         transform_groups: Optional[TransformGroups] = None,
         frozen_transform_groups: Optional[TransformGroups] = None,
     ):
-        can_flatten = (
-            (transform_groups is None)
-            and (frozen_transform_groups is None)
-        )
+        can_flatten = (transform_groups is None) and (frozen_transform_groups is None)
         super().__init__(datasets, indices=indices, can_flatten=can_flatten)
         if isinstance(transform_groups, dict):
             transform_groups = TransformGroups(transform_groups)
@@ -527,25 +518,23 @@ def __init__(
         self._transform_groups.current_group = cgroup
 
     def __eq__(self, other):
-        for required_attr in ['_datasets',
-                              '_transform_groups',
-                              '_frozen_transform_groups']:
+        for required_attr in [
+            "_datasets",
+            "_transform_groups",
+            "_frozen_transform_groups",
+        ]:
             if not hasattr(other, required_attr):
                 return False
 
-        eq_datasets = \
-            len(self._datasets) == len(other._datasets)  # type: ignore
+        eq_datasets = len(self._datasets) == len(other._datasets)  # type: ignore
         eq_datasets = eq_datasets and all(
-            d1 == d2 for d1, d2 in
-            zip(self._datasets, other._datasets)  # type: ignore
+            d1 == d2 for d1, d2 in zip(self._datasets, other._datasets)  # type: ignore
         )
         ftg = other._frozen_transform_groups  # type: ignore
         return (
             eq_datasets
-            and
-            self._transform_groups == other._transform_groups  # type: ignore
-            and
-            self._frozen_transform_groups == ftg  # type: ignore
+            and self._transform_groups == other._transform_groups  # type: ignore
+            and self._frozen_transform_groups == ftg  # type: ignore
         )
 
     def _getitem_recursive_call(self, idx, group_name) -> T_co:
@@ -563,15 +552,14 @@ def _getitem_recursive_call(self, idx, group_name) -> T_co:
             element = dd[idx]
 
         if self._frozen_transform_groups is not None:
-            element = self._frozen_transform_groups(
-                element, group_name=group_name
-            )
+            element = self._frozen_transform_groups(element, group_name=group_name)
         if self._transform_groups is not None:
             element = self._transform_groups(element, group_name=group_name)
         return element
 
-    def __getitem__(self: TDataWTransform, idx: Union[int, slice]) -> \
-            Union[T_co, TDataWTransform]:
+    def __getitem__(
+        self: TDataWTransform, idx: Union[int, slice]
+    ) -> Union[T_co, TDataWTransform]:
         if isinstance(idx, (int, np.integer)):
             elem = self._getitem_recursive_call(
                 idx, self._transform_groups.current_group
@@ -580,9 +568,7 @@ def __getitem__(self: TDataWTransform, idx: Union[int, slice]) -> \
         else:
             return super().__getitem__(idx)
 
-    def with_transforms(
-        self: TDataWTransform, group_name: str
-    ) -> TDataWTransform:
+    def with_transforms(self: TDataWTransform, group_name: str) -> TDataWTransform:
         """
         Returns a new dataset with the transformations of a different group
         loaded.
@@ -647,9 +633,7 @@ def _shallow_clone_dataset(self: TDataWTransform) -> TDataWTransform:
         This is a shallow copy, i.e. the data attributes are not copied.
         """
         dataset_copy = copy.copy(self)
-        dataset_copy._transform_groups = copy.copy(
-            dataset_copy._transform_groups
-        )
+        dataset_copy._transform_groups = copy.copy(dataset_copy._transform_groups)
         dataset_copy._frozen_transform_groups = copy.copy(
             dataset_copy._frozen_transform_groups
         )
diff --git a/avalanche/benchmarks/utils/data_attribute.py b/avalanche/benchmarks/utils/data_attribute.py
index 66f697e8b..d00908c16 100644
--- a/avalanche/benchmarks/utils/data_attribute.py
+++ b/avalanche/benchmarks/utils/data_attribute.py
@@ -22,7 +22,7 @@
 from .dataset_definitions import IDataset
 from .flat_data import ConstantSequence, FlatData
 
-T_co = TypeVar('T_co', covariant=True)
+T_co = TypeVar("T_co", covariant=True)
 
 
 class DataAttribute(IDataset[T_co], Sequence[T_co]):
@@ -35,11 +35,7 @@ class labels.
     Data attributes can be efficiently concatenated and subsampled.
     """
 
-    def __init__(
-            self,
-            data: IDataset[T_co],
-            name: str,
-            use_in_getitem: bool = False):
+    def __init__(self, data: IDataset[T_co], name: str, use_in_getitem: bool = False):
         """Data Attribute.
 
         :param data: a sequence of values, one for each sample.
@@ -51,7 +47,7 @@ def __init__(
         """
         assert name is not None
         assert data is not None
-        
+
         self.name: str = name
         self.use_in_getitem: bool = use_in_getitem
 
@@ -73,8 +69,7 @@ def __getitem__(self, item: int) -> T_co:
     def __getitem__(self, item: slice) -> Sequence[T_co]:
         ...
 
-    def __getitem__(self, item: Union[int, slice]) -> \
-            Union[T_co, Sequence[T_co]]:
+    def __getitem__(self, item: Union[int, slice]) -> Union[T_co, Sequence[T_co]]:
         return self.data[item]
 
     def __len__(self):
@@ -124,7 +119,7 @@ def val_to_idx(self):
             self._val_to_idx = dict()
             if isinstance(self.data, ConstantSequence):
                 self._val_to_idx = {self.data[0]: range(len(self.data))}
-            else: 
+            else:
                 for i, x in enumerate(self.data):
                     if x not in self.val_to_idx:
                         self._val_to_idx[x] = []
@@ -151,9 +146,9 @@ def concat(self, other: "DataAttribute"):
         :param other: the other `DataAttribute`
         :return: the new concatenated `DataAttribute`
         """
-        assert self.name == other.name, (
-            'Cannot concatenate DataAttributes with different names.'
-        )
+        assert (
+            self.name == other.name
+        ), "Cannot concatenate DataAttributes with different names."
         return DataAttribute(
             self.data.concat(other.data),
             self.name,
@@ -164,8 +159,7 @@ def _normalize_sequence(self, seq: IDataset[T_co]) -> FlatData[T_co]:
         if isinstance(seq, torch.Tensor):
             # equality doesn't work for tensors
             seq = seq.tolist()
-        if not isinstance(seq, FlatData) \
-                and not isinstance(seq, ConstantSequence):
+        if not isinstance(seq, FlatData) and not isinstance(seq, ConstantSequence):
             return FlatData([seq])
         return seq
 
@@ -181,8 +175,4 @@ def __init__(self, task_labels):
         super().__init__(task_labels, "task_labels", use_in_getitem=True)
 
 
-__all__ = [
-    "DataAttribute",
-    "TensorDataAttribute",
-    "TaskLabels"
-]
+__all__ = ["DataAttribute", "TensorDataAttribute", "TaskLabels"]
diff --git a/avalanche/benchmarks/utils/data_loader.py b/avalanche/benchmarks/utils/data_loader.py
index 3da2515cf..492fa7cdc 100644
--- a/avalanche/benchmarks/utils/data_loader.py
+++ b/avalanche/benchmarks/utils/data_loader.py
@@ -69,11 +69,11 @@ def __init__(
         """Custom data loader for loading batches from multiple datasets.
 
         This dataloader iterates in parallel multiple datasets which are used
-        to create mini-batches by concatenating their data together. 
-        The number of examples from each dataset in each mini-batch 
+        to create mini-batches by concatenating their data together.
+        The number of examples from each dataset in each mini-batch
         is defined by the `batch_sizes` parameter.
 
-        The length of the loader (that is, the number of iterations in an 
+        The length of the loader (that is, the number of iterations in an
         epoch) is determined by the `termination_dataset`.
 
         The `oversample_small_datasets` parameter can be used to control what
@@ -92,7 +92,7 @@ def __init__(
             datasets will not be cycled again, which means that some datasets
             will not contribute to the minibatch composition near the end of
             the epoch.
-        :param distributed_sampling: If True, apply the PyTorch 
+        :param distributed_sampling: If True, apply the PyTorch
             :class:`DistributedSampler`. Defaults to False.
         :param never_ending: If True, this data loader will cycle indefinitely
             by iterating over all datasets again and again and the epoch will
@@ -111,12 +111,11 @@ def __init__(
 
         if termination_dataset >= len(datasets):
             raise ValueError(
-                'termination_dataset must be the index of the '
-                'dataset used to determine the termination of an epoch'
+                "termination_dataset must be the index of the "
+                "dataset used to determine the termination of an epoch"
             )
-        
-        distributed_sampling = distributed_sampling and \
-            DistributedHelper.is_distributed
+
+        distributed_sampling = distributed_sampling and DistributedHelper.is_distributed
 
         self.datasets = list(datasets)
         self.oversample_small_datasets: bool = oversample_small_datasets
@@ -135,15 +134,23 @@ def __init__(
         if self.never_ending:
             # Infinite data loader
             self.termination_dataset = -1
-            self.n_iterations = 10 ** 10
+            self.n_iterations = 10**10
             self.oversample_small_datasets = True
         else:
             # Estimate number of iterations per epoch
-            loaders_len = np.full((len(self.datasets,)), -1)
+            loaders_len = np.full(
+                (
+                    len(
+                        self.datasets,
+                    )
+                ),
+                -1,
+            )
 
             if self.termination_dataset < 0:
-                for i, (data_subset, subset_mb_size) in \
-                        enumerate(zip(self.datasets, self.batch_sizes)):
+                for i, (data_subset, subset_mb_size) in enumerate(
+                    zip(self.datasets, self.batch_sizes)
+                ):
                     loaders_len[i] = len(
                         _make_data_loader(
                             data_subset,
@@ -160,7 +167,7 @@ def __init__(
                         distributed_sampling,
                         kwargs,
                         self.batch_sizes[self.termination_dataset],
-                        force_no_workers=True
+                        force_no_workers=True,
                     )[0]
                 )
 
@@ -172,10 +179,8 @@ def __iter__(self):
         # https://pytorch.org/docs/stable/_modules/torch/utils/data/dataloader.html#DataLoader
         # Needed to support 'persistent_workers'
 
-        use_persistent_workers = self.loader_kwargs.get(
-            'persistent_workers', False)
-        num_workers = self.loader_kwargs.get(
-            'num_workers', 0)
+        use_persistent_workers = self.loader_kwargs.get("persistent_workers", False)
+        num_workers = self.loader_kwargs.get("num_workers", 0)
 
         if use_persistent_workers and num_workers > 0:
             if self._persistent_loader is None:
@@ -184,13 +189,13 @@ def __iter__(self):
             yield from self._persistent_loader
         else:
             yield from self._get_loader()
-        
+
     def _get_loader(self):
         samplers = self._create_samplers(
             self.datasets,
             self.batch_sizes,
             self.distributed_sampling,
-            self.loader_kwargs
+            self.loader_kwargs,
         )
 
         overall_dataset = ConcatDataset(self.datasets)
@@ -200,47 +205,43 @@ def _get_loader(self):
             samplers,
             termination_dataset_idx=self.termination_dataset,
             oversample_small_datasets=self.oversample_small_datasets,
-            never_ending=self.never_ending
+            never_ending=self.never_ending,
         )
 
         loader = _make_data_loader_with_batched_sampler(
             overall_dataset,
             batch_sampler=multi_dataset_batch_sampler,
-            data_loader_args=self.loader_kwargs
+            data_loader_args=self.loader_kwargs,
         )
 
         return loader
 
     def __len__(self):
         return self.n_iterations
-    
+
     @staticmethod
     def _create_samplers(
-        datasets: List[AvalancheDataset], 
+        datasets: List[AvalancheDataset],
         batch_sizes: List[int],
         distributed_sampling: bool,
-        loader_kwargs: Dict[str, Any]
+        loader_kwargs: Dict[str, Any],
     ):
         samplers = []
 
         for dataset, dataset_mb_size in zip(datasets, batch_sizes):
             sampler = _make_sampler(
-                dataset,
-                distributed_sampling,
-                loader_kwargs,
-                dataset_mb_size
+                dataset, distributed_sampling, loader_kwargs, dataset_mb_size
             )
 
             samplers.append(sampler)
-        
+
         return samplers
-    
+
 
 class GroupBalancedDataLoader(MultiDatasetDataLoader):
     """Data loader that balances data from multiple datasets."""
 
     def __init__(
-            
         self,
         datasets: Sequence[AvalancheDataset],
         oversample_small_groups: bool = False,
@@ -263,7 +264,7 @@ def __init__(
             oversampled to match the largest one.
         :param batch_size: the size of the batch. It must be greater than or
             equal to the number of groups.
-        :param distributed_sampling: If True, apply the PyTorch 
+        :param distributed_sampling: If True, apply the PyTorch
             :class:`DistributedSampler`. Defaults to False.
         :param kwargs: data loader arguments used to instantiate the loader for
             each group separately. See pytorch :class:`DataLoader`.
@@ -318,21 +319,21 @@ def __init__(
         :param data: an instance of `AvalancheDataset`.
         :param oversample_small_groups: whether smaller tasks should be
             oversampled to match the largest one.
-        :param distributed_sampling: If True, apply the PyTorch 
+        :param distributed_sampling: If True, apply the PyTorch
             :class:`DistributedSampler`. Defaults to False.
         :param kwargs: data loader arguments used to instantiate the loader for
             each task separately. See pytorch :class:`DataLoader`.
         """
 
-        if 'oversample_small_tasks' in kwargs:
+        if "oversample_small_tasks" in kwargs:
             raise ValueError(
                 "oversample_small_tasks is deprecated in favor of "
                 "oversample_small_groups"
             )
-            
+
         # Split data by task
         task_datasets = []
-        task_labels_field = getattr(data, 'targets_task_labels')
+        task_labels_field = getattr(data, "targets_task_labels")
         assert isinstance(task_labels_field, DataAttribute)
         for task_label in task_labels_field.uniques:
             tidxs = task_labels_field.val_to_idx[task_label]
@@ -368,11 +369,11 @@ def __init__(
 
         :param datasets: an instance of `AvalancheDataset`.
         :param batch_size: the size of the batch to take from each dataset.
-            Please note that, differently from other Avalanche multi dataset 
+            Please note that, differently from other Avalanche multi dataset
             loaders, this value is the per-dataset contribution to the
             final mini-batch, NOT the final mini-batch size. The final
             mini-batches will be of size `len(datasets) * batch_size`.
-        :param distributed_sampling: If True, apply the PyTorch 
+        :param distributed_sampling: If True, apply the PyTorch
             :class:`DistributedSampler`. Defaults to False.
         :param kwargs: data loader arguments used to instantiate the loader for
             each group separately. See pytorch :class:`DataLoader`.
@@ -413,8 +414,8 @@ def __init__(
         balanced using the task label (i.e. each mini-batch contains a balanced
         number of examples from all the tasks in the `data` and `memory`).
 
-        The length of the loader is determined only by the current 
-        task data and is the same than what it would be when creating a 
+        The length of the loader is determined only by the current
+        task data and is the same than what it would be when creating a
         data loader for this dataset.
 
         If `oversample_small_tasks == True` smaller tasks are oversampled to
@@ -432,7 +433,7 @@ def __init__(
         :param task_balanced_dataloader: if true, buffer data loaders will be
             task-balanced, otherwise it creates a single data loader for the
             buffer samples.
-        :param distributed_sampling: If True, apply the PyTorch 
+        :param distributed_sampling: If True, apply the PyTorch
             :class:`DistributedSampler`. Defaults to False.
         :param kwargs: data loader arguments used to instantiate the loader for
             each task separately. See pytorch :class:`DataLoader`.
@@ -443,7 +444,7 @@ def __init__(
 
         # Create dataloader for memory items
         if task_balanced_dataloader:
-            memory_task_labels = getattr(memory, 'targets_task_labels')
+            memory_task_labels = getattr(memory, "targets_task_labels")
             assert isinstance(memory_task_labels, DataAttribute)
             num_keys = len(memory_task_labels.uniques)
 
@@ -468,10 +469,7 @@ def __init__(
         # or a dictionary task_id -> mb_size
         # In both cases, remaining_examples=0
         data_batch_sizes, data_subsets = self._get_datasets_and_batch_sizes(
-            data,
-            batch_size,
-            0,
-            False
+            data, batch_size, 0, False
         )
 
         memory_batch_sizes, memory_subsets = self._get_datasets_and_batch_sizes(
@@ -497,10 +495,10 @@ def __init__(
                     force_no_workers=True,
                 )[0]
             )
-        
-        longest_data_subset_idx = np.array(
-            len(d) for d in loaders_for_len_estimation
-        ).argmax().item()
+
+        longest_data_subset_idx = (
+            np.array(len(d) for d in loaders_for_len_estimation).argmax().item()
+        )
 
         super().__init__(
             data_subsets + memory_subsets,
@@ -516,7 +514,7 @@ def _get_datasets_and_batch_sizes(
         data: AvalancheDataset,
         batch_sizes_def: Union[int, Dict[int, int]],
         remaining_examples: int,
-        task_balanced_dataloader: bool
+        task_balanced_dataloader: bool,
     ):
         datasets: List[AvalancheDataset] = []
         batch_sizes: List[int] = []
@@ -548,17 +546,18 @@ class MultiDatasetSampler(Sampler[List[int]]):
     """
     Iterate over datasets and provide a batch per dataset in each mini-batch.
     """
+
     def __init__(
-            self,
-            datasets: Sequence[Sized],
-            samplers: Sequence[BatchSampler],
-            termination_dataset_idx: int = 0,
-            oversample_small_datasets: bool = False,
-            never_ending: bool = False):
+        self,
+        datasets: Sequence[Sized],
+        samplers: Sequence[BatchSampler],
+        termination_dataset_idx: int = 0,
+        oversample_small_datasets: bool = False,
+        never_ending: bool = False,
+    ):
         assert len(datasets) == len(samplers)
         assert never_ending or (
-            termination_dataset_idx >= 0 and
-            termination_dataset_idx < len(datasets)
+            termination_dataset_idx >= 0 and termination_dataset_idx < len(datasets)
         )
 
         self.datasets = list(datasets)
@@ -568,21 +567,22 @@ def __init__(
 
         if self.never_ending:
             self.termination_dataset_idx = -1
-            self.termination_dataset_iterations = 10 ** 10
+            self.termination_dataset_iterations = 10**10
             self.oversample_small_datasets = True
 
             if sum(len(x) for x in self.samplers) == 0:
                 raise RuntimeError(
-                    'The never ending sampler must able to create a mini-batch'
+                    "The never ending sampler must able to create a mini-batch"
                 )
         else:
             # termination_dataset_idx => dataset used to determine the epoch end
             self.termination_dataset_idx = termination_dataset_idx
-            self.termination_dataset_iterations = \
-                len(self.samplers[self.termination_dataset_idx])
+            self.termination_dataset_iterations = len(
+                self.samplers[self.termination_dataset_idx]
+            )
 
             self.oversample_small_datasets = oversample_small_datasets
-       
+
     def __len__(self):
         return self.termination_dataset_iterations
 
@@ -600,8 +600,9 @@ def __iter__(self):
         index_offsets = np.array([0] + self.cumulative_sizes[:-1])
 
         while True:
-            per_dataset_indices: List[Optional[np.ndarray]] = \
-                [None] * number_of_datasets
+            per_dataset_indices: List[Optional[np.ndarray]] = [
+                None
+            ] * number_of_datasets
 
             if self.never_ending:
                 sampling_dataset_order = list(range(number_of_datasets))
@@ -609,16 +610,15 @@ def __iter__(self):
             else:
                 # Obtain the indices for the "main" dataset first
                 sampling_dataset_order = [self.termination_dataset_idx] + list(
-                    x for x in range(number_of_datasets)
+                    x
+                    for x in range(number_of_datasets)
                     if x != self.termination_dataset_idx
                 )
-                is_termination_dataset = \
-                    [True] + ([False] * (number_of_datasets - 1))
+                is_termination_dataset = [True] + ([False] * (number_of_datasets - 1))
 
             for dataset_idx, is_term_dataset in zip(
-                    sampling_dataset_order, 
-                    is_termination_dataset):
-
+                sampling_dataset_order, is_termination_dataset
+            ):
                 sampler = samplers_list[dataset_idx]
                 sampler_iterator = sampler_iterators[dataset_idx]
 
@@ -628,22 +628,18 @@ def __iter__(self):
                 if len(sampler) == 0:
                     if is_term_dataset and (not self.never_ending):
                         return
-                    
+
                     samplers_list[dataset_idx] = None
                     sampler_iterators[dataset_idx] = None
                     continue
 
                 should_stop_if_ended = (
-                    is_term_dataset or
-                    not self.oversample_small_datasets
+                    is_term_dataset or not self.oversample_small_datasets
                 ) and (not self.never_ending)
 
-                continue_epoch, updated_iterator, next_batch_indices = \
-                    self._next_batch(
-                        sampler,
-                        sampler_iterator,
-                        stop_on_last_batch=should_stop_if_ended
-                    )
+                continue_epoch, updated_iterator, next_batch_indices = self._next_batch(
+                    sampler, sampler_iterator, stop_on_last_batch=should_stop_if_ended
+                )
 
                 if not continue_epoch:
                     if is_term_dataset:
@@ -656,45 +652,41 @@ def __iter__(self):
                         samplers_list[dataset_idx] = None
                         sampler_iterators[dataset_idx] = None
                         continue
-                
+
                 assert next_batch_indices is not None
                 next_batch_indices = np.array(next_batch_indices)
 
-                # Shift indices according to the position of the 
+                # Shift indices according to the position of the
                 # dataset in the list
                 next_batch_indices += index_offsets[dataset_idx]
-                
+
                 sampler_iterators[dataset_idx] = updated_iterator
                 per_dataset_indices[dataset_idx] = next_batch_indices
-            per_dataset_indices = [x for x in per_dataset_indices 
-                                   if x is not None]
+            per_dataset_indices = [x for x in per_dataset_indices if x is not None]
             yield np.concatenate(per_dataset_indices).tolist()
-    
+
     @staticmethod
     def _next_batch(
-            sampler: Sampler,
-            sampler_iterator: Iterator[Sequence[int]],
-            stop_on_last_batch: bool):
+        sampler: Sampler,
+        sampler_iterator: Iterator[Sequence[int]],
+        stop_on_last_batch: bool,
+    ):
         try:
             next_batch_indices = next(sampler_iterator)
             return True, sampler_iterator, next_batch_indices
         except StopIteration:
             if stop_on_last_batch:
                 return False, None, None
-        
+
         # Re-create the iterator
         # This time, do not catch StopIteration
         if isinstance(sampler, BatchSampler):
             if isinstance(sampler.sampler, DistributedSampler):
-                sampler.sampler.set_epoch(
-                    sampler.sampler.epoch + 1
-                )
+                sampler.sampler.set_epoch(sampler.sampler.epoch + 1)
         elif isinstance(sampler, DistributedSampler):
             # Manage shuffling in DistributedSampler
-            sampler.set_epoch(
-                sampler.epoch + 1
-            )
-        
+            sampler.set_epoch(sampler.epoch + 1)
+
         sampler_iterator = iter(sampler)
         next_batch_indices = next(sampler_iterator)
         return True, sampler_iterator, next_batch_indices
@@ -712,11 +704,11 @@ def _make_data_loader(
     collate_from_data_or_kwargs(dataset, data_loader_args)
 
     if force_no_workers:
-        data_loader_args['num_workers'] = 0
-        if 'persistent_workers' in data_loader_args:
-            data_loader_args['persistent_workers'] = False
-        if 'prefetch_factor' in data_loader_args:
-            data_loader_args['prefetch_factor'] = 2
+        data_loader_args["num_workers"] = 0
+        if "persistent_workers" in data_loader_args:
+            data_loader_args["persistent_workers"] = False
+        if "prefetch_factor" in data_loader_args:
+            data_loader_args["prefetch_factor"] = 2
 
     if DistributedHelper.is_distributed and distributed_sampling:
         # Note: shuffle only goes in the sampler, while
@@ -737,17 +729,13 @@ def _make_data_loader(
         )
     else:
         sampler = None
-        data_loader = DataLoader(
-            dataset, batch_size=batch_size, **data_loader_args
-        )
+        data_loader = DataLoader(dataset, batch_size=batch_size, **data_loader_args)
 
     return data_loader, sampler
 
 
 def _make_data_loader_with_batched_sampler(
-    dataset: Dataset,
-    batch_sampler: Any,
-    data_loader_args: Dict[str, Any]
+    dataset: Dataset, batch_sampler: Any, data_loader_args: Dict[str, Any]
 ):
     data_loader_args = data_loader_args.copy()
 
@@ -759,26 +747,23 @@ def _make_data_loader_with_batched_sampler(
     data_loader_args.pop("sampler", False)
     data_loader_args.pop("drop_last", False)
 
-    return DataLoader(
-        dataset,
-        batch_sampler=batch_sampler,
-        **data_loader_args
-    )
+    return DataLoader(dataset, batch_sampler=batch_sampler, **data_loader_args)
 
 
 def _make_sampler(
     dataset: Any,
     distributed_sampling: bool,
     data_loader_args: Dict[str, Any],
-    batch_size: int
+    batch_size: int,
 ):
     loader, _ = _make_data_loader(
         dataset,
         distributed_sampling,
         data_loader_args,
         batch_size,
-        force_no_workers=True)
-    
+        force_no_workers=True,
+    )
+
     sampler = loader.batch_sampler
     return sampler
 
diff --git a/avalanche/benchmarks/utils/dataset_definitions.py b/avalanche/benchmarks/utils/dataset_definitions.py
index b5107f926..7931175b0 100644
--- a/avalanche/benchmarks/utils/dataset_definitions.py
+++ b/avalanche/benchmarks/utils/dataset_definitions.py
@@ -59,9 +59,7 @@ def targets(self) -> Sequence[TTargetType_co]:
         ...
 
 
-class ISupportedClassificationDataset(
-    IDatasetWithTargets[T_co, SupportsInt], Protocol
-):
+class ISupportedClassificationDataset(IDatasetWithTargets[T_co, SupportsInt], Protocol):
     """
     Protocol definition of a Dataset that has a valid targets field (like the
     Datasets in the torchvision package) for classification.
@@ -78,6 +76,7 @@ class ISupportedClassificationDataset(
     On the contrary, class :class:`IClassificationDataset` strictly
     defines a `targets` field as sequence of native `int`s.
     """
+
     @property
     def targets(self) -> Sequence[SupportsInt]:
         """
diff --git a/avalanche/benchmarks/utils/dataset_utils.py b/avalanche/benchmarks/utils/dataset_utils.py
index c3d0ec180..8a8548c73 100644
--- a/avalanche/benchmarks/utils/dataset_utils.py
+++ b/avalanche/benchmarks/utils/dataset_utils.py
@@ -41,18 +41,16 @@
 TMappableTargetType = TypeVar("TMappableTargetType")
 
 
-TData = TypeVar('TData')
-TIntermediateData = TypeVar('TIntermediateData')
-TSliceSequence = TypeVar('TSliceSequence', bound='SliceSequence')
+TData = TypeVar("TData")
+TIntermediateData = TypeVar("TIntermediateData")
+TSliceSequence = TypeVar("TSliceSequence", bound="SliceSequence")
 
 
 class SliceSequence(Sequence[TData], Generic[TData, TIntermediateData], ABC):
-    def __init__(
-        self,
-        slice_ids: Optional[List[int]]
-    ):
-        self.slice_ids: Optional[List[int]] = \
+    def __init__(self, slice_ids: Optional[List[int]]):
+        self.slice_ids: Optional[List[int]] = (
             list(slice_ids) if slice_ids is not None else None
+        )
         """
         Describes thew indices in the current slice
         (w.r.t. the original sequence). 
@@ -73,16 +71,15 @@ def __getitem__(self, item: int) -> TData:
     @overload
     def __getitem__(self: TSliceSequence, item: slice) -> TSliceSequence:
         ...
-    
+
     @final
-    def __getitem__(self: TSliceSequence, item: Union[int, slice]) -> \
-            Union[TSliceSequence, TData]:
+    def __getitem__(
+        self: TSliceSequence, item: Union[int, slice]
+    ) -> Union[TSliceSequence, TData]:
         if isinstance(item, (int, np.integer)):
             item = int(item)
             if item >= len(self):
-                raise IndexError(
-                    "Sequence index out of bounds" + str(int(item))
-                )
+                raise IndexError("Sequence index out of bounds" + str(int(item)))
 
             curr_elem = item if self.slice_ids is None else self.slice_ids[item]
 
@@ -105,9 +102,8 @@ def __len__(self) -> int:
             return self._full_length()
 
     def _forward_slice(
-            self,
-            *slices: Union[None, slice, Iterable[int]]) -> \
-            Optional[Iterable[int]]:
+        self, *slices: Union[None, slice, Iterable[int]]
+    ) -> Optional[Iterable[int]]:
         any_slice = False
         indices = list(range(self._full_length()))
         for sl in slices:
@@ -116,8 +112,7 @@ def _forward_slice(
             any_slice = True
 
             slice_indices = slice_alike_object_to_indices(
-                slice_alike_object=sl,
-                max_length=len(indices)
+                slice_alike_object=sl, max_length=len(indices)
             )
 
             new_indices = [indices[x] for x in slice_indices]
@@ -158,28 +153,29 @@ def _post_process_element(self, element: TIntermediateData) -> TData:
         return element  # type: ignore
 
     def _make_slice(
-            self: TSliceSequence,
-            sequence_slice: Optional[Iterable[int]]) -> TSliceSequence:
+        self: TSliceSequence, sequence_slice: Optional[Iterable[int]]
+    ) -> TSliceSequence:
         """
         Obtain a sub-squence given a list of indices of the elements
         to include.
-        
+
         Element ids are the ones of the originating sequence
         (that is, the non-sliced sequence).
         """
         stream_copy = copy.copy(self)
-        stream_copy.slice_ids = list(sequence_slice) if \
-            sequence_slice is not None else None
+        stream_copy.slice_ids = (
+            list(sequence_slice) if sequence_slice is not None else None
+        )
         return stream_copy
-    
+
     def __str__(self):
-        return (
-            "[" + ", ".join([str(self[idx]) for idx in range(len(self))]) + "]"
-        )
+        return "[" + ", ".join([str(self[idx]) for idx in range(len(self))]) + "]"
 
 
-class SubSequence(SliceSequence[TTargetType, TMappableTargetType],
-                  Generic[TTargetType, TMappableTargetType]):
+class SubSequence(
+    SliceSequence[TTargetType, TMappableTargetType],
+    Generic[TTargetType, TMappableTargetType],
+):
     """
     A utility class used to define a lazily evaluated sub-sequence.
     """
@@ -189,23 +185,19 @@ def __init__(
         targets: Sequence[TMappableTargetType],
         *,
         indices: Optional[Sequence[int]] = None,
-        converter: Optional[Callable[[TMappableTargetType], TTargetType]] = None
+        converter: Optional[Callable[[TMappableTargetType], TTargetType]] = None,
     ):
         self._targets = targets
         self.converter = converter
-        super().__init__(
-            slice_ids=list(indices) if indices is not None else None
-        )
+        super().__init__(slice_ids=list(indices) if indices is not None else None)
 
     def _full_length(self) -> int:
         return len(self._targets)
 
     def _make_element(self, element_idx: int) -> TMappableTargetType:
         return self._targets[element_idx]
-    
-    def _post_process_element(
-            self,
-            element: TMappableTargetType) -> TTargetType:
+
+    def _post_process_element(self, element: TMappableTargetType) -> TTargetType:
         if self.converter is None:
             return element  # type: ignore
         return self.converter(element)
@@ -243,9 +235,7 @@ class SequenceDataset(IDatasetWithTargets[T_co, TTargetType]):
     """
 
     def __init__(
-        self,
-        *sequences: Sequence,
-        targets: Union[int, Sequence[TTargetType]] = 1
+        self, *sequences: Sequence, targets: Union[int, Sequence[TTargetType]] = 1
     ):
         """
         Creates a ``SequenceDataset`` instance.
@@ -310,15 +300,15 @@ def find_list_from_index(
     return list_idx, pattern_idx
 
 
-T = TypeVar('T')
-X = TypeVar('X')
+T = TypeVar("T")
+X = TypeVar("X")
 
 
 def manage_advanced_indexing(
     idx: Union[slice, int, Iterable[int]],
     single_element_getter: Callable[[int], X],
     max_length: int,
-    collate_fn: Callable[[Iterable[X]], T]
+    collate_fn: Callable[[Iterable[X]], T],
 ) -> Union[X, T]:
     """
     Utility function used to manage the advanced indexing and slicing.
@@ -337,8 +327,8 @@ def manage_advanced_indexing(
         of the patterns addressed by the idx parameter.
     """
     indexes_iterator: Iterable[int] = slice_alike_object_to_indices(
-        slice_alike_object=idx,
-        max_length=max_length)
+        slice_alike_object=idx, max_length=max_length
+    )
 
     elements: List[X] = []
     for single_idx in indexes_iterator:
@@ -352,16 +342,14 @@ def manage_advanced_indexing(
 
 
 def slice_alike_object_to_indices(
-    slice_alike_object: Union[slice,
-                              int,
-                              Iterable[int],
-                              Tensor,
-                              ndarray], max_length: int) -> Iterable[int]:
+    slice_alike_object: Union[slice, int, Iterable[int], Tensor, ndarray],
+    max_length: int,
+) -> Iterable[int]:
     """
-    Utility function used to obtain the sequence of indices given a slice 
+    Utility function used to obtain the sequence of indices given a slice
     object.
 
-    This fuction offers some additional flexibility by also accepting generic 
+    This fuction offers some additional flexibility by also accepting generic
     Iterable[int], PyTorch Tensor and NumPy ndarray.
 
     Beware that this function only supports 1-D slicing.
@@ -370,7 +358,7 @@ def slice_alike_object_to_indices(
 
     If the input object is a native slice or int, then negative indices will be
     managed as usual (like when used on a native Python list). If a tensor or
-    generic iterable is passed, then indices will be transformed as they where 
+    generic iterable is passed, then indices will be transformed as they where
     int(s).
     """
 
@@ -397,12 +385,10 @@ def slice_alike_object_to_indices(
                     indexes_iterator = []
                 else:
                     # Flat Tensor (NumPy or PyTorch)
-                    indexes_iterator = \
-                        slice_alike_object.tolist()  # type: ignore
+                    indexes_iterator = slice_alike_object.tolist()  # type: ignore
             else:
                 # Last attempt
-                indexes_iterator = \
-                    [slice_alike_object.item()]  # type: ignore
+                indexes_iterator = [slice_alike_object.item()]  # type: ignore
             if len(indexes_iterator) > 0:  # type: ignore
                 assert isinstance(indexes_iterator, int)
     else:
@@ -424,21 +410,21 @@ def slice_alike_object_to_indices(
             if idx >= 0:
                 if idx >= max_length:
                     raise IndexError(
-                        f'Index {idx} out of range for sequence '
-                        f'of length {max_length}'
+                        f"Index {idx} out of range for sequence "
+                        f"of length {max_length}"
                     )
             else:
                 pos_idx = max_length - idx  # Negative to positive
                 if pos_idx < 0:
                     raise IndexError(
-                        f'Index {idx} out of range for sequence '
-                        f'of length {max_length}'
+                        f"Index {idx} out of range for sequence "
+                        f"of length {max_length}"
                     )
                 idx = pos_idx
-            
+
             iterator_as_list.append(idx)
         indexes_iterator = iterator_as_list
-    
+
     return indexes_iterator  # type: ignore
 
 
diff --git a/avalanche/benchmarks/utils/datasets_from_filelists.py b/avalanche/benchmarks/utils/datasets_from_filelists.py
index dabf81ad2..c22de9487 100644
--- a/avalanche/benchmarks/utils/datasets_from_filelists.py
+++ b/avalanche/benchmarks/utils/datasets_from_filelists.py
@@ -67,20 +67,18 @@ def default_flist_reader(flist: Union[str, Path]) -> List[Tuple[str, int]]:
     return imlist
 
 
-T = TypeVar('T', covariant=True)
-TTargetsType = TypeVar('TTargetsType')
+T = TypeVar("T", covariant=True)
+TTargetsType = TypeVar("TTargetsType")
 
 PathALikeT = Union[Path, str]
 CoordsT = Union[int, float]
 CropBoxT = Tuple[CoordsT, CoordsT, CoordsT, CoordsT]
 FilesDefT = Union[
-    Tuple[PathALikeT, TTargetsType],
-    Tuple[PathALikeT, TTargetsType, Sequence[int]]
+    Tuple[PathALikeT, TTargetsType], Tuple[PathALikeT, TTargetsType, Sequence[int]]
 ]
 
 
-class PathsDataset(data.Dataset[Tuple[T, TTargetsType]], 
-                   Generic[T, TTargetsType]):
+class PathsDataset(data.Dataset[Tuple[T, TTargetsType]], Generic[T, TTargetsType]):
     """
     This class extends the basic Pytorch Dataset class to handle list of paths
     as the main data source.
@@ -249,9 +247,7 @@ def datasets_from_filelists(
     """
 
     if complete_test_set_only:
-        if not (
-            isinstance(test_filelists, str) or isinstance(test_filelists, Path)
-        ):
+        if not (isinstance(test_filelists, str) or isinstance(test_filelists, Path)):
             if len(test_filelists) > 1:
                 raise ValueError(
                     "When complete_test_set_only is True, test_filelists must "
@@ -372,14 +368,8 @@ def datasets_from_paths(
     # Detect common root
     try:
         all_paths = [
-            pattern_tuple[0]
-            for exp_list in train_list
-            for pattern_tuple in exp_list
-        ] + [
-            pattern_tuple[0]
-            for exp_list in test_list
-            for pattern_tuple in exp_list
-        ]
+            pattern_tuple[0] for exp_list in train_list for pattern_tuple in exp_list
+        ] + [pattern_tuple[0] for exp_list in test_list for pattern_tuple in exp_list]
 
         common_root = os.path.commonpath(all_paths)
     except ValueError:
@@ -454,8 +444,9 @@ def datasets_from_paths(
     return train_inc_datasets, test_inc_datasets
 
 
-def common_paths_root(exp_list: Sequence[FilesDefT]) -> \
-        Tuple[Union[str, None], Sequence[FilesDefT]]:
+def common_paths_root(
+    exp_list: Sequence[FilesDefT],
+) -> Tuple[Union[str, None], Sequence[FilesDefT]]:
     common_root = None
 
     # Detect common root
diff --git a/avalanche/benchmarks/utils/deprecated.py b/avalanche/benchmarks/utils/deprecated.py
index 056dbf27e..f970c8ac3 100644
--- a/avalanche/benchmarks/utils/deprecated.py
+++ b/avalanche/benchmarks/utils/deprecated.py
@@ -94,15 +94,12 @@ def AvalancheConcatDataset(
     *,
     transform: Optional[XTransform] = None,
     target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Dict[str, 
-                                    Tuple[XTransform, YTransform]]] = None,
+    transform_groups: Optional[Dict[str, Tuple[XTransform, YTransform]]] = None,
     initial_transform_group: Optional[str] = None,
-    task_labels: Optional[Union[int, 
-                                Sequence[int],
-                                Sequence[Sequence[int]]]] = None,
-    targets: Optional[Union[
-        Sequence[TTargetType], Sequence[Sequence[TTargetType]]
-    ]] = None,
+    task_labels: Optional[Union[int, Sequence[int], Sequence[Sequence[int]]]] = None,
+    targets: Optional[
+        Union[Sequence[TTargetType], Sequence[Sequence[TTargetType]]]
+    ] = None,
     collate_fn: Optional[Callable[[List], Any]] = None,
 ):
     warnings.warn(
diff --git a/avalanche/benchmarks/utils/detection_dataset.py b/avalanche/benchmarks/utils/detection_dataset.py
index 2d040e96b..7f3b3b632 100644
--- a/avalanche/benchmarks/utils/detection_dataset.py
+++ b/avalanche/benchmarks/utils/detection_dataset.py
@@ -49,7 +49,8 @@
 from .data_attribute import DataAttribute
 from .dataset_definitions import (
     IDataset,
-    IDatasetWithTargets, )
+    IDatasetWithTargets,
+)
 from .dataset_utils import (
     SubSequence,
 )
@@ -79,21 +80,18 @@ def task_pattern_indices(self) -> Dict[int, Sequence[int]]:
         return self.targets_task_labels.val_to_idx  # type: ignore
 
     @property
-    def task_set(self: TDetectionDataset) -> \
-            TaskSet[TDetectionDataset]:
+    def task_set(self: TDetectionDataset) -> TaskSet[TDetectionDataset]:
         """Returns the dataset's ``TaskSet``, which is a mapping <task-id,
         task-dataset>."""
         return TaskSet(self)
 
     def subset(self, indices):
         data = super().subset(indices)
-        return data.with_transforms(
-            self._flat_data._transform_groups.current_group)
+        return data.with_transforms(self._flat_data._transform_groups.current_group)
 
     def concat(self, other):
         data = super().concat(other)
-        return data.with_transforms(
-            self._flat_data._transform_groups.current_group)
+        return data.with_transforms(self._flat_data._transform_groups.current_group)
 
     def __hash__(self):
         return id(self)
@@ -101,44 +99,44 @@ def __hash__(self):
 
 class SupervisedDetectionDataset(DetectionDataset[T_co]):
     def __init__(
-            self,
-            datasets: List[IDataset[T_co]],
-            *,
-            indices: Optional[List[int]] = None,
-            data_attributes: Optional[List[DataAttribute]] = None,
-            transform_groups: Optional[TransformGroups] = None,
-            frozen_transform_groups: Optional[TransformGroups] = None,
-            collate_fn: Optional[Callable[[List], Any]] = None):
+        self,
+        datasets: List[IDataset[T_co]],
+        *,
+        indices: Optional[List[int]] = None,
+        data_attributes: Optional[List[DataAttribute]] = None,
+        transform_groups: Optional[TransformGroups] = None,
+        frozen_transform_groups: Optional[TransformGroups] = None,
+        collate_fn: Optional[Callable[[List], Any]] = None
+    ):
         super().__init__(
             datasets=datasets,
             indices=indices,
             data_attributes=data_attributes,
             transform_groups=transform_groups,
             frozen_transform_groups=frozen_transform_groups,
-            collate_fn=collate_fn
+            collate_fn=collate_fn,
+        )
+
+        assert hasattr(self, "targets"), (
+            "The supervised version of the ClassificationDataset requires "
+            + "the targets field"
+        )
+        assert hasattr(self, "targets_task_labels"), (
+            "The supervised version of the ClassificationDataset requires "
+            + "the targets_task_labels field"
         )
-        
-        assert hasattr(self, 'targets'), \
-            'The supervised version of the ClassificationDataset requires ' + \
-            'the targets field'
-        assert hasattr(self, 'targets_task_labels'), \
-            'The supervised version of the ClassificationDataset requires ' + \
-            'the targets_task_labels field'
 
     @property
     def targets(self) -> DataAttribute[TTargetType]:
-        return self._data_attributes['targets']
+        return self._data_attributes["targets"]
 
     @property
     def targets_task_labels(self) -> DataAttribute[int]:
-        return self._data_attributes['targets_task_labels']
+        return self._data_attributes["targets_task_labels"]
 
 
 SupportedDetectionDataset = Union[
-    IDatasetWithTargets,
-    Subset,
-    ConcatDataset,
-    DetectionDataset
+    IDatasetWithTargets, Subset, ConcatDataset, DetectionDataset
 ]
 
 
@@ -284,10 +282,10 @@ def make_detection_dataset(
         initial_transform_group,
         dataset,
     )
-    targets_data: Optional[DataAttribute[TTargetType]] = \
-        _init_targets(dataset, targets)
-    task_labels_data: Optional[DataAttribute[int]] = \
-        _init_task_labels(dataset, task_labels)
+    targets_data: Optional[DataAttribute[TTargetType]] = _init_targets(dataset, targets)
+    task_labels_data: Optional[DataAttribute[int]] = _init_task_labels(
+        dataset, task_labels
+    )
 
     das: List[DataAttribute] = []
     if targets_data is not None:
@@ -297,11 +295,11 @@ def make_detection_dataset(
 
     # Check if supervision data has been added
     is_supervised = is_supervised or (
-        targets_data is not None and
-        task_labels_data is not None)
-    
+        targets_data is not None and task_labels_data is not None
+    )
+
     if collate_fn is None:
-        collate_fn = getattr(dataset, 'collate_fn', detection_collate_fn)
+        collate_fn = getattr(dataset, "collate_fn", detection_collate_fn)
 
     data: Union[DetectionDataset, SupervisedDetectionDataset]
     if is_supervised:
@@ -318,15 +316,16 @@ def make_detection_dataset(
             transform_groups=transform_gs,
             collate_fn=collate_fn,
         )
-    
+
     if initial_transform_group is not None:
         return data.with_transforms(initial_transform_group)
     else:
         return data
 
 
-def _init_targets(dataset, targets, check_shape=True) -> \
-        Optional[DataAttribute[TTargetType]]:
+def _init_targets(
+    dataset, targets, check_shape=True
+) -> Optional[DataAttribute[TTargetType]]:
     if targets is not None:
         # User defined targets always take precedence
         if len(targets) != len(dataset) and check_shape:
@@ -337,12 +336,11 @@ def _init_targets(dataset, targets, check_shape=True) -> \
             )
         return DataAttribute(targets, "targets")
 
-    targets = _traverse_supported_dataset(
-        dataset, _select_targets)
+    targets = _traverse_supported_dataset(dataset, _select_targets)
 
     if targets is None:
         return None
-    
+
     return DataAttribute(targets, "targets")
 
 
@@ -352,8 +350,7 @@ def _detection_class_mapping_transform(class_mapping, example_target_dict):
     # example_target_dict["labels"] is a tensor containing one label
     # for each bounding box in the image. We need to remap each of them
     example_target_labels = example_target_dict["labels"]
-    example_mapped_labels = [class_mapping[int(el)] for el
-                             in example_target_labels]
+    example_mapped_labels = [class_mapping[int(el)] for el in example_target_labels]
 
     if isinstance(example_target_labels, Tensor):
         example_mapped_labels = torch.as_tensor(example_mapped_labels)
@@ -371,8 +368,7 @@ def detection_subset(
     class_mapping: Optional[Sequence[int]] = None,
     transform: Optional[XTransform] = None,
     target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str, 
-                                       Tuple[XTransform, YTransform]]] = None,
+    transform_groups: Optional[Mapping[str, Tuple[XTransform, YTransform]]] = None,
     initial_transform_group: Optional[str] = None,
     task_labels: Optional[Union[int, Sequence[int]]] = None,
     targets: Optional[Sequence[TTargetType]] = None,
@@ -389,8 +385,7 @@ def detection_subset(
     class_mapping: Optional[Sequence[int]] = None,
     transform: Optional[XTransform] = None,
     target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str, 
-                                       Tuple[XTransform, YTransform]]] = None,
+    transform_groups: Optional[Mapping[str, Tuple[XTransform, YTransform]]] = None,
     initial_transform_group: Optional[str] = None,
     task_labels: Union[int, Sequence[int]],
     targets: Sequence[TTargetType],
@@ -407,8 +402,7 @@ def detection_subset(
     class_mapping: Optional[Sequence[int]] = None,
     transform: Optional[XTransform] = None,
     target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str, 
-                                       Tuple[XTransform, YTransform]]] = None,
+    transform_groups: Optional[Mapping[str, Tuple[XTransform, YTransform]]] = None,
     initial_transform_group: Optional[str] = None,
     task_labels: Optional[Union[int, Sequence[int]]] = None,
     targets: Optional[Sequence[TTargetType]] = None,
@@ -424,8 +418,7 @@ def detection_subset(
     class_mapping: Optional[Sequence[int]] = None,
     transform: Optional[XTransform] = None,
     target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str, 
-                                       Tuple[XTransform, YTransform]]] = None,
+    transform_groups: Optional[Mapping[str, Tuple[XTransform, YTransform]]] = None,
     initial_transform_group: Optional[str] = None,
     task_labels: Optional[Union[int, Sequence[int]]] = None,
     targets: Optional[Sequence[TTargetType]] = None,
@@ -511,11 +504,13 @@ def detection_subset(
         ):
             return dataset.subset(indices)
 
-    targets_data: Optional[DataAttribute[TTargetType]] = \
-        _init_targets(dataset, targets, check_shape=False)
-    task_labels_data: Optional[DataAttribute[int]] = \
-        _init_task_labels(dataset, task_labels, check_shape=False)
-    
+    targets_data: Optional[DataAttribute[TTargetType]] = _init_targets(
+        dataset, targets, check_shape=False
+    )
+    task_labels_data: Optional[DataAttribute[int]] = _init_task_labels(
+        dataset, task_labels, check_shape=False
+    )
+
     del task_labels
     del targets
 
@@ -527,33 +522,29 @@ def detection_subset(
         dataset,
     )
 
-    if initial_transform_group is not None and isinstance(
-        dataset, AvalancheDataset
-    ):
+    if initial_transform_group is not None and isinstance(dataset, AvalancheDataset):
         dataset = dataset.with_transforms(initial_transform_group)
 
     if class_mapping is not None:  # update targets
-
         if targets_data is None:
             # Should not happen
             # The following line usually fails
             targets_data = dataset.targets  # type: ignore
-        
-        assert targets_data is not None, \
-            'To execute the class mapping, a list of targets is required.'
-        
+
+        assert (
+            targets_data is not None
+        ), "To execute the class mapping, a list of targets is required."
+
         tgs = [
-            _detection_class_mapping_transform(
-                class_mapping, example_target_dict)
-            for example_target_dict in targets_data]
+            _detection_class_mapping_transform(class_mapping, example_target_dict)
+            for example_target_dict in targets_data
+        ]
 
         targets_data = DataAttribute(tgs, "targets")
 
     if class_mapping is not None:
         mapping_fn = partial(_detection_class_mapping_transform, class_mapping)
-        frozen_transform_groups = DefaultTransformGroups(
-            (None, mapping_fn)
-        )
+        frozen_transform_groups = DefaultTransformGroups((None, mapping_fn))
     else:
         frozen_transform_groups = None
 
@@ -565,8 +556,8 @@ def detection_subset(
 
     # Check if supervision data has been added
     is_supervised = is_supervised or (
-        targets_data is not None and
-        task_labels_data is not None)
+        targets_data is not None and task_labels_data is not None
+    )
 
     if collate_fn is None:
         collate_fn = detection_collate_fn
@@ -597,15 +588,12 @@ def concat_detection_datasets(
     *,
     transform: Optional[XTransform] = None,
     target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str,
-                                       Tuple[XTransform, YTransform]]] = None,
+    transform_groups: Optional[Mapping[str, Tuple[XTransform, YTransform]]] = None,
     initial_transform_group: Optional[str] = None,
-    task_labels: Optional[Union[int,
-                                Sequence[int],
-                                Sequence[Sequence[int]]]] = None,
-    targets: Optional[Union[
-        Sequence[TTargetType], Sequence[Sequence[TTargetType]]
-    ]] = None,
+    task_labels: Optional[Union[int, Sequence[int], Sequence[Sequence[int]]]] = None,
+    targets: Optional[
+        Union[Sequence[TTargetType], Sequence[Sequence[TTargetType]]]
+    ] = None,
     collate_fn: Optional[Callable[[List], Any]] = None
 ) -> SupervisedDetectionDataset:
     ...
@@ -617,13 +605,10 @@ def concat_detection_datasets(
     *,
     transform: Optional[XTransform] = None,
     target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str, 
-                                       Tuple[XTransform, YTransform]]] = None,
+    transform_groups: Optional[Mapping[str, Tuple[XTransform, YTransform]]] = None,
     initial_transform_group: Optional[str] = None,
     task_labels: Union[int, Sequence[int], Sequence[Sequence[int]]],
-    targets: Union[
-        Sequence[TTargetType], Sequence[Sequence[TTargetType]]
-    ],
+    targets: Union[Sequence[TTargetType], Sequence[Sequence[TTargetType]]],
     collate_fn: Optional[Callable[[List], Any]] = None
 ) -> SupervisedDetectionDataset:
     ...
@@ -635,15 +620,12 @@ def concat_detection_datasets(
     *,
     transform: Optional[XTransform] = None,
     target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str, 
-                                       Tuple[XTransform, YTransform]]] = None,
+    transform_groups: Optional[Mapping[str, Tuple[XTransform, YTransform]]] = None,
     initial_transform_group: Optional[str] = None,
-    task_labels: Optional[Union[int, 
-                                Sequence[int],
-                                Sequence[Sequence[int]]]] = None,
-    targets: Optional[Union[
-        Sequence[TTargetType], Sequence[Sequence[TTargetType]]
-    ]] = None,
+    task_labels: Optional[Union[int, Sequence[int], Sequence[Sequence[int]]]] = None,
+    targets: Optional[
+        Union[Sequence[TTargetType], Sequence[Sequence[TTargetType]]]
+    ] = None,
     collate_fn: Optional[Callable[[List], Any]] = None
 ) -> DetectionDataset:
     ...
@@ -654,15 +636,12 @@ def concat_detection_datasets(
     *,
     transform: Optional[XTransform] = None,
     target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str, 
-                                       Tuple[XTransform, YTransform]]] = None,
+    transform_groups: Optional[Mapping[str, Tuple[XTransform, YTransform]]] = None,
     initial_transform_group: Optional[str] = None,
-    task_labels: Optional[Union[int,
-                                Sequence[int],
-                                Sequence[Sequence[int]]]] = None,
-    targets: Optional[Union[
-        Sequence[TTargetType], Sequence[Sequence[TTargetType]]
-    ]] = None,
+    task_labels: Optional[Union[int, Sequence[int], Sequence[Sequence[int]]]] = None,
+    targets: Optional[
+        Union[Sequence[TTargetType], Sequence[Sequence[TTargetType]]]
+    ] = None,
     collate_fn: Optional[Callable[[List], Any]] = None
 ) -> Union[DetectionDataset, SupervisedDetectionDataset]:
     """Creates a ``AvalancheConcatDataset`` instance.
@@ -729,18 +708,15 @@ def concat_detection_datasets(
         in different datasets.
     """
     dds = []
-    per_dataset_task_labels = _split_user_def_task_label(
-        datasets, task_labels
-    )
+    per_dataset_task_labels = _split_user_def_task_label(datasets, task_labels)
 
     per_dataset_targets = _split_user_def_targets(
-        datasets,
-        targets,
-        lambda x: isinstance(x, dict)
+        datasets, targets, lambda x: isinstance(x, dict)
     )
-    
-    for dd, dataset_task_labels, dataset_targets in \
-            zip(datasets, per_dataset_task_labels, per_dataset_targets):
+
+    for dd, dataset_task_labels, dataset_targets in zip(
+        datasets, per_dataset_task_labels, per_dataset_targets
+    ):
         dd = make_detection_dataset(
             dd,
             transform=transform,
@@ -752,7 +728,7 @@ def concat_detection_datasets(
             collate_fn=collate_fn,
         )
         dds.append(dd)
-    
+
     if (
         transform is None
         and target_transform is None
@@ -790,10 +766,7 @@ def concat_detection_datasets(
                     if uniform_group is None:
                         uniform_group = d_set._transform_groups.current_group
                     else:
-                        if (
-                            uniform_group
-                            != d_set._transform_groups.current_group
-                        ):
+                        if uniform_group != d_set._transform_groups.current_group:
                             uniform_group = None
                             break
 
@@ -807,17 +780,13 @@ def concat_detection_datasets(
         #######################################
 
         totlen = sum([len(d) for d in datasets])
-        if (
-            task_labels is not None
-        ):  # User defined targets always take precedence
-            
+        if task_labels is not None:  # User defined targets always take precedence
             all_labels: IDataset[int]
             if isinstance(task_labels, int):
                 all_labels = ConstantSequence(task_labels, totlen)
             else:
                 all_labels_lst = []
-                for dd, dataset_task_labels in \
-                        zip(dds, per_dataset_task_labels):
+                for dd, dataset_task_labels in zip(dds, per_dataset_task_labels):
                     assert dataset_task_labels is not None
 
                     # We already checked that len(t_labels) == len(dataset)
@@ -828,9 +797,7 @@ def concat_detection_datasets(
                         all_labels_lst.extend(dataset_task_labels)
                 all_labels = all_labels_lst
             das.append(
-                DataAttribute(
-                    all_labels, "targets_task_labels", use_in_getitem=True
-                )
+                DataAttribute(all_labels, "targets_task_labels", use_in_getitem=True)
             )
 
         if targets is not None:  # User defined targets always take precedence
@@ -844,26 +811,24 @@ def concat_detection_datasets(
             das.append(DataAttribute(all_targets_lst, "targets"))
     else:
         transform_groups_obj = None
-        initial_transform_group = 'train'
+        initial_transform_group = "train"
 
     data = DetectionDataset(
         dds,
         transform_groups=transform_groups_obj,
-        data_attributes=das if len(das) > 0 else None
+        data_attributes=das if len(das) > 0 else None,
     )
     return data.with_transforms(initial_transform_group)
 
 
 def _select_targets(
-        dataset: SupportedDetectionDataset,
-        indices: Optional[List[int]]) -> Sequence[TTargetType]:
+    dataset: SupportedDetectionDataset, indices: Optional[List[int]]
+) -> Sequence[TTargetType]:
     if hasattr(dataset, "targets"):
         # Standard supported dataset
         found_targets = dataset.targets
     else:
-        raise ValueError(
-            "Unsupported dataset: must have a valid targets field"
-        )
+        raise ValueError("Unsupported dataset: must have a valid targets field")
 
     if indices is not None:
         found_targets = SubSequence(found_targets, indices=indices)
@@ -876,5 +841,5 @@ def _select_targets(
     "DetectionDataset",
     "make_detection_dataset",
     "detection_subset",
-    "concat_detection_datasets"
+    "concat_detection_datasets",
 ]
diff --git a/avalanche/benchmarks/utils/flat_data.py b/avalanche/benchmarks/utils/flat_data.py
index 373dde2b4..63dae165d 100644
--- a/avalanche/benchmarks/utils/flat_data.py
+++ b/avalanche/benchmarks/utils/flat_data.py
@@ -12,12 +12,13 @@
     Datasets with optimized concat/subset operations.
 """
 import bisect
-
+import sys
 import numpy as np
 
 from avalanche.benchmarks.utils.dataset_utils import (
     slice_alike_object_to_indices,
 )
+
 try:
     from collections import Hashable
 except ImportError:
@@ -38,8 +39,7 @@
 import itertools
 from avalanche.benchmarks.utils.dataset_definitions import IDataset
 
-
-TFlatData = TypeVar('TFlatData', bound='FlatData')
+TFlatData = TypeVar("TFlatData", bound="FlatData")
 DataT = TypeVar("DataT")
 T_co = TypeVar("T_co", covariant=True)
 
@@ -61,12 +61,12 @@ def __init__(self, *lists, known_length=None, offset=0):
                 new_lists.append(ll._eager_list)
             else:
                 new_lists.append(ll)
-        self._lists = new_lists
+        self._lists = new_lists  # freed after eagerification
 
-        if len(self._lists) == 1 and offset == 0:
-            self._eager_list = self._lists[0]
+        if len(new_lists) == 1 and offset == 0:
+            self._eager_list = new_lists[0]
         else:
-            self._lazy_sequence = itertools.chain(*self._lists)
+            self._lazy_sequence = itertools.chain(*new_lists)
             """chain of generators
             this will be consumed over time whenever we need elems.
             """
@@ -76,12 +76,35 @@ def __init__(self, *lists, known_length=None, offset=0):
             """
             self._offset = offset
 
-        self._known_length = known_length
+        # check depth to avoid RecursionError
+        if self._depth() > sys.getrecursionlimit() // 4:
+            self._to_eager()
+
+        if known_length is not None:
+            self._known_length = known_length
+        elif self._eager_list is not None:
+            self._known_length = len(self._eager_list)
+        else:
+            self._known_length = sum(len(ll) for ll in new_lists)
+
+    def _depth(self):
+        """Return the depth of the LazyIndices tree.
+        Use it only to eagerify early to avoid RecursionErrors.
+        """
+        if self._eager_list is not None:
+            return 0
+
+        lens = [0]
+        for ll in self._lists:
+            if isinstance(ll, LazyIndices):
+                lens.append(ll._depth())
+        return max(lens) + 1
 
     def _to_eager(self):
         if self._eager_list is not None:
             return
         self._eager_list = [el + self._offset for el in self._lazy_sequence]
+        self._lists = None  # free memory
 
     def __getitem__(self, item):
         if self._eager_list is None:
@@ -95,13 +118,7 @@ def __radd__(self, other):
         return LazyIndices(other, self)
 
     def __len__(self):
-        if self._eager_list is not None:
-            return len(self._eager_list)
-        elif self._known_length is not None:
-            return self._known_length
-        else:
-            # raise ValueError("Unknown lazy list length")
-            return sum(len(ll) for ll in self._lists)
+        return self._known_length
 
 
 class LazyRange(LazyIndices):
@@ -123,8 +140,7 @@ def __iter__(self):
             yield self._offset + i
 
     def __getitem__(self, item):
-        assert item >= self._start and item < self._end, \
-            "LazyRange: index out of range"
+        assert item >= self._start and item < self._end, "LazyRange: index out of range"
         return self._start + self._offset + item
 
     def __add__(self, other):
@@ -175,7 +191,8 @@ def __init__(
         if can_flatten:
             self._datasets = _flatten_dataset_list(self._datasets)
             self._datasets, self._indices = _flatten_datasets_and_reindex(
-                self._datasets, self._indices)
+                self._datasets, self._indices
+            )
         self._cumulative_sizes = ConcatDataset.cumsum(self._datasets)
 
         # NOTE: check disabled to avoid slowing down OCL scenarios
@@ -224,12 +241,18 @@ def concat(self: TFlatData, other: TFlatData) -> TFlatData:
 
         # Case 1: one is a subset of the other
         if len(self._datasets) == 1 and len(other._datasets) == 1:
-            if self._can_flatten and self._datasets[0] is other \
-                    and other._indices is None:
+            if (
+                self._can_flatten
+                and self._datasets[0] is other
+                and other._indices is None
+            ):
                 idxs = self._get_lazy_indices() + other._get_lazy_indices()
                 return other.subset(idxs)
-            elif other._can_flatten and other._datasets[0] is self \
-                    and self._indices is None:
+            elif (
+                other._can_flatten
+                and other._datasets[0] is self
+                and self._indices is None
+            ):
                 idxs = self._get_lazy_indices() + other._get_lazy_indices()
                 return self.subset(idxs)
             elif (
@@ -237,8 +260,7 @@ def concat(self: TFlatData, other: TFlatData) -> TFlatData:
                 and other._can_flatten
                 and self._datasets[0] is other._datasets[0]
             ):
-                idxs = LazyIndices(self._get_lazy_indices(),
-                                   other._get_lazy_indices())
+                idxs = LazyIndices(self._get_lazy_indices(), other._get_lazy_indices())
                 return self.__class__(datasets=self._datasets, indices=idxs)
 
         # Case 2: at least one of them can be flattened
@@ -250,8 +272,7 @@ def concat(self: TFlatData, other: TFlatData) -> TFlatData:
                     base_other = 0
                 else:
                     base_other = self._cumulative_sizes[-1]
-                other_idxs = LazyIndices(other._get_lazy_indices(),
-                                         offset=base_other)
+                other_idxs = LazyIndices(other._get_lazy_indices(), offset=base_other)
                 new_indices = self._get_lazy_indices() + other_idxs
             return self.__class__(
                 datasets=self._datasets + other._datasets, indices=new_indices
@@ -275,8 +296,7 @@ def concat(self: TFlatData, other: TFlatData) -> TFlatData:
             else:
                 base_other = len(self)
                 self_idxs = LazyRange(0, len(self))
-                other_idxs = LazyIndices(other._get_lazy_indices(),
-                                         offset=base_other)
+                other_idxs = LazyIndices(other._get_lazy_indices(), offset=base_other)
                 new_indices = self_idxs + other_idxs
             return self.__class__(
                 datasets=[self] + other._datasets, indices=new_indices
@@ -304,7 +324,7 @@ def _get_idx(self, idx) -> Tuple[int, int]:
             else:
                 idx = idx - self._cumulative_sizes[dataset_idx - 1]
         return dataset_idx, int(idx)
-    
+
     @overload
     def __getitem__(self, item: int) -> T_co:
         ...
@@ -313,20 +333,16 @@ def __getitem__(self, item: int) -> T_co:
     def __getitem__(self: TFlatData, item: slice) -> TFlatData:
         ...
 
-    def __getitem__(self: TFlatData, item: Union[int, slice]) -> \
-            Union[T_co, TFlatData]:
+    def __getitem__(self: TFlatData, item: Union[int, slice]) -> Union[T_co, TFlatData]:
         if isinstance(item, (int, np.integer)):
             dataset_idx, idx = self._get_idx(int(item))
             return self._datasets[dataset_idx][idx]
         else:
             slice_indices = slice_alike_object_to_indices(
-                slice_alike_object=item,
-                max_length=len(self)
+                slice_alike_object=item, max_length=len(self)
             )
 
-            return self.subset(
-                indices=slice_indices
-            )
+            return self.subset(indices=slice_indices)
 
     def __len__(self) -> int:
         if len(self._cumulative_sizes) == 0:
@@ -370,31 +386,30 @@ def __init__(self, constant_value: DataT, size: int):
 
     def __len__(self):
         return self._size
-    
+
     @overload
     def __getitem__(self, index: int) -> DataT:
         ...
-    
+
     @overload
-    def __getitem__(self, index: slice) -> 'ConstantSequence[DataT]':
+    def __getitem__(self, index: slice) -> "ConstantSequence[DataT]":
         ...
 
-    def __getitem__(self, index: Union[int, slice]) -> \
-            'Union[DataT, ConstantSequence[DataT]]':
+    def __getitem__(
+        self, index: Union[int, slice]
+    ) -> "Union[DataT, ConstantSequence[DataT]]":
         if isinstance(index, (int, np.integer)):
             index = int(index)
-        
+
             if index >= len(self):
                 raise IndexError()
             return self._constant_value
         else:
             slice_indices = slice_alike_object_to_indices(
-                slice_alike_object=index,
-                max_length=len(self)
+                slice_alike_object=index, max_length=len(self)
             )
             return ConstantSequence(
-                constant_value=self._constant_value,
-                size=sum(1 for _ in slice_indices)
+                constant_value=self._constant_value, size=sum(1 for _ in slice_indices)
             )
 
     def subset(self, indices: List[int]) -> "ConstantSequence[DataT]":
@@ -415,24 +430,20 @@ def concat(self, other: FlatData[DataT]) -> IDataset[DataT]:
             isinstance(other, ConstantSequence)
             and self._constant_value == other._constant_value
         ):
-            return ConstantSequence(
-                self._constant_value, len(self) + len(other)
-            )
+            return ConstantSequence(self._constant_value, len(self) + len(other))
         else:
             return FlatData([self, other])
 
     def __str__(self):
-        return (
-            f"ConstantSequence(value={self._constant_value}, len={self._size})"
-        )
+        return f"ConstantSequence(value={self._constant_value}, len={self._size})"
 
     def __hash__(self):
         return id(self)
 
 
 def _flatten_dataset_list(
-        datasets: List[Union[FlatData[T_co], IDataset[T_co]]]) -> \
-            List[IDataset[T_co]]:
+    datasets: List[Union[FlatData[T_co], IDataset[T_co]]]
+) -> List[IDataset[T_co]]:
     """Flatten the dataset tree if possible."""
     # Concat -> Concat branch
     # Flattens by borrowing the list of concatenated datasets
@@ -470,8 +481,9 @@ def _flatten_dataset_list(
         ):
             new_data_list.pop()
             # the same dataset is repeated, using indices to avoid repeating it
-            idxs = LazyIndices(LazyRange(0, len(last_dataset)),
-                               LazyRange(0, len(last_dataset)))
+            idxs = LazyIndices(
+                LazyRange(0, len(last_dataset)), LazyRange(0, len(last_dataset))
+            )
             merged_ds = [FlatData([last_dataset], indices=idxs)]
             new_data_list.extend(merged_ds)
         else:
@@ -480,9 +492,8 @@ def _flatten_dataset_list(
 
 
 def _flatten_datasets_and_reindex(
-        datasets: List[IDataset],
-        indices: Optional[List[int]]) -> \
-            Tuple[List[IDataset], Optional[List[int]]]:
+    datasets: List[IDataset], indices: Optional[List[int]]
+) -> Tuple[List[IDataset], Optional[List[int]]]:
     """The same dataset may occurr multiple times in the list of datasets.
 
     Here, we flatten the list of datasets and fix the indices to account for
@@ -577,8 +588,9 @@ def _flatdata_repr(dataset, indent=0):
             s += _flatdata_repr(dd, indent + 1)
         return s
     else:
-        return "\t" * indent + f"{dataset.__class__.__name__} " \
-                               f"(len={len(dataset)})\n"
+        return (
+            "\t" * indent + f"{dataset.__class__.__name__} " f"(len={len(dataset)})\n"
+        )
 
 
 __all__ = ["FlatData", "ConstantSequence"]
diff --git a/avalanche/benchmarks/utils/torchvision_wrapper.py b/avalanche/benchmarks/utils/torchvision_wrapper.py
index 6317b1de4..3744a73be 100644
--- a/avalanche/benchmarks/utils/torchvision_wrapper.py
+++ b/avalanche/benchmarks/utils/torchvision_wrapper.py
@@ -25,7 +25,6 @@ def DatasetFolder(*args, **kwargs):
 
 
 if __name__ == "__main__":
-
     mnist = DatasetFolder(".", download=True)
 
 
diff --git a/avalanche/benchmarks/utils/transform_groups.py b/avalanche/benchmarks/utils/transform_groups.py
index d99206a9c..005e32aee 100644
--- a/avalanche/benchmarks/utils/transform_groups.py
+++ b/avalanche/benchmarks/utils/transform_groups.py
@@ -79,9 +79,9 @@ class TransformGroups:
     def __init__(
         self,
         transform_groups: Mapping[
-            str, Union[None, 
-                       Callable,
-                       Sequence[Union[Callable, XTransform, YTransform]]]],
+            str,
+            Union[None, Callable, Sequence[Union[Callable, XTransform, YTransform]]],
+        ],
         current_group="train",
     ):
         """Constructor.
@@ -90,9 +90,9 @@ def __init__(
             and transformations (pytorch transformations) as values.
         :param current_group: the currently active group.
         """
-        self.transform_groups: Dict[str, Union[TupleTransform, 
-                                               MultiParamTransform,
-                                               None]] = dict()
+        self.transform_groups: Dict[
+            str, Union[TupleTransform, MultiParamTransform, None]
+        ] = dict()
         for group, transform in transform_groups.items():
             norm_transform = _normalize_transform(transform)
             self.transform_groups[group] = norm_transform
@@ -141,9 +141,7 @@ def __add__(self, other: "TransformGroups"):
                 self_group = tgroups[gname]
                 other_group = gtrans
 
-                to_expand_group: Union[TupleTransform, 
-                                       MultiParamTransform,
-                                       None]
+                to_expand_group: Union[TupleTransform, MultiParamTransform, None]
                 for to_expand_group in [self_group, other_group]:
                     if to_expand_group is None:
                         pass
@@ -223,13 +221,13 @@ def _normalize_transform(transforms):
 
 
 __all__ = [
-    'XComposedTransformDef',
-    'XTransformDef',
-    'YTransformDef',
-    'XTransform',
-    'YTransform',
-    'TransformGroupDef',
-    'TransformGroups',
-    'DefaultTransformGroups',
-    'EmptyTransformGroups'
+    "XComposedTransformDef",
+    "XTransformDef",
+    "YTransformDef",
+    "XTransform",
+    "YTransform",
+    "TransformGroupDef",
+    "TransformGroups",
+    "DefaultTransformGroups",
+    "EmptyTransformGroups",
 ]
diff --git a/avalanche/benchmarks/utils/transforms.py b/avalanche/benchmarks/utils/transforms.py
index 25ff2df8f..99b94fba6 100644
--- a/avalanche/benchmarks/utils/transforms.py
+++ b/avalanche/benchmarks/utils/transforms.py
@@ -77,9 +77,7 @@ def __init__(self, transforms: Sequence[Callable]):
 
     def __call__(self, *args, force_tuple_output=False):
         if len(self.transforms) > 0:
-            for transform, (min_par, max_par) in zip(
-                self.transforms, self.param_def
-            ):
+            for transform, (min_par, max_par) in zip(self.transforms, self.param_def):
                 args = MultiParamTransformCallable._call_transform(
                     transform, min_par, max_par, *args
                 )
@@ -156,9 +154,7 @@ def _detect_parameters(transform_callable):
         ):
             min_params = transform_callable.min_params
             max_params = transform_callable.max_params
-        elif MultiParamTransformCallable._is_torchvision_transform(
-            transform_callable
-        ):
+        elif MultiParamTransformCallable._is_torchvision_transform(transform_callable):
             min_params = 1
             max_params = 1
         else:
diff --git a/avalanche/benchmarks/utils/utils.py b/avalanche/benchmarks/utils/utils.py
index 0c2cf6674..0da4c5e16 100644
--- a/avalanche/benchmarks/utils/utils.py
+++ b/avalanche/benchmarks/utils/utils.py
@@ -48,13 +48,11 @@
     TransformGroupDef,
     TransformGroups,
     XTransform,
-    YTransform
+    YTransform,
 )
 
 if TYPE_CHECKING:
-    from avalanche.benchmarks.utils.classification_dataset import (
-        ClassificationDataset
-    )
+    from avalanche.benchmarks.utils.classification_dataset import ClassificationDataset
 
 T_co = TypeVar("T_co", covariant=True)
 TAvalancheDataset = TypeVar("TAvalancheDataset", bound="AvalancheDataset")
@@ -97,9 +95,7 @@ def _indexes_grouped_by_classes(
     # This means that, if sort_classes is True, the next for statement
     # will initialize "result_per_class" in sorted order which in turn means
     # that patterns will be ordered by ascending class ID.
-    classes = torch.unique(
-        torch.as_tensor(targets), sorted=sort_classes
-    ).tolist()
+    classes = torch.unique(torch.as_tensor(targets), sorted=sort_classes).tolist()
 
     for class_id in classes:
         result_per_class[class_id] = []
@@ -188,10 +184,8 @@ def as_avalanche_dataset(
 
 def as_classification_dataset(
     dataset: ISupportedClassificationDataset[T_co],
-) -> 'ClassificationDataset':
-    from avalanche.benchmarks.utils.classification_dataset import (
-        ClassificationDataset
-    )
+) -> "ClassificationDataset":
+    from avalanche.benchmarks.utils.classification_dataset import ClassificationDataset
 
     if isinstance(dataset, ClassificationDataset):
         return dataset
@@ -224,8 +218,8 @@ def concat_datasets(datasets):
 
 
 def find_common_transforms_group(
-        datasets: Iterable[Any], 
-        default_group: str = "train") -> str:
+    datasets: Iterable[Any], default_group: str = "train"
+) -> str:
     """
     Utility used to find the common transformations group across multiple
     datasets.
@@ -245,10 +239,7 @@ def find_common_transforms_group(
             if uniform_group is None:
                 uniform_group = d_set._flat_data._transform_groups.current_group
             else:
-                if (
-                    uniform_group
-                    != d_set._flat_data._transform_groups.current_group
-                ):
+                if uniform_group != d_set._flat_data._transform_groups.current_group:
                     uniform_group = None
                     break
 
@@ -260,14 +251,14 @@ def find_common_transforms_group(
     return initial_transform_group
 
 
-Y = TypeVar('Y')
-T = TypeVar('T')
+Y = TypeVar("Y")
+T = TypeVar("T")
 
 
 def _traverse_supported_dataset(
     dataset: Y,
     values_selector: Callable[[Y, Optional[List[int]]], Optional[Sequence[T]]],
-    indices: Optional[List[int]] = None
+    indices: Optional[List[int]] = None,
 ) -> Sequence[T]:
     """
     Traverse the given dataset by gathering required info.
@@ -275,7 +266,7 @@ def _traverse_supported_dataset(
     The given dataset is traversed by covering all sub-datasets
     contained PyTorch :class:`Subset` and :class`ConcatDataset`.
     Beware that instances of :class:`AvalancheDataset` will not
-    be traversed as those objects already have the proper data 
+    be traversed as those objects already have the proper data
     attribute fields populated with data from leaf datasets.
 
     For each dataset, the `values_selector` will be called to gather
@@ -284,7 +275,7 @@ def _traverse_supported_dataset(
 
     :param dataset: The dataset to traverse.
     :param values_selector: A function that, given the dataset
-        and the indices to consider (which may be None if the entire 
+        and the indices to consider (which may be None if the entire
         dataset must be considered), returns a list of selected values.
     :returns: The list of selected values.
     """
@@ -301,11 +292,9 @@ def _traverse_supported_dataset(
             indices = [dataset.indices[x] for x in range(len(dataset))]
         else:
             indices = [dataset.indices[x] for x in indices]
-        
+
         return list(
-            _traverse_supported_dataset(
-                dataset.dataset, values_selector, indices
-            )
+            _traverse_supported_dataset(dataset.dataset, values_selector, indices)
         )
 
     if isinstance(dataset, ConcatDataset):
@@ -313,9 +302,7 @@ def _traverse_supported_dataset(
         if indices is None:
             for c_dataset in dataset.datasets:
                 result += list(
-                    _traverse_supported_dataset(
-                        c_dataset, values_selector, indices
-                    )
+                    _traverse_supported_dataset(c_dataset, values_selector, indices)
                 )
             return result
 
@@ -361,8 +348,9 @@ def _traverse_supported_dataset(
     raise ValueError("Error: can't find the needed data in the given dataset")
 
 
-def _init_task_labels(dataset, task_labels, check_shape=True) -> \
-        Optional[DataAttribute[int]]:
+def _init_task_labels(
+    dataset, task_labels, check_shape=True
+) -> Optional[DataAttribute[int]]:
     """
     Initializes the task label list (one for each pattern in the dataset).
 
@@ -370,13 +358,13 @@ def _init_task_labels(dataset, task_labels, check_shape=True) -> \
     Otherwisem the elements will be retrieved from the dataset itself by
     traversing it and looking at the `targets_task_labels` field.
 
-    :param dataset: The dataset for which the task labels list must be 
+    :param dataset: The dataset for which the task labels list must be
         initialized. Ignored if `task_labels` is passed, but it may still be
         used if `check_shape` is true.
     :param task_labels: The task labels to use. May be None, in which case
         the labels will be retrieved from the dataset.
     :param check_shape: If True, will check if the length of the task labels
-        list matches the dataset size. Ignored if the labels are retrieved 
+        list matches the dataset size. Ignored if the labels are retrieved
         from the dataset.
     :returns: A data attribute containing the task labels. May be None to
         signal that the dataset's `targets_task_labels` field should be used
@@ -400,9 +388,7 @@ def _init_task_labels(dataset, task_labels, check_shape=True) -> \
         else:
             tls = SubSequence(task_labels, converter=int)
     else:
-        task_labels = _traverse_supported_dataset(
-            dataset, _select_task_labels
-        )
+        task_labels = _traverse_supported_dataset(dataset, _select_task_labels)
 
         if task_labels is None:
             tls = None
@@ -410,8 +396,8 @@ def _init_task_labels(dataset, task_labels, check_shape=True) -> \
             tls = task_labels
         elif isinstance(task_labels, DataAttribute):
             return DataAttribute(
-                task_labels.data, "targets_task_labels",
-                use_in_getitem=True)
+                task_labels.data, "targets_task_labels", use_in_getitem=True
+            )
         else:
             tls = SubSequence(task_labels, converter=int)
 
@@ -420,8 +406,9 @@ def _init_task_labels(dataset, task_labels, check_shape=True) -> \
     return DataAttribute(tls, "targets_task_labels", use_in_getitem=True)
 
 
-def _select_task_labels(dataset: Any, indices: Optional[List[int]]) -> \
-        Optional[Sequence[SupportsInt]]:
+def _select_task_labels(
+    dataset: Any, indices: Optional[List[int]]
+) -> Optional[Sequence[SupportsInt]]:
     """
     Selector function to be passed to :func:`_traverse_supported_dataset`
     to obtain the `targets_task_labels` for the given dataset.
@@ -466,14 +453,14 @@ def _init_transform_groups(
     passing a dictionary of groups (`transform_groups`).
 
     :param transform_groups: The transform groups to use as a dictionary
-        (group_name -> group). Can be None. Mutually exclusive with 
+        (group_name -> group). Can be None. Mutually exclusive with
         `targets` and `target_transform`
     :param transform: The transformation for the X value. Can be None.
     :param target_transform: The transformation for the Y value. Can be None.
     :param initial_transform_group: The name of the initial group.
         If None, 'train' will be used.
     :param dataset: The avalanche dataset, used only to obtain the name of
-        the initial transformations groups if `initial_transform_group` is 
+        the initial transformations groups if `initial_transform_group` is
         None.
     :returns: a :class:`TransformGroups` instance if any transformation
         was passed, else None.
@@ -513,9 +500,7 @@ def _init_transform_groups(
                 current_group=initial_transform_group,
             )
     else:
-        tgs = TransformGroups(
-            transform_groups, current_group=initial_transform_group
-        )
+        tgs = TransformGroups(transform_groups, current_group=initial_transform_group)
     return tgs
 
 
@@ -540,11 +525,8 @@ def _check_groups_dict_format(groups_dict):
 
 
 def _split_user_def_task_label(
-    datasets,
-    task_labels: Optional[Union[int, 
-                                Sequence[int],
-                                Sequence[Sequence[int]]]]) -> \
-        List[Optional[Union[int, Sequence[int]]]]:
+    datasets, task_labels: Optional[Union[int, Sequence[int], Sequence[Sequence[int]]]]
+) -> List[Optional[Union[int, Sequence[int]]]]:
     """
     Given a datasets list and the user-defined list of task labels,
     returns the task labels list of each dataset.
@@ -553,15 +535,15 @@ def _split_user_def_task_label(
     in which the user can define the task labels:
     - As a single task label for all exemplars of all datasets
     - A single list of length equal to the sum of the lengths of all datasets
-    - A list containing, for each dataset, one element between: 
+    - A list containing, for each dataset, one element between:
         - a list, defining the task labels of each exemplar of a that dataset
         - an int, defining the task label of all exemplars of a that dataset
-    
+
     :param datasets: The list of datasets.
     :param task_labels: The user-defined task labels. Can be None, in which
         case a list of None will be returned.
-    :returns: A list containing as many elements as the input `datasets`. 
-        Each element is either a list of task labels or None. If None 
+    :returns: A list containing as many elements as the input `datasets`.
+        Each element is either a list of task labels or None. If None
         (because `task_labels` is None), this means that the task labels
         should be retrieved by traversing each dataset.
     """
@@ -584,9 +566,7 @@ def _split_user_def_task_label(
             # One sequence per dataset
             dataset_t_label = task_labels[dd_idx]
         else:
-            raise ValueError(
-                'The task_labels parameter has an invalid format.'
-            )
+            raise ValueError("The task_labels parameter has an invalid format.")
         t_labels.append(dataset_t_label)
 
         idx_start = end_idx
@@ -594,10 +574,10 @@ def _split_user_def_task_label(
 
 
 def _split_user_def_targets(
-        datasets,
-        targets: Optional[Union[Sequence[T], Sequence[Sequence[T]]]],
-        single_element_checker: Callable[[Any], bool]) -> \
-            List[Optional[Sequence[T]]]:
+    datasets,
+    targets: Optional[Union[Sequence[T], Sequence[Sequence[T]]]],
+    single_element_checker: Callable[[Any], bool],
+) -> List[Optional[Sequence[T]]]:
     """
     Given a datasets list and the user-defined list of targets,
     returns the targets list of each dataset.
@@ -605,14 +585,14 @@ def _split_user_def_targets(
     This internal utility is mainly used to manage the different ways
     in which the user can define the targets:
     - A single list of length equal to the sum of the lengths of all datasets
-    - A list containing, for each dataset, a list, defining the targets 
+    - A list containing, for each dataset, a list, defining the targets
         of each exemplar of a that dataset
-    
+
     :param datasets: The list of datasets.
     :param targets: The user-defined targets. Can be None, in which
         case a list of None will be returned.
-    :returns: A list containing as many elements as the input `datasets`. 
-        Each element is either a list of targets or None. If None 
+    :returns: A list containing as many elements as the input `datasets`.
+        Each element is either a list of targets or None. If None
         (because `targets` is None), this means that the targets
         should be retrieved by traversing each dataset.
     """
@@ -632,9 +612,7 @@ def _split_user_def_targets(
             # One sequence per dataset
             dataset_t_label = targets[dd_idx]  # type: ignore
         else:
-            raise ValueError(
-                'The targets parameter has an invalid format.'
-            )
+            raise ValueError("The targets parameter has an invalid format.")
         t_labels.append(dataset_t_label)
 
         idx_start = end_idx
@@ -673,9 +651,7 @@ def __iter__(self) -> Iterator[int]:
     def __getitem__(self, task_label: int):
         t_labels = self._get_task_labels_field()
         tl_idx = t_labels.val_to_idx[task_label]
-        return self.data.subset(
-            tl_idx
-        )
+        return self.data.subset(tl_idx)
 
     def __len__(self) -> int:
         t_labels = self._get_task_labels_field()
@@ -692,5 +668,5 @@ def _get_task_labels_field(self) -> DataAttribute[int]:
     "as_classification_dataset",
     "concat_datasets",
     "find_common_transforms_group",
-    "TaskSet"
+    "TaskSet",
 ]
diff --git a/avalanche/core.py b/avalanche/core.py
index f266b7f0d..539318c55 100644
--- a/avalanche/core.py
+++ b/avalanche/core.py
@@ -54,21 +54,15 @@ def after_training(self, strategy: Template, *args, **kwargs):
         """Called after `train` by the `BaseTemplate`."""
         pass
 
-    def before_eval(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
+    def before_eval(self, strategy: Template, *args, **kwargs) -> CallbackResult:
         """Called before `eval` by the `BaseTemplate`."""
         pass
 
-    def before_eval_exp(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
+    def before_eval_exp(self, strategy: Template, *args, **kwargs) -> CallbackResult:
         """Called before `eval_exp` by the `BaseTemplate`."""
         pass
 
-    def after_eval_exp(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
+    def after_eval_exp(self, strategy: Template, *args, **kwargs) -> CallbackResult:
         """Called after `eval_exp` by the `BaseTemplate`."""
         pass
 
@@ -76,10 +70,7 @@ def after_eval(self, strategy: Template, *args, **kwargs) -> CallbackResult:
         """Called after `eval` by the `BaseTemplate`."""
         pass
 
-    def __init_subclass__(
-            cls,
-            supports_distributed: bool = False,
-            **kwargs) -> None:
+    def __init_subclass__(cls, supports_distributed: bool = False, **kwargs) -> None:
         cls.supports_distributed = supports_distributed
         return super().__init_subclass__(**kwargs)
 
@@ -109,27 +100,19 @@ def before_training_iteration(
         `BaseTemplate`."""
         pass
 
-    def before_forward(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
+    def before_forward(self, strategy: Template, *args, **kwargs) -> CallbackResult:
         """Called before `model.forward()` by the `BaseTemplate`."""
         pass
 
-    def after_forward(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
+    def after_forward(self, strategy: Template, *args, **kwargs) -> CallbackResult:
         """Called after `model.forward()` by the `BaseTemplate`."""
         pass
 
-    def before_backward(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
+    def before_backward(self, strategy: Template, *args, **kwargs) -> CallbackResult:
         """Called before `criterion.backward()` by the `BaseTemplate`."""
         pass
 
-    def after_backward(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
+    def after_backward(self, strategy: Template, *args, **kwargs) -> CallbackResult:
         """Called after `criterion.backward()` by the `BaseTemplate`."""
         pass
 
@@ -140,15 +123,11 @@ def after_training_iteration(
         `BaseTemplate`."""
         pass
 
-    def before_update(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
+    def before_update(self, strategy: Template, *args, **kwargs) -> CallbackResult:
         """Called before `optimizer.update()` by the `BaseTemplate`."""
         pass
 
-    def after_update(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
+    def after_update(self, strategy: Template, *args, **kwargs) -> CallbackResult:
         """Called after `optimizer.update()` by the `BaseTemplate`."""
         pass
 
@@ -171,9 +150,7 @@ def before_eval_forward(
         """Called before `model.forward()` by the `BaseTemplate`."""
         pass
 
-    def after_eval_forward(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
+    def after_eval_forward(self, strategy: Template, *args, **kwargs) -> CallbackResult:
         """Called after `model.forward()` by the `BaseTemplate`."""
         pass
 
@@ -214,6 +191,7 @@ class SupervisedPlugin(BaseSGDPlugin[Template], ABC):
 
     See `BaseTemplate` for complete description of the train/eval loop.
     """
+
     def __init__(self):
         """
         Inizializes an instance of a supervised plugin.
@@ -224,8 +202,9 @@ def __init__(self):
 class SupervisedMetaLearningPlugin(SupervisedPlugin[Template], ABC):
     """ABC for SupervisedMetaLearningTemplate plugins.
 
-        See `BaseTemplate` for complete description of the train/eval loop.
+    See `BaseTemplate` for complete description of the train/eval loop.
     """
+
     def before_inner_updates(
         self, strategy: Template, *args, **kwargs
     ) -> CallbackResult:
@@ -244,8 +223,6 @@ def before_outer_update(
         """Called before `_outer_updates` by the `BaseTemplate`."""
         pass
 
-    def after_outer_update(
-        self, strategy: Template, *args, **kwargs
-    ) -> CallbackResult:
+    def after_outer_update(self, strategy: Template, *args, **kwargs) -> CallbackResult:
         """Called before `_outer_updates` by the `BaseTemplate`."""
         pass
diff --git a/avalanche/distributed/distributed_consistency_verification.py b/avalanche/distributed/distributed_consistency_verification.py
index 689e85c28..ef12c55dc 100644
--- a/avalanche/distributed/distributed_consistency_verification.py
+++ b/avalanche/distributed/distributed_consistency_verification.py
@@ -14,30 +14,27 @@
     from torch.utils.data import Dataset
 
 
-def hash_benchmark(benchmark: 'DatasetScenario', *, 
-                   hash_engine=None, num_workers=0) -> str:
+def hash_benchmark(
+    benchmark: "DatasetScenario", *, hash_engine=None, num_workers=0
+) -> str:
     if hash_engine is None:
         hash_engine = hashlib.sha256()
-    
+
     for stream_name in sorted(benchmark.streams.keys()):
         stream = benchmark.streams[stream_name]
         hash_engine.update(stream_name.encode())
         for experience in stream:
             exp_dataset = experience.dataset
-            hash_dataset(exp_dataset, 
-                         hash_engine=hash_engine,
-                         num_workers=num_workers)
+            hash_dataset(exp_dataset, hash_engine=hash_engine, num_workers=num_workers)
     return hash_engine.hexdigest()
 
 
-def hash_dataset(dataset: 'Dataset', *, hash_engine=None, num_workers=0) -> str:
+def hash_dataset(dataset: "Dataset", *, hash_engine=None, num_workers=0) -> str:
     if hash_engine is None:
         hash_engine = hashlib.sha256()
 
     data_loader = DataLoader(
-        dataset,
-        collate_fn=lambda batch: tuple(zip(*batch)),
-        num_workers=num_workers
+        dataset, collate_fn=lambda batch: tuple(zip(*batch)), num_workers=num_workers
     )
     for loaded_elem in data_loader:
         example = tuple(tuple_element[0] for tuple_element in loaded_elem)
@@ -50,10 +47,10 @@ def hash_dataset(dataset: 'Dataset', *, hash_engine=None, num_workers=0) -> str:
     return hash_engine.hexdigest()
 
 
-def hash_minibatch(minibatch: 'Tuple[Tensor]', *, hash_engine=None) -> str:
+def hash_minibatch(minibatch: "Tuple[Tensor]", *, hash_engine=None) -> str:
     if hash_engine is None:
         hash_engine = hashlib.sha256()
-    
+
     for tuple_elem in minibatch:
         buff = io.BytesIO()
         torch.save(tuple_elem, buff)
@@ -62,10 +59,10 @@ def hash_minibatch(minibatch: 'Tuple[Tensor]', *, hash_engine=None) -> str:
     return hash_engine.hexdigest()
 
 
-def hash_tensor(tensor: 'Tensor', *, hash_engine=None) -> str:
+def hash_tensor(tensor: "Tensor", *, hash_engine=None) -> str:
     if hash_engine is None:
         hash_engine = hashlib.sha256()
-    
+
     buff = io.BytesIO()
     torch.save(tensor, buff)
     buff.seek(0)
@@ -73,14 +70,10 @@ def hash_tensor(tensor: 'Tensor', *, hash_engine=None) -> str:
     return hash_engine.hexdigest()
 
 
-def hash_model(
-        model: 'Module',
-        include_buffers=True,
-        *,
-        hash_engine=None) -> str:
+def hash_model(model: "Module", include_buffers=True, *, hash_engine=None) -> str:
     if hash_engine is None:
         hash_engine = hashlib.sha256()
-    
+
     for name, param in model.named_parameters():
         hash_engine.update(name.encode())
         buff = io.BytesIO()
@@ -95,14 +88,14 @@ def hash_model(
             torch.save(model_buffer.detach().cpu(), buff)
             buff.seek(0)
             hash_engine.update(buff.read())
-    
+
     return hash_engine.hexdigest()
 
 
 __all__ = [
-    'hash_benchmark',
-    'hash_dataset',
-    'hash_minibatch',
-    'hash_tensor',
-    'hash_model'
+    "hash_benchmark",
+    "hash_dataset",
+    "hash_minibatch",
+    "hash_tensor",
+    "hash_model",
 ]
diff --git a/avalanche/distributed/distributed_helper.py b/avalanche/distributed/distributed_helper.py
index 5d7054da6..fdcb40455 100644
--- a/avalanche/distributed/distributed_helper.py
+++ b/avalanche/distributed/distributed_helper.py
@@ -9,17 +9,13 @@
 from torch.nn.modules import Module
 from torch.nn.parallel import DistributedDataParallel
 from typing_extensions import Literal
-from torch.distributed import (
-    init_process_group,
-    broadcast_object_list
-)
+from torch.distributed import init_process_group, broadcast_object_list
 
 
-BroadcastT = TypeVar('BroadcastT')
+BroadcastT = TypeVar("BroadcastT")
 
 
-from avalanche.distributed.distributed_consistency_verification import \
-    hash_tensor
+from avalanche.distributed.distributed_consistency_verification import hash_tensor
 
 
 class _Singleton(type):
@@ -27,8 +23,7 @@ class _Singleton(type):
 
     def __call__(cls, *args, **kwargs):
         if cls not in cls._instances:
-            cls._instances[cls] = super(_Singleton, cls).__call__(
-                *args, **kwargs)
+            cls._instances[cls] = super(_Singleton, cls).__call__(*args, **kwargs)
         return cls._instances[cls]
 
 
@@ -41,19 +36,23 @@ class RollingSeedContext(object):
       - behave differently depending on the rank
       - change the global state of random number generators
     """
+
     def __init__(self):
         self.rng_manager_state = None
 
     def save_generators_state(self):
         from avalanche.training.determinism.rng_manager import RNGManager
+
         self.rng_manager_state = RNGManager.__getstate__()
 
     def load_generators_state(self):
         from avalanche.training.determinism.rng_manager import RNGManager
+
         self.rng_manager_state = RNGManager.__setstate__(self.rng_manager_state)
 
     def step_random_generators(self):
         from avalanche.training.determinism.rng_manager import RNGManager
+
         RNGManager.step_generators()
 
     def __enter__(self):
@@ -70,6 +69,7 @@ class BroadcastSeedContext(object):
 
     This is usually slower than using :class:`RollingSeedContext`.
     """
+
     def __init__(self):
         pass
 
@@ -89,12 +89,12 @@ class _MainProcessFirstContext(object):
     """
 
     def __init__(
-            self,
-            seed_alignment: Literal["rolling", "broadcast"] = 'rolling',
-            final_barrier: bool = False):
-        
+        self,
+        seed_alignment: Literal["rolling", "broadcast"] = "rolling",
+        final_barrier: bool = False,
+    ):
         self._seed_aligner: ContextManager
-        if seed_alignment == 'rolling':
+        if seed_alignment == "rolling":
             self._seed_aligner = RollingSeedContext()
         else:
             self._seed_aligner = BroadcastSeedContext()
@@ -129,41 +129,42 @@ class _DistributedHelperCls(object):
     Only a single object of this class is instantiated
     as the "DistributedHelper" singleton.
 
-            
+
     Note: differently from the original Pytorch API, which requires
-    that input tensor(s) to be moved to the default device (forced to 
+    that input tensor(s) to be moved to the default device (forced to
     CUDA if using NCCL), these functions usually also manage input tensors
     residing on a different devices. The returned elements will
     be moved to the same device of the input tensor. Consider looking at
     the documentation of each method for more details.
     """
+
     __metaclass__ = _Singleton
 
     def __init__(self):
         self.use_cuda = False
-        self._dev_map = _DistributedHelperCls._make_map('cpu')
+        self._dev_map = _DistributedHelperCls._make_map("cpu")
 
     def init_distributed(self, random_seed, backend=None, use_cuda=True):
         if self.is_distributed:
-            raise RuntimeError('Distributed API already initialized')
+            raise RuntimeError("Distributed API already initialized")
 
         use_cuda = use_cuda and torch.cuda.is_available()
 
         if backend is None:
             if use_cuda:
-                backend = 'nccl'
+                backend = "nccl"
             else:
-                backend = 'gloo'
+                backend = "gloo"
 
-        if backend == 'nccl' and not use_cuda:
-            warnings.warn(
-                'Bad configuration: using NCCL, but you set use_cuda=False!')
+        if backend == "nccl" and not use_cuda:
+            warnings.warn("Bad configuration: using NCCL, but you set use_cuda=False!")
 
         could_initialize_distributed = False
-        if os.environ.get('LOCAL_RANK', None) is None:
+        if os.environ.get("LOCAL_RANK", None) is None:
             warnings.warn(
-                'Torch distributed could not be initialized '
-                '(missing environment configuration)')
+                "Torch distributed could not be initialized "
+                "(missing environment configuration)"
+            )
         else:
             init_process_group(backend=backend)
             could_initialize_distributed = True
@@ -218,11 +219,11 @@ def make_device(self, set_cuda_device: bool = False) -> torch.device:
             device_id = 0
 
         if self.use_cuda and device_id >= 0:
-            ref_device = torch.device(f'cuda:{device_id}')
+            ref_device = torch.device(f"cuda:{device_id}")
             if set_cuda_device:
                 torch.cuda.set_device(ref_device)
         else:
-            ref_device = torch.device('cpu')
+            ref_device = torch.device("cpu")
         return ref_device
 
     def wrap_model(self, model: Module) -> Module:
@@ -230,7 +231,7 @@ def wrap_model(self, model: Module) -> Module:
         Wraps a given model to enable distributed training.
 
         The given model will be wrapped using :class:`DistributedDataParallel`.
-        
+
         :return: The model wrapped in :class:`DistributedDataParallel` if
             running a distributed training, or the model itself if running a
             single-process training.
@@ -245,13 +246,10 @@ def wrap_model(self, model: Module) -> Module:
                 # (an int, a device object or a str)
                 # If not set, output_device defaults to device_ids[0]
                 return DistributedDataParallel(
-                    model,
-                    device_ids=[self.make_device()], 
-                    find_unused_parameters=True)
+                    model, device_ids=[self.make_device()], find_unused_parameters=True
+                )
             else:
-                return DistributedDataParallel(
-                    model,
-                    find_unused_parameters=True)
+                return DistributedDataParallel(model, find_unused_parameters=True)
         else:
             return model
 
@@ -274,6 +272,7 @@ def set_random_seeds(self, random_seed):
         :param random_seed: The random seed to set.
         """
         from avalanche.training.determinism.rng_manager import RNGManager
+
         RNGManager.set_random_seeds(random_seed)
 
     def align_seeds(self):
@@ -285,7 +284,7 @@ def align_seeds(self):
             return
 
         if self.is_main_process:
-            reference_seed = torch.randint(0, 2**32-1, (1,), dtype=torch.int64)
+            reference_seed = torch.randint(0, 2**32 - 1, (1,), dtype=torch.int64)
         else:
             reference_seed = torch.empty((1,), dtype=torch.int64)
 
@@ -295,7 +294,7 @@ def align_seeds(self):
 
     def main_process_first(self):
         """
-        Returns an execution context allowing the main process 
+        Returns an execution context allowing the main process
         to complete the section before allowing other processes
         to enter it.
 
@@ -319,18 +318,18 @@ def broadcast(self, tensor: Tensor, src: int = 0):
         """
         Broadcasts the given tensor from a source process to all processes.
 
-        Differences with torch.distributed: 
+        Differences with torch.distributed:
             - The input tensor can reside in any device.
             - The input tensor will be transmitted using the current backend.
                 However, the resulting tensor will be moved to the save device
                 as the `tensor` parameter before retutrning it,
-                no matter the backend in use. 
+                no matter the backend in use.
             - No-op if not running a distributed training.
 
         :param tensor: The tensor to be broadcasted.
         :param src: The rank of the source process. Defaults to 0,
             which is the main process.
-        :return: The tensor obtained from the source process, in the same 
+        :return: The tensor obtained from the source process, in the same
             device as the tensor parameter.
         """
 
@@ -342,7 +341,7 @@ def broadcast(self, tensor: Tensor, src: int = 0):
         tensor = self._revert_to_original_device(tensor_distrib, orig_data)
 
         return tensor
-    
+
     def broadcast_object(self, obj: BroadcastT, src=0) -> BroadcastT:
         """
         Broadcasts the given object from a source process to all processes.
@@ -350,7 +349,7 @@ def broadcast_object(self, obj: BroadcastT, src=0) -> BroadcastT:
         Note: if broadcasting a Tensor, consider using :meth:`broadcast`
         instead.
 
-        Differences with torch.distributed: 
+        Differences with torch.distributed:
             - No-op if not running a distributed training.
 
         :param obj: The object to be broadcasted.
@@ -374,12 +373,12 @@ def cat_all(self, tensor: Tensor):
         The resulting tensor will be concatenated in the order given by the
         rank of each source process.
 
-        Differences with torch.distributed: 
+        Differences with torch.distributed:
             - The input tensor can reside in any device.
             - The input tensor will be transmitted using the current backend.
                 However, the resulting tensor will be moved to the save device
                 as the `tensor` parameter before returning it,
-                no matter the backend in use. 
+                no matter the backend in use.
             - No-op if not running a distributed training.
 
         :param tensor: The tensor from the current process. Tensors across
@@ -401,8 +400,7 @@ def cat_all(self, tensor: Tensor):
 
         return torch.cat(gathered_tensors)
 
-    def gather_tensor_shapes(self, tensor: Tensor, max_shape_len=10) \
-            -> List[List[int]]:
+    def gather_tensor_shapes(self, tensor: Tensor, max_shape_len=10) -> List[List[int]]:
         """
         Gathers the shapes of the tensors from all processes.
 
@@ -419,38 +417,39 @@ def gather_tensor_shapes(self, tensor: Tensor, max_shape_len=10) \
         for i in range(len(tensor.shape)):
             tensor_size[i] = tensor.shape[i]
         all_tensors_shape = [
-            self._prepare_for_distributed_comm(
-                torch.zeros_like(tensor_size))[0]
-            for _ in range(self.world_size)]
+            self._prepare_for_distributed_comm(torch.zeros_like(tensor_size))[0]
+            for _ in range(self.world_size)
+        ]
         tensor_size, _ = self._prepare_for_distributed_comm(tensor_size)
 
         torch.distributed.all_gather(all_tensors_shape, tensor_size)
 
         all_tensors_shape = [t.cpu() for t in all_tensors_shape]
-        
+
         # Trim shape
         for i, t in enumerate(all_tensors_shape):
             for x in range(len(t)):
                 if t[x] == 0:
                     if x == 0:
                         # Tensor with 0-length shape
-                        all_tensors_shape[i] = t[:x+1]
+                        all_tensors_shape[i] = t[: x + 1]
                     else:
                         all_tensors_shape[i] = t[:x]
 
                     break
-        
+
         return [t_shape.tolist() for t_shape in all_tensors_shape]
 
     def gather_all(
-            self,
-            tensor: Tensor,
-            same_shape: bool = False,
-            shapes: Optional[List[List[int]]] = None) -> List[Tensor]:
+        self,
+        tensor: Tensor,
+        same_shape: bool = False,
+        shapes: Optional[List[List[int]]] = None,
+    ) -> List[Tensor]:
         """
         Gather all for tensors only.
 
-        Differences with torch.distributed: 
+        Differences with torch.distributed:
             - The input tensor can reside in any device.
             - The input tensor will be transmitted using the current backend.
                 However, the resulting tensors will be moved to the save device
@@ -480,8 +479,7 @@ def gather_all(
                 tensor_size = list(tensor.shape)
             else:
                 tensor_size = [0]
-            all_tensors_shape = \
-                [tensor_size for _ in range(self.world_size)]
+            all_tensors_shape = [tensor_size for _ in range(self.world_size)]
         elif shapes is not None:
             # Shapes given by the user
             # make sure it is a list of lists
@@ -489,7 +487,7 @@ def gather_all(
         else:
             # Tensor differ by whole shape
             all_tensors_shape = self.gather_tensor_shapes(tensor)
-        
+
         same_shape = all(all_tensors_shape[0] == x for x in all_tensors_shape)
         orig_device = tensor.device
 
@@ -511,36 +509,33 @@ def gather_all(
                 all_tensors_numel.append(curr_size)
 
             max_numel = max(all_tensors_numel)
-            out_tensors = [torch.empty((max_numel,), dtype=dtype) 
-                           for _ in all_tensors_shape]
-            
+            out_tensors = [
+                torch.empty((max_numel,), dtype=dtype) for _ in all_tensors_shape
+            ]
+
             tensor = tensor.flatten()
             n_padding = max_numel - tensor.numel()
             if n_padding > 0:
-                padding = torch.zeros((n_padding,), 
-                                      dtype=tensor.dtype,
-                                      device=orig_device)
+                padding = torch.zeros(
+                    (n_padding,), dtype=tensor.dtype, device=orig_device
+                )
                 tensor = torch.cat((tensor, padding), dim=0)
 
         tensor, _ = self._prepare_for_distributed_comm(tensor)
-        out_tensors = [self._prepare_for_distributed_comm(t)[0]
-                       for t in out_tensors]
-                        
+        out_tensors = [self._prepare_for_distributed_comm(t)[0] for t in out_tensors]
+
         torch.distributed.all_gather(out_tensors, tensor)
 
         if not same_shape:
             # The tensors are flat and of the wrong dimension: re-shape them
-            for tensor_idx, (tensor_sz, tensor_numel, out_t) in \
-                    enumerate(zip(all_tensors_shape, 
-                                  all_tensors_numel,
-                                  out_tensors)):
+            for tensor_idx, (tensor_sz, tensor_numel, out_t) in enumerate(
+                zip(all_tensors_shape, all_tensors_numel, out_tensors)
+            ):
                 if tensor_sz[0] == 0:
                     # Tensor with 0-length shape
-                    out_tensors[tensor_idx] = \
-                        out_t[:tensor_numel].reshape(tuple())
+                    out_tensors[tensor_idx] = out_t[:tensor_numel].reshape(tuple())
                 else:
-                    out_tensors[tensor_idx] = \
-                        out_t[:tensor_numel].reshape(tensor_sz)
+                    out_tensors[tensor_idx] = out_t[:tensor_numel].reshape(tensor_sz)
 
         out_tensors = [t.to(orig_device) for t in out_tensors]
         return out_tensors
@@ -551,14 +546,14 @@ def gather_all_objects(self, obj: BroadcastT) -> List[BroadcastT]:
         (even the ones nested inside objects) to the correct default device.
 
         Same as torch.distributed:
-            - Tensors nested inside the input object must reside in the 
+            - Tensors nested inside the input object must reside in the
                 default device. Future versions of Avalanche may adopt
                 solutions to circumvent the limitations of
                 orch.distributed.
 
         Differences with torch.distributed:
             - The input object will be transmitted using the current backend.
-                However, the resulting tensors nested inside of it 
+                However, the resulting tensors nested inside of it
                 will be moved to the default device before returning them,
                 no matter the backend in use.
             - No-op if not running a distributed training.
@@ -587,8 +582,7 @@ def check_equal_tensors(self, tensor: Tensor):
 
         if len(set(tensors_hashes)) != 1:
             # Equal tensors
-            raise ValueError('Different tensors. Got hashes: {}'.format(
-                tensors_hashes))
+            raise ValueError("Different tensors. Got hashes: {}".format(tensors_hashes))
 
     def check_equal_objects(self, obj: Any):
         """
@@ -610,9 +604,9 @@ def check_equal_objects(self, obj: Any):
             o_bt = _base_typed(o)
             if obj_bt != o_bt:
                 raise ValueError(
-                    'Different objects (ranks this={}, remote={}). '
-                    'Got this={}, remote={}'.format(
-                        self.rank, i, obj, o))
+                    "Different objects (ranks this={}, remote={}). "
+                    "Got this={}, remote={}".format(self.rank, i, obj, o)
+                )
 
     def _prepare_for_distributed_comm(self, tensor: Tensor):
         """
@@ -642,7 +636,7 @@ def _revert_to_original_device(self, tensor_distributed, orig_data):
         (if needed).
 
         :param: The tensor obtained from a torch.distributed API call.
-        :param: The descriptor in the format of 
+        :param: The descriptor in the format of
             :meth:`_prepare_for_distributed_comm`.
         :return: The tensor moved to the appropriate device.
         """
@@ -661,7 +655,7 @@ def rank(self) -> int:
         """
         The current tank.
 
-        :return: The rank of the current process. 
+        :return: The rank of the current process.
             Returns 0 if not running a distributed training.
         """
         if torch.distributed.is_initialized():
@@ -673,7 +667,7 @@ def world_size(self) -> int:
         """
         The world size.
 
-        :return: The world size of the default group. 
+        :return: The world size of the default group.
             Returns 1 if not running a distributed training.
         """
 
@@ -719,7 +713,7 @@ def forced_cuda_comm(self) -> bool:
         :return: True if tensors must be moved to the default cuda device,
             False otherwise.
         """
-        return self.backend == 'nccl'
+        return self.backend == "nccl"
 
     @property
     def device_map(self) -> Dict[str, str]:
@@ -741,9 +735,9 @@ def _make_map(device_or_map) -> Dict[str, str]:
         device = torch.device(device_or_map)
         map_location = dict()
 
-        map_location['cpu'] = 'cpu'
+        map_location["cpu"] = "cpu"
         for cuda_idx in range(100):
-            map_location[f'cuda:{cuda_idx}'] = str(device)
+            map_location[f"cuda:{cuda_idx}"] = str(device)
         return map_location
 
 
@@ -755,14 +749,17 @@ def _base_typed(obj):
     Improved version of https://stackoverflow.com/a/62420097
     """
     T = type(obj)
-    from_numpy = T.__module__ == 'numpy'
-    from_pytorch = T.__module__ == 'torch'
+    from_numpy = T.__module__ == "numpy"
+    from_pytorch = T.__module__ == "torch"
 
     if from_numpy or from_pytorch:
         return obj.tolist()
 
-    if T in BASE_TYPES or callable(obj) or ((from_numpy or from_pytorch)
-                                            and not isinstance(T, Iterable)):
+    if (
+        T in BASE_TYPES
+        or callable(obj)
+        or ((from_numpy or from_pytorch) and not isinstance(T, Iterable))
+    ):
         return obj
 
     if isinstance(obj, Dict):
@@ -777,8 +774,7 @@ def _base_typed(obj):
 
 
 def fix():
-    return lambda b: torch.load(BytesIO(b),
-                                map_location=DistributedHelper.device_map)
+    return lambda b: torch.load(BytesIO(b), map_location=DistributedHelper.device_map)
 
 
 class MappedUnpickler(pickle.Unpickler):
@@ -790,6 +786,7 @@ class MappedUnpickler(pickle.Unpickler):
     This unpickler will we used to replace the
     `torch.distributed.distributed_c10d._unpickler`.
     """
+
     # Based on:
     # https://github.com/pytorch/pytorch/issues/16797#issuecomment-777059657
 
@@ -800,7 +797,7 @@ def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
     def find_class(self, module, name):
-        if module == 'torch.storage' and name == '_load_from_bytes':
+        if module == "torch.storage" and name == "_load_from_bytes":
             return fix()
         else:
             return super().find_class(module, name)
@@ -812,8 +809,8 @@ def find_class(self, module, name):
 
 
 __all__ = [
-    'RollingSeedContext',
-    'BroadcastSeedContext',
-    '_DistributedHelperCls',
-    'DistributedHelper'
+    "RollingSeedContext",
+    "BroadcastSeedContext",
+    "_DistributedHelperCls",
+    "DistributedHelper",
 ]
diff --git a/avalanche/evaluation/metric_definitions.py b/avalanche/evaluation/metric_definitions.py
index ec41a3026..61aebfbe5 100644
--- a/avalanche/evaluation/metric_definitions.py
+++ b/avalanche/evaluation/metric_definitions.py
@@ -32,7 +32,7 @@
     from ..training.templates import SupervisedTemplate
 
 TResult_co = TypeVar("TResult_co", covariant=True)
-TMetric = TypeVar('TMetric', bound='Metric')
+TMetric = TypeVar("TMetric", bound="Metric")
 
 
 class Metric(Protocol[TResult_co]):
@@ -110,9 +110,7 @@ def reset(self) -> None:
     def before_training(self, strategy: "SupervisedTemplate") -> "MetricResult":
         pass
 
-    def before_training_exp(
-        self, strategy: "SupervisedTemplate"
-    ) -> "MetricResult":
+    def before_training_exp(self, strategy: "SupervisedTemplate") -> "MetricResult":
         pass
 
     def before_train_dataset_adaptation(
@@ -125,9 +123,7 @@ def after_train_dataset_adaptation(
     ) -> "MetricResult":
         pass
 
-    def before_training_epoch(
-        self, strategy: "SupervisedTemplate"
-    ) -> "MetricResult":
+    def before_training_epoch(self, strategy: "SupervisedTemplate") -> "MetricResult":
         pass
 
     def before_training_iteration(
@@ -158,14 +154,10 @@ def before_update(self, strategy: "SupervisedTemplate") -> "MetricResult":
     def after_update(self, strategy: "SupervisedTemplate") -> "MetricResult":
         pass
 
-    def after_training_epoch(
-        self, strategy: "SupervisedTemplate"
-    ) -> "MetricResult":
+    def after_training_epoch(self, strategy: "SupervisedTemplate") -> "MetricResult":
         pass
 
-    def after_training_exp(
-        self, strategy: "SupervisedTemplate"
-    ) -> "MetricResult":
+    def after_training_exp(self, strategy: "SupervisedTemplate") -> "MetricResult":
         pass
 
     def after_training(self, strategy: "SupervisedTemplate") -> "MetricResult":
@@ -193,30 +185,20 @@ def after_eval_exp(self, strategy: "SupervisedTemplate") -> "MetricResult":
     def after_eval(self, strategy: "SupervisedTemplate") -> "MetricResult":
         pass
 
-    def before_eval_iteration(
-        self, strategy: "SupervisedTemplate"
-    ) -> "MetricResult":
+    def before_eval_iteration(self, strategy: "SupervisedTemplate") -> "MetricResult":
         pass
 
-    def before_eval_forward(
-        self, strategy: "SupervisedTemplate"
-    ) -> "MetricResult":
+    def before_eval_forward(self, strategy: "SupervisedTemplate") -> "MetricResult":
         pass
 
-    def after_eval_forward(
-        self, strategy: "SupervisedTemplate"
-    ) -> "MetricResult":
+    def after_eval_forward(self, strategy: "SupervisedTemplate") -> "MetricResult":
         pass
 
-    def after_eval_iteration(
-        self, strategy: "SupervisedTemplate"
-    ) -> "MetricResult":
+    def after_eval_iteration(self, strategy: "SupervisedTemplate") -> "MetricResult":
         pass
 
 
-class GenericPluginMetric(
-        PluginMetric[TResult_co],
-        Generic[TResult_co, TMetric]):
+class GenericPluginMetric(PluginMetric[TResult_co], Generic[TResult_co, TMetric]):
     """
     This class provides a generic implementation of a Plugin Metric.
     The user can subclass this class to easily implement custom plugin
@@ -225,46 +207,28 @@ class GenericPluginMetric(
 
     @overload
     def __init__(
-        self, 
-        metric: TMetric, 
+        self,
+        metric: TMetric,
         reset_at: Literal[
-            "iteration",
-            "epoch",
-            "experience",
-            "stream",
-            "never"] = "experience",
-        emit_at: Literal[
-            "iteration",
-            "epoch",
-            "experience", 
-            "stream"] = "experience",
-        mode: Literal["train"] = "train"
+            "iteration", "epoch", "experience", "stream", "never"
+        ] = "experience",
+        emit_at: Literal["iteration", "epoch", "experience", "stream"] = "experience",
+        mode: Literal["train"] = "train",
     ):
         ...
 
     @overload
     def __init__(
-        self, 
-        metric: TMetric, 
-        reset_at: Literal[
-            "iteration",
-            "experience",
-            "stream",
-            "never"] = "experience",
-        emit_at: Literal[
-            "iteration",
-            "experience",
-            "stream"] = "experience",
-        mode: Literal["eval"] = "eval"
+        self,
+        metric: TMetric,
+        reset_at: Literal["iteration", "experience", "stream", "never"] = "experience",
+        emit_at: Literal["iteration", "experience", "stream"] = "experience",
+        mode: Literal["eval"] = "eval",
     ):
         ...
 
     def __init__(
-        self, 
-        metric: TMetric, 
-        reset_at="experience",
-        emit_at="experience",
-        mode="eval"
+        self, metric: TMetric, reset_at="experience", emit_at="experience", mode="eval"
     ):
         super(GenericPluginMetric, self).__init__()
         assert mode in {"train", "eval"}
@@ -305,17 +269,13 @@ def _package_result(self, strategy: "SupervisedTemplate") -> "MetricResult":
                 metric_name = get_metric_name(
                     self, strategy, add_experience=add_exp, add_task=k
                 )
-                metrics.append(
-                    MetricValue(self, metric_name, v, plot_x_position)
-                )
+                metrics.append(MetricValue(self, metric_name, v, plot_x_position))
             return metrics
         else:
             metric_name = get_metric_name(
                 self, strategy, add_experience=add_exp, add_task=True
             )
-            return [
-                MetricValue(self, metric_name, metric_value, plot_x_position)
-            ]
+            return [MetricValue(self, metric_name, metric_value, plot_x_position)]
 
     def before_training(self, strategy: "SupervisedTemplate"):
         super().before_training(strategy)
@@ -493,24 +453,19 @@ def _package_result(self, strategy: "SupervisedTemplate") -> "MetricResult":
         for m_value in emitted_values:
             if not isinstance(m_value, _ExtendedPluginMetricValue):
                 raise RuntimeError(
-                    "Emitted a value that is not of type "
-                    "ExtendedPluginMetricValue"
+                    "Emitted a value that is not of type " "ExtendedPluginMetricValue"
                 )
 
             m_name = self.metric_value_name(m_value)
             x_pos = m_value.plot_position
             if x_pos is None:
                 x_pos = default_plot_x_position
-            metrics.append(
-                MetricValue(self, m_name, m_value.metric_value, x_pos)
-            )
+            metrics.append(MetricValue(self, m_name, m_value.metric_value, x_pos))
 
         return metrics
 
     def metric_value_name(self, m_value: _ExtendedPluginMetricValue) -> str:
-        return generic_get_metric_name(
-            default_metric_name_template, vars(m_value)
-        )
+        return generic_get_metric_name(default_metric_name_template, vars(m_value))
 
 
 __all__ = [
diff --git a/avalanche/evaluation/metric_results.py b/avalanche/evaluation/metric_results.py
index 602008bbc..7ee0ac047 100644
--- a/avalanche/evaluation/metric_results.py
+++ b/avalanche/evaluation/metric_results.py
@@ -61,9 +61,7 @@ class AlternativeValues:
     def __init__(self, *alternatives: MetricType):
         self.alternatives: Tuple[MetricType, ...] = alternatives
 
-    def best_supported_value(
-        self, *supported_types: type
-    ) -> Optional[MetricType]:
+    def best_supported_value(self, *supported_types: type) -> Optional[MetricType]:
         """
         Retrieves a supported representation for this metric value.
 
diff --git a/avalanche/evaluation/metric_utils.py b/avalanche/evaluation/metric_utils.py
index f4335f9df..76ebca350 100644
--- a/avalanche/evaluation/metric_utils.py
+++ b/avalanche/evaluation/metric_utils.py
@@ -321,7 +321,6 @@ def generic_get_metric_name(
     value_name_template: Union[str, Callable[[Dict[str, Any]], str]],
     metric_info: Dict[str, Any],
 ):
-
     if isinstance(value_name_template, str):
         name_template = value_name_template
     else:
diff --git a/avalanche/evaluation/metrics/acc_matrix.py b/avalanche/evaluation/metrics/acc_matrix.py
index cf0936095..20a4d0e8c 100644
--- a/avalanche/evaluation/metrics/acc_matrix.py
+++ b/avalanche/evaluation/metrics/acc_matrix.py
@@ -44,12 +44,12 @@ def result(self, strategy=None) -> float:
     def add_new_task(self, new_length):
         """Adds a new dimension to the accuracy matrix.
 
-        :param new_length: The new dimension of the matrix. We assume a square 
+        :param new_length: The new dimension of the matrix. We assume a square
                             matrix
         """
         temp = self.matrix.clone()
         self.matrix = torch.zeros((new_length, new_length))
-        self.matrix[:temp.size(0), :temp.size(1)] = temp
+        self.matrix[: temp.size(0), : temp.size(1)] = temp
 
     def update(self, num_training_steps, eval_exp_id):
         """Updates the matrix with the accuracy value for a given task pair.
@@ -58,7 +58,7 @@ def update(self, num_training_steps, eval_exp_id):
         :param eval_exp_id: The ID of the evaluation experience.
         """
         if (max(num_training_steps, eval_exp_id) + 1) > self.matrix.size(0):
-            self.add_new_task(max(num_training_steps, eval_exp_id)+1)
+            self.add_new_task(max(num_training_steps, eval_exp_id) + 1)
 
         acc = self._accuracy.result()
         self.matrix[num_training_steps, eval_exp_id] = acc
@@ -112,11 +112,10 @@ def before_training(self, strategy: "SupervisedTemplate"):
         if self.num_training_steps is not None:
             self.num_training_steps += 1
         else:
-            self.num_training_steps = 0     
+            self.num_training_steps = 0
 
 
-def accuracy_matrix_metrics(
-) -> List[PluginMetric]:
+def accuracy_matrix_metrics() -> List[PluginMetric]:
     """
     Helper method that can be used to obtain the desired set of
     plugin metrics.
diff --git a/avalanche/evaluation/metrics/accuracy.py b/avalanche/evaluation/metrics/accuracy.py
index 4b418e29a..8cb92a0be 100644
--- a/avalanche/evaluation/metrics/accuracy.py
+++ b/avalanche/evaluation/metrics/accuracy.py
@@ -77,9 +77,7 @@ def update(
 
         true_positives = float(torch.sum(torch.eq(predicted_y, true_y)))
         total_patterns = len(true_y)
-        self._mean_accuracy.update(
-            true_positives / total_patterns, total_patterns
-        )
+        self._mean_accuracy.update(true_positives / total_patterns, total_patterns)
 
     def result(self) -> float:
         """Retrieves the running accuracy.
@@ -153,9 +151,7 @@ def update(
             for pred, true, t in zip(predicted_y, true_y, task_labels):
                 if isinstance(t, Tensor):
                     t = t.item()
-                self._mean_accuracy[t].update(
-                    pred.unsqueeze(0), true.unsqueeze(0)
-                )
+                self._mean_accuracy[t].update(pred.unsqueeze(0), true.unsqueeze(0))
         else:
             raise ValueError(
                 f"Task label type: {type(task_labels)}, "
@@ -213,9 +209,7 @@ def __init__(self, reset_at, emit_at, mode, split_by_task=False):
         :param mode:
         :param split_by_task: whether to compute task-aware accuracy or not.
         """
-        super().__init__(
-            Accuracy(), reset_at=reset_at, emit_at=emit_at, mode=mode
-        )
+        super().__init__(Accuracy(), reset_at=reset_at, emit_at=emit_at, mode=mode)
 
     def reset(self) -> None:
         self._metric.reset()
@@ -228,9 +222,8 @@ def update(self, strategy):
 
 
 class AccuracyPerTaskPluginMetric(
-        GenericPluginMetric[
-            Dict[int, float],
-            TaskAwareAccuracy]):
+    GenericPluginMetric[Dict[int, float], TaskAwareAccuracy]
+):
     """
     Base class for all accuracies plugin metrics
     """
@@ -254,9 +247,7 @@ def result(self) -> Dict[int, float]:
         return self._metric.result()
 
     def update(self, strategy):
-        self._metric.update(
-            strategy.mb_output, strategy.mb_y, strategy.mb_task_id
-        )
+        self._metric.update(strategy.mb_output, strategy.mb_y, strategy.mb_task_id)
 
 
 class MinibatchAccuracy(AccuracyPluginMetric):
diff --git a/avalanche/evaluation/metrics/amca.py b/avalanche/evaluation/metrics/amca.py
index 4791bf169..3b7d3048f 100644
--- a/avalanche/evaluation/metrics/amca.py
+++ b/avalanche/evaluation/metrics/amca.py
@@ -263,14 +263,11 @@ def update(
         """
         if self._current_stream is None:
             raise RuntimeError(
-                "No current stream set. "
-                'Call "set_stream" to set the current stream.'
+                "No current stream set. " 'Call "set_stream" to set the current stream.'
             )
 
         if self._is_stream_tracked(self._current_stream):
-            self._amcas[self._current_stream].update(
-                predicted_y, true_y, task_labels
-            )
+            self._amcas[self._current_stream].update(predicted_y, true_y, task_labels)
 
     def result(self) -> Dict[str, Dict[int, float]]:
         """
@@ -359,18 +356,14 @@ def __init__(self, classes=None, streams=None, ignore_validation=True):
         self._ignore_validation = ignore_validation
 
         self._is_training = False
-        super().__init__(
-            self._ms_amca, reset_at="never", emit_at="stream", mode="eval"
-        )
+        super().__init__(self._ms_amca, reset_at="never", emit_at="stream", mode="eval")
 
     def update(self, strategy: "SupervisedTemplate"):
         if self._is_training and self._ignore_validation:
             # Running a validation (eval phase inside a train phase), ignore it
             return
 
-        self._ms_amca.update(
-            strategy.mb_output, strategy.mb_y, strategy.mb_task_id
-        )
+        self._ms_amca.update(strategy.mb_output, strategy.mb_y, strategy.mb_task_id)
 
     def before_training(self, strategy: "SupervisedTemplate"):
         self._is_training = True
@@ -419,9 +412,7 @@ def result(self) -> List[_ExtendedPluginMetricValue]:
         return metric_values
 
     def metric_value_name(self, m_value: _ExtendedPluginMetricValue) -> str:
-        return generic_get_metric_name(
-            AMCAPluginMetric.VALUE_NAME, vars(m_value)
-        )
+        return generic_get_metric_name(AMCAPluginMetric.VALUE_NAME, vars(m_value))
 
     def __str__(self):
         return "Top1_AMCA_Stream"
diff --git a/avalanche/evaluation/metrics/checkpoint.py b/avalanche/evaluation/metrics/checkpoint.py
index 3c155c892..b06926ccb 100644
--- a/avalanche/evaluation/metrics/checkpoint.py
+++ b/avalanche/evaluation/metrics/checkpoint.py
@@ -79,14 +79,10 @@ def _package_result(self, strategy) -> "MetricResult":
             self, strategy, add_experience=True, add_task=False
         )
         return [
-            MetricValue(
-                self, metric_name, weights, strategy.clock.train_iterations
-            )
+            MetricValue(self, metric_name, weights, strategy.clock.train_iterations)
         ]
 
-    def after_training_exp(
-        self, strategy: "SupervisedTemplate"
-    ) -> "MetricResult":
+    def after_training_exp(self, strategy: "SupervisedTemplate") -> "MetricResult":
         model_params = copy.deepcopy(strategy.model.parameters())
         self.update(model_params)
         return None
diff --git a/avalanche/evaluation/metrics/class_accuracy.py b/avalanche/evaluation/metrics/class_accuracy.py
index 3199e76ff..2947d49f5 100644
--- a/avalanche/evaluation/metrics/class_accuracy.py
+++ b/avalanche/evaluation/metrics/class_accuracy.py
@@ -137,8 +137,7 @@ def update(
 
         if not isinstance(task_labels, (int, Tensor)):
             raise ValueError(
-                f"Task label type: {type(task_labels)}, "
-                f"expected int or Tensor"
+                f"Task label type: {type(task_labels)}, " f"expected int or Tensor"
             )
 
         if isinstance(task_labels, int):
@@ -216,21 +215,19 @@ class ClassAccuracyPluginMetric(_ExtendedGenericPluginMetric[ClassAccuracy]):
 
     def __init__(self, reset_at, emit_at, mode, classes=None):
         super(ClassAccuracyPluginMetric, self).__init__(
-            ClassAccuracy(classes=classes), 
+            ClassAccuracy(classes=classes),
             reset_at=reset_at,
             emit_at=emit_at,
-            mode=mode
+            mode=mode,
         )
-        self.phase_name = 'train'
-        self.stream_name = 'train'
+        self.phase_name = "train"
+        self.stream_name = "train"
         self.experience_id = 0
 
     def update(self, strategy: "SupervisedTemplate"):
         assert strategy.mb_output is not None
         assert strategy.experience is not None
-        self._metric.update(
-            strategy.mb_output, strategy.mb_y, strategy.mb_task_id
-        )
+        self._metric.update(strategy.mb_output, strategy.mb_y, strategy.mb_task_id)
 
         self.phase_name = "train" if strategy.is_training else "eval"
         self.stream_name = strategy.experience.origin_stream.name
diff --git a/avalanche/evaluation/metrics/confusion_matrix.py b/avalanche/evaluation/metrics/confusion_matrix.py
index 8369e123d..02e77bdf7 100644
--- a/avalanche/evaluation/metrics/confusion_matrix.py
+++ b/avalanche/evaluation/metrics/confusion_matrix.py
@@ -122,8 +122,7 @@ def update(self, true_y: Tensor, predicted_y: Tensor) -> None:
             )
         if len(predicted_y.shape) > 2:
             raise ValueError(
-                "Confusion matrix supports predictions with at "
-                "most 2 dimensions"
+                "Confusion matrix supports predictions with at " "most 2 dimensions"
             )
 
         max_label = -1 if self._num_classes is None else self._num_classes - 1
@@ -168,14 +167,11 @@ def update(self, true_y: Tensor, predicted_y: Tensor) -> None:
             if self._num_classes is None:
                 max_label = max(max_label, torch.max(true_y).item())
             elif torch.max(true_y).item() >= self._num_classes:
-                raise ValueError(
-                    "Encountered target label larger than" "num_classes"
-                )
+                raise ValueError("Encountered target label larger than" "num_classes")
 
         if max_label < 0:
             raise ValueError(
-                "The Confusion Matrix metric can only handle "
-                "positive label values"
+                "The Confusion Matrix metric can only handle " "positive label values"
             )
 
         if self._cm_tensor is None:
@@ -205,9 +201,7 @@ def result(self) -> Tensor:
                 matrix_shape = (self._num_classes, self._num_classes)
             return torch.zeros(matrix_shape, dtype=torch.long)
         if self.normalize is not None:
-            return ConfusionMatrix._normalize_cm(
-                self._cm_tensor, self.normalize
-            )
+            return ConfusionMatrix._normalize_cm(self._cm_tensor, self.normalize)
         return self._cm_tensor
 
     def reset(self) -> None:
@@ -222,13 +216,10 @@ def reset(self) -> None:
         self._cm_tensor = None
 
     @staticmethod
-    def _normalize_cm(
-        cm: Tensor, normalization: Literal["true", "pred", "all"]
-    ):
+    def _normalize_cm(cm: Tensor, normalization: Literal["true", "pred", "all"]):
         if normalization not in ("true", "pred", "all"):
             raise ValueError(
-                "Invalid normalization parameter. Can be 'true',"
-                " 'pred' or 'all'"
+                "Invalid normalization parameter. Can be 'true'," " 'pred' or 'all'"
             )
 
         if normalization == "true":
@@ -346,9 +337,7 @@ def _package_result(self, strategy: "SupervisedTemplate") -> MetricResult:
         exp_cm = self.result()
         phase_name, _ = phase_and_task(strategy)
         stream = stream_type(strategy.experience)
-        metric_name = "{}/{}_phase/{}_stream".format(
-            str(self), phase_name, stream
-        )
+        metric_name = "{}/{}_phase/{}_stream".format(str(self), phase_name, stream)
         plot_x_position = strategy.clock.train_iterations
 
         if self._save_image:
@@ -437,9 +426,7 @@ def _package_result(self, strategy: "SupervisedTemplate") -> MetricResult:
         outputs, targets = self.result()
         phase_name, _ = phase_and_task(strategy)
         stream = stream_type(strategy.experience)
-        metric_name = "{}/{}_phase/{}_stream".format(
-            str(self), phase_name, stream
-        )
+        metric_name = "{}/{}_phase/{}_stream".format(str(self), phase_name, stream)
         plot_x_position = strategy.clock.train_iterations
 
         # compute predicted classes
diff --git a/avalanche/evaluation/metrics/cpu_usage.py b/avalanche/evaluation/metrics/cpu_usage.py
index 5b7d9ffda..f79cb903b 100644
--- a/avalanche/evaluation/metrics/cpu_usage.py
+++ b/avalanche/evaluation/metrics/cpu_usage.py
@@ -78,9 +78,7 @@ def update(self) -> None:
 
         last_time = getattr(self._process_handle, "_last_sys_cpu_times", None)
         utilization = self._process_handle.cpu_percent()
-        current_time = getattr(
-            self._process_handle, "_last_sys_cpu_times", None
-        )
+        current_time = getattr(self._process_handle, "_last_sys_cpu_times", None)
 
         if self._first_update:
             self._first_update = False
@@ -119,7 +117,6 @@ def reset(self) -> None:
 
 class CPUPluginMetric(GenericPluginMetric[float, CPUUsage]):
     def __init__(self, reset_at, emit_at, mode):
-
         super(CPUPluginMetric, self).__init__(
             CPUUsage(), reset_at=reset_at, emit_at=emit_at, mode=mode
         )
@@ -270,12 +267,7 @@ def __str__(self):
 
 
 def cpu_usage_metrics(
-    *,
-    minibatch=False,
-    epoch=False,
-    epoch_running=False,
-    experience=False,
-    stream=False
+    *, minibatch=False, epoch=False, epoch_running=False, experience=False, stream=False
 ) -> List[CPUPluginMetric]:
     """
     Helper method that can be used to obtain the desired set of
diff --git a/avalanche/evaluation/metrics/cumulative_accuracies.py b/avalanche/evaluation/metrics/cumulative_accuracies.py
index b6c3ae3c8..91a901500 100644
--- a/avalanche/evaluation/metrics/cumulative_accuracies.py
+++ b/avalanche/evaluation/metrics/cumulative_accuracies.py
@@ -19,10 +19,12 @@
 from avalanche.benchmarks import OnlineCLExperience
 from avalanche.evaluation import GenericPluginMetric, Metric, PluginMetric
 from avalanche.evaluation.metrics.mean import Mean
-from avalanche.evaluation.metric_utils import (phase_and_task,
-                                               stream_type,
-                                               generic_get_metric_name,
-                                               default_metric_name_template)
+from avalanche.evaluation.metric_utils import (
+    phase_and_task,
+    stream_type,
+    generic_get_metric_name,
+    default_metric_name_template,
+)
 from avalanche.evaluation.metric_results import MetricValue
 
 if TYPE_CHECKING:
@@ -55,23 +57,22 @@ def update(
         true_y = torch.as_tensor(true_y)
         predicted_y = torch.as_tensor(predicted_y)
         if len(true_y) != len(predicted_y):
-            raise ValueError("Size mismatch for true_y " 
-                             "and predicted_y tensors")
+            raise ValueError("Size mismatch for true_y " "and predicted_y tensors")
         for t, classes in classes_splits.items():
-            # This is to fix a weird bug 
+            # This is to fix a weird bug
             # that was happening in some workflows
             if t not in self._mean_accuracy:
                 self._mean_accuracy[t]
 
             # Only compute Accuracy for classes that are in classes set
             if len(set(true_y.cpu().numpy()).intersection(classes)) == 0:
-                # Here this assumes that true_y is only 
-                # coming from the same classes split, 
+                # Here this assumes that true_y is only
+                # coming from the same classes split,
                 # this is a shortcut
                 # but sometimes this is not true so we
                 # do additional filtering later to make sure
                 continue
-            
+
             idxs = np.where(np.isin(true_y.cpu(), list(classes)))[0]
             y = true_y[idxs]
             logits_exp = predicted_y[idxs, :]
@@ -121,27 +122,18 @@ def __init__(self, reset_at="stream", emit_at="stream", mode="eval"):
 
         self.classes_seen_so_far = set()
         self.classes_splits = {}
-        super().__init__(CumulativeAccuracy(), 
-                         reset_at=reset_at, 
-                         emit_at=emit_at, 
-                         mode=mode)
+        super().__init__(
+            CumulativeAccuracy(), reset_at=reset_at, emit_at=emit_at, mode=mode
+        )
 
     def before_training_exp(self, strategy, **kwargs):
         super().before_training_exp(strategy, **kwargs)
         if isinstance(strategy.experience, OnlineCLExperience):
             new_classes = set(
-                strategy.experience.logging().
-                origin_experience.
-                classes_in_this_experience
+                strategy.experience.logging().origin_experience.classes_in_this_experience
             )
 
-            task_id = (
-                    strategy.
-                    experience.
-                    logging().
-                    origin_experience.
-                    current_experience
-                    )
+            task_id = strategy.experience.logging().origin_experience.current_experience
         else:
             new_classes = set(strategy.experience.classes_in_this_experience)
             task_id = strategy.experience.current_experience
@@ -156,10 +148,7 @@ def result(self) -> Dict[int, float]:
         return self._metric.result()
 
     def update(self, strategy):
-        self._metric.update(
-            self.classes_splits,
-            strategy.mb_output,
-            strategy.mb_y)
+        self._metric.update(self.classes_splits, strategy.mb_output, strategy.mb_y)
 
     def _package_result(self, strategy: "SupervisedTemplate") -> "MetricResult":
         assert strategy.experience is not None
@@ -181,9 +170,7 @@ def _package_result(self, strategy: "SupervisedTemplate") -> "MetricResult":
                     "stream_name": stream,
                 },
             )
-            metrics.append(
-                MetricValue(self, metric_name, v, plot_x_position)
-            )
+            metrics.append(MetricValue(self, metric_name, v, plot_x_position))
         return metrics
 
     def __repr__(self):
@@ -218,23 +205,18 @@ def __init__(self, reset_at="stream", emit_at="stream", mode="eval"):
 
         self.train_task_id = None
 
-        super().__init__(CumulativeAccuracy(), 
-                         reset_at=reset_at, 
-                         emit_at=emit_at, 
-                         mode=mode)
+        super().__init__(
+            CumulativeAccuracy(), reset_at=reset_at, emit_at=emit_at, mode=mode
+        )
 
     def before_training_exp(self, strategy, **kwargs):
         super().before_training_exp(strategy, **kwargs)
         if isinstance(strategy.experience, OnlineCLExperience):
             if strategy.experience.access_task_boundaries:
                 new_classes = set(
-                    strategy.experience.
-                    origin_experience.
-                    classes_in_this_experience
+                    strategy.experience.origin_experience.classes_in_this_experience
                 )
-                task_id = (strategy.experience.
-                           origin_experience.
-                           current_experience)
+                task_id = strategy.experience.origin_experience.current_experience
             else:
                 raise AttributeError(
                     "Online Scenario has to allow "
@@ -283,16 +265,11 @@ def _package_result(self, strategy: "SupervisedTemplate") -> "MetricResult":
                     "stream_name": stream,
                 },
             )
-            metrics.append(
-                MetricValue(self, metric_name, v, plot_x_position)
-            )
+            metrics.append(MetricValue(self, metric_name, v, plot_x_position))
         return metrics
 
     def update(self, strategy):
-        self._metric.update(
-            self.classes_splits, 
-            strategy.mb_output, 
-            strategy.mb_y)
+        self._metric.update(self.classes_splits, strategy.mb_output, strategy.mb_y)
 
     def _compute_forgetting(self):
         for t, item in self._metric.result().items():
diff --git a/avalanche/evaluation/metrics/detection.py b/avalanche/evaluation/metrics/detection.py
index c10f3a11e..4f291655c 100644
--- a/avalanche/evaluation/metrics/detection.py
+++ b/avalanche/evaluation/metrics/detection.py
@@ -48,8 +48,8 @@
     )
 
 
-TDetPredictions_co = TypeVar('TDetPredictions_co', covariant=True)
-TDetModelOutput = TypeVar('TDetModelOutput', contravariant=True)
+TDetPredictions_co = TypeVar("TDetPredictions_co", covariant=True)
+TDetModelOutput = TypeVar("TDetModelOutput", contravariant=True)
 
 TCommonDetectionOutput = Dict[str, Dict[str, Tensor]]
 
@@ -102,9 +102,7 @@ def update(self, model_output: TDetModelOutput):
 
     def evaluate(
         self,
-    ) -> Optional[
-        Union[Dict[str, Any], Tuple[Dict[str, Any], TDetPredictions_co]]
-    ]:
+    ) -> Optional[Union[Dict[str, Any], Tuple[Dict[str, Any], TDetPredictions_co]]]:
         """
         Computes the performance metrics on the outputs previously obtained
         through `update()`.
@@ -138,14 +136,16 @@ def summarize(self):
 
 
 def coco_evaluator_factory(coco_gt: COCO, iou_types: List[str]):
-    from avalanche.evaluation.metrics.detection_evaluators.coco_evaluator \
-        import CocoEvaluator
+    from avalanche.evaluation.metrics.detection_evaluators.coco_evaluator import (
+        CocoEvaluator,
+    )
+
     return CocoEvaluator(coco_gt=coco_gt, iou_types=iou_types)
 
 
 class DetectionMetrics(
-        PluginMetric[dict],
-        Generic[TDetPredictions_co, TDetModelOutput]):
+    PluginMetric[dict], Generic[TDetPredictions_co, TDetModelOutput]
+):
     """
     Metric used to compute the detection and segmentation metrics using the
     dataset-specific API.
@@ -163,8 +163,7 @@ def __init__(
         self,
         *,
         evaluator_factory: Callable[
-            [Any, List[str]], 
-            DetectionEvaluator[TDetPredictions_co, TDetModelOutput]
+            [Any, List[str]], DetectionEvaluator[TDetPredictions_co, TDetModelOutput]
         ] = coco_evaluator_factory,
         gt_api_def: Sequence[
             SupportedDatasetApiDef
@@ -299,7 +298,7 @@ def update(self, res: TDetModelOutput):
         self._check_evaluator()
         if self.save:
             self.current_outputs.append(res)
-        
+
         self.evaluator.update(res)  # type: ignore
 
     def result(self):
@@ -325,20 +324,19 @@ def before_eval_exp(self, strategy) -> None:
             self.current_filename = self._get_filename(strategy)
 
     def after_eval_iteration(  # type: ignore[override]
-            self, strategy: "ObjectDetectionTemplate"):
+        self, strategy: "ObjectDetectionTemplate"
+    ):
         assert strategy.detection_predictions is not None
         self.update(strategy.detection_predictions)
 
     def after_eval_exp(  # type: ignore[override]
-            self, strategy: "ObjectDetectionTemplate"):
+        self, strategy: "ObjectDetectionTemplate"
+    ):
         assert strategy.experience is not None
-        if (
-            self.save
-            and strategy.experience.origin_stream.name == self.save_stream
-        ):
+        if self.save and strategy.experience.origin_stream.name == self.save_stream:
             assert self.current_filename is not None, (
-                'The current_filename field is None, which may happen if the '
-                '`before_eval_exp` was not properly invoked.'
+                "The current_filename field is None, which may happen if the "
+                "`before_eval_exp` was not properly invoked."
             )
 
             with open(self.current_filename, "w") as f:
@@ -362,9 +360,7 @@ def _package_result(self, strategy):
             for metric_key, metric_value in iou_dict.items():
                 metric_name = base_metric_name + f"/{iou}/{metric_key}"
                 metric_values.append(
-                    MetricValue(
-                        self, metric_name, metric_value, plot_x_position
-                    )
+                    MetricValue(self, metric_name, metric_value, plot_x_position)
                 )
 
         return metric_values
@@ -379,14 +375,14 @@ def _get_filename(self, strategy) -> Union[str, Path]:
             f"{self.filename_prefix}{middle}"
             f"{strategy.experience.current_experience}.json",
         )
-    
+
     def _check_evaluator(self):
         assert self.evaluator is not None, (
-            'The evaluator was not initialized. This may happen if you try '
-            'to update or obtain results for this metric before the '
-            '`before_eval_exp` callback is invoked. If you are using this '
-            'metric in a standalone way, you can initialize the evaluator '
-            'by calling `initialize_evaluator` instead.'
+            "The evaluator was not initialized. This may happen if you try "
+            "to update or obtain results for this metric before the "
+            "`before_eval_exp` callback is invoked. If you are using this "
+            "metric in a standalone way, you can initialize the evaluator "
+            "by calling `initialize_evaluator` instead."
         )
 
     def __str__(self):
@@ -394,8 +390,10 @@ def __str__(self):
 
 
 def lvis_evaluator_factory(lvis_gt: LVIS, iou_types: List[str]):
-    from avalanche.evaluation.metrics.detection_evaluators.lvis_evaluator \
-        import LvisEvaluator
+    from avalanche.evaluation.metrics.detection_evaluators.lvis_evaluator import (
+        LvisEvaluator,
+    )
+
     return LvisEvaluator(lvis_gt=lvis_gt, iou_types=iou_types)
 
 
@@ -407,9 +405,7 @@ def make_lvis_metrics(
     evaluator_factory: Callable[
         [Any, List[str]], DetectionEvaluator
     ] = lvis_evaluator_factory,
-    gt_api_def: Sequence[
-        SupportedDatasetApiDef
-    ] = DEFAULT_SUPPROTED_DETECTION_DATASETS,
+    gt_api_def: Sequence[SupportedDatasetApiDef] = DEFAULT_SUPPROTED_DETECTION_DATASETS,
 ):
     """
     Returns an instance of :class:`DetectionMetrics` initialized for the LVIS
@@ -511,11 +507,7 @@ def convert_to_coco_api(ds):
     coco_ds = COCO()
     # annotation IDs need to start at 1, not 0, see torchvision issue #1530
     ann_id = 1
-    dataset: Dict[str, List[Any]] = {
-        "images": [],
-        "categories": [],
-        "annotations": []
-    }
+    dataset: Dict[str, List[Any]] = {"images": [], "categories": [], "annotations": []}
     categories = set()
     for img_idx in range(len(ds)):
         img_dict = {}
diff --git a/avalanche/evaluation/metrics/detection_evaluators/coco_evaluator.py b/avalanche/evaluation/metrics/detection_evaluators/coco_evaluator.py
index 6a5a9f2d2..b408e2546 100644
--- a/avalanche/evaluation/metrics/detection_evaluators/coco_evaluator.py
+++ b/avalanche/evaluation/metrics/detection_evaluators/coco_evaluator.py
@@ -95,10 +95,7 @@
 )
 
 
-class CocoEvaluator(
-        DetectionEvaluator[
-            Dict[str, COCOeval],
-            TCommonDetectionOutput]):
+class CocoEvaluator(DetectionEvaluator[Dict[str, COCOeval], TCommonDetectionOutput]):
     """
     Defines an evaluator for the COCO dataset.
 
@@ -129,9 +126,7 @@ def update(self, predictions: TCommonDetectionOutput):
         for iou_type in self.iou_types:
             results = self.prepare(predictions, iou_type)
             with redirect_stdout(io.StringIO()):
-                coco_dt = (
-                    COCO.loadRes(self.coco_gt, results) if results else COCO()
-                )
+                coco_dt = COCO.loadRes(self.coco_gt, results) if results else COCO()
             coco_eval = self.coco_eval[iou_type]
 
             coco_eval.cocoDt = coco_dt
@@ -142,9 +137,7 @@ def update(self, predictions: TCommonDetectionOutput):
 
     def synchronize_between_processes(self):
         for iou_type in self.iou_types:
-            self.eval_imgs[iou_type] = np.concatenate(
-                self.eval_imgs[iou_type], 2
-            )
+            self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)
 
             create_common_coco_eval(
                 self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type]
@@ -154,9 +147,9 @@ def synchronize_between_processes(self):
             return dist.get_rank() == 0
         return True
 
-    def evaluate(self) -> Optional[
-        Union[Dict[str, Any], Tuple[Dict[str, Any], Dict[str, COCOeval]]]
-    ]:
+    def evaluate(
+        self,
+    ) -> Optional[Union[Dict[str, Any], Tuple[Dict[str, Any], Dict[str, COCOeval]]]]:
         main_process = self.synchronize_between_processes()
 
         for coco_eval in self.coco_eval.values():
@@ -243,9 +236,7 @@ def prepare_for_coco_segmentation(self, predictions):
 
             rles = [
                 mask_util.encode(
-                    np.array(
-                        mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"
-                    )
+                    np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F")
                 )[0]
                 for mask in masks
             ]
@@ -327,8 +318,9 @@ def all_gather(data):
     return data_list
 
 
-def merge(img_ids: List[int], eval_imgs: List[np.ndarray]) -> \
-        Tuple[np.ndarray, np.ndarray]:
+def merge(
+    img_ids: List[int], eval_imgs: List[np.ndarray]
+) -> Tuple[np.ndarray, np.ndarray]:
     all_img_ids = all_gather(img_ids)
     all_eval_imgs = all_gather(eval_imgs)
 
@@ -351,9 +343,8 @@ def merge(img_ids: List[int], eval_imgs: List[np.ndarray]) -> \
 
 
 def create_common_coco_eval(
-        coco_eval: COCOeval,
-        img_ids: List[int],
-        eval_imgs: List[np.ndarray]):
+    coco_eval: COCOeval, img_ids: List[int], eval_imgs: List[np.ndarray]
+):
     img_ids_np, eval_imgs_np = merge(img_ids, eval_imgs)
     img_ids_lst = list(img_ids_np)
     eval_imgs_list = list(eval_imgs_np.flatten())
diff --git a/avalanche/evaluation/metrics/detection_evaluators/lvis_evaluator.py b/avalanche/evaluation/metrics/detection_evaluators/lvis_evaluator.py
index 0428e90a4..e38833a3e 100644
--- a/avalanche/evaluation/metrics/detection_evaluators/lvis_evaluator.py
+++ b/avalanche/evaluation/metrics/detection_evaluators/lvis_evaluator.py
@@ -28,10 +28,7 @@
 )
 
 
-class LvisEvaluator(
-        DetectionEvaluator[
-            Dict[str, LVISEval],
-            TCommonDetectionOutput]):
+class LvisEvaluator(DetectionEvaluator[Dict[str, LVISEval], TCommonDetectionOutput]):
     """
     Defines an evaluator for the LVIS dataset.
 
@@ -68,8 +65,9 @@ def synchronize_between_processes(self):
             my_rank = dist.get_rank()
             is_main_rank = my_rank == 0
             if is_main_rank:
-                output: List[Dict[str, Any]] = \
-                    [None] * dist.get_world_size()  # type: ignore
+                output: List[Dict[str, Any]] = [
+                    None
+                ] * dist.get_world_size()  # type: ignore
                 dist.gather_object(
                     self.predictions,
                     output,
@@ -88,9 +86,9 @@ def synchronize_between_processes(self):
         else:
             return self.predictions, True
 
-    def evaluate(self, max_dets_per_image=None) -> Optional[
-        Union[Dict[str, Any], Tuple[Dict[str, Any], Dict[str, LVISEval]]]
-    ]:
+    def evaluate(
+        self, max_dets_per_image=None
+    ) -> Optional[Union[Dict[str, Any], Tuple[Dict[str, Any], Dict[str, LVISEval]]]]:
         all_preds, main_process = self.synchronize_between_processes()
         if main_process:
             if max_dets_per_image is None:
@@ -165,9 +163,7 @@ def prepare_for_lvis_detection(self, predictions: TCommonDetectionOutput):
                 masks = masks > 0.5
                 rles = [
                     mask_util.encode(
-                        np.array(
-                            mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"
-                        )
+                        np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F")
                     )[0]
                     for mask in masks
                 ]
diff --git a/avalanche/evaluation/metrics/disk_usage.py b/avalanche/evaluation/metrics/disk_usage.py
index 28c52b1da..a2a92149e 100644
--- a/avalanche/evaluation/metrics/disk_usage.py
+++ b/avalanche/evaluation/metrics/disk_usage.py
@@ -27,10 +27,7 @@ class DiskUsage(Metric[float]):
     """
 
     def __init__(
-        self,
-        paths_to_monitor: Optional[
-            Union[PathAlike, Sequence[PathAlike]]
-        ] = None
+        self, paths_to_monitor: Optional[Union[PathAlike, Sequence[PathAlike]]] = None
     ):
         """
         Creates an instance of the standalone disk usage metric.
diff --git a/avalanche/evaluation/metrics/forgetting_bwt.py b/avalanche/evaluation/metrics/forgetting_bwt.py
index 659d6f785..9e5f79094 100644
--- a/avalanche/evaluation/metrics/forgetting_bwt.py
+++ b/avalanche/evaluation/metrics/forgetting_bwt.py
@@ -35,7 +35,7 @@
 
 TResult_co = TypeVar("TResult_co", covariant=True)
 TResultKey_co = TypeVar("TResultKey_co", covariant=True)
-TMetric = TypeVar('TMetric', bound=Metric)
+TMetric = TypeVar("TMetric", bound=Metric)
 
 
 class Forgetting(Metric[Dict[int, float]]):
@@ -121,9 +121,8 @@ def reset(self) -> None:
 
 
 class GenericExperienceForgetting(
-        PluginMetric[TResult_co],
-        Generic[TMetric, TResult_co, TResultKey_co],
-        ABC):
+    PluginMetric[TResult_co], Generic[TMetric, TResult_co, TResultKey_co], ABC
+):
     """
     The GenericExperienceForgetting metric, describing the change in
     a metric detected for a certain experience. The user should
@@ -208,7 +207,7 @@ def update(self, k, v, initial=False):
     @abstractmethod
     def result_key(self, k: int) -> TResultKey_co:
         pass
-    
+
     @abstractmethod
     def result(self) -> TResult_co:
         pass
@@ -233,9 +232,7 @@ def after_eval_exp(self, strategy: "SupervisedTemplate") -> MetricResult:
         # update experience on which training just ended
         self._check_eval_exp_id()
         if self.train_exp_id == self.eval_exp_id:
-            self.update(
-                self.eval_exp_id, self.metric_result(strategy), initial=True
-            )
+            self.update(self.eval_exp_id, self.metric_result(strategy), initial=True)
         else:
             # update other experiences
             # if experience has not been encountered in training
@@ -243,7 +240,7 @@ def after_eval_exp(self, strategy: "SupervisedTemplate") -> MetricResult:
             self.update(self.eval_exp_id, self.metric_result(strategy))
 
         return self._package_result(strategy)
-    
+
     def after_eval(self, strategy: "SupervisedTemplate") -> MetricResult:
         self.eval_exp_id = -1  # reset the last experience ID
         return super().after_eval(strategy)
@@ -273,22 +270,21 @@ def metric_update(self, strategy):
     @abstractmethod
     def metric_result(self, strategy):
         pass
-    
+
     @abstractmethod
     def __str__(self):
         pass
-    
+
     def _check_eval_exp_id(self):
         assert self.eval_exp_id >= 0, (
-            'The evaluation loop executed 0 iterations. '
-            'This is not suported while using this metric')
+            "The evaluation loop executed 0 iterations. "
+            "This is not suported while using this metric"
+        )
 
 
 class ExperienceForgetting(
-        GenericExperienceForgetting[
-            TaskAwareAccuracy,
-            Dict[int, float],
-            Optional[float]]):
+    GenericExperienceForgetting[TaskAwareAccuracy, Dict[int, float], Optional[float]]
+):
     """
     The ExperienceForgetting metric, describing the accuracy loss
     detected for a certain experience.
@@ -343,10 +339,8 @@ def __str__(self):
 
 
 class GenericStreamForgetting(
-        GenericExperienceForgetting[
-            TMetric, 
-            float,
-            Optional[float]]):
+    GenericExperienceForgetting[TMetric, float, Optional[float]]
+):
     """
     The GenericStreamForgetting metric, describing the average evaluation
     change in the desired metric detected over all experiences observed
@@ -411,7 +405,7 @@ def exp_result(self, k: int) -> Optional[float]:
         k: optional key from which compute forgetting.
         """
         return self.result_key(k=k)
-    
+
     def result_key(self, k: int) -> Optional[float]:
         """
         Result for experience defined by a key.
@@ -461,9 +455,7 @@ def _package_result(self, strategy: "SupervisedTemplate") -> MetricResult:
 
         phase_name, _ = phase_and_task(strategy)
         stream = stream_type(strategy.experience)
-        metric_name = "{}/{}_phase/{}_stream".format(
-            str(self), phase_name, stream
-        )
+        metric_name = "{}/{}_phase/{}_stream".format(str(self), phase_name, stream)
         plot_x_position = strategy.clock.train_iterations
 
         return [MetricValue(self, metric_name, metric_value, plot_x_position)]
@@ -508,10 +500,7 @@ def __str__(self):
         return "StreamForgetting"
 
 
-def forgetting_metrics(
-        *,
-        experience=False,
-        stream=False) -> List[PluginMetric]:
+def forgetting_metrics(*, experience=False, stream=False) -> List[PluginMetric]:
     """
     Helper method that can be used to obtain the desired set of
     plugin metrics.
@@ -608,7 +597,7 @@ def result(self) -> Dict[int, float]:
         Backward transfer will be returned for all keys encountered at
         least twice.
 
-        :return: A dictionary containing keys whose value has been 
+        :return: A dictionary containing keys whose value has been
             updated at least twice. The associated value is the difference
             between the last and first value recorded for that key.
         """
diff --git a/avalanche/evaluation/metrics/forward_transfer.py b/avalanche/evaluation/metrics/forward_transfer.py
index 90ef1a2e8..07a3906a8 100644
--- a/avalanche/evaluation/metrics/forward_transfer.py
+++ b/avalanche/evaluation/metrics/forward_transfer.py
@@ -26,7 +26,7 @@
 
 TResult_co = TypeVar("TResult_co", covariant=True)
 TResultKey_co = TypeVar("TResultKey_co", covariant=True)
-TMetric = TypeVar('TMetric', bound=Metric)
+TMetric = TypeVar("TMetric", bound=Metric)
 
 
 class ForwardTransfer(Metric[Dict[int, float]]):
@@ -78,7 +78,7 @@ def result_key(self, k: int) -> Optional[float]:
         :param k: the key for which returning forward transfer.
 
         :return: the difference between the key value after training on the
-            previous experience, and the key at random initialization. 
+            previous experience, and the key at random initialization.
             It returns None if k has not been updated at least twice.
         """
         assert k is not None
@@ -92,7 +92,7 @@ def result(self) -> Dict[int, float]:
         """
         Compute the forward transfer for all keys.
 
-        :return: a dictionary containing, for each key, 
+        :return: a dictionary containing, for each key,
             the difference between the key value after training on the
             previous experience, and the key at random initialization.
         """
@@ -110,9 +110,8 @@ def reset(self) -> None:
 
 
 class GenericExperienceForwardTransfer(
-        PluginMetric[TResult_co],
-        Generic[TMetric, TResult_co, TResultKey_co],
-        ABC):
+    PluginMetric[TResult_co], Generic[TMetric, TResult_co, TResultKey_co], ABC
+):
     """
     The GenericExperienceForwardMetric metric, describing the forward transfer
     detected after a certain experience. The user should
@@ -179,11 +178,11 @@ def update(self, k, v, initial=False):
             previous value.
         """
         self.forward_transfer.update(k, v, initial=initial)
-    
+
     @abstractmethod
     def result_key(self, k: int) -> TResultKey_co:
         pass
-    
+
     @abstractmethod
     def result(self) -> TResult_co:
         pass
@@ -231,9 +230,7 @@ def after_eval_iteration(self, strategy: "SupervisedTemplate") -> None:
     def after_eval_exp(self, strategy: "SupervisedTemplate") -> MetricResult:
         self._check_eval_exp_id()
         if self.at_init:
-            self.update(
-                self.eval_exp_id, self.metric_result(strategy), initial=True
-            )
+            self.update(self.eval_exp_id, self.metric_result(strategy), initial=True)
         else:
             if self.train_exp_id == self.eval_exp_id - 1:
                 self.update(self.eval_exp_id, self.metric_result(strategy))
@@ -251,9 +248,7 @@ def _package_result(self, strategy: "SupervisedTemplate") -> MetricResult:
             metric_name = get_metric_name(self, strategy, add_experience=True)
             plot_x_position = strategy.clock.train_iterations
 
-            metric_values = [
-                MetricValue(self, metric_name, result, plot_x_position)
-            ]
+            metric_values = [MetricValue(self, metric_name, result, plot_x_position)]
             return metric_values
 
     @abstractmethod
@@ -270,8 +265,9 @@ def __str__(self):
 
     def _check_eval_exp_id(self):
         assert self.eval_exp_id >= 0, (
-            'The evaluation loop executed 0 iterations. '
-            'This is not suported while using this metric')
+            "The evaluation loop executed 0 iterations. "
+            "This is not suported while using this metric"
+        )
 
 
 class ExperienceForwardTransfer(GenericExperienceForwardTransfer):
@@ -306,7 +302,7 @@ def result(self) -> Dict[int, float]:
         See :class `ForwardTransfer` documentation for more detailed
         information.
 
-        :return: a dictionary containing, for each key, 
+        :return: a dictionary containing, for each key,
             the difference between the key value after training on the
             previous experience, and the key at random initialization.
         """
@@ -323,10 +319,8 @@ def __str__(self):
 
 
 class GenericStreamForwardTransfer(
-        GenericExperienceForwardTransfer[
-            TMetric, 
-            float,
-            Optional[float]]):
+    GenericExperienceForwardTransfer[TMetric, float, Optional[float]]
+):
     """
     The GenericStreamForwardTransfer metric, describing the average evaluation
     forward transfer detected over all experiences observed during training.
@@ -408,17 +402,13 @@ def before_eval(self, strategy) -> None:
     def after_eval_exp(self, strategy: "SupervisedTemplate") -> None:
         self._check_eval_exp_id()
         if self.at_init:
-            self.update(
-                self.eval_exp_id, self.metric_result(strategy), initial=True
-            )
+            self.update(self.eval_exp_id, self.metric_result(strategy), initial=True)
         else:
             if self.train_exp_id == self.eval_exp_id - 1:
                 self.update(self.eval_exp_id, self.metric_result(strategy))
             exp_forward_transfer = self.exp_result(k=self.eval_exp_id)
             if exp_forward_transfer is not None:
-                self.stream_forward_transfer.update(
-                    exp_forward_transfer, weight=1
-                )
+                self.stream_forward_transfer.update(exp_forward_transfer, weight=1)
 
     def after_eval(self, strategy: "SupervisedTemplate") -> "MetricResult":
         super().after_eval(strategy)
@@ -430,9 +420,7 @@ def _package_result(self, strategy: "SupervisedTemplate") -> MetricResult:
 
         phase_name, _ = phase_and_task(strategy)
         stream = stream_type(strategy.experience)
-        metric_name = "{}/{}_phase/{}_stream".format(
-            str(self), phase_name, stream
-        )
+        metric_name = "{}/{}_phase/{}_stream".format(str(self), phase_name, stream)
         plot_x_position = strategy.clock.train_iterations
 
         return [MetricValue(self, metric_name, metric_value, plot_x_position)]
@@ -474,10 +462,7 @@ def __str__(self):
         return "StreamForwardTransfer"
 
 
-def forward_transfer_metrics(
-        *,
-        experience=False,
-        stream=False) -> List[PluginMetric]:
+def forward_transfer_metrics(*, experience=False, stream=False) -> List[PluginMetric]:
     """
     Helper method that can be used to obtain the desired set of
     plugin metrics.
diff --git a/avalanche/evaluation/metrics/gpu_usage.py b/avalanche/evaluation/metrics/gpu_usage.py
index 4b97d63ed..2e5dae7a7 100644
--- a/avalanche/evaluation/metrics/gpu_usage.py
+++ b/avalanche/evaluation/metrics/gpu_usage.py
@@ -92,9 +92,7 @@ def _f(self):
             gpu_perc = GPUtil.getGPUs()[self.gpu_id].load * 100
             if gpu_perc > self.max_usage:
                 self.max_usage = gpu_perc
-            time.sleep(
-                self.every - ((time.monotonic() - start_time) % self.every)
-            )
+            time.sleep(self.every - ((time.monotonic() - start_time) % self.every))
 
     def start_thread(self):
         if self.gpu_id is not None:
@@ -136,10 +134,7 @@ def __init__(self, gpu_id, every, reset_at, emit_at, mode):
         self.gpu_id = gpu_id
 
         super(GPUPluginMetric, self).__init__(
-            MaxGPU(gpu_id, every), 
-            reset_at=reset_at,
-            emit_at=emit_at,
-            mode=mode
+            MaxGPU(gpu_id, every), reset_at=reset_at, emit_at=emit_at, mode=mode
         )
 
     def update(self, strategy):
diff --git a/avalanche/evaluation/metrics/images_samples.py b/avalanche/evaluation/metrics/images_samples.py
index f689f03c4..8b8fbe964 100644
--- a/avalanche/evaluation/metrics/images_samples.py
+++ b/avalanche/evaluation/metrics/images_samples.py
@@ -80,9 +80,7 @@ def result(self) -> List[Tensor]:
     def __str__(self):
         return "images"
 
-    def _make_grid_sample(
-        self, strategy: "SupervisedTemplate"
-    ) -> "MetricResult":
+    def _make_grid_sample(self, strategy: "SupervisedTemplate") -> "MetricResult":
         self._load_sorted_images(strategy)
 
         return [
@@ -95,9 +93,7 @@ def _make_grid_sample(
                     add_task=True,
                 ),
                 value=TensorImage(
-                    make_grid(
-                        list(self.images), normalize=False, nrow=self.n_cols
-                    )
+                    make_grid(list(self.images), normalize=False, nrow=self.n_cols)
                 ),
                 x_plot=strategy.clock.train_iterations,
             )
@@ -139,9 +135,7 @@ def _sort_images(self, labels: List[int], tasks: List[int]):
             )
         ]
 
-    def _make_dataloader(
-        self, data: AvalancheDataset, mb_size: int
-    ) -> DataLoader:
+    def _make_dataloader(self, data: AvalancheDataset, mb_size: int) -> DataLoader:
         if self.disable_augmentations:
             data = data.replace_current_transform_group(_MaybeToTensor())
         collate_fn = data.collate_fn if hasattr(data, "collate_fn") else None
@@ -195,15 +189,11 @@ def images_samples_metrics(
     plugins: List[ImagesSamplePlugin] = []
     if on_eval:
         plugins.append(
-            ImagesSamplePlugin(
-                mode="eval", n_rows=n_rows, n_cols=n_cols, group=group
-            )
+            ImagesSamplePlugin(mode="eval", n_rows=n_rows, n_cols=n_cols, group=group)
         )
     if on_train:
         plugins.append(
-            ImagesSamplePlugin(
-                mode="train", n_rows=n_rows, n_cols=n_cols, group=group
-            )
+            ImagesSamplePlugin(mode="train", n_rows=n_rows, n_cols=n_cols, group=group)
         )
     return plugins
 
diff --git a/avalanche/evaluation/metrics/labels_repartition.py b/avalanche/evaluation/metrics/labels_repartition.py
index c765c022c..5a8cc65c7 100644
--- a/avalanche/evaluation/metrics/labels_repartition.py
+++ b/avalanche/evaluation/metrics/labels_repartition.py
@@ -40,11 +40,7 @@ def __init__(self):
     def reset(self) -> None:
         self.task2label2count = defaultdict(Counter)
 
-    def update(
-        self,
-        tasks: Sequence[int],
-        labels: Sequence[int]
-    ):
+    def update(self, tasks: Sequence[int], labels: Sequence[int]):
         for task, label in zip(tasks, labels):
             self.task2label2count[task][label] += 1
 
@@ -64,9 +60,7 @@ def result(self) -> Dict[int, Dict[int, int]]:
         }
 
 
-LabelsRepartitionImageCreator = Callable[
-    [Dict[int, List[int]], List[int]], Figure
-]
+LabelsRepartitionImageCreator = Callable[[Dict[int, List[int]], List[int]], Figure]
 
 
 class LabelsRepartitionPlugin(GenericPluginMetric[Figure, LabelsRepartition]):
@@ -94,7 +88,7 @@ def __init__(
         emit_reset_at: Literal["stream", "experience", "epoch"] = "epoch",
     ):
         ...
-        
+
     @overload
     def __init__(
         self,
@@ -126,9 +120,7 @@ def __init__(
         self.mode = mode
         self.image_creator = image_creator
         self.steps = [0]
-        self.task2label2counts: Dict[int, Dict[int, List[int]]] = defaultdict(
-            dict
-        )
+        self.task2label2counts: Dict[int, Dict[int, List[int]]] = defaultdict(dict)
         self.strategy: Optional[SupervisedTemplate] = None
 
     def before_training(self, strategy: "SupervisedTemplate"):
@@ -147,25 +139,19 @@ def reset(self) -> None:
     def update(self, strategy: "SupervisedTemplate"):
         assert strategy.experience is not None
 
-        if self.mode == 'train':
-            if strategy.clock.train_exp_epochs and \
-                    self.emit_reset_at != "epoch":
+        if self.mode == "train":
+            if strategy.clock.train_exp_epochs and self.emit_reset_at != "epoch":
                 # Do not update after first epoch
                 return
-        
-        self._metric.update(
-            strategy.mb_task_id.tolist(),
-            strategy.mb_y.tolist()
-        )
+
+        self._metric.update(strategy.mb_task_id.tolist(), strategy.mb_y.tolist())
 
         if hasattr(strategy.experience, "classes_order"):
-            self._metric.update_order(
-                strategy.experience.classes_order
-            )
+            self._metric.update_order(strategy.experience.classes_order)
 
     def _package_result(self, strategy: "SupervisedTemplate") -> "MetricResult":
         assert strategy.experience is not None
-        
+
         self.steps.append(strategy.clock.train_iterations)
         task2label2count = self._metric.result()
 
@@ -174,11 +160,11 @@ def _package_result(self, strategy: "SupervisedTemplate") -> "MetricResult":
                 self.task2label2counts[task].setdefault(
                     label, [0] * (len(self.steps) - 2)
                 ).extend((count, count))
-        
+
         for task, label2counts in self.task2label2counts.items():
             for label, counts in label2counts.items():
                 counts.extend([0] * (len(self.steps) - len(counts)))
-        
+
         return [
             MetricValue(
                 self,
diff --git a/avalanche/evaluation/metrics/loss.py b/avalanche/evaluation/metrics/loss.py
index cd20d3c4b..c25490cf4 100644
--- a/avalanche/evaluation/metrics/loss.py
+++ b/avalanche/evaluation/metrics/loss.py
@@ -128,7 +128,7 @@ def result(self) -> Dict[int, float]:
         :return: A dictionary `{task_label: mean_loss}`.
         """
         return {k: v.result() for k, v in self._mean_loss.items()}
-    
+
     def result_task_label(self, task_label: int):
         """
         Retrieves the running average loss per pattern for a specific task.
@@ -158,9 +158,7 @@ def reset_task_label(self, task_label: int):
 class LossPluginMetric(GenericPluginMetric[float, LossMetric]):
     def __init__(self, reset_at, emit_at, mode):
         self._loss = LossMetric()
-        super(LossPluginMetric, self).__init__(
-            self._loss, reset_at, emit_at, mode
-        )
+        super(LossPluginMetric, self).__init__(self._loss, reset_at, emit_at, mode)
 
     def reset(self) -> None:
         self._metric.reset()
@@ -172,15 +170,10 @@ def update(self, strategy):
         self._loss.update(strategy.loss, patterns=len(strategy.mb_y))
 
 
-class LossPerTaskPluginMetric(
-        GenericPluginMetric[
-            Dict[int, float],
-            TaskAwareLoss]):
+class LossPerTaskPluginMetric(GenericPluginMetric[Dict[int, float], TaskAwareLoss]):
     def __init__(self, reset_at, emit_at, mode):
         self._loss = TaskAwareLoss()
-        super().__init__(
-            self._loss, reset_at, emit_at, mode
-        )
+        super().__init__(self._loss, reset_at, emit_at, mode)
 
     def reset(self) -> None:
         self._metric.reset()
@@ -236,9 +229,7 @@ def __init__(self):
         Creates an instance of the EpochLoss metric.
         """
 
-        super(EpochLoss, self).__init__(
-            reset_at="epoch", emit_at="epoch", mode="train"
-        )
+        super(EpochLoss, self).__init__(reset_at="epoch", emit_at="epoch", mode="train")
 
     def __str__(self):
         return "Loss_Epoch"
@@ -307,12 +298,7 @@ def __str__(self):
 
 
 def loss_metrics(
-    *,
-    minibatch=False,
-    epoch=False,
-    epoch_running=False,
-    experience=False,
-    stream=False
+    *, minibatch=False, epoch=False, epoch_running=False, experience=False, stream=False
 ) -> List[LossPluginMetric]:
     """
     Helper method that can be used to obtain the desired set of
diff --git a/avalanche/evaluation/metrics/mac.py b/avalanche/evaluation/metrics/mac.py
index 27e37c83a..550d10e25 100644
--- a/avalanche/evaluation/metrics/mac.py
+++ b/avalanche/evaluation/metrics/mac.py
@@ -135,9 +135,7 @@ def __init__(self):
         """
         Creates an instance of the EpochMAC metric.
         """
-        super(EpochMAC, self).__init__(
-            reset_at="epoch", emit_at="epoch", mode="train"
-        )
+        super(EpochMAC, self).__init__(reset_at="epoch", emit_at="epoch", mode="train")
 
     def __str__(self):
         return "MAC_Epoch"
diff --git a/avalanche/evaluation/metrics/mean_scores.py b/avalanche/evaluation/metrics/mean_scores.py
index feb97c4aa..b82484eb4 100644
--- a/avalanche/evaluation/metrics/mean_scores.py
+++ b/avalanche/evaluation/metrics/mean_scores.py
@@ -33,7 +33,7 @@
     from avalanche.evaluation.metric_results import MetricResult
 
 
-TAggregation = TypeVar('TAggregation')
+TAggregation = TypeVar("TAggregation")
 LabelCat = Literal["new", "old"]
 
 
@@ -69,7 +69,6 @@ def result(self) -> Dict[TAggregation, float]:
 
 
 class PerClassMeanScores(MeanScores[int]):
-
     def result(self) -> Dict[int, float]:
         return {label: m.result() for label, m in self.label2mean.items()}
 
@@ -169,9 +168,7 @@ def __init__(
         super().__init__()
         self.mean_scores = MeanNewOldScores()
         self.image_creator = image_creator
-        self.label_cat2step2mean: Dict[
-            LabelCat, Dict[int, float]
-        ] = defaultdict(dict)
+        self.label_cat2step2mean: Dict[LabelCat, Dict[int, float]] = defaultdict(dict)
 
     def reset(self) -> None:
         self.mean_scores.reset()
@@ -183,9 +180,7 @@ def update_new_classes(self, strategy: "SupervisedTemplate"):
         )
 
     def update(self, strategy: "SupervisedTemplate"):
-        self.mean_scores.update(
-            predicted_y=strategy.mb_output, true_y=strategy.mb_y
-        )
+        self.mean_scores.update(predicted_y=strategy.mb_output, true_y=strategy.mb_y)
 
     def result(self) -> Dict[LabelCat, float]:
         return self.mean_scores.result()
@@ -215,8 +210,7 @@ def _package_result(self, strategy: "SupervisedTemplate") -> "MetricResult":
                 MetricValue(
                     self,
                     name=base_metric_name + f"/new_old_diff",
-                    value=label_cat2mean_score["new"]
-                    - label_cat2mean_score["old"],
+                    value=label_cat2mean_score["new"] - label_cat2mean_score["old"],
                     x_plot=num_it,
                 )
             )
@@ -254,9 +248,7 @@ def after_training_iteration(self, strategy: "SupervisedTemplate") -> None:
             self.update(strategy)
         super().after_training_iteration(strategy)
 
-    def after_training_epoch(
-        self, strategy: "SupervisedTemplate"
-    ) -> "MetricResult":
+    def after_training_epoch(self, strategy: "SupervisedTemplate") -> "MetricResult":
         if strategy.clock.train_exp_epochs == strategy.train_epochs - 1:
             return self._package_result(strategy)
         else:
@@ -287,9 +279,7 @@ def mean_scores_metrics(
     *,
     on_train: bool = True,
     on_eval: bool = True,
-    image_creator: Optional[
-        MeanScoresImageCreator
-    ] = default_mean_scores_image_creator,
+    image_creator: Optional[MeanScoresImageCreator] = default_mean_scores_image_creator,
 ) -> List[PluginMetric]:
     """
     Helper to create plugins to show the scores of the true class, averaged by
diff --git a/avalanche/evaluation/metrics/ram_usage.py b/avalanche/evaluation/metrics/ram_usage.py
index af719a3ac..baa6bcc15 100644
--- a/avalanche/evaluation/metrics/ram_usage.py
+++ b/avalanche/evaluation/metrics/ram_usage.py
@@ -74,9 +74,7 @@ def _f(self):
             ram_usage = self._process_handle.memory_info().rss / 1024 / 1024
             if ram_usage > self.max_usage:
                 self.max_usage = ram_usage
-            time.sleep(
-                self.every - ((time.monotonic() - start_time) % self.every)
-            )
+            time.sleep(self.every - ((time.monotonic() - start_time) % self.every))
 
     def result(self) -> Optional[float]:
         """
@@ -116,10 +114,7 @@ def update(self):
 
 class RAMPluginMetric(GenericPluginMetric[float, MaxRAM]):
     def __init__(self, every, reset_at, emit_at, mode):
-
-        super(RAMPluginMetric, self).__init__(
-            MaxRAM(every), reset_at, emit_at, mode
-        )
+        super(RAMPluginMetric, self).__init__(MaxRAM(every), reset_at, emit_at, mode)
 
     def update(self, strategy):
         self._metric.update()
diff --git a/avalanche/evaluation/metrics/timing.py b/avalanche/evaluation/metrics/timing.py
index a8371072d..b9754a4ff 100644
--- a/avalanche/evaluation/metrics/timing.py
+++ b/avalanche/evaluation/metrics/timing.py
@@ -92,9 +92,7 @@ class TimePluginMetric(GenericPluginMetric[float, ElapsedTime]):
     def __init__(self, reset_at, emit_at, mode):
         self._time = ElapsedTime()
 
-        super(TimePluginMetric, self).__init__(
-            self._time, reset_at, emit_at, mode
-        )
+        super(TimePluginMetric, self).__init__(self._time, reset_at, emit_at, mode)
 
     def update(self, strategy):
         self._time.update()
@@ -141,9 +139,7 @@ def __init__(self):
         Creates an instance of the epoch time metric.
         """
 
-        super(EpochTime, self).__init__(
-            reset_at="epoch", emit_at="epoch", mode="train"
-        )
+        super(EpochTime, self).__init__(reset_at="epoch", emit_at="epoch", mode="train")
 
     def before_training_epoch(self, strategy):
         super().before_training_epoch(strategy)
@@ -178,9 +174,7 @@ def before_training_epoch(self, strategy):
         self._time_mean.reset()
         self._time.update()
 
-    def after_training_iteration(
-        self, strategy: "SupervisedTemplate"
-    ) -> MetricResult:
+    def after_training_iteration(self, strategy: "SupervisedTemplate") -> MetricResult:
         super().after_training_iteration(strategy)
         self._time_mean.update(self._time.result())
         self._time.reset()
@@ -244,12 +238,7 @@ def __str__(self):
 
 
 def timing_metrics(
-    *,
-    minibatch=False,
-    epoch=False,
-    epoch_running=False,
-    experience=False,
-    stream=False
+    *, minibatch=False, epoch=False, epoch_running=False, experience=False, stream=False
 ) -> List[TimePluginMetric]:
     """
     Helper method that can be used to obtain the desired set of
diff --git a/avalanche/evaluation/metrics/topk_acc.py b/avalanche/evaluation/metrics/topk_acc.py
index b2af79223..5ad7af1da 100644
--- a/avalanche/evaluation/metrics/topk_acc.py
+++ b/avalanche/evaluation/metrics/topk_acc.py
@@ -46,8 +46,9 @@ def __init__(self, top_k: int):
         self._topk_acc_dict: Dict[int, Mean] = defaultdict(Mean)
         self.top_k: int = top_k
 
-        self.__torchmetrics_requires_task = \
-            version.parse(torchmetrics.__version__) >= version.parse('0.11.0')
+        self.__torchmetrics_requires_task = version.parse(
+            torchmetrics.__version__
+        ) >= version.parse("0.11.0")
 
     @torch.no_grad()
     def update(
@@ -85,41 +86,33 @@ def update(
         if isinstance(task_labels, int):
             total_patterns = len(true_y)
             self._topk_acc_dict[task_labels].update(
-                self._compute_topk_acc(predicted_y, true_y, top_k=self.top_k), 
-                total_patterns
+                self._compute_topk_acc(predicted_y, true_y, top_k=self.top_k),
+                total_patterns,
             )
         elif isinstance(task_labels, Tensor):
             for pred, true, t in zip(predicted_y, true_y, task_labels):
                 self._topk_acc_dict[int(t)].update(
-                    self._compute_topk_acc(pred, true, top_k=self.top_k),
-                    1
+                    self._compute_topk_acc(pred, true, top_k=self.top_k), 1
                 )
         else:
             raise ValueError(
                 f"Task label type: {type(task_labels)}, "
                 f"expected int/float or Tensor"
             )
-        
+
     def _compute_topk_acc(self, pred, gt, top_k):
         if self.__torchmetrics_requires_task:
             num_classes = int(torch.max(torch.as_tensor(gt))) + 1
             pred_t = torch.as_tensor(pred)
             if len(pred_t.shape) > 1:
                 num_classes = max(num_classes, pred_t.shape[1])
-            
+
             return accuracy(
-                pred,
-                gt,
-                task="multiclass",
-                num_classes=num_classes,
-                top_k=self.top_k
+                pred, gt, task="multiclass", num_classes=num_classes, top_k=self.top_k
             )
         else:
-            return accuracy(
-                pred,
-                gt,
-                top_k=self.top_k)
-        
+            return accuracy(pred, gt, top_k=self.top_k)
+
     def result_task_label(self, task_label: int) -> Dict[int, float]:
         """
         Retrieves the running top-k accuracy.
@@ -128,7 +121,7 @@ def result_task_label(self, task_label: int) -> Dict[int, float]:
 
         :param task_label: if None, return the entire dictionary of accuracies
             for each task. Otherwise return the dictionary
-            
+
         :return: A dictionary `{task_label: topk_accuracy}`, where the accuracy
             is a float value between 0 and 1.
         """
@@ -145,7 +138,7 @@ def result(self) -> Dict[int, float]:
             where each value is a float value between 0 and 1.
         """
         return {k: v.result() for k, v in self._topk_acc_dict.items()}
-    
+
     def reset(self, task_label=None) -> None:
         """
         Resets the metric.
@@ -161,20 +154,14 @@ def reset(self, task_label=None) -> None:
             self._topk_acc_dict[task_label].reset()
 
 
-class TopkAccuracyPluginMetric(
-        GenericPluginMetric[
-            Dict[int, float],
-            TopkAccuracy]):
+class TopkAccuracyPluginMetric(GenericPluginMetric[Dict[int, float], TopkAccuracy]):
     """
     Base class for all top-k accuracies plugin metrics
     """
 
     def __init__(self, reset_at, emit_at, mode, top_k):
         super(TopkAccuracyPluginMetric, self).__init__(
-            TopkAccuracy(top_k=top_k),
-            reset_at=reset_at,
-            emit_at=emit_at,
-            mode=mode
+            TopkAccuracy(top_k=top_k), reset_at=reset_at, emit_at=emit_at, mode=mode
         )
 
     def reset(self, strategy=None) -> None:
@@ -315,7 +302,7 @@ def __init__(self, top_k):
         self._current_experience = 0
         self.top_k = top_k
 
-    def after_training_exp(self, strategy): 
+    def after_training_exp(self, strategy):
         self._current_experience = strategy.experience.current_experience
         # Reset average after learning from a new experience
         self.reset(strategy)
diff --git a/avalanche/logging/base_logger.py b/avalanche/logging/base_logger.py
index 4ce79ad77..20e946f56 100644
--- a/avalanche/logging/base_logger.py
+++ b/avalanche/logging/base_logger.py
@@ -31,12 +31,12 @@ def __init__(self):
         super().__init__()
 
         if not DistributedHelper.is_main_process:
-
             raise RuntimeError(
-                'You are creating a logger in a non-main process during a '
-                'distributed training session. '
-                'Jump to this error for an example on how to fix this.')
-        
+                "You are creating a logger in a non-main process during a "
+                "distributed training session. "
+                "Jump to this error for an example on how to fix this."
+            )
+
         # You have to create the loggers in the main process only. Otherwise,
         # metrics will end up duplicated in your log files and consistency
         # errors may arise. When creating the EvaluationPlugin in a
diff --git a/avalanche/logging/csv_logger.py b/avalanche/logging/csv_logger.py
index 3299103cb..9c04e2b69 100644
--- a/avalanche/logging/csv_logger.py
+++ b/avalanche/logging/csv_logger.py
@@ -75,9 +75,7 @@ def __init__(self, log_folder=None):
         self.training_file = open(
             os.path.join(self.log_folder, "training_results.csv"), "w"
         )
-        self.eval_file = open(
-            os.path.join(self.log_folder, "eval_results.csv"), "w"
-        )
+        self.eval_file = open(os.path.join(self.log_folder, "eval_results.csv"), "w")
         os.makedirs(self.log_folder, exist_ok=True)
 
         # current training experience id
@@ -248,6 +246,4 @@ def close(self):
         self.eval_file.close()
 
 
-__all__ = [
-    'CSVLogger'
-]
+__all__ = ["CSVLogger"]
diff --git a/avalanche/logging/interactive_logging.py b/avalanche/logging/interactive_logging.py
index 63c66366e..731d0d8c8 100644
--- a/avalanche/logging/interactive_logging.py
+++ b/avalanche/logging/interactive_logging.py
@@ -157,14 +157,12 @@ def _end_progress(self):
 
     def __getstate__(self):
         out = super().__getstate__()
-        del out['_pbar']
+        del out["_pbar"]
         return out
 
     def __setstate__(self, state):
-        state['_pbar'] = None
+        state["_pbar"] = None
         super().__setstate__(state)
 
 
-__all__ = [
-    'InteractiveLogger'
-]
+__all__ = ["InteractiveLogger"]
diff --git a/avalanche/logging/text_logging.py b/avalanche/logging/text_logging.py
index f8151d420..670c20f0d 100644
--- a/avalanche/logging/text_logging.py
+++ b/avalanche/logging/text_logging.py
@@ -108,10 +108,7 @@ def after_training_epoch(
         **kwargs,
     ):
         super().after_training_epoch(strategy, metric_values, **kwargs)
-        print(
-            f"Epoch {strategy.clock.train_exp_epochs} ended.",
-            file=self.file
-        )
+        print(f"Epoch {strategy.clock.train_exp_epochs} ended.", file=self.file)
         self.print_current_metrics()
         self.metric_vals = {}
 
@@ -128,14 +125,14 @@ def after_eval_exp(
             print(
                 f"> Eval on experience {exp_id} "
                 f"from {stream_type(strategy.experience)} stream ended.",
-                file=self.file
+                file=self.file,
             )
         else:
             print(
                 f"> Eval on experience {exp_id} (Task "
                 f"{task_id}) "
                 f"from {stream_type(strategy.experience)} stream ended.",
-                file=self.file
+                file=self.file,
             )
         self.print_current_metrics()
         self.metric_vals = {}
@@ -188,38 +185,38 @@ def _on_exp_start(self, strategy: "SupervisedTemplate"):
                 "-- Starting {} on experience {} from {} stream --".format(
                     action_name, exp_id, stream
                 ),
-                file=self.file
+                file=self.file,
             )
         else:
             print(
                 "-- Starting {} on experience {} (Task {}) from {}"
                 " stream --".format(action_name, exp_id, task_id, stream),
-                file=self.file
+                file=self.file,
             )
 
     def __getstate__(self):
         # Implementation of pickle serialization
         out = self.__dict__.copy()
 
-        fobject_serialized_def = TextLogger._fobj_serialize(out['file'])
+        fobject_serialized_def = TextLogger._fobj_serialize(out["file"])
 
         if fobject_serialized_def is not None:
-            out['file'] = fobject_serialized_def
+            out["file"] = fobject_serialized_def
         else:
             warnings.warn(
-                f'Cannot properly serialize the file object used for text '
-                f'logging: {out["file"]}.')
+                f"Cannot properly serialize the file object used for text "
+                f'logging: {out["file"]}.'
+            )
         return out
 
     def __setstate__(self, state):
         # Implementation of pickle deserialization
-        fobj = TextLogger._fobj_deserialize(state['file'])
+        fobj = TextLogger._fobj_deserialize(state["file"])
 
         if fobj is not None:
-            state['file'] = fobj
+            state["file"] = fobj
         else:
-            raise RuntimeError(
-                f'Cannot deserialize file object {state["file"]}')
+            raise RuntimeError(f'Cannot deserialize file object {state["file"]}')
         self.__dict__ = state
         self.on_checkpoint_resume()
 
@@ -228,34 +225,36 @@ def on_checkpoint_resume(self):
         utc_dt = datetime.datetime.now(datetime.timezone.utc)  # UTC time
         now_w_timezone = utc_dt.astimezone()  # local time
         print(
-            f'[{self.__class__.__name__}] Resuming from checkpoint.',
-            f'Current time is',
+            f"[{self.__class__.__name__}] Resuming from checkpoint.",
+            f"Current time is",
             now_w_timezone.strftime("%Y-%m-%d %H:%M:%S %z"),
-            file=self.file
+            file=self.file,
         )
 
     @staticmethod
     def _fobj_serialize(file_object) -> Optional[str]:
         is_notebook = False
         try:
-            is_notebook = file_object.__class__.__name__ == 'OutStream' and\
-                'ipykernel' in file_object.__class__.__module__
+            is_notebook = (
+                file_object.__class__.__name__ == "OutStream"
+                and "ipykernel" in file_object.__class__.__module__
+            )
         except Exception:
             pass
 
         if is_notebook:
             # Running in a notebook
             out_file_path = None
-            stream_name = 'stdout'
+            stream_name = "stdout"
         else:
             # Standard file object
             out_file_path = TextLogger._file_get_real_path(file_object)
             stream_name = TextLogger._file_get_stream(file_object)
-        
+
         if out_file_path is not None:
-            return 'path:' + str(out_file_path)
+            return "path:" + str(out_file_path)
         elif stream_name is not None:
-            return 'stream:' + stream_name
+            return "stream:" + stream_name
         else:
             return None
 
@@ -265,14 +264,14 @@ def _fobj_deserialize(file_def: str) -> Optional[TextIO]:
             # Custom object (managed by pickle or dill library)
             return file_def
 
-        if file_def.startswith('path:'):
-            file_def = _remove_prefix(file_def, 'path:')
-            return open(file_def, 'a')
-        elif file_def.startswith('stream:'):
-            file_def = _remove_prefix(file_def, 'stream:')
-            if file_def == 'stdout':
+        if file_def.startswith("path:"):
+            file_def = _remove_prefix(file_def, "path:")
+            return open(file_def, "a")
+        elif file_def.startswith("stream:"):
+            file_def = _remove_prefix(file_def, "stream:")
+            if file_def == "stdout":
                 return sys.stdout
-            elif file_def == 'stderr':
+            elif file_def == "stderr":
                 return sys.stderr
 
         return None
@@ -280,11 +279,11 @@ def _fobj_deserialize(file_def: str) -> Optional[TextIO]:
     @staticmethod
     def _file_get_real_path(file_object) -> Optional[str]:
         try:
-            if hasattr(file_object, 'file'):
+            if hasattr(file_object, "file"):
                 # Manage files created by tempfile
                 file_object = file_object.file
             fobject_path = file_object.name
-            if fobject_path in ['<stdout>', '<stderr>']:
+            if fobject_path in ["<stdout>", "<stderr>"]:
                 return None
             return fobject_path
         except AttributeError:
@@ -293,19 +292,17 @@ def _file_get_real_path(file_object) -> Optional[str]:
     @staticmethod
     def _file_get_stream(file_object) -> Optional[str]:
         if file_object == sys.stdout or file_object == sys.__stdout__:
-            return 'stdout'
+            return "stdout"
         if file_object == sys.stderr or file_object == sys.__stderr__:
-            return 'stderr'
+            return "stderr"
 
         return None
 
 
 def _remove_prefix(text, prefix):
     if text.startswith(prefix):
-        return text[len(prefix):]
+        return text[len(prefix) :]
     return text  # or whatever
 
 
-__all__ = [
-    'TextLogger'
-]
+__all__ = ["TextLogger"]
diff --git a/avalanche/logging/wandb_logger.py b/avalanche/logging/wandb_logger.py
index 5ea49336b..00120dbdc 100644
--- a/avalanche/logging/wandb_logger.py
+++ b/avalanche/logging/wandb_logger.py
@@ -125,13 +125,13 @@ def before_run(self):
         if self.init_kwargs is None:
             self.init_kwargs = dict()
 
-        run_id = self.init_kwargs.get('id', None)
+        run_id = self.init_kwargs.get("id", None)
         if run_id is None:
             run_id = os.environ.get("WANDB_RUN_ID", None)
         if run_id is None:
             run_id = self.wandb.util.generate_id()
 
-        self.init_kwargs['id'] = run_id
+        self.init_kwargs["id"] = run_id
 
         self.wandb.init(**self.init_kwargs)
         self.wandb.run._label(repo="Avalanche")
@@ -164,8 +164,15 @@ def log_single_metric(self, name, value, x_plot):
 
         if not isinstance(
             value,
-            (Image, TensorImage, Tensor, Figure, float, int,
-             self.wandb.viz.CustomChart),
+            (
+                Image,
+                TensorImage,
+                Tensor,
+                Figure,
+                float,
+                int,
+                self.wandb.viz.CustomChart,
+            ),
         ):
             # Unsupported type
             return
@@ -179,15 +186,11 @@ def log_single_metric(self, name, value, x_plot):
                 {name: self.wandb.Histogram(np_histogram=value)}, step=self.step
             )
 
-        elif isinstance(
-            value, (float, int, Figure, self.wandb.viz.CustomChart)
-        ):
+        elif isinstance(value, (float, int, Figure, self.wandb.viz.CustomChart)):
             self.wandb.log({name: value}, step=self.step)
 
         elif isinstance(value, TensorImage):
-            self.wandb.log(
-                {name: self.wandb.Image(array(value))}, step=self.step
-            )
+            self.wandb.log({name: self.wandb.Image(array(value))}, step=self.step)
 
         elif name.startswith("WeightCheckpoint"):
             if self.log_artifacts:
@@ -212,21 +215,19 @@ def log_single_metric(self, name, value, x_plot):
 
     def __getstate__(self):
         state = self.__dict__.copy()
-        if 'wandb' in state:
-            del state['wandb']
+        if "wandb" in state:
+            del state["wandb"]
         return state
 
     def __setstate__(self, state):
-        print('[W&B logger] Resuming from checkpoint...')
+        print("[W&B logger] Resuming from checkpoint...")
         self.__dict__ = state
         if self.init_kwargs is None:
             self.init_kwargs = dict()
-        self.init_kwargs['resume'] = 'allow'
+        self.init_kwargs["resume"] = "allow"
 
         self.wandb = None
         self.before_run()
 
 
-__all__ = [
-    "WandBLogger"
-]
+__all__ = ["WandBLogger"]
diff --git a/avalanche/models/batch_renorm.py b/avalanche/models/batch_renorm.py
index ceca879e7..98ed256b1 100644
--- a/avalanche/models/batch_renorm.py
+++ b/avalanche/models/batch_renorm.py
@@ -71,13 +71,14 @@ def __init__(
         self.d_max = d_max
 
     def forward(self, x):
-
         device = self.gamma.device
 
-        self.r_max = self.r_max if isinstance(self.r_max, float)\
-            else self.r_max.to(device)
-        self.d_max = self.d_max if isinstance(self.d_max, float)\
-            else self.d_max.to(device)
+        self.r_max = (
+            self.r_max if isinstance(self.r_max, float) else self.r_max.to(device)
+        )
+        self.d_max = (
+            self.d_max if isinstance(self.d_max, float) else self.d_max.to(device)
+        )
 
         batch_ch_mean = torch.mean(x, dim=(0, 2, 3), keepdim=True).to(device)
         batch_ch_std = torch.sqrt(
@@ -101,8 +102,7 @@ def forward(self, x):
             )
             d = (
                 torch.clamp(
-                    (batch_ch_mean - self.running_avg_mean)
-                    / self.running_avg_std,
+                    (batch_ch_mean - self.running_avg_mean) / self.running_avg_std,
                     -self.d_max,
                     self.d_max,
                 )
@@ -127,7 +127,6 @@ def forward(self, x):
             )
 
         else:
-
             x = (x - self.running_avg_mean) / self.running_avg_std
             x = self.gamma * x + self.beta
 
diff --git a/avalanche/models/bic_model.py b/avalanche/models/bic_model.py
index a58b0b83f..e8de493a5 100644
--- a/avalanche/models/bic_model.py
+++ b/avalanche/models/bic_model.py
@@ -3,18 +3,18 @@
 
 class BiasLayer(torch.nn.Module):
     """Bias layers with alpha and beta parameters
-    
+
     Bias layers used in Bias Correction (BiC) plugin.
-    "Wu, Yue, et al. "Large scale incremental learning." Proceedings 
-    of the IEEE/CVF Conference on Computer Vision and Pattern 
+    "Wu, Yue, et al. "Large scale incremental learning." Proceedings
+    of the IEEE/CVF Conference on Computer Vision and Pattern
     Recognition. 2019"
     """
 
     def __init__(self, device, clss):
         """
         :param device: device used by the main model. 'cpu' or 'cuda'
-        :param clss: list of classes of the current layer. This are use 
-            to identify the columns which are multiplied by the Bias 
+        :param clss: list of classes of the current layer. This are use
+            to identify the columns which are multiplied by the Bias
             correction Layer.
         """
         super().__init__()
diff --git a/avalanche/models/dynamic_modules.py b/avalanche/models/dynamic_modules.py
index 0646d67c7..415cafe2e 100644
--- a/avalanche/models/dynamic_modules.py
+++ b/avalanche/models/dynamic_modules.py
@@ -78,7 +78,7 @@ def eval_adaptation(self, experience: CLExperience):
     def _adaptation_device(self):
         """
         The device to use when expanding (or otherwise adapting)
-        the model. Defaults to the current device of the fist 
+        the model. Defaults to the current device of the fist
         parameter listed using :meth:`parameters`.
         """
         return next(self.parameters()).device
@@ -137,9 +137,7 @@ def train_adaptation(self, experience: CLExperience):
             set(task_labels)
         )
 
-    def forward(
-        self, x: torch.Tensor, task_labels: torch.Tensor
-    ) -> torch.Tensor:
+    def forward(self, x: torch.Tensor, task_labels: torch.Tensor) -> torch.Tensor:
         """compute the output given the input `x` and task labels.
 
         :param x:
@@ -164,16 +162,13 @@ def forward(
             x_task = x[task_mask]
             out_task = self.forward_single_task(x_task, task.item())
             assert len(out_task.shape) == 2, (
-                "multi-head assumes mini-batches of 2 dimensions "
-                "<batch, classes>"
+                "multi-head assumes mini-batches of 2 dimensions " "<batch, classes>"
             )
             n_labels_head = out_task.shape[1]
             out[task_mask, :n_labels_head] = out_task
         return out
 
-    def forward_single_task(
-        self, x: torch.Tensor, task_label: int
-    ) -> torch.Tensor:
+    def forward_single_task(self, x: torch.Tensor, task_label: int) -> torch.Tensor:
         """compute the output given the input `x` and task label.
 
         :param x:
@@ -246,9 +241,8 @@ def adaptation(self, experience: CLExperience):
             if old_nclasses != new_nclasses:  # expand active_units mask
                 old_act_units = self.active_units
                 self.active_units = torch.zeros(
-                    new_nclasses,
-                    dtype=torch.int8, 
-                    device=device)
+                    new_nclasses, dtype=torch.int8, device=device
+                )
                 self.active_units[: old_act_units.shape[0]] = old_act_units
             # update with new active classes
             if self.training:
@@ -369,16 +363,12 @@ def adaptation(self, experience: CLExperience):
             # head adaptation
             if tid not in self.classifiers:  # create new head
                 new_head = IncrementalClassifier(
-                    self.in_features,
-                    self.starting_out_features,
-                    masking=False
+                    self.in_features, self.starting_out_features, masking=False
                 ).to(device)
                 self.classifiers[tid] = new_head
 
                 au_init = torch.zeros(
-                    self.starting_out_features,
-                    dtype=torch.int8,
-                    device=device
+                    self.starting_out_features, dtype=torch.int8, device=device
                 )
                 self.register_buffer(f"active_units_T{tid}", au_init)
 
@@ -406,13 +396,9 @@ def adaptation(self, experience: CLExperience):
                 if old_nunits != new_nclasses:  # expand active_units mask
                     old_act_units = self._buffers[au_name]
                     self._buffers[au_name] = torch.zeros(
-                        new_nclasses,
-                        dtype=torch.int8,
-                        device=device
+                        new_nclasses, dtype=torch.int8, device=device
                     )
-                    self._buffers[au_name][
-                        : old_act_units.shape[0]
-                    ] = old_act_units
+                    self._buffers[au_name][: old_act_units.shape[0]] = old_act_units
                 # update with new active classes
                 if self.training:
                     self._buffers[au_name][curr_classes] = 1
@@ -435,9 +421,8 @@ def forward_single_task(self, x, task_label):
             if oldsize < nunits:  # we have to update the mask
                 old_mask = self._buffers[au_name]
                 self._buffers[au_name] = torch.zeros(
-                    nunits,
-                    dtype=torch.int8,
-                    device=device)
+                    nunits, dtype=torch.int8, device=device
+                )
                 self._buffers[au_name][:oldsize] = old_mask
                 curr_au = self._buffers[au_name]
             out[..., torch.logical_not(curr_au)] = self.mask_value
diff --git a/avalanche/models/dynamic_optimizers.py b/avalanche/models/dynamic_optimizers.py
index a20ee2dc0..b2c9e5a54 100644
--- a/avalanche/models/dynamic_optimizers.py
+++ b/avalanche/models/dynamic_optimizers.py
@@ -48,12 +48,7 @@ def reset_optimizer(optimizer, model):
     return optimized_param_id
 
 
-def update_optimizer(
-        optimizer, 
-        new_params, 
-        optimized_params, 
-        reset_state=False
-):
+def update_optimizer(optimizer, new_params, optimized_params, reset_state=False):
     """Update the optimizer by adding new parameters,
     removing removed parameters, and adding new parameters
     to the optimizer, for instance after model has been adapted
@@ -63,7 +58,7 @@ def update_optimizer(
     Newly added parameters are added by default to parameter group 0
 
     :param new_params: Dict (name, param) of new parameters
-    :param optimized_params: Dict (name, param) of 
+    :param optimized_params: Dict (name, param) of
         currently optimized parameters (returned by reset_optimizer)
     :param reset_state: Wheter to reset the optimizer's state (i.e momentum).
         Defaults to False.
@@ -87,8 +82,9 @@ def update_optimizer(
                         optimizer.state[new_p] = {}
                     break
         if not found:
-            raise Exception(f"Parameter {key} expected but "
-                            "not found in the optimizer")
+            raise Exception(
+                f"Parameter {key} expected but " "not found in the optimizer"
+            )
 
     # Remove parameters that are not here anymore
     # This should not happend in most use case
@@ -105,11 +101,12 @@ def update_optimizer(
                     optimized_params.pop(key)
                     break
         if not found:
-            raise Exception(f"Parameter {key} expected but " 
-                            "not found in the optimizer")
+            raise Exception(
+                f"Parameter {key} expected but " "not found in the optimizer"
+            )
 
     for i, idx_list in enumerate(keys_to_remove):
-        for (j, p) in sorted(idx_list, key=lambda x: x[0], reverse=True):
+        for j, p in sorted(idx_list, key=lambda x: x[0], reverse=True):
             del optimizer.param_groups[i]["params"][j]
             if p in optimizer.state:
                 optimizer.state.pop(p)
diff --git a/avalanche/models/expert_gate.py b/avalanche/models/expert_gate.py
index 06d8ed6b6..6746d6c7b 100644
--- a/avalanche/models/expert_gate.py
+++ b/avalanche/models/expert_gate.py
@@ -18,15 +18,14 @@
 
 
 def AE_loss(target, reconstruction):
-    """Calculates the MSE loss for the autoencoder by comparing the 
+    """Calculates the MSE loss for the autoencoder by comparing the
     reconstruction to the pre-processed input.
 
     :param target: the target for the autoencoder
     :param reconstruction: output of the autoencoder
     :return: mean squared error loss between the target and reconstruction
     """
-    reconstruction_loss = mse_loss(
-        input=reconstruction, target=target, reduction="sum")
+    reconstruction_loss = mse_loss(input=reconstruction, target=target, reduction="sum")
     return reconstruction_loss
 
 
@@ -35,36 +34,39 @@ class ExpertAutoencoder(nn.Module):
     for the incoming data.
     """
 
-    def __init__(self, 
-                 shape, 
-                 latent_dim, 
-                 device,
-                 arch="alexnet",
-                 pretrained_flag=True,
-                 output_layer_name="features"):
+    def __init__(
+        self,
+        shape,
+        latent_dim,
+        device,
+        arch="alexnet",
+        pretrained_flag=True,
+        output_layer_name="features",
+    ):
         """
         :param shape: shape of the input layer
         :param latent_dim: size of the autoencoder's latent dimension
         :param device: gpu or cpu
-        :param arch: the architecture to use from torchvision.models, 
+        :param arch: the architecture to use from torchvision.models,
         defaults to "alexnet"
-        :param pretrained_flag: determines if torchvision model is pre-trained, 
+        :param pretrained_flag: determines if torchvision model is pre-trained,
         defaults to True
-        :param output_layer_name: output layer of the feature backbone, 
+        :param output_layer_name: output layer of the feature backbone,
         defaults to "features"
         """
 
         super().__init__()
 
-        # Select pretrained AlexNet for preprocessing input 
-        base_template = (models.__dict__[arch](
-            weights=('AlexNet_Weights.IMAGENET1K_V1' 
-                     if pretrained_flag 
-                     else 'AlexNet_Weights.NONE'))
-            .to(device))
+        # Select pretrained AlexNet for preprocessing input
+        base_template = models.__dict__[arch](
+            weights=(
+                "AlexNet_Weights.IMAGENET1K_V1"
+                if pretrained_flag
+                else "AlexNet_Weights.NONE"
+            )
+        ).to(device)
 
-        self.feature_module = FeatureExtractorBackbone(
-                base_template, output_layer_name)
+        self.feature_module = FeatureExtractorBackbone(base_template, output_layer_name)
 
         self.feature_module.to(device)
 
@@ -79,19 +81,15 @@ def __init__(self,
         # Encoder Linear -> ReLU
         flattened_size = torch.Size(shape).numel()
         self.encoder = nn.Sequential(
-            Flatten(),
-            nn.Linear(flattened_size, latent_dim),
-            nn.ReLU()
+            Flatten(), nn.Linear(flattened_size, latent_dim), nn.ReLU()
         ).to(device)
 
         # Decoder Linear -> Sigmoid
         self.decoder = nn.Sequential(
-            nn.Linear(latent_dim, flattened_size), 
-            nn.Sigmoid()
+            nn.Linear(latent_dim, flattened_size), nn.Sigmoid()
         ).to(device)
 
     def forward(self, x):
-
         # Preprocessing step
         x = x.to(self.device)
         x = self.feature_module(x)
@@ -107,24 +105,21 @@ def forward(self, x):
 
 
 class ExpertModel(nn.Module):
-    """The expert classifier which sits behind the autoencoder. 
-    Each expert classifieris usually a pre-trained AlexNet fine-tuned 
-    on a specific task. The final classification layer is replaced and 
+    """The expert classifier which sits behind the autoencoder.
+    Each expert classifieris usually a pre-trained AlexNet fine-tuned
+    on a specific task. The final classification layer is replaced and
     sized based on the number of classes for a task.
     """
 
-    def __init__(self, 
-                 num_classes, 
-                 arch, 
-                 device, 
-                 pretrained_flag, 
-                 provided_template=None):
+    def __init__(
+        self, num_classes, arch, device, pretrained_flag, provided_template=None
+    ):
         """
         :param num_classes: number of classes this expert model will classify
         :param arch: the architecture to use from torchvision.models
         :param device: gpu or cpu
-        :param pretrained_flag: determines if torchvision model is pre-trained 
-        :param provided_template: the expert model to copy the backbone from, 
+        :param pretrained_flag: determines if torchvision model is pre-trained
+        :param provided_template: the expert model to copy the backbone from,
         defaults to None
         """
         super().__init__()
@@ -133,33 +128,36 @@ def __init__(self,
         self.num_classes = num_classes
 
         # Select pretrained AlexNet for feature backbone
-        base_template = (models.__dict__[arch](
-            weights=('AlexNet_Weights.IMAGENET1K_V1' 
-                     if pretrained_flag 
-                     else 'AlexNet_Weights.NONE'))
-            .to(device))
+        base_template = models.__dict__[arch](
+            weights=(
+                "AlexNet_Weights.IMAGENET1K_V1"
+                if pretrained_flag
+                else "AlexNet_Weights.NONE"
+            )
+        ).to(device)
 
-        # Set the feature module from provided template 
-        if (provided_template is None):
-            self.feature_module = deepcopy(base_template._modules['features'])
+        # Set the feature module from provided template
+        if provided_template is None:
+            self.feature_module = deepcopy(base_template._modules["features"])
 
         # Use base template if nothing provided
-        else: 
+        else:
             self.feature_module = deepcopy(provided_template.feature_module)
 
         # Set avgpool layer
-        self.avg_pool = deepcopy(base_template._modules['avgpool'])
+        self.avg_pool = deepcopy(base_template._modules["avgpool"])
 
         # Flattener
         self.flatten = Flatten()
 
         # Classifier module
-        self.classifier_module = deepcopy(base_template._modules['classifier'])
+        self.classifier_module = deepcopy(base_template._modules["classifier"])
 
         # Customize final layer for  the number of classes in the data
         original_classifier_input_dim = self.classifier_module[-1].in_features
         self.classifier_module[-1] = nn.Linear(
-            original_classifier_input_dim, self.num_classes)
+            original_classifier_input_dim, self.num_classes
+        )
 
         for param in self.parameters():
             param.requires_grad = True
@@ -173,8 +171,8 @@ def forward(self, x):
 
 
 class ExpertGate(nn.Module):
-    """Overall parent module that holds the dictionary of expert autoencoders 
-    and expert classifiers. 
+    """Overall parent module that holds the dictionary of expert autoencoders
+    and expert classifiers.
     """
 
     def __init__(
@@ -183,16 +181,16 @@ def __init__(
         device,
         arch="alexnet",
         pretrained_flag=True,
-        output_layer_name="features"
+        output_layer_name="features",
     ):
         """
         :param shape: shape of the input layer
         :param device: gpu or cpu
-        :param arch: the architecture to use from torchvision.models, 
+        :param arch: the architecture to use from torchvision.models,
         defaults to "alexnet"
-        :param pretrained_flag: determines if torchvision model is pre-trained, 
+        :param pretrained_flag: determines if torchvision model is pre-trained,
         defaults to True
-        :param output_layer_name: output layer of the feature backbone, 
+        :param output_layer_name: output layer of the feature backbone,
         defaults to "features"
         """
         super().__init__()
@@ -214,15 +212,17 @@ def __init__(
         # Initialize an expert with pretrained AlexNet
         self.expert = (
             models.__dict__[arch](
-                weights=('AlexNet_Weights.IMAGENET1K_V1' 
-                         if pretrained_flag 
-                         else 'AlexNet_Weights.NONE'))
+                weights=(
+                    "AlexNet_Weights.IMAGENET1K_V1"
+                    if pretrained_flag
+                    else "AlexNet_Weights.NONE"
+                )
+            )
             .to(device)
             .eval()
         )
 
     def _get_average_reconstruction_error(self, autoencoder_id, x):
-
         # Select autoencoder with the given ID
         autoencoder = self.autoencoder_dict[str(autoencoder_id)]
 
@@ -238,18 +238,15 @@ def _get_average_reconstruction_error(self, autoencoder_id, x):
         return error
 
     def forward(self, x):
-
         # If not in training mode, select the best expert for the input data
-        if (not self.training):
-
+        if not self.training:
             # Build an error tensor to hold errors for all autoencoders
-            all_errors = [None]*len(self.autoencoder_dict)
+            all_errors = [None] * len(self.autoencoder_dict)
 
             # Iterate through all autoencoders to populate error tensor
             for autoencoder_id in self.autoencoder_dict:
-                error = self._get_average_reconstruction_error(
-                    autoencoder_id, x)
-                error = -error/self.temp
+                error = self._get_average_reconstruction_error(autoencoder_id, x)
+                error = -error / self.temp
                 all_errors[int(autoencoder_id)] = torch.tensor(error)
 
             # Softmax to get probabilites
diff --git a/avalanche/models/generator.py b/avalanche/models/generator.py
index 9b6d848c1..3f0314e2f 100644
--- a/avalanche/models/generator.py
+++ b/avalanche/models/generator.py
@@ -88,9 +88,7 @@ def __init__(self, shape, nhid=16):
             MLP([nhid, 64, 128, 256, flattened_size], last_activation=False),
             nn.Sigmoid(),
         )
-        self.invTrans = transforms.Compose(
-            [transforms.Normalize((0.1307,), (0.3081,))]
-        )
+        self.invTrans = transforms.Compose([transforms.Normalize((0.1307,), (0.3081,))])
 
     def forward(self, z, y=None):
         if y is None:
@@ -115,11 +113,8 @@ class MlpVAE(Generator, nn.Module):
     """
 
     def __init__(
-            self,
-            shape,
-            nhid=16,
-            n_classes=10,
-            device: Union[str, torch.device] = "cpu"):
+        self, shape, nhid=16, n_classes=10, device: Union[str, torch.device] = "cpu"
+    ):
         """
         :param shape: Shape of each input sample
         :param nhid: Dimension of latent space of Encoder.
@@ -129,7 +124,7 @@ def __init__(
         super(MlpVAE, self).__init__()
         self.dim = nhid
         if device is None:
-            device = 'cpu'
+            device = "cpu"
 
         self.device = torch.device(device)
         self.encoder = VAEMLPEncoder(shape, latent_dim=128)
@@ -173,9 +168,7 @@ def forward(self, x):
         Forward.
         """
         represntations = self.encoder(x)
-        mean, logvar = self.calc_mean(represntations), self.calc_logvar(
-            represntations
-        )
+        mean, logvar = self.calc_mean(represntations), self.calc_logvar(represntations)
         z = self.sampling(mean, logvar)
         return self.decoder(z), mean, logvar
 
@@ -200,7 +193,7 @@ def VAE_loss(X, forward_output):
     """
     X_hat, mean, logvar = forward_output
     reconstruction_loss = MSE_loss(X_hat, X)
-    KL_divergence = 0.5 * torch.sum(-1 - logvar + torch.exp(logvar) + mean ** 2)
+    KL_divergence = 0.5 * torch.sum(-1 - logvar + torch.exp(logvar) + mean**2)
     return reconstruction_loss + KL_divergence
 
 
diff --git a/avalanche/models/helper_method.py b/avalanche/models/helper_method.py
index a7d3e07f4..a4df1d1f1 100644
--- a/avalanche/models/helper_method.py
+++ b/avalanche/models/helper_method.py
@@ -39,17 +39,13 @@ def __init__(self, model: nn.Module, classifier_name: str):
         if isinstance(old_classifier, nn.Linear):
             in_size = old_classifier.in_features
             out_size = old_classifier.out_features
-            old_params = [
-                torch.clone(p.data) for p in old_classifier.parameters()
-            ]
+            old_params = [torch.clone(p.data) for p in old_classifier.parameters()]
             # Replace old classifier by empty block
             setattr(self.model, classifier_name, nn.Sequential())
         elif isinstance(old_classifier, nn.Sequential):
             in_size = old_classifier[-1].in_features
             out_size = old_classifier[-1].out_features
-            old_params = [
-                torch.clone(p.data) for p in old_classifier[-1].parameters()
-            ]
+            old_params = [torch.clone(p.data) for p in old_classifier[-1].parameters()]
             del old_classifier[-1]
         else:
             raise NotImplementedError(
diff --git a/avalanche/models/icarl_resnet.py b/avalanche/models/icarl_resnet.py
index b195127b7..bd250bd6d 100644
--- a/avalanche/models/icarl_resnet.py
+++ b/avalanche/models/icarl_resnet.py
@@ -27,9 +27,7 @@ def forward(self, x: Tensor) -> Tensor:
         return self.transform_function(x)
 
 
-def conv3x3(
-    in_planes: int, out_planes: int, stride: Union[int, Sequence[int]] = 1
-):
+def conv3x3(in_planes: int, out_planes: int, stride: Union[int, Sequence[int]] = 1):
     return Conv2d(
         in_planes,
         out_planes,
diff --git a/avalanche/models/lenet5.py b/avalanche/models/lenet5.py
index b0bd4601c..8d2405b62 100644
--- a/avalanche/models/lenet5.py
+++ b/avalanche/models/lenet5.py
@@ -23,14 +23,10 @@ def __init__(self, n_classes, input_channels):
             nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1),
             nn.Tanh(),
             nn.AvgPool2d(kernel_size=2),
-            nn.Conv2d(
-                in_channels=16, out_channels=120, kernel_size=5, stride=1
-            ),
+            nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5, stride=1),
             nn.Tanh(),
         )
-        self.ff = nn.Sequential(
-            nn.Linear(in_features=120, out_features=84), nn.Tanh()
-        )
+        self.ff = nn.Sequential(nn.Linear(in_features=120, out_features=84), nn.Tanh())
 
         self.classifier = nn.Sequential(
             nn.Linear(in_features=84, out_features=n_classes),
diff --git a/avalanche/models/mobilenetv1.py b/avalanche/models/mobilenetv1.py
index 2be71389a..532901e16 100644
--- a/avalanche/models/mobilenetv1.py
+++ b/avalanche/models/mobilenetv1.py
@@ -26,7 +26,6 @@
 
 
 def remove_sequential(network: nn.Module, all_layers: List[nn.Module]):
-
     for layer in network.children():
         # if sequential layer, apply recursively to layers in sequential layer
         if isinstance(layer, nn.Sequential):
@@ -38,7 +37,6 @@ def remove_sequential(network: nn.Module, all_layers: List[nn.Module]):
 
 
 def remove_DwsConvBlock(cur_layers):
-
     all_layers = []
     for layer in cur_layers:
         if isinstance(layer, DwsConvBlock):
@@ -79,7 +77,6 @@ def __init__(self, pretrained=True, latent_layer_num=20):
         self.output = nn.Linear(1024, 50, bias=False)
 
     def forward(self, x, latent_input=None, return_lat_acts=False):
-
         if latent_input is not None:
             with torch.no_grad():
                 orig_acts = self.lat_features(x)
@@ -99,7 +96,6 @@ def forward(self, x, latent_input=None, return_lat_acts=False):
 
 
 if __name__ == "__main__":
-
     model = MobilenetV1(pretrained=True)
     for name, param in model.named_parameters():
         print(name)
diff --git a/avalanche/models/ncm_classifier.py b/avalanche/models/ncm_classifier.py
index 68c76e252..869aa4dc4 100644
--- a/avalanche/models/ncm_classifier.py
+++ b/avalanche/models/ncm_classifier.py
@@ -20,22 +20,20 @@ class NCMClassifier(nn.Module):
     than `max_class_id_seen` are associated with a 0-vector.
     """
 
-    def __init__(self,
-                 normalize: bool = True):
+    def __init__(self, normalize: bool = True):
         """
         :param normalize: whether to normalize the input with
             2-norm = 1 before computing the distance.
         """
         super().__init__()
         # vectorized version of class means
-        self.register_buffer('class_means', None)
+        self.register_buffer("class_means", None)
         self.class_means_dict = {}
 
         self.normalize = normalize
 
-    def load_state_dict(self, state_dict,
-                        strict: bool = True):
-        self.class_means = state_dict['class_means']
+    def load_state_dict(self, state_dict, strict: bool = True):
+        self.class_means = state_dict["class_means"]
         super().load_state_dict(state_dict, strict)
         # fill dictionary
         if self.class_means is not None:
@@ -83,8 +81,7 @@ def forward(self, x):
         # (batch_size, num_classes)
         return (-sqd).T
 
-    def update_class_means_dict(self,
-                                class_means_dict: Dict[int, Tensor]):
+    def update_class_means_dict(self, class_means_dict: Dict[int, Tensor]):
         """
         Update dictionary of class means.
         If class already exists, the average of the two mean vectors
@@ -93,9 +90,9 @@ def update_class_means_dict(self,
         :param class_means_dict: a dictionary mapping class id
             to class mean tensor.
         """
-        assert isinstance(class_means_dict, dict), \
-            "class_means_dict must be a dictionary mapping class_id " \
-            "to mean vector"
+        assert isinstance(class_means_dict, dict), (
+            "class_means_dict must be a dictionary mapping class_id " "to mean vector"
+        )
         for k, v in class_means_dict.items():
             if k not in self.class_means_dict:
                 self.class_means_dict[k] = class_means_dict[k].clone()
@@ -106,16 +103,14 @@ def update_class_means_dict(self,
 
         self._vectorize_means_dict()
 
-    def replace_class_means_dict(self,
-                                 class_means_dict: Dict[int, Tensor]):
+    def replace_class_means_dict(self, class_means_dict: Dict[int, Tensor]):
         """
         Replace existing dictionary of means with a given dictionary.
         """
-        assert isinstance(class_means_dict, dict), \
-            "class_means_dict must be a dictionary mapping class_id " \
-            "to mean vector"
-        self.class_means_dict = {k: v.clone()
-                                 for k, v in class_means_dict.items()}
+        assert isinstance(class_means_dict, dict), (
+            "class_means_dict must be a dictionary mapping class_id " "to mean vector"
+        )
+        self.class_means_dict = {k: v.clone() for k, v in class_means_dict.items()}
 
         self._vectorize_means_dict()
 
diff --git a/avalanche/models/pnn.py b/avalanche/models/pnn.py
index 169c35575..7d721effe 100644
--- a/avalanche/models/pnn.py
+++ b/avalanche/models/pnn.py
@@ -61,9 +61,7 @@ def __init__(
             return  # first adapter is empty
 
         # Eq. 2 - MLP adapter. Not needed for the first task.
-        self.V = nn.Linear(
-            in_features * num_prev_modules, out_features_per_column
-        )
+        self.V = nn.Linear(in_features * num_prev_modules, out_features_per_column)
         self.alphas = nn.Parameter(torch.randn(num_prev_modules))
         self.U = nn.Linear(out_features_per_column, out_features_per_column)
 
@@ -152,9 +150,7 @@ def __init__(self, in_features, out_features_per_column, adapter="mlp"):
 
         # convert from task label to module list order
         self.task_to_module_idx = {}
-        first_col = PNNColumn(
-            in_features, out_features_per_column, 0, adapter=adapter
-        )
+        first_col = PNNColumn(in_features, out_features_per_column, 0, adapter=adapter)
         self.columns = nn.ModuleList([first_col])
 
     @property
diff --git a/avalanche/models/prompt.py b/avalanche/models/prompt.py
index 49ba5f66d..646553f04 100644
--- a/avalanche/models/prompt.py
+++ b/avalanche/models/prompt.py
@@ -88,11 +88,8 @@ def l2_normalize(self, x, dim=None, epsilon=1e-12):
         """Normalizes a given vector or matrix."""
         square_sum = torch.sum(x**2, dim=dim, keepdim=True)
         x_inv_norm = torch.rsqrt(
-            torch.maximum(
-                square_sum,
-                torch.tensor(
-                    epsilon,
-                    device=x.device)))
+            torch.maximum(square_sum, torch.tensor(epsilon, device=x.device))
+        )
         return x * x_inv_norm
 
     def forward(self, x_embed, prompt_mask=None, cls_features=None):
@@ -109,8 +106,9 @@ def forward(self, x_embed, prompt_mask=None, cls_features=None):
             elif self.embedding_key == "max":
                 x_embed_mean = torch.max(x_embed, dim=1)[0]
             elif self.embedding_key == "mean_max":
-                x_embed_mean = torch.max(x_embed, dim=1)[
-                                         0] + 2 * torch.mean(x_embed, dim=1)
+                x_embed_mean = torch.max(x_embed, dim=1)[0] + 2 * torch.mean(
+                    x_embed, dim=1
+                )
             elif self.embedding_key == "cls":
                 if cls_features is None:
                     x_embed_mean = torch.max(x_embed, dim=1)[0]  # B, C
@@ -118,24 +116,24 @@ def forward(self, x_embed, prompt_mask=None, cls_features=None):
                     x_embed_mean = cls_features
             else:
                 raise NotImplementedError(
-                    "Not supported way of calculating embedding keys!")
+                    "Not supported way of calculating embedding keys!"
+                )
 
-            prompt_norm = self.l2_normalize(
-                self.prompt_key, dim=1)  # Pool_size, C
+            prompt_norm = self.l2_normalize(self.prompt_key, dim=1)  # Pool_size, C
             x_embed_norm = self.l2_normalize(x_embed_mean, dim=1)  # B, C
 
-            similarity = torch.matmul(
-                x_embed_norm, prompt_norm.t())  # B, Pool_size
+            similarity = torch.matmul(x_embed_norm, prompt_norm.t())  # B, Pool_size
 
             if prompt_mask is None:
                 _, idx = torch.topk(similarity, k=self.top_k, dim=1)  # B, top_k
                 if self.batchwise_prompt:
                     prompt_id, id_counts = torch.unique(
-                        idx, return_counts=True, sorted=True)
-                    # In jnp.unique, when the 'size' is specified and there are 
+                        idx, return_counts=True, sorted=True
+                    )
+                    # In jnp.unique, when the 'size' is specified and there are
                     # fewer than the indicated number of elements,
-                    # the remaining elements will be filled with 'fill_value', 
-                    # the default is the minimum value along the specified 
+                    # the remaining elements will be filled with 'fill_value',
+                    # the default is the minimum value along the specified
                     # dimension.
                     # Unless dimension is specified, this will be flattend if it
                     # is not already 1D.
@@ -163,8 +161,7 @@ def forward(self, x_embed, prompt_mask=None, cls_features=None):
                     _, major_idx = torch.topk(id_counts, k=self.top_k)  # top_k
                     major_prompt_id = prompt_id[major_idx]  # top_k
                     # expand to batch
-                    idx = major_prompt_id.expand(
-                        x_embed.shape[0], -1)  # B, top_k
+                    idx = major_prompt_id.expand(x_embed.shape[0], -1)  # B, top_k
             else:
                 idx = prompt_mask  # B, top_k
 
@@ -191,14 +188,11 @@ def forward(self, x_embed, prompt_mask=None, cls_features=None):
             out["reduce_sim"] = reduce_sim
         else:
             if self.prompt_init == "zero":
-                self.prompt = nn.Parameter(
-                    torch.zeros(self.length, self.embed_dim))
+                self.prompt = nn.Parameter(torch.zeros(self.length, self.embed_dim))
             elif self.prompt_init == "uniform":
-                self.prompt = nn.Parameter(
-                    torch.randn(self.length, self.embed_dim))
+                self.prompt = nn.Parameter(torch.randn(self.length, self.embed_dim))
                 nn.init.uniform_(self.prompt)
-            batched_prompt = self.prompt.unsqueeze(
-                0).expand(x_embed.shape[0], -1, -1)
+            batched_prompt = self.prompt.unsqueeze(0).expand(x_embed.shape[0], -1, -1)
 
         # The input with the prompt concatenated to the front. [B, prompt+token,
         # C]
diff --git a/avalanche/models/pytorchcv_wrapper.py b/avalanche/models/pytorchcv_wrapper.py
index 5a893c0d9..34bd56c29 100644
--- a/avalanche/models/pytorchcv_wrapper.py
+++ b/avalanche/models/pytorchcv_wrapper.py
@@ -41,8 +41,7 @@ def vgg(depth: int, batch_normalization=False, pretrained=False) -> Module:
     available_depths = [11, 13, 16, 19]
     if depth not in available_depths:
         raise ValueError(
-            f"Depth {depth} not available, "
-            f"availble depths are {available_depths}"
+            f"Depth {depth} not available, " f"availble depths are {available_depths}"
         )
 
     name = f"vgg{depth}"
diff --git a/avalanche/models/resnet32.py b/avalanche/models/resnet32.py
index 0342a2dd5..c18295f42 100644
--- a/avalanche/models/resnet32.py
+++ b/avalanche/models/resnet32.py
@@ -1,12 +1,13 @@
 import torch.nn as nn
 
-__all__ = ['resnet32']
+__all__ = ["resnet32"]
 
 
 def conv3x3(in_planes, out_planes, stride=1):
     """3x3 convolution with padding"""
-    return nn.Conv2d(in_planes, out_planes, kernel_size=3, 
-                     stride=stride, padding=1, bias=False)
+    return nn.Conv2d(
+        in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False
+    )
 
 
 class BasicBlock(nn.Module):
@@ -39,11 +40,13 @@ def __init__(self, inplanes, planes, stride=1, downsample=None):
         super(Bottleneck, self).__init__()
         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
         self.bn1 = nn.BatchNorm2d(planes)
-        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 
-                               padding=1, bias=False)
+        self.conv2 = nn.Conv2d(
+            planes, planes, kernel_size=3, stride=stride, padding=1, bias=False
+        )
         self.bn2 = nn.BatchNorm2d(planes)
-        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, 
-                               bias=False)
+        self.conv3 = nn.Conv2d(
+            planes, planes * self.expansion, kernel_size=1, bias=False
+        )
         self.bn3 = nn.BatchNorm2d(planes * self.expansion)
         self.relu = nn.ReLU(inplace=True)
         self.downsample = downsample
@@ -65,17 +68,17 @@ class ResNet(nn.Module):
     Reduced ResNet 32
 
     This network architecture is an adaptation of ResNet for smaller
-    input size. The number of blocks can be modified in this line by 
+    input size. The number of blocks can be modified in this line by
     changing n=5 to n=3 for ResNet-20, and n=9 for ResNet-56.
 
     Implementation based on FACIL, as in:
     https://github.com/mmasana/FACIL/blob/master/src/networks/resnet32.py
     """
+
     def __init__(self, block, layers, num_classes=10):
         self.inplanes = 16
         super(ResNet, self).__init__()
-        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, 
-                               bias=False)
+        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
         self.bn1 = nn.BatchNorm2d(16)
         self.relu = nn.ReLU(inplace=True)
         self.layer1 = self._make_layer(block, 16, layers[0])
@@ -84,14 +87,13 @@ def __init__(self, block, layers, num_classes=10):
         self.avgpool = nn.AvgPool2d(8, stride=1)
         # last classifier layer (head) with as many outputs as classes
         self.fc = nn.Linear(64 * block.expansion, num_classes)
-        # and `head_var` with the name of the head, so it can be 
+        # and `head_var` with the name of the head, so it can be
         # removed when doing incremental learning experiments
-        self.head_var = 'fc'
+        self.head_var = "fc"
 
         for m in self.modules():
             if isinstance(m, nn.Conv2d):
-                nn.init.kaiming_normal_(m.weight, mode='fan_out', 
-                                        nonlinearity='relu')
+                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
             elif isinstance(m, nn.BatchNorm2d):
                 nn.init.constant_(m.weight, 1)
                 nn.init.constant_(m.bias, 0)
@@ -100,8 +102,13 @@ def _make_layer(self, block, planes, blocks, stride=1):
         downsample = None
         if stride != 1 or self.inplanes != planes * block.expansion:
             downsample = nn.Sequential(
-                nn.Conv2d(self.inplanes, planes * block.expansion, 
-                          kernel_size=1, stride=stride, bias=False),
+                nn.Conv2d(
+                    self.inplanes,
+                    planes * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False,
+                ),
                 nn.BatchNorm2d(planes * block.expansion),
             )
         layers = []
diff --git a/avalanche/models/scr_model.py b/avalanche/models/scr_model.py
index ca7c15e5e..d2f7708de 100644
--- a/avalanche/models/scr_model.py
+++ b/avalanche/models/scr_model.py
@@ -11,9 +11,8 @@ class SCRModel(nn.Module):
     The input is passed through a feature extractor and then normalized
     before being fed to the classifier.
     """
-    def __init__(self,
-                 feature_extractor: nn.Module,
-                 projection: nn.Module):
+
+    def __init__(self, feature_extractor: nn.Module, projection: nn.Module):
         """
         :param feature_extractor: a pytorch module that given the input
             examples extracts the hidden features
diff --git a/avalanche/models/slda_resnet.py b/avalanche/models/slda_resnet.py
index dc80194f3..80e3ff8cd 100644
--- a/avalanche/models/slda_resnet.py
+++ b/avalanche/models/slda_resnet.py
@@ -33,9 +33,7 @@ def __init__(
         super(SLDAResNetModel, self).__init__()
 
         feat_extractor = (
-            models.__dict__[arch](pretrained=imagenet_pretrained)
-            .to(device)
-            .eval()
+            models.__dict__[arch](pretrained=imagenet_pretrained).to(device).eval()
         )
         self.feature_extraction_wrapper = FeatureExtractorBackbone(
             feat_extractor, output_layer_name
diff --git a/avalanche/models/slim_resnet18.py b/avalanche/models/slim_resnet18.py
index 29727c31b..f5cd0fed6 100644
--- a/avalanche/models/slim_resnet18.py
+++ b/avalanche/models/slim_resnet18.py
@@ -114,9 +114,7 @@ def __init__(self, nclasses, nf=20):
         self.layer2 = self._make_layer(block, nf * 2, num_blocks[1], stride=2)
         self.layer3 = self._make_layer(block, nf * 4, num_blocks[2], stride=2)
         self.layer4 = self._make_layer(block, nf * 8, num_blocks[3], stride=2)
-        self.linear = MultiHeadClassifier(
-            nf * 8 * BasicBlock.expansion, nclasses
-        )
+        self.linear = MultiHeadClassifier(nf * 8 * BasicBlock.expansion, nclasses)
 
     def _make_layer(self, block, planes, num_blocks, stride):
         strides = [stride] + [1] * (num_blocks - 1)
diff --git a/avalanche/models/timm_vit.py b/avalanche/models/timm_vit.py
index 77ef2882f..c2e0bf45a 100644
--- a/avalanche/models/timm_vit.py
+++ b/avalanche/models/timm_vit.py
@@ -37,7 +37,7 @@
         adapt_input_conv,
         checkpoint_seq,
         resolve_pretrained_cfg,
-        build_model_with_cfg
+        build_model_with_cfg,
     )
     from timm.models.layers import PatchEmbed
     from timm.models.vision_transformer import VisionTransformer, Block
@@ -54,7 +54,7 @@
 
 class ViTWithPrompt(VisionTransformer):
     """
-    Visual Transformer with Prompt. This class add prompts to a visual 
+    Visual Transformer with Prompt. This class add prompts to a visual
     transformer to implement the Method Learning to Prompt (L2P)
 
     Implementation based on VisionTransformer from timm library
@@ -145,20 +145,20 @@ def __init__(
             embed_layer=embed_layer,
             norm_layer=norm_layer,
             act_layer=act_layer,
-            block_fn=block_fn
+            block_fn=block_fn,
         )
 
         self.class_token = class_token
         num_patches = self.patch_embed.num_patches
 
         if no_embed_class:
-            embed_len = num_patches  
+            embed_len = num_patches
         else:
             embed_len = num_patches + self.num_prefix_tokens
 
         if prompt_length is not None and pool_size is not None and prompt_pool:
             embed_len += prompt_length * top_k
-        self.pos_embed = nn.Parameter(torch.randn(1, embed_len, embed_dim)*0.02)
+        self.pos_embed = nn.Parameter(torch.randn(1, embed_len, embed_dim) * 0.02)
         self.pos_drop = nn.Dropout(p=drop_rate)
 
         self.prompt_pool = prompt_pool
@@ -180,8 +180,8 @@ def __init__(
             )
 
         if num_classes > 0:
-            self.head = nn.Linear(self.embed_dim, num_classes) 
-        else: 
+            self.head = nn.Linear(self.embed_dim, num_classes)
+        else:
             self.head = nn.Identity()
 
         if weight_init != "skip":
@@ -196,18 +196,12 @@ def forward_features(self, x, task_id=-1, cls_features=None, train=False):
                 start = task_id * self.prompt.top_k
                 end = (task_id + 1) * self.prompt.top_k
                 single_prompt_mask = torch.arange(start, end).to(x.device)
-                prompt_mask = single_prompt_mask.\
-                    unsqueeze(0).\
-                    expand(x.shape[0], -1)
+                prompt_mask = single_prompt_mask.unsqueeze(0).expand(x.shape[0], -1)
                 if end > self.prompt.pool_size:
                     prompt_mask = None
             else:
                 prompt_mask = None
-            res = self.prompt(
-                            x, 
-                            prompt_mask=prompt_mask, 
-                            cls_features=cls_features
-                        )
+            res = self.prompt(x, prompt_mask=prompt_mask, cls_features=cls_features)
             self.total_prompt_len = res["total_prompt_len"]
             x = res["prompted_embedding"]
         else:
@@ -240,9 +234,7 @@ def forward_head(self, res, pre_logits: bool = False):
                 else x[:, 0 : self.total_prompt_len]
             )
             x = x.mean(dim=1)
-        elif self.head_type == "token+prompt" \
-                and self.prompt_pool \
-                and self.class_token:
+        elif self.head_type == "token+prompt" and self.prompt_pool and self.class_token:
             x = x[:, 0 : self.total_prompt_len + 1]
             x = x.mean(dim=1)
         else:
@@ -257,27 +249,21 @@ def forward_head(self, res, pre_logits: bool = False):
         return res
 
     def forward(self, x, task_id=-1, cls_features=None, train=False):
-        res = self.forward_features(    
-                                x, 
-                                task_id=task_id, 
-                                cls_features=cls_features, 
-                                train=train
-                            )
+        res = self.forward_features(
+            x, task_id=task_id, cls_features=cls_features, train=train
+        )
         res = self.forward_head(res)
         return res
 
     @torch.jit.ignore()
-    def load_pretrained(self, checkpoint_path, prefix=''):
+    def load_pretrained(self, checkpoint_path, prefix=""):
         _load_weights(self, checkpoint_path, prefix)
 
 
 @torch.no_grad()
-def _load_weights(
-                model: VisionTransformer, 
-                checkpoint_path: str, 
-                prefix: str = ""):
+def _load_weights(model: VisionTransformer, checkpoint_path: str, prefix: str = ""):
     """
-    Load weights from .npz checkpoints for official 
+    Load weights from .npz checkpoints for official
     Google Brain Flax implementation
     """
     import numpy as np
@@ -304,8 +290,9 @@ def _n2p(w, t=True):
         stem_only = not hasattr(backbone, "stem")
         stem = backbone if stem_only else backbone.stem
         stem.conv.weight.copy_(
-            adapt_input_conv(stem.conv.weight.shape[1],
-                             _n2p(w[f"{prefix}conv_root/kernel"]))
+            adapt_input_conv(
+                stem.conv.weight.shape[1], _n2p(w[f"{prefix}conv_root/kernel"])
+            )
         )
         stem.norm.weight.copy_(_n2p(w[f"{prefix}gn_root/scale"]))
         stem.norm.bias.copy_(_n2p(w[f"{prefix}gn_root/bias"]))
@@ -314,40 +301,36 @@ def _n2p(w, t=True):
                 for j, block in enumerate(stage.blocks):
                     bp = f"{prefix}block{i + 1}/unit{j + 1}/"
                     for r in range(3):
-                        getattr(
-                            block, 
-                            f"conv{r + 1}").\
-                                weight.copy_(_n2p(w[f"{bp}conv{r + 1}/kernel"]))
-                        getattr(
-                            block, 
-                            f"norm{r + 1}").\
-                            weight.copy_(_n2p(w[f"{bp}gn{r + 1}/scale"]))
-                        getattr(
-                            block, 
-                            f"norm{r + 1}").\
-                            bias.copy_(_n2p(w[f"{bp}gn{r + 1}/bias"]))
+                        getattr(block, f"conv{r + 1}").weight.copy_(
+                            _n2p(w[f"{bp}conv{r + 1}/kernel"])
+                        )
+                        getattr(block, f"norm{r + 1}").weight.copy_(
+                            _n2p(w[f"{bp}gn{r + 1}/scale"])
+                        )
+                        getattr(block, f"norm{r + 1}").bias.copy_(
+                            _n2p(w[f"{bp}gn{r + 1}/bias"])
+                        )
                     if block.downsample is not None:
-                        block.downsample.\
-                            conv.weight.copy_(_n2p(w[f"{bp}conv_proj/kernel"]))
-                        block.downsample.\
-                            norm.weight.copy_(_n2p(w[f"{bp}gn_proj/scale"]))
-                        block.downsample.\
-                            norm.bias.copy_(_n2p(w[f"{bp}gn_proj/bias"]))
+                        block.downsample.conv.weight.copy_(
+                            _n2p(w[f"{bp}conv_proj/kernel"])
+                        )
+                        block.downsample.norm.weight.copy_(
+                            _n2p(w[f"{bp}gn_proj/scale"])
+                        )
+                        block.downsample.norm.bias.copy_(_n2p(w[f"{bp}gn_proj/bias"]))
         embed_conv_w = _n2p(w[f"{prefix}embedding/kernel"])
     else:
         embed_conv_w = adapt_input_conv(
-            model.patch_embed.proj.weight.shape[1], 
-            _n2p(w[f"{prefix}embedding/kernel"])
+            model.patch_embed.proj.weight.shape[1], _n2p(w[f"{prefix}embedding/kernel"])
         )
     model.patch_embed.proj.weight.copy_(embed_conv_w)
     model.patch_embed.proj.bias.copy_(_n2p(w[f"{prefix}embedding/bias"]))
     model.cls_token.copy_(_n2p(w[f"{prefix}cls"], t=False))
-    pos_embed_w = _n2p(w[f"{prefix}Transformer/posembed_input/pos_embedding"], 
-                       t=False)
+    pos_embed_w = _n2p(w[f"{prefix}Transformer/posembed_input/pos_embedding"], t=False)
     if pos_embed_w.shape != model.pos_embed.shape:
         pos_embed_w = (
             # resize pos embedding when different size from pretrained weights
-            resize_pos_embed(  
+            resize_pos_embed(
                 pos_embed_w,
                 model.pos_embed,
                 getattr(model, "num_prefix_tokens", 1),
@@ -363,9 +346,9 @@ def _n2p(w, t=True):
     ):
         model.head.weight.copy_(_n2p(w[f"{prefix}head/kernel"]))
         model.head.bias.copy_(_n2p(w[f"{prefix}head/bias"]))
-    # NOTE representation layer has been removed, 
+    # NOTE representation layer has been removed,
     # not used in latest 21k/1k pretrained weights
-    # if isinstance(getattr(model.pre_logits, 'fc', None), nn.Linear) 
+    # if isinstance(getattr(model.pre_logits, 'fc', None), nn.Linear)
     #               and f'{prefix}pre_logits/bias' in w:
     #     model.pre_logits.fc.weight.\
     #               copy_(_n2p(w[f'{prefix}pre_logits/kernel']))
@@ -392,10 +375,8 @@ def _n2p(w, t=True):
                 ]
             )
         )
-        block.attn.\
-            proj.weight.copy_(_n2p(w[f"{mha_prefix}out/kernel"]).flatten(1))
-        block.attn.\
-            proj.bias.copy_(_n2p(w[f"{mha_prefix}out/bias"]))
+        block.attn.proj.weight.copy_(_n2p(w[f"{mha_prefix}out/kernel"]).flatten(1))
+        block.attn.proj.bias.copy_(_n2p(w[f"{mha_prefix}out/bias"]))
         for r in range(2):
             getattr(block.mlp, f"fc{r + 1}").weight.copy_(
                 _n2p(w[f"{block_prefix}MlpBlock_3/Dense_{r}/kernel"])
@@ -408,7 +389,7 @@ def _n2p(w, t=True):
 
 
 def resize_pos_embed(posemb, posemb_new, num_prefix_tokens=1, gs_new=()):
-    # Rescale the grid of position embeddings when loading from state_dict. 
+    # Rescale the grid of position embeddings when loading from state_dict.
     # Adapted from:
     # https://github.com/google-research/vision_transformer/blob/
     #       00883dd691c63a6830751563748663526e811cee/vit_jax/checkpoint.py#L224
@@ -432,12 +413,10 @@ def resize_pos_embed(posemb, posemb_new, num_prefix_tokens=1, gs_new=()):
     assert len(gs_new) >= 2
 
     posemb_grid = posemb_grid.reshape(1, gs_old, gs_old, -1).permute(0, 3, 1, 2)
-    posemb_grid = F.interpolate(posemb_grid, 
-                                size=gs_new, 
-                                mode="bicubic", 
-                                align_corners=False)
-    posemb_grid = posemb_grid.\
-        permute(0, 2, 3, 1).reshape(1, gs_new[0] * gs_new[1], -1)
+    posemb_grid = F.interpolate(
+        posemb_grid, size=gs_new, mode="bicubic", align_corners=False
+    )
+    posemb_grid = posemb_grid.permute(0, 2, 3, 1).reshape(1, gs_new[0] * gs_new[1], -1)
     posemb = torch.cat([posemb_prefix, posemb_grid], dim=1)
     return posemb
 
@@ -458,13 +437,13 @@ def checkpoint_filter_fn(state_dict, model, adapt_layer_scale=False):
             O, _, H, W = model.patch_embed.proj.weight.shape
             v = v.reshape(O, -1, H, W)
         elif k == "pos_embed" and v.shape[1] != model.pos_embed.shape[1]:
-            # To resize pos embedding when using model at different size 
+            # To resize pos embedding when using model at different size
             # from pretrained weights
             v = resize_pos_embed(
                 v,
                 model.pos_embed,
-                0 
-                if getattr(model, "no_embed_class") 
+                0
+                if getattr(model, "no_embed_class")
                 else getattr(model, "num_prefix_tokens", 1),
                 model.patch_embed.grid_size,
             )
@@ -472,7 +451,7 @@ def checkpoint_filter_fn(state_dict, model, adapt_layer_scale=False):
             # remap layer-scale gamma into sub-module (deit3 models)
             k = re.sub(r"gamma_([0-9])", r"ls\1.gamma", k)
         elif "pre_logits" in k:
-            # NOTE representation layer removed as not used in latest 21k/1k 
+            # NOTE representation layer removed as not used in latest 21k/1k
             # pretrained weights
             continue
         out_dict[k] = v
@@ -481,8 +460,10 @@ def checkpoint_filter_fn(state_dict, model, adapt_layer_scale=False):
 
 def _create_vision_transformer(variant, pretrained=False, **kwargs):
     if kwargs.get("features_only", None):
-        raise RuntimeError("features_only not implemented for \
-                            Vision Transformer models.")
+        raise RuntimeError(
+            "features_only not implemented for \
+                            Vision Transformer models."
+        )
 
     pretrained_cfg = resolve_pretrained_cfg(
         variant, pretrained_cfg=kwargs.pop("pretrained_cfg", None)
diff --git a/avalanche/models/utils.py b/avalanche/models/utils.py
index f847bafaa..e45ec6db8 100644
--- a/avalanche/models/utils.py
+++ b/avalanche/models/utils.py
@@ -8,9 +8,10 @@
 
 
 def is_multi_task_module(model: nn.Module) -> bool:
-    return isinstance(model, MultiTaskModule) or \
-        (isinstance(model, DistributedDataParallel) and 
-         isinstance(model.module, MultiTaskModule))
+    return isinstance(model, MultiTaskModule) or (
+        isinstance(model, DistributedDataParallel)
+        and isinstance(model.module, MultiTaskModule)
+    )
 
 
 def avalanche_forward(model, x, task_labels):
@@ -22,8 +23,10 @@ def avalanche_forward(model, x, task_labels):
 
 def avalanche_model_adaptation(model: nn.Module, experience: CLExperience):
     if isinstance(model, DistributedDataParallel):
-        raise RuntimeError('The model is wrapped in DistributedDataParallel. '
-                           'Please unwrap it before calling this method.')
+        raise RuntimeError(
+            "The model is wrapped in DistributedDataParallel. "
+            "Please unwrap it before calling this method."
+        )
     for module in model.modules():
         if isinstance(module, DynamicModule):
             module.adaptation(experience)
diff --git a/avalanche/models/vit.py b/avalanche/models/vit.py
index 5deb21b33..7408a2c79 100644
--- a/avalanche/models/vit.py
+++ b/avalanche/models/vit.py
@@ -4,9 +4,8 @@
 def vit_tiny_patch16_224(pretrained=True, **kwargs):
     """ViT-Tiny (Vit-Ti/16)"""
     from avalanche.models.timm_vit import _create_vision_transformer
-    model_kwargs = dict(
-                        patch_size=16, embed_dim=192,
-                        depth=12, num_heads=3, **kwargs)
+
+    model_kwargs = dict(patch_size=16, embed_dim=192, depth=12, num_heads=3, **kwargs)
     model = _create_vision_transformer(
         "vit_tiny_patch16_224", pretrained=pretrained, **model_kwargs
     )
@@ -14,18 +13,14 @@ def vit_tiny_patch16_224(pretrained=True, **kwargs):
 
 
 def vit_base_patch16_224(pretrained=True, **kwargs):
-    """ViT-Base (ViT-B/16) from original paper 
+    """ViT-Base (ViT-B/16) from original paper
     (https://arxiv.org/abs/2010.11929).
-    ImageNet-1k weights fine-tuned from in21k @ 224x224, 
+    ImageNet-1k weights fine-tuned from in21k @ 224x224,
     source https://github.com/google-research/vision_transformer.
     """
     from avalanche.models.timm_vit import _create_vision_transformer
-    model_kwargs = dict(
-        patch_size=16,
-        embed_dim=768,
-        depth=12,
-        num_heads=12,
-        **kwargs)
+
+    model_kwargs = dict(patch_size=16, embed_dim=768, depth=12, num_heads=12, **kwargs)
     model = _create_vision_transformer(
         "vit_base_patch16_224", pretrained=pretrained, **model_kwargs
     )
@@ -34,15 +29,14 @@ def vit_base_patch16_224(pretrained=True, **kwargs):
 
 def vit_large_patch16_224(pretrained=False, **kwargs):
     """
-    ViT-Large (ViT-L/16) from original paper 
+    ViT-Large (ViT-L/16) from original paper
     (https://arxiv.org/abs/2010.11929).
-    ImageNet-1k weights fine-tuned from in21k @ 224x224, 
+    ImageNet-1k weights fine-tuned from in21k @ 224x224,
     source https://github.com/google-research/vision_transformer.
     """
     from avalanche.models.timm_vit import _create_vision_transformer
-    model_kwargs = dict(
-                        patch_size=16, embed_dim=1024,
-                        depth=24, num_heads=16, **kwargs)
+
+    model_kwargs = dict(patch_size=16, embed_dim=1024, depth=24, num_heads=16, **kwargs)
     model = _create_vision_transformer(
         "vit_large_patch16_224", pretrained=pretrained, **model_kwargs
     )
@@ -52,9 +46,8 @@ def vit_large_patch16_224(pretrained=False, **kwargs):
 def vit_small_patch32_224(pretrained=True, **kwargs):
     """ViT-Small (ViT-S/32)"""
     from avalanche.models.timm_vit import _create_vision_transformer
-    model_kwargs = dict(
-                        patch_size=32, embed_dim=384,
-                        depth=12, num_heads=6, **kwargs)
+
+    model_kwargs = dict(patch_size=32, embed_dim=384, depth=12, num_heads=6, **kwargs)
     model = _create_vision_transformer(
         "vit_small_patch32_224", pretrained=pretrained, **model_kwargs
     )
@@ -64,13 +57,12 @@ def vit_small_patch32_224(pretrained=True, **kwargs):
 def vit_base_patch32_224(pretrained=True, **kwargs):
     """
     ViT-Base (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929).
-    ImageNet-1k weights fine-tuned from in21k, 
+    ImageNet-1k weights fine-tuned from in21k,
     source https://github.com/google-research/vision_transformer.
     """
     from avalanche.models.timm_vit import _create_vision_transformer
-    model_kwargs = dict(
-                        patch_size=32, embed_dim=768,
-                        depth=12, num_heads=12, **kwargs)
+
+    model_kwargs = dict(patch_size=32, embed_dim=768, depth=12, num_heads=12, **kwargs)
     model = _create_vision_transformer(
         "vit_base_patch32_224", pretrained=pretrained, **model_kwargs
     )
@@ -82,9 +74,8 @@ def vit_large_patch32_224(pretrained=True, **kwargs):
     ViT-Large (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929)
     No pretrained weights."""
     from avalanche.models.timm_vit import _create_vision_transformer
-    model_kwargs = dict(
-                        patch_size=32, embed_dim=1024,
-                        depth=24, num_heads=16, **kwargs)
+
+    model_kwargs = dict(patch_size=32, embed_dim=1024, depth=24, num_heads=16, **kwargs)
     model = _create_vision_transformer(
         "vit_large_patch32_224", pretrained=pretrained, **model_kwargs
     )
@@ -93,11 +84,10 @@ def vit_large_patch32_224(pretrained=True, **kwargs):
 
 # Only for unittest
 def simpleMLP(num_classes=10, **kwargs):
-    model = SimpleMLP(input_size=6, hidden_size=10, 
-                      num_classes=num_classes)
+    model = SimpleMLP(input_size=6, hidden_size=10, num_classes=num_classes)
     return model
 
 
-def create_model(model_name='', **kwargs):
+def create_model(model_name="", **kwargs):
     get_model = globals()[model_name]
     return get_model(**kwargs)
diff --git a/avalanche/training/checkpoint.py b/avalanche/training/checkpoint.py
index ebeaf95e9..961000fe8 100644
--- a/avalanche/training/checkpoint.py
+++ b/avalanche/training/checkpoint.py
@@ -46,12 +46,11 @@ def maybe_load_checkpoint(strategy, fname, map_location=None):
     if not os.path.exists(fname):
         return strategy, 0
 
-    ckp = torch.load(fname, pickle_module=dill,
-                     map_location=map_location)
+    ckp = torch.load(fname, pickle_module=dill, map_location=map_location)
 
     print(ckp)
-    strategy.__dict__.update(ckp['strategy'].__dict__)
-    exp_counter = ckp['exp_counter']
+    strategy.__dict__.update(ckp["strategy"].__dict__)
+    exp_counter = ckp["exp_counter"]
     return strategy, exp_counter
 
 
@@ -82,8 +81,8 @@ def save_checkpoint(strategy, fname, exclude=None):
         delattr(strategy, attr)
 
     checkpoint_data = {
-        'strategy': strategy,
-        'rng_manager': RNGManager,
-        'exp_counter': ended_experience_counter
+        "strategy": strategy,
+        "rng_manager": RNGManager,
+        "exp_counter": ended_experience_counter,
     }
     torch.save(checkpoint_data, fname, pickle_module=dill)
diff --git a/avalanche/training/determinism/cuda_rng.py b/avalanche/training/determinism/cuda_rng.py
index 4ebad4aac..c31bd43fe 100644
--- a/avalanche/training/determinism/cuda_rng.py
+++ b/avalanche/training/determinism/cuda_rng.py
@@ -38,30 +38,31 @@ def cuda_rng_load_state(rng_state):
     n_states = len(rng_state)
     if n_states < n_devices:
         warnings.warn(
-            'Problem when reloading the state of torch.cuda RNGs: the given'
-            'checkpoint contain a number of RNG states less than the '
-            'number of currently available cuda devices '
-            f'(got {n_states}, expected {n_devices}). '
-            f'The RNG of cuda devices with ID >= {n_states} will not be '
-            f'initialized!')
+            "Problem when reloading the state of torch.cuda RNGs: the given"
+            "checkpoint contain a number of RNG states less than the "
+            "number of currently available cuda devices "
+            f"(got {n_states}, expected {n_devices}). "
+            f"The RNG of cuda devices with ID >= {n_states} will not be "
+            f"initialized!"
+        )
 
     # The standard situation is n_devices == n_states: just re-load the state
     # of the RNGs of currently available GPUs. This reasoning also applies if
     # n_states > n_devices. However, this is a bad fit for n_states < n_devices
     # (and because of this, we show the warning above).
     for device_id, rng_state in enumerate(rng_state[:n_devices]):
-        torch.cuda.set_rng_state(rng_state, f'cuda:{device_id}')
+        torch.cuda.set_rng_state(rng_state, f"cuda:{device_id}")
 
 
 def cuda_rng_step():
     for device_id in range(torch.cuda.device_count()):
-        torch.rand(1, device=f'cuda:{device_id}')
+        torch.rand(1, device=f"cuda:{device_id}")
 
 
 __all__ = [
-    'cuda_rng_seed',
-    'cpu_rng_seed',
-    'cuda_rng_save_state',
-    'cuda_rng_load_state',
-    'cuda_rng_step'
+    "cuda_rng_seed",
+    "cpu_rng_seed",
+    "cuda_rng_save_state",
+    "cuda_rng_load_state",
+    "cuda_rng_step",
 ]
diff --git a/avalanche/training/determinism/rng_manager.py b/avalanche/training/determinism/rng_manager.py
index 09c418d22..4727854d6 100644
--- a/avalanche/training/determinism/rng_manager.py
+++ b/avalanche/training/determinism/rng_manager.py
@@ -5,8 +5,13 @@
 import numpy as np
 import torch
 
-from avalanche.training.determinism.cuda_rng import cuda_rng_seed, \
-    cuda_rng_save_state, cuda_rng_load_state, cuda_rng_step, cpu_rng_seed
+from avalanche.training.determinism.cuda_rng import (
+    cuda_rng_seed,
+    cuda_rng_save_state,
+    cuda_rng_load_state,
+    cuda_rng_step,
+    cpu_rng_seed,
+)
 
 
 class _Singleton(type):
@@ -14,8 +19,7 @@ class _Singleton(type):
 
     def __call__(cls, *args, **kwargs):
         if cls not in cls._instances:
-            cls._instances[cls] = super(_Singleton, cls).__call__(
-                *args, **kwargs)
+            cls._instances[cls] = super(_Singleton, cls).__call__(*args, **kwargs)
         return cls._instances[cls]
 
 
@@ -34,9 +38,7 @@ class _RNGManager:
 
     __metaclass__ = _Singleton
 
-    RNG_DEF_REQUIRED_FIELDS = {
-        'seed', 'save_state', 'load_state', 'step'
-    }
+    RNG_DEF_REQUIRED_FIELDS = {"seed", "save_state", "load_state", "step"}
 
     def __init__(self):
         """
@@ -69,38 +71,50 @@ def register_random_generator(self, name: str, rng_def: dict):
         """
         rng_def_keys = set(rng_def.keys())
         if not rng_def_keys.issubset(_RNGManager.RNG_DEF_REQUIRED_FIELDS):
-            raise ValueError('Invalid random number generator definition')
+            raise ValueError("Invalid random number generator definition")
 
         self.random_generators[name] = rng_def
 
     def _register_default_generators(self):
-        self.register_random_generator('torch', {
-            'seed': cpu_rng_seed,
-            'save_state': torch.random.get_rng_state,
-            'load_state': torch.random.set_rng_state,
-            'step': lambda: torch.rand(1)
-        })
-
-        self.register_random_generator('torch.cuda', {
-            'seed': cuda_rng_seed,
-            'save_state': cuda_rng_save_state,
-            'load_state': cuda_rng_load_state,
-            'step': cuda_rng_step
-        })
-
-        self.register_random_generator('numpy', {
-            'seed': np.random.seed,
-            'save_state': np.random.get_state,
-            'load_state': np.random.set_state,
-            'step': lambda: np.random.rand(1)
-        })
-
-        self.register_random_generator('random', {
-            'seed': random.seed,
-            'save_state': random.getstate,
-            'load_state': random.setstate,
-            'step': random.random
-        })
+        self.register_random_generator(
+            "torch",
+            {
+                "seed": cpu_rng_seed,
+                "save_state": torch.random.get_rng_state,
+                "load_state": torch.random.set_rng_state,
+                "step": lambda: torch.rand(1),
+            },
+        )
+
+        self.register_random_generator(
+            "torch.cuda",
+            {
+                "seed": cuda_rng_seed,
+                "save_state": cuda_rng_save_state,
+                "load_state": cuda_rng_load_state,
+                "step": cuda_rng_step,
+            },
+        )
+
+        self.register_random_generator(
+            "numpy",
+            {
+                "seed": np.random.seed,
+                "save_state": np.random.get_state,
+                "load_state": np.random.set_state,
+                "step": lambda: np.random.rand(1),
+            },
+        )
+
+        self.register_random_generator(
+            "random",
+            {
+                "seed": random.seed,
+                "save_state": random.getstate,
+                "load_state": random.setstate,
+                "step": random.random,
+            },
+        )
 
     def set_random_seeds(self, random_seed):
         """
@@ -112,7 +126,7 @@ def set_random_seeds(self, random_seed):
         """
 
         for gen_name, gen_dict in self.random_generators.items():
-            gen_dict['seed'](random_seed)
+            gen_dict["seed"](random_seed)
 
     def align_seeds(self):
         """
@@ -120,8 +134,7 @@ def align_seeds(self):
         integer value.
         """
 
-        reference_seed = torch.randint(0, 2 ** 32 - 1, (1,),
-                                       dtype=torch.int64)
+        reference_seed = torch.randint(0, 2**32 - 1, (1,), dtype=torch.int64)
 
         seed = int(reference_seed)
         self.set_random_seeds(seed)
@@ -130,13 +143,13 @@ def __getstate__(self):
         all_rngs_state = dict()
         for rng_name, rng_def in self.random_generators.items():
             rng_state = dict()
-            rng_state['current_state'] = rng_def['save_state']()
+            rng_state["current_state"] = rng_def["save_state"]()
             all_rngs_state[rng_name] = rng_state
         return all_rngs_state
 
     def step_generators(self):
         for rng_name, rng_def in self.random_generators.items():
-            rng_def['step']()
+            rng_def["step"]()
 
     def __setstate__(self, rngs):
         # Note on the following:
@@ -157,8 +170,8 @@ def __setstate__(self, rngs):
         # global number generators registered in the singleton.
         self.random_generators = RNGManager.random_generators
         for rng_name, rng_def in self.random_generators.items():
-            loaded_state = rngs[rng_name]['current_state']
-            rng_def['load_state'](loaded_state)
+            loaded_state = rngs[rng_name]["current_state"]
+            rng_def["load_state"](loaded_state)
 
     def _replace_generators(self, generators):
         """
@@ -170,6 +183,4 @@ def _replace_generators(self, generators):
 RNGManager = _RNGManager()
 
 
-__all__ = [
-    'RNGManager'
-]
+__all__ = ["RNGManager"]
diff --git a/avalanche/training/losses.py b/avalanche/training/losses.py
index bbf136fa5..7f40d1325 100644
--- a/avalanche/training/losses.py
+++ b/avalanche/training/losses.py
@@ -56,9 +56,7 @@ def after_training_exp(self, strategy, **kwargs):
 
         self.old_model.load_state_dict(strategy.model.state_dict())
 
-        self.old_classes += np.unique(
-            strategy.experience.dataset.targets
-        ).tolist()
+        self.old_classes += np.unique(strategy.experience.dataset.targets).tolist()
 
 
 class SCRLoss(torch.nn.Module):
@@ -71,8 +69,8 @@ class SCRLoss(torch.nn.Module):
     Original GitHub repository: https://github.com/HobbitLong/SupContrast/
     LICENSE: BSD 2-Clause License
     """
-    def __init__(self, temperature=0.07, contrast_mode='all',
-                 base_temperature=0.07):
+
+    def __init__(self, temperature=0.07, contrast_mode="all", base_temperature=0.07):
         super().__init__()
         self.temperature = temperature
         self.contrast_mode = contrast_mode
@@ -98,39 +96,41 @@ def forward(self, features, labels=None, mask=None):
         device = features.device
 
         if len(features.shape) < 3:
-            raise ValueError('`features` needs to be [bsz, n_views, ...],'
-                             'at least 3 dimensions are required')
+            raise ValueError(
+                "`features` needs to be [bsz, n_views, ...],"
+                "at least 3 dimensions are required"
+            )
         if len(features.shape) > 3:
             features = features.view(features.shape[0], features.shape[1], -1)
 
         batch_size = features.shape[0]
         if labels is not None and mask is not None:
-            raise ValueError('Cannot define both `labels` and `mask`')
+            raise ValueError("Cannot define both `labels` and `mask`")
         elif labels is None and mask is None:
             mask = torch.eye(batch_size, dtype=torch.float32).to(device)
         elif labels is not None:
             labels = labels.contiguous().view(-1, 1)
             if labels.shape[0] != batch_size:
-                raise ValueError('Num of labels does not match num of features')
+                raise ValueError("Num of labels does not match num of features")
             mask = torch.eq(labels, labels.T).float().to(device)
         else:
             mask = mask.float().to(device)
 
         contrast_count = features.shape[1]
         contrast_feature = torch.cat(torch.unbind(features, dim=1), dim=0)
-        if self.contrast_mode == 'one':
+        if self.contrast_mode == "one":
             anchor_feature = features[:, 0]
             anchor_count = 1
-        elif self.contrast_mode == 'all':
+        elif self.contrast_mode == "all":
             anchor_feature = contrast_feature
             anchor_count = contrast_count
         else:
-            raise ValueError('Unknown mode: {}'.format(self.contrast_mode))
+            raise ValueError("Unknown mode: {}".format(self.contrast_mode))
 
         # compute logits
         anchor_dot_contrast = torch.div(
-            torch.matmul(anchor_feature, contrast_feature.T),
-            self.temperature)
+            torch.matmul(anchor_feature, contrast_feature.T), self.temperature
+        )
 
         # for numerical stability
         logits_max, _ = torch.max(anchor_dot_contrast, dim=1, keepdim=True)
@@ -143,7 +143,7 @@ def forward(self, features, labels=None, mask=None):
             torch.ones_like(mask),
             1,
             torch.arange(batch_size * anchor_count).view(-1, 1).to(device),
-            0
+            0,
         )
         mask = mask * logits_mask
 
@@ -155,7 +155,7 @@ def forward(self, features, labels=None, mask=None):
         mean_log_prob_pos = (mask * log_prob).sum(1) / mask.sum(1)
 
         # loss
-        loss = - (self.temperature / self.base_temperature) * mean_log_prob_pos
+        loss = -(self.temperature / self.base_temperature) * mean_log_prob_pos
         loss = loss.view(anchor_count, batch_size).mean()
 
         return loss
diff --git a/avalanche/training/plugins/agem.py b/avalanche/training/plugins/agem.py
index 778e8d53c..bc752d2cd 100644
--- a/avalanche/training/plugins/agem.py
+++ b/avalanche/training/plugins/agem.py
@@ -92,17 +92,13 @@ def after_backward(self, strategy, **kwargs):
                 alpha2 = dotg / torch.dot(
                     self.reference_gradients, self.reference_gradients
                 )
-                grad_proj = (
-                    current_gradients - self.reference_gradients * alpha2
-                )
+                grad_proj = current_gradients - self.reference_gradients * alpha2
 
                 count = 0
                 for n, p in strategy.model.named_parameters():
                     n_param = p.numel()
                     if p.grad is not None:
-                        p.grad.copy_(
-                            grad_proj[count : count + n_param].view_as(p)
-                        )
+                        p.grad.copy_(grad_proj[count : count + n_param].view_as(p))
                     count += n_param
 
     def after_training_exp(self, strategy, **kwargs):
@@ -129,7 +125,7 @@ def update_memory(self, dataset, num_workers=0, **kwargs):
         if removed_els > 0:
             indices = list(range(len(dataset)))
             random.shuffle(indices)
-            dataset = dataset.subset(indices[:self.patterns_per_experience])
+            dataset = dataset.subset(indices[: self.patterns_per_experience])
 
         self.buffers.append(dataset)
 
diff --git a/avalanche/training/plugins/bic.py b/avalanche/training/plugins/bic.py
index 457da4476..f057022b5 100644
--- a/avalanche/training/plugins/bic.py
+++ b/avalanche/training/plugins/bic.py
@@ -14,8 +14,10 @@
 from torch.utils.data import DataLoader
 from torch.optim.lr_scheduler import MultiStepLR
 
-from avalanche.benchmarks.utils import classification_subset, \
-                                    concat_classification_datasets
+from avalanche.benchmarks.utils import (
+    classification_subset,
+    concat_classification_datasets,
+)
 from avalanche.benchmarks.utils.data import AvalancheDataset
 from avalanche.benchmarks.utils.data_loader import ReplayDataLoader
 from avalanche.training.plugins.strategy_plugin import SupervisedPlugin
@@ -36,8 +38,8 @@ class BiCPlugin(SupervisedPlugin):
     Bias Correction (BiC) plugin.
 
     Technique introduced in:
-    "Wu, Yue, et al. "Large scale incremental learning." Proceedings 
-    of the IEEE/CVF Conference on Computer Vision and Pattern 
+    "Wu, Yue, et al. "Large scale incremental learning." Proceedings
+    of the IEEE/CVF Conference on Computer Vision and Pattern
     Recognition. 2019"
 
     Implementation based on FACIL, as in:
@@ -45,17 +47,16 @@ class BiCPlugin(SupervisedPlugin):
     """
 
     def __init__(
-        self, 
+        self,
         mem_size: int = 2000,
         batch_size: Optional[int] = None,
         batch_size_mem: Optional[int] = None,
         task_balanced_dataloader: bool = False,
         storage_policy: Optional["ExemplarsBuffer"] = None,
-
         val_percentage: float = 0.1,
-        T: int = 2, 
+        T: int = 2,
         stage_2_epochs: int = 200,
-        lamb: float = -1, 
+        lamb: float = -1,
         lr: float = 0.1,
     ):
         """
@@ -63,22 +64,22 @@ def __init__(
         :param batch_size: the size of the data batch. If set to `None`, it
             will be set equal to the strategy's batch size.
         :param batch_size_mem: the size of the memory batch. If
-            `task_balanced_dataloader` is set to True, it must be greater than 
+            `task_balanced_dataloader` is set to True, it must be greater than
             or equal to the number of tasks. If its value is set to `None`
             (the default value), it will be automatically set equal to the
             data batch size.
         :param task_balanced_dataloader: if True, buffer data loaders will be
-                task-balanced, otherwise it will create a single dataloader for 
+                task-balanced, otherwise it will create a single dataloader for
                 the buffer samples.
         :param storage_policy: The policy that controls how to add new exemplars
                             in memory
-        :param val_percentage: hyperparameter used to set the 
+        :param val_percentage: hyperparameter used to set the
                 percentage of exemplars in the val set.
-        :param T: hyperparameter used to set the temperature 
+        :param T: hyperparameter used to set the temperature
                 used in stage 1.
-        :param stage_2_epochs: hyperparameter used to set the 
+        :param stage_2_epochs: hyperparameter used to set the
                 amount of epochs of stage 2.
-        :param lamb: hyperparameter used to balance the distilling 
+        :param lamb: hyperparameter used to balance the distilling
                 loss and the classification loss.
         :param lr: hyperparameter used as a learning rate for
                 the second phase of training.
@@ -119,25 +120,22 @@ def __init__(
     #     return self.storage_policy.buffer_groups  # a Dict<task_id, Dataset>
 
     def before_training(self, strategy: "SupervisedTemplate", *args, **kwargs):
-        assert not isinstance(strategy.model, MultiTaskModule), \
-               "BiC only supported for Class Incremetnal Learning (single head)"
+        assert not isinstance(
+            strategy.model, MultiTaskModule
+        ), "BiC only supported for Class Incremetnal Learning (single head)"
 
-    def before_train_dataset_adaptation(
-        self, 
-        strategy: "SupervisedTemplate", 
-        **kwargs
-    ):
+    def before_train_dataset_adaptation(self, strategy: "SupervisedTemplate", **kwargs):
         assert strategy.experience is not None
         new_data: AvalancheDataset = strategy.experience.dataset
         task_id = strategy.clock.train_exp_counter
 
         cl_idxs: Dict[int, List[int]] = defaultdict(list)
-        targets: Sequence[SupportsInt] = getattr(new_data, 'targets')
+        targets: Sequence[SupportsInt] = getattr(new_data, "targets")
         for idx, target in enumerate(targets):
             # Conversion to int may fix issues when target
             # is a single-element torch.tensor
             target = int(target)
-            cl_idxs[target].append(idx) 
+            cl_idxs[target].append(idx)
 
         for c in cl_idxs.keys():
             self.class_to_tasks[c] = task_id
@@ -165,9 +163,7 @@ def before_train_dataset_adaptation(
 
         # resize buffers
         for class_id, class_buf in self.val_buffer.items():
-            class_buf.resize(
-                strategy, class_to_len[class_id]
-            )
+            class_buf.resize(strategy, class_to_len[class_id])
 
         strategy.experience.dataset = concat_classification_datasets(train_data)
 
@@ -186,11 +182,8 @@ def before_training_exp(
         task_id = strategy.clock.train_exp_counter
 
         if task_id not in self.bias_layer:
-            targets = getattr(strategy.adapted_dataset, 'targets')
-            self.bias_layer[task_id] = BiasLayer(
-                                strategy.device, 
-                                list(targets.uniques)
-                            )
+            targets = getattr(strategy.adapted_dataset, "targets")
+            self.bias_layer[task_id] = BiasLayer(strategy.device, list(targets.uniques))
 
         if len(self.storage_policy.buffer) == 0:
             # first experience. We don't use the buffer, no need to change
@@ -219,7 +212,7 @@ def before_training_exp(
     def after_forward(self, strategy, **kwargs):
         for t in self.bias_layer.keys():
             strategy.mb_output = self.bias_layer[t](strategy.mb_output)
-    
+
     def after_eval_forward(self, strategy, **kwargs):
         for t in self.bias_layer.keys():
             strategy.mb_output = self.bias_layer[t](strategy.mb_output)
@@ -236,9 +229,8 @@ def before_backward(self, strategy, **kwargs):
             for c in self.class_to_tasks.keys():
                 if self.class_to_tasks[c] < task_id:
                     old_clss.append(c)
-            
-            loss_dist = self.cross_entropy(out_new[:, old_clss],
-                                           out_old[:, old_clss])
+
+            loss_dist = self.cross_entropy(out_new[:, old_clss], out_old[:, old_clss])
             if self.lamb == -1:
                 lamb = len(old_clss) / len(self.seen_classes)
                 return (1.0 - lamb) * strategy.loss + lamb * loss_dist
@@ -248,33 +240,32 @@ def before_backward(self, strategy, **kwargs):
     def after_training_exp(self, strategy, **kwargs):
         self.model_old = deepcopy(strategy.model)
         task_id = strategy.clock.train_exp_counter
-        
+
         self.storage_policy.update(strategy, **kwargs)
 
         if task_id > 0:
             list_subsets = []
             for _, class_buf in self.val_buffer.items():
                 list_subsets.append(class_buf.buffer)
-            
+
             stage_set = concat_classification_datasets(list_subsets)
             stage_loader = DataLoader(
-                                stage_set, 
-                                batch_size=strategy.train_mb_size, 
-                                shuffle=True,
-                                num_workers=4)
-            
+                stage_set,
+                batch_size=strategy.train_mb_size,
+                shuffle=True,
+                num_workers=4,
+            )
+
             bic_optimizer = torch.optim.SGD(
-                                self.bias_layer[task_id].parameters(), 
-                                lr=self.lr, momentum=0.9)
+                self.bias_layer[task_id].parameters(), lr=self.lr, momentum=0.9
+            )
 
             # verbose here is actually correct
             # The PyTorch type stubs for MultiStepLR are broken
             scheduler = MultiStepLR(
-                bic_optimizer,
-                milestones=[50, 100, 150], 
-                gamma=0.1,
-                verbose=False)  # type: ignore
-            
+                bic_optimizer, milestones=[50, 100, 150], gamma=0.1, verbose=False
+            )  # type: ignore
+
             # Loop epochs
             for e in range(self.stage_2_epochs):
                 total, t_acc, t_loss = 0, 0, 0
@@ -286,34 +277,33 @@ def after_training_exp(self, strategy, **kwargs):
                     for t in self.bias_layer.keys():
                         outputs = self.bias_layer[t](outputs)
 
-                    loss = torch.nn.functional.cross_entropy(
-                                                            outputs, 
-                                                            y_real)
-                        
+                    loss = torch.nn.functional.cross_entropy(outputs, y_real)
+
                     _, preds = torch.max(outputs, 1)
                     t_acc += torch.sum(preds == y_real.data)
                     t_loss += loss.item() * x.size(0)
                     total += x.size(0)
 
-                    loss += 0.1 * ((self.bias_layer[task_id].beta.sum() 
-                                    ** 2) / 2)
+                    loss += 0.1 * ((self.bias_layer[task_id].beta.sum() ** 2) / 2)
 
                     bic_optimizer.zero_grad()
                     loss.backward()
                     bic_optimizer.step()
-                
+
                 scheduler.step()
                 if (e + 1) % (int(self.stage_2_epochs / 4)) == 0:
-                    print('| E {:3d} | Train: loss={:.3f}, S2 acc={:5.1f}% |'
-                          .format(e + 1, t_loss / total,
-                                  100 * t_acc / total))
-    
+                    print(
+                        "| E {:3d} | Train: loss={:.3f}, S2 acc={:5.1f}% |".format(
+                            e + 1, t_loss / total, 100 * t_acc / total
+                        )
+                    )
+
     def cross_entropy(self, outputs, targets):
         """Calculates cross-entropy with temperature scaling"""
-        logp = torch.nn.functional.log_softmax(outputs/self.T, dim=1)
-        pre_p = torch.nn.functional.softmax(targets/self.T, dim=1)
+        logp = torch.nn.functional.log_softmax(outputs / self.T, dim=1)
+        pre_p = torch.nn.functional.softmax(targets / self.T, dim=1)
         return -torch.mean(torch.sum(pre_p * logp, dim=1)) * self.T * self.T
-    
+
     def get_group_lengths(self, num_groups):
         """Compute groups lengths given the number of groups `num_groups`."""
         max_size = int(self.val_percentage * self.mem_size)
diff --git a/avalanche/training/plugins/checkpoint.py b/avalanche/training/plugins/checkpoint.py
index 56487e700..546244ed4 100644
--- a/avalanche/training/plugins/checkpoint.py
+++ b/avalanche/training/plugins/checkpoint.py
@@ -1,6 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Union, Callable, IO, Any, Dict, Optional, Iterable, \
-    BinaryIO
+from typing import Union, Callable, IO, Any, Dict, Optional, Iterable, BinaryIO
 
 import torch
 
@@ -9,8 +8,7 @@
 from avalanche.training.templates import BaseSGDTemplate
 
 
-@deprecated(0.5, "Please use `save_checkpoint` and `maybe_load_checkpoint` "
-                 "instead.")
+@deprecated(0.5, "Please use `save_checkpoint` and `maybe_load_checkpoint` " "instead.")
 class CheckpointStorage(ABC):
     """
     Abstract class for the checkpoint storage component.
@@ -19,6 +17,7 @@ class CheckpointStorage(ABC):
     and load checkpoints from a persistent storage. Instances of this class are
     used by the :class:`CheckpointPlugin` strategy plugin.
     """
+
     def __init__(self):
         """
         Initializes the checkpoint storage.
@@ -27,10 +26,10 @@ def __init__(self):
 
     @abstractmethod
     def store_checkpoint(
-            self,
-            checkpoint_name: str,
-            checkpoint_writer: Callable[[Union[BinaryIO, IO[bytes]]], None]) \
-            -> None:
+        self,
+        checkpoint_name: str,
+        checkpoint_writer: Callable[[Union[BinaryIO, IO[bytes]]], None],
+    ) -> None:
         """
         Stores a checkpoint.
 
@@ -71,10 +70,10 @@ def checkpoint_exists(self, checkpoint_name: str) -> bool:
 
     @abstractmethod
     def load_checkpoint(
-            self,
-            checkpoint_name: str,
-            checkpoint_loader: Callable[[Union[BinaryIO, IO[bytes]]], Any]) \
-            -> Any:
+        self,
+        checkpoint_name: str,
+        checkpoint_loader: Callable[[Union[BinaryIO, IO[bytes]]], Any],
+    ) -> Any:
         """
         Loads a checkpoint.
 
@@ -93,8 +92,7 @@ def load_checkpoint(
         pass
 
 
-@deprecated(0.5, "Please use `save_checkpoint` and `maybe_load_checkpoint` "
-                 "instead.")
+@deprecated(0.5, "Please use `save_checkpoint` and `maybe_load_checkpoint` " "instead.")
 class CheckpointPlugin(BaseSGDPlugin[BaseSGDTemplate]):
     """
     A checkpointing facility that can be used to persist the entire state of the
@@ -135,11 +133,10 @@ class CheckpointPlugin(BaseSGDPlugin[BaseSGDTemplate]):
     """
 
     def __init__(
-            self,
-            storage: CheckpointStorage,
-            map_location: Optional[Union[str,
-                                         torch.device,
-                                         Dict[str, str]]] = None):
+        self,
+        storage: CheckpointStorage,
+        map_location: Optional[Union[str, torch.device, Dict[str, str]]] = None,
+    ):
         """
         Creates an instance of the checkpoint plugin.
 
@@ -156,12 +153,12 @@ def __init__(
             but it is needed to properly manage things in Avalanche).
             Defaults to None, which means that no mapping will take place.
         """
-        raise ValueError("Please use `save_checkpoint` and "
-                         "`maybe_load_checkpoint` "
-                         "instead.")
+        raise ValueError(
+            "Please use `save_checkpoint` and " "`maybe_load_checkpoint` " "instead."
+        )
 
 
 __all__ = [
-    'CheckpointStorage',
-    'CheckpointPlugin',
+    "CheckpointStorage",
+    "CheckpointPlugin",
 ]
diff --git a/avalanche/training/plugins/checkpoint_common_recipes.py b/avalanche/training/plugins/checkpoint_common_recipes.py
index 9872029f7..603f3678b 100644
--- a/avalanche/training/plugins/checkpoint_common_recipes.py
+++ b/avalanche/training/plugins/checkpoint_common_recipes.py
@@ -19,14 +19,15 @@ def _get_checkpoint_device_map():
 
 
 def _recreate_pytorch_device(*args):
-    device_map = globals().get('CHECKPOINT_DEVICE_MAP', None)
+    device_map = globals().get("CHECKPOINT_DEVICE_MAP", None)
     device_object = torch.device(*args)
     mapped_object = device_object
 
     if device_map is not None:
         mapped_object = torch.device(
-            device_map.get(str(device_object), str(device_object)))
-    print('Mapping', device_object, 'to', mapped_object)
+            device_map.get(str(device_object), str(device_object))
+        )
+    print("Mapping", device_object, "to", mapped_object)
     return mapped_object
 
 
@@ -39,14 +40,12 @@ def _save_pytorch_device(pickler, obj: torch.device):
     else:
         reduction = (obj.type,)
 
-    pickler.save_reduce(
-        _recreate_pytorch_device,
-        reduction, obj=obj)
+    pickler.save_reduce(_recreate_pytorch_device, reduction, obj=obj)
 
 
 __all__ = [
-    '_set_checkpoint_device_map',
-    '_get_checkpoint_device_map',
-    '_recreate_pytorch_device',
-    '_save_pytorch_device'
+    "_set_checkpoint_device_map",
+    "_get_checkpoint_device_map",
+    "_recreate_pytorch_device",
+    "_save_pytorch_device",
 ]
diff --git a/avalanche/training/plugins/clock.py b/avalanche/training/plugins/clock.py
index c04bf05ac..b35ad0ba3 100644
--- a/avalanche/training/plugins/clock.py
+++ b/avalanche/training/plugins/clock.py
@@ -63,6 +63,4 @@ def after_eval_iteration(self, strategy, **kwargs):
         self.total_iterations += 1
 
 
-__all__ = [
-    'Clock'
-]
+__all__ = ["Clock"]
diff --git a/avalanche/training/plugins/cope.py b/avalanche/training/plugins/cope.py
index e30666b00..f840cd947 100644
--- a/avalanche/training/plugins/cope.py
+++ b/avalanche/training/plugins/cope.py
@@ -58,7 +58,7 @@ def __init__(
 
         # Operational memory: Prototypical memory
         # Scales with nb classes * feature size
-        self.p_mem: Dict[int, Tensor] = {}  
+        self.p_mem: Dict[int, Tensor] = {}
         self.p_size = p_size  # Prototype size determined on runtime
         self.tmp_p_mem = {}  # Intermediate to process batch for multiple times
         self.alpha = alpha
@@ -93,9 +93,7 @@ def before_training(self, strategy, **kwargs):
 
             self.initialized = True
 
-    def before_training_exp(
-        self, strategy, num_workers=0, shuffle=True, **kwargs
-    ):
+    def before_training_exp(self, strategy, num_workers=0, shuffle=True, **kwargs):
         """
         Random retrieval from a class-balanced memory.
         Dataloader builds batches containing examples from both memories and
@@ -170,15 +168,10 @@ def _update_running_prototypes(self, strategy):
             c = y_unique[idx].item()
             idxs = torch.nonzero(strategy.mb_y == c).squeeze(1)
             p_tmp_batch = (
-                strategy.mb_output[idxs]
-                .sum(dim=0)
-                .unsqueeze(0)
-                .to(strategy.device)
+                strategy.mb_output[idxs].sum(dim=0).unsqueeze(0).to(strategy.device)
             )
 
-            p_init, cnt_init = (
-                self.tmp_p_mem[c] if c in self.tmp_p_mem else (0, 0)
-            )
+            p_init, cnt_init = self.tmp_p_mem[c] if c in self.tmp_p_mem else (0, 0)
             self.tmp_p_mem[c] = (p_init + p_tmp_batch, cnt_init + len(idxs))
 
     def after_training_exp(self, strategy, **kwargs):
@@ -284,9 +277,7 @@ class c vs other-classes k.
 
         # All prototypes
         p_y = torch.tensor([c for c in self.p_mem.keys()]).to(x.device).detach()
-        p_x = (
-            torch.cat([self.p_mem[c.item()] for c in p_y]).to(x.device).detach()
-        )
+        p_x = torch.cat([self.p_mem[c.item()] for c in p_y]).to(x.device).detach()
 
         for label_idx in range(y_unique.size(0)):  # Per-class operation
             c = y_unique[label_idx]
diff --git a/avalanche/training/plugins/cwr_star.py b/avalanche/training/plugins/cwr_star.py
index a65833032..08809663e 100644
--- a/avalanche/training/plugins/cwr_star.py
+++ b/avalanche/training/plugins/cwr_star.py
@@ -58,9 +58,7 @@ def before_training_exp(self, strategy, **kwargs):
         data = strategy.experience.dataset
         self.model.cur_j = examples_per_class(data.targets)
         self.cur_class = [
-            cls
-            for cls in set(self.model.cur_j.keys())
-            if self.model.cur_j[cls] > 0
+            cls for cls in set(self.model.cur_j.keys()) if self.model.cur_j[cls] > 0
         ]
 
         self.reset_weights(self.cur_class)
@@ -70,8 +68,7 @@ def consolidate_weights(self):
 
         with torch.no_grad():
             cwr_layer = self.get_cwr_layer()
-            assert cwr_layer is not None, \
-                'Could not find the CWR layer.'
+            assert cwr_layer is not None, "Could not find the CWR layer."
             # calculate the average of the current classes
             globavg = np.average(
                 cwr_layer.weight.detach().cpu().numpy()[self.cur_class]
@@ -83,9 +80,7 @@ def consolidate_weights(self):
                 new_w = w - globavg
                 # if the class has been already seen
                 if c in self.model.saved_weights.keys():
-                    wpast_j = np.sqrt(
-                        self.model.past_j[c] / self.model.cur_j[c]
-                    )
+                    wpast_j = np.sqrt(self.model.past_j[c] / self.model.cur_j[c])
                     # consolidation
                     self.model.saved_weights[c] = (
                         self.model.saved_weights[c] * wpast_j + new_w
@@ -101,20 +96,16 @@ def set_consolidate_weights(self):
 
         with torch.no_grad():
             cwr_layer = self.get_cwr_layer()
-            assert cwr_layer is not None, \
-                'Could not find the CWR layer.'
-            
+            assert cwr_layer is not None, "Could not find the CWR layer."
+
             for c, w in self.model.saved_weights.items():
-                cwr_layer.weight[c].copy_(
-                    torch.from_numpy(self.model.saved_weights[c])
-                )
+                cwr_layer.weight[c].copy_(torch.from_numpy(self.model.saved_weights[c]))
 
     def reset_weights(self, cur_clas):
         """reset weights"""
         with torch.no_grad():
             cwr_layer = self.get_cwr_layer()
-            assert cwr_layer is not None, \
-                'Could not find the CWR layer.'
+            assert cwr_layer is not None, "Could not find the CWR layer."
 
             cwr_layer.weight.fill_(0.0)
             for c, w in self.model.saved_weights.items():
diff --git a/avalanche/training/plugins/early_stopping.py b/avalanche/training/plugins/early_stopping.py
index 9e11a6a42..bb71681f7 100644
--- a/avalanche/training/plugins/early_stopping.py
+++ b/avalanche/training/plugins/early_stopping.py
@@ -67,9 +67,7 @@ def __init__(
         self.margin = margin
 
         self.metric_name = metric_name
-        self.metric_key = (
-            f"{self.metric_name}/eval_phase/" f"{self.val_stream_name}"
-        )
+        self.metric_key = f"{self.metric_name}/eval_phase/" f"{self.val_stream_name}"
 
         if mode not in ("max", "min"):
             raise ValueError(f'Mode must be "max" or "min", got {mode}.')
diff --git a/avalanche/training/plugins/evaluation.py b/avalanche/training/plugins/evaluation.py
index baa51ad76..b151c6a73 100644
--- a/avalanche/training/plugins/evaluation.py
+++ b/avalanche/training/plugins/evaluation.py
@@ -48,10 +48,13 @@ class EvaluationPlugin:
     def __init__(
         self,
         *metrics: Union["PluginMetric", Sequence["PluginMetric"]],
-        loggers: Optional[Union[
-            "BaseLogger",
-            Sequence["BaseLogger"],
-            Callable[[], Sequence["BaseLogger"]]]] = None,
+        loggers: Optional[
+            Union[
+                "BaseLogger",
+                Sequence["BaseLogger"],
+                Callable[[], Sequence["BaseLogger"]],
+            ]
+        ] = None,
         collect_all=True,
         strict_checks=False
     ):
@@ -104,7 +107,7 @@ def __init__(
             self.all_metric_results = defaultdict(_init_metrics_list_lambda)
         else:
             self.all_metric_results = dict()
-        
+
         # Dictionary of last values emitted. Dictionary key
         # is the full metric name, while dictionary value is
         # metric value.
@@ -217,6 +220,7 @@ def __getattribute__(self, item):
                 # method is a callback. Forward to metrics.
                 def fun(strat, **kwargs):
                     return self._update_metrics_and_loggers(strat, item)
+
                 return fun
             raise
 
@@ -227,7 +231,6 @@ def before_eval(self, strategy: "SupervisedTemplate", **kwargs):
             "evaluation stream."
         )
         if self.strict_checks:
-
             curr_stream = next(iter(strategy.current_eval_stream)).origin_stream
             benchmark = curr_stream[0].origin_stream.benchmark
             full_stream = benchmark.streams[curr_stream.name]
@@ -245,15 +248,10 @@ def default_loggers() -> Sequence["BaseLogger"]:
 
 def default_evaluator() -> EvaluationPlugin:
     return EvaluationPlugin(
-        accuracy_metrics(
-            minibatch=False, epoch=True, experience=True, stream=True
-        ),
+        accuracy_metrics(minibatch=False, epoch=True, experience=True, stream=True),
         loss_metrics(minibatch=False, epoch=True, experience=True, stream=True),
         loggers=default_loggers,
     )
 
 
-__all__ = [
-    "EvaluationPlugin",
-    "default_evaluator"
-]
+__all__ = ["EvaluationPlugin", "default_evaluator"]
diff --git a/avalanche/training/plugins/ewc.py b/avalanche/training/plugins/ewc.py
index 6978c9167..f3c2f05a5 100644
--- a/avalanche/training/plugins/ewc.py
+++ b/avalanche/training/plugins/ewc.py
@@ -8,8 +8,7 @@
 
 from avalanche.models.utils import avalanche_forward
 from avalanche.training.plugins.strategy_plugin import SupervisedPlugin
-from avalanche.training.utils import copy_params_dict, zerolike_params_dict, \
-    ParamData
+from avalanche.training.utils import copy_params_dict, zerolike_params_dict, ParamData
 
 
 class EWCPlugin(SupervisedPlugin):
@@ -88,10 +87,10 @@ def before_backward(self, strategy, **kwargs):
                     saved_param = self.saved_params[experience][k]
                     imp = self.importances[experience][k]
                     new_shape = cur_param.shape
-                    penalty += (imp.expand(new_shape) *
-                                (cur_param -
-                                 saved_param.expand(new_shape))
-                                .pow(2)).sum()
+                    penalty += (
+                        imp.expand(new_shape)
+                        * (cur_param - saved_param.expand(new_shape)).pow(2)
+                    ).sum()
         elif self.mode == "online":  # may need importance and param expansion
             prev_exp = exp_counter - 1
             for k, cur_param in strategy.model.named_parameters():
@@ -101,9 +100,10 @@ def before_backward(self, strategy, **kwargs):
                 saved_param = self.saved_params[prev_exp][k]
                 imp = self.importances[prev_exp][k]
                 new_shape = cur_param.shape
-                penalty += (imp.expand(new_shape) *
-                            (cur_param - saved_param.expand(new_shape))
-                            .pow(2)).sum()
+                penalty += (
+                    imp.expand(new_shape)
+                    * (cur_param - saved_param.expand(new_shape)).pow(2)
+                ).sum()
         else:
             raise ValueError("Wrong EWC mode.")
 
@@ -152,12 +152,8 @@ def compute_importances(
 
         # list of list
         importances = zerolike_params_dict(model)
-        collate_fn = (
-            dataset.collate_fn if hasattr(dataset, "collate_fn") else None
-        )
-        dataloader = DataLoader(
-            dataset, batch_size=batch_size, collate_fn=collate_fn
-        )
+        collate_fn = dataset.collate_fn if hasattr(dataset, "collate_fn") else None
+        dataloader = DataLoader(dataset, batch_size=batch_size, collate_fn=collate_fn)
         for i, batch in enumerate(dataloader):
             # get only input, target and task_id from the batch
             x, y, task_labels = batch[0], batch[1], batch[-1]
@@ -194,7 +190,7 @@ def update_importances(self, importances, t: int):
             self.importances[t] = importances
         elif self.mode == "online":
             for (k1, old_imp), (k2, curr_imp) in itertools.zip_longest(
-                self.importances[t-1].items(),
+                self.importances[t - 1].items(),
                 importances.items(),
                 fillvalue=(None, None),
             ):
@@ -212,10 +208,12 @@ def update_importances(self, importances, t: int):
 
                 # manage expansion of existing layers
                 self.importances[t][k1] = ParamData(
-                    f'imp_{k1}', curr_imp.shape,
-                    init_tensor=self.decay_factor * old_imp.expand(
-                        curr_imp.shape) + curr_imp.data,
-                    device=curr_imp.device)
+                    f"imp_{k1}",
+                    curr_imp.shape,
+                    init_tensor=self.decay_factor * old_imp.expand(curr_imp.shape)
+                    + curr_imp.data,
+                    device=curr_imp.device,
+                )
 
             # clear previous parameter importances
             if t > 0 and (not self.keep_importance_data):
diff --git a/avalanche/training/plugins/from_scratch_training.py b/avalanche/training/plugins/from_scratch_training.py
index 5a0fda1f9..ccaf11d17 100644
--- a/avalanche/training/plugins/from_scratch_training.py
+++ b/avalanche/training/plugins/from_scratch_training.py
@@ -9,7 +9,7 @@
 
 
 class FromScratchTrainingPlugin(BaseSGDPlugin):
-    """ From Scratch Training Plugin.
+    """From Scratch Training Plugin.
 
     This plugin resets the strategy's model weights and optimizer state after
     each experience. It expects the strategy to have a single model and
@@ -17,10 +17,7 @@ class FromScratchTrainingPlugin(BaseSGDPlugin):
     "from-scratch training" baselines.
     """
 
-    def __init__(
-            self,
-            reset_optimizer: bool = True
-    ):
+    def __init__(self, reset_optimizer: bool = True):
         """
         Creates a `FromScratchTrainingPlugin` instance.
 
@@ -46,7 +43,7 @@ def before_training_exp(self, strategy: BaseSGDTemplate, *args, **kwargs):
         init_weights = self.initial_weights
         assert init_weights is not None
 
-        for (n, p) in strategy.model.named_parameters():
+        for n, p in strategy.model.named_parameters():
             if n in init_weights.keys():
                 if p.data.shape == init_weights[n].data.shape:
                     p.data.copy_(init_weights[n].data)
@@ -59,6 +56,4 @@ def before_training_exp(self, strategy: BaseSGDTemplate, *args, **kwargs):
             reset_optimizer(strategy.optimizer, strategy.model)
 
 
-__all__ = [
-    'FromScratchTrainingPlugin'
-]
+__all__ = ["FromScratchTrainingPlugin"]
diff --git a/avalanche/training/plugins/gdumb.py b/avalanche/training/plugins/gdumb.py
index 11f7e6fd5..fbd9f8793 100644
--- a/avalanche/training/plugins/gdumb.py
+++ b/avalanche/training/plugins/gdumb.py
@@ -21,9 +21,7 @@ class GDumbPlugin(SupervisedPlugin, supports_distributed=True):
     https://www.robots.ox.ac.uk/~tvg/publications/2020/gdumb.pdf
     """
 
-    def __init__(
-            self, 
-            mem_size: int = 200):
+    def __init__(self, mem_size: int = 200):
         super().__init__()
         self.mem_size = mem_size
 
@@ -34,9 +32,7 @@ def __init__(
         )
         self.init_model = None
 
-    def before_train_dataset_adaptation(
-        self, strategy: "SupervisedTemplate", **kwargs
-    ):
+    def before_train_dataset_adaptation(self, strategy: "SupervisedTemplate", **kwargs):
         """Reset model."""
         if self.init_model is None:
             self.init_model = copy.deepcopy(strategy.model)
@@ -44,18 +40,12 @@ def before_train_dataset_adaptation(
             strategy.model = copy.deepcopy(self.init_model)
         strategy.model_adaptation(self.init_model)
 
-    def before_eval_dataset_adaptation(
-        self, strategy: "SupervisedTemplate", **kwargs
-    ):
+    def before_eval_dataset_adaptation(self, strategy: "SupervisedTemplate", **kwargs):
         strategy.model_adaptation(self.init_model)
 
-    def after_train_dataset_adaptation(
-        self, strategy: "SupervisedTemplate", **kwargs
-    ):
+    def after_train_dataset_adaptation(self, strategy: "SupervisedTemplate", **kwargs):
         self.storage_policy.update(strategy, **kwargs)
         strategy.adapted_dataset = self.storage_policy.buffer
 
 
-__all__ = [
-    'GDumbPlugin'
-]
+__all__ = ["GDumbPlugin"]
diff --git a/avalanche/training/plugins/gem.py b/avalanche/training/plugins/gem.py
index 622281856..f0b01128f 100644
--- a/avalanche/training/plugins/gem.py
+++ b/avalanche/training/plugins/gem.py
@@ -52,9 +52,7 @@ def before_training_iteration(self, strategy, **kwargs):
                 strategy.optimizer.zero_grad()
                 xref = self.memory_x[t].to(strategy.device)
                 yref = self.memory_y[t].to(strategy.device)
-                out = avalanche_forward(
-                    strategy.model, xref, self.memory_tid[t]
-                )
+                out = avalanche_forward(strategy.model, xref, self.memory_tid[t])
                 loss = strategy._criterion(out, yref)
                 loss.backward()
 
@@ -100,9 +98,7 @@ def after_backward(self, strategy, **kwargs):
             for p in strategy.model.parameters():
                 curr_pars = p.numel()
                 if p.grad is not None:
-                    p.grad.copy_(
-                        v_star[num_pars : num_pars + curr_pars].view(p.size())
-                    )
+                    p.grad.copy_(v_star[num_pars : num_pars + curr_pars].view(p.size()))
                 num_pars += curr_pars
 
             assert num_pars == v_star.numel(), "Error in projecting gradient"
@@ -123,12 +119,9 @@ def update_memory(self, dataset, t, batch_size):
         """
         Update replay memory with patterns from current experience.
         """
-        collate_fn = (
-            dataset.collate_fn if hasattr(dataset, "collate_fn") else None
-        )
+        collate_fn = dataset.collate_fn if hasattr(dataset, "collate_fn") else None
         dataloader = DataLoader(
-            dataset, batch_size=batch_size, collate_fn=collate_fn,
-            shuffle=True
+            dataset, batch_size=batch_size, collate_fn=collate_fn, shuffle=True
         )
         tot = 0
         for mbatch in dataloader:
@@ -141,9 +134,7 @@ def update_memory(self, dataset, t, batch_size):
                 else:
                     self.memory_x[t] = torch.cat((self.memory_x[t], x), dim=0)
                     self.memory_y[t] = torch.cat((self.memory_y[t], y), dim=0)
-                    self.memory_tid[t] = torch.cat(
-                        (self.memory_tid[t], tid), dim=0
-                    )
+                    self.memory_tid[t] = torch.cat((self.memory_tid[t], tid), dim=0)
 
             else:
                 diff = self.patterns_per_experience - tot
@@ -152,12 +143,8 @@ def update_memory(self, dataset, t, batch_size):
                     self.memory_y[t] = y[:diff].clone()
                     self.memory_tid[t] = tid[:diff].clone()
                 else:
-                    self.memory_x[t] = torch.cat(
-                        (self.memory_x[t], x[:diff]), dim=0
-                    )
-                    self.memory_y[t] = torch.cat(
-                        (self.memory_y[t], y[:diff]), dim=0
-                    )
+                    self.memory_x[t] = torch.cat((self.memory_x[t], x[:diff]), dim=0)
+                    self.memory_y[t] = torch.cat((self.memory_y[t], y[:diff]), dim=0)
                     self.memory_tid[t] = torch.cat(
                         (self.memory_tid[t], tid[:diff]), dim=0
                     )
diff --git a/avalanche/training/plugins/gss_greedy.py b/avalanche/training/plugins/gss_greedy.py
index a796ca62b..0e4fff0a6 100644
--- a/avalanche/training/plugins/gss_greedy.py
+++ b/avalanche/training/plugins/gss_greedy.py
@@ -94,9 +94,7 @@ def get_rand_mem_grads(self, strategy, grad_dims, gss_batch_size):
             grad_dims: gradient dimensions
         Returns: gradient from memory subsets
         """
-        temp_gss_batch_size = min(
-            gss_batch_size, self.ext_mem_list_current_index
-        )
+        temp_gss_batch_size = min(gss_batch_size, self.ext_mem_list_current_index)
         num_mem_subs = min(
             self.mem_strength, self.ext_mem_list_current_index // gss_batch_size
         )
@@ -111,8 +109,7 @@ def get_rand_mem_grads(self, strategy, grad_dims, gss_batch_size):
         )
         for i in range(num_mem_subs):
             random_batch_inds = shuffeled_inds[
-                i * temp_gss_batch_size : i * temp_gss_batch_size
-                + temp_gss_batch_size
+                i * temp_gss_batch_size : i * temp_gss_batch_size + temp_gss_batch_size
             ]
             batch_x = self.ext_mem_list_x[random_batch_inds].to(strategy.device)
             batch_y = self.ext_mem_list_y[random_batch_inds].to(strategy.device)
@@ -152,9 +149,7 @@ def get_each_batch_sample_sim(
             cosine_sim[i] = max(self.cosine_similarity(mem_grads, this_grad))
         return cosine_sim
 
-    def before_training_exp(
-        self, strategy, num_workers=0, shuffle=True, **kwargs
-    ):
+    def before_training_exp(self, strategy, num_workers=0, shuffle=True, **kwargs):
         """
         Dataloader to build batches containing examples from both memories and
         the training dataset
@@ -169,8 +164,8 @@ def before_training_exp(
 
         memory = list(zip(temp_x_tensors, temp_y_tensors))
         memory_dataset = make_classification_dataset(
-            memory,
-            targets=temp_y_tensors.tolist())
+            memory, targets=temp_y_tensors.tolist()
+        )
 
         strategy.dataloader = ReplayDataLoader(
             strategy.adapted_dataset,
@@ -194,11 +189,8 @@ def after_forward(self, strategy, num_workers=0, shuffle=True, **kwargs):
         for param in strategy.model.parameters():
             grad_dims.append(param.data.numel())
 
-        place_left = (
-            self.ext_mem_list_x.size(0) - self.ext_mem_list_current_index
-        )
+        place_left = self.ext_mem_list_x.size(0) - self.ext_mem_list_current_index
         if place_left <= 0:  # buffer full
-
             batch_sim, mem_grads = self.get_batch_sim(
                 strategy,
                 grad_dims,
@@ -228,14 +220,12 @@ def after_forward(self, strategy, num_workers=0, shuffle=True, **kwargs):
 
                 # normalize to [0,1]
                 scaled_batch_item_sim = ((batch_item_sim + 1) / 2).unsqueeze(1)
-                buffer_repl_batch_sim = (
-                    (self.buffer_score[index] + 1) / 2
-                ).unsqueeze(1)
+                buffer_repl_batch_sim = ((self.buffer_score[index] + 1) / 2).unsqueeze(
+                    1
+                )
                 # draw an event to decide on replacement decision
                 outcome = torch.multinomial(
-                    torch.cat(
-                        (scaled_batch_item_sim, buffer_repl_batch_sim), dim=1
-                    ),
+                    torch.cat((scaled_batch_item_sim, buffer_repl_batch_sim), dim=1),
                     1,
                     replacement=False,
                 )
@@ -260,9 +250,7 @@ def after_forward(self, strategy, num_workers=0, shuffle=True, **kwargs):
 
             # first buffer insertion
             if self.ext_mem_list_current_index == 0:
-                batch_sample_memory_cos = (
-                    torch.zeros(updated_mb_x.size(0)) + 0.1
-                )
+                batch_sample_memory_cos = torch.zeros(updated_mb_x.size(0)) + 0.1
             else:
                 # draw random samples from buffer
                 mem_grads = self.get_rand_mem_grads(
@@ -277,12 +265,8 @@ def after_forward(self, strategy, num_workers=0, shuffle=True, **kwargs):
                 )
 
             curr_idx = self.ext_mem_list_current_index
-            self.ext_mem_list_x[curr_idx : curr_idx + offset].data.copy_(
-                updated_mb_x
-            )
-            self.ext_mem_list_y[curr_idx : curr_idx + offset].data.copy_(
-                updated_mb_y
-            )
+            self.ext_mem_list_x[curr_idx : curr_idx + offset].data.copy_(updated_mb_x)
+            self.ext_mem_list_y[curr_idx : curr_idx + offset].data.copy_(updated_mb_y)
             self.buffer_score[curr_idx : curr_idx + offset].data.copy_(
                 batch_sample_memory_cos
             )
diff --git a/avalanche/training/plugins/lr_scheduling.py b/avalanche/training/plugins/lr_scheduling.py
index e288ff915..cc194dd8b 100644
--- a/avalanche/training/plugins/lr_scheduling.py
+++ b/avalanche/training/plugins/lr_scheduling.py
@@ -153,7 +153,6 @@ def before_training_exp(self, strategy, *args, **kwargs):
 
     def after_eval(self, strategy: "SupervisedTemplate", **kwargs):
         if self.metric == "val_loss" and self._was_training:
-
             if not self._executed_train_iteration:
                 # The base strategy may run an evaluation pass on the
                 # validation set before running the training loop. In that
@@ -175,9 +174,7 @@ def after_eval(self, strategy: "SupervisedTemplate", **kwargs):
 
         self._just_validated = True
 
-    def after_training_iteration(
-        self, strategy: "SupervisedTemplate", **kwargs
-    ):
+    def after_training_iteration(self, strategy: "SupervisedTemplate", **kwargs):
         self._executed_train_iteration = True
 
         if self.metric == "train_loss":
diff --git a/avalanche/training/plugins/lwf.py b/avalanche/training/plugins/lwf.py
index ed5c5b8be..28ae33ecd 100644
--- a/avalanche/training/plugins/lwf.py
+++ b/avalanche/training/plugins/lwf.py
@@ -24,9 +24,7 @@ def before_backward(self, strategy, **kwargs):
         Add distillation loss
         """
 
-        strategy.loss += self.lwf(
-            strategy.mb_x, strategy.mb_output, strategy.model
-        )
+        strategy.loss += self.lwf(strategy.mb_x, strategy.mb_output, strategy.model)
 
     def after_training_exp(self, strategy, **kwargs):
         """
diff --git a/avalanche/training/plugins/mas.py b/avalanche/training/plugins/mas.py
index 1dbee7d19..61068008c 100644
--- a/avalanche/training/plugins/mas.py
+++ b/avalanche/training/plugins/mas.py
@@ -6,8 +6,7 @@
 
 from avalanche.models.utils import avalanche_forward
 from avalanche.training.plugins.strategy_plugin import SupervisedPlugin
-from avalanche.training.utils import copy_params_dict, zerolike_params_dict, \
-    ParamData
+from avalanche.training.utils import copy_params_dict, zerolike_params_dict, ParamData
 
 
 class MASPlugin(SupervisedPlugin):
@@ -29,9 +28,7 @@ class MASPlugin(SupervisedPlugin):
     https://github.com/mmasana/FACIL/blob/master/src/approach/mas.py
     """
 
-    def __init__(
-        self, lambda_reg: float = 1.0, alpha: float = 0.5, verbose=False
-    ):
+    def __init__(self, lambda_reg: float = 1.0, alpha: float = 0.5, verbose=False):
         """
         :param lambda_reg: hyperparameter weighting the penalty term
                in the loss.
@@ -57,7 +54,6 @@ def __init__(
         self.verbose = verbose
 
     def _get_importance(self, strategy):
-
         # Initialize importance matrix
         importance = dict(zerolike_params_dict(strategy.model))
 
@@ -139,8 +135,8 @@ def before_backward(self, strategy, **kwargs):
         for name, param in strategy.model.named_parameters():
             if name in self.importance.keys():
                 loss_reg += torch.sum(
-                    self.importance[name].expand(param.shape) *
-                    (param - self.params[name].expand(param.shape)).pow(2)
+                    self.importance[name].expand(param.shape)
+                    * (param - self.params[name].expand(param.shape)).pow(2)
                 )
 
         # Update loss
@@ -166,9 +162,11 @@ def after_training_exp(self, strategy, **kwargs):
             new_shape = curr_importance[name].data.shape
             if name not in self.importance:
                 self.importance[name] = ParamData(
-                    name, curr_importance[name].shape,
+                    name,
+                    curr_importance[name].shape,
                     device=curr_importance[name].device,
-                    init_tensor=curr_importance[name].data.clone())
+                    init_tensor=curr_importance[name].data.clone(),
+                )
             else:
                 self.importance[name].data = (
                     self.alpha * self.importance[name].expand(new_shape)
diff --git a/avalanche/training/plugins/mir.py b/avalanche/training/plugins/mir.py
index 2c35de2b9..ce4172eba 100644
--- a/avalanche/training/plugins/mir.py
+++ b/avalanche/training/plugins/mir.py
@@ -90,11 +90,9 @@ def before_backward(self, strategy, **kwargs):
         with torch.no_grad():
             _old_red_strategy = strategy._criterion.reduction
             strategy._criterion.reduction = "none"
-            old_output = avalanche_forward(strategy.model, 
-                                           samples_x, samples_tid)
+            old_output = avalanche_forward(strategy.model, samples_x, samples_tid)
             old_loss = strategy._criterion(old_output, samples_y)
-            new_output = avalanche_forward(model_updated, 
-                                           samples_x, samples_tid)
+            new_output = avalanche_forward(model_updated, samples_x, samples_tid)
             new_loss = strategy._criterion(new_output, samples_y)
             loss_diff = new_loss - old_loss
             chosen_samples_indexes = torch.argsort(loss_diff)[
@@ -136,6 +134,4 @@ def after_training_exp(self, strategy: "SupervisedTemplate", **kwargs):
             self.replay_loader = None
 
 
-__all__ = [
-    'MIRPlugin'
-]
+__all__ = ["MIRPlugin"]
diff --git a/avalanche/training/plugins/replay.py b/avalanche/training/plugins/replay.py
index ecefd746f..832a26df6 100644
--- a/avalanche/training/plugins/replay.py
+++ b/avalanche/training/plugins/replay.py
@@ -50,7 +50,7 @@ def __init__(
         batch_size: Optional[int] = None,
         batch_size_mem: Optional[int] = None,
         task_balanced_dataloader: bool = False,
-        storage_policy: Optional["ExemplarsBuffer"] = None
+        storage_policy: Optional["ExemplarsBuffer"] = None,
     ):
         super().__init__()
         self.mem_size = mem_size
diff --git a/avalanche/training/plugins/rwalk.py b/avalanche/training/plugins/rwalk.py
index bc87e88c0..e6b56dddc 100644
--- a/avalanche/training/plugins/rwalk.py
+++ b/avalanche/training/plugins/rwalk.py
@@ -5,8 +5,7 @@
 import torch.nn.functional as F
 
 from avalanche.training.plugins.strategy_plugin import SupervisedPlugin
-from avalanche.training.utils import copy_params_dict, zerolike_params_dict, \
-    ParamData
+from avalanche.training.utils import copy_params_dict, zerolike_params_dict, ParamData
 from avalanche.models.utils import avalanche_forward
 
 
@@ -111,8 +110,9 @@ def _update_loss(self, strategy):
             p_grad = self.iter_grad[k]
             shape = new_p.shape
             self.checkpoint_loss[k].expand(shape)
-            self.checkpoint_loss[k].data -= p_grad.expand(
-                shape) * (new_p - old_p.expand(shape))
+            self.checkpoint_loss[k].data -= p_grad.expand(shape) * (
+                new_p - old_p.expand(shape)
+            )
 
     # Update parameter importance (EWC++, Eq. 6 of the RWalk paper)
     def _update_importance(self, strategy):
@@ -129,15 +129,16 @@ def _update_importance(self, strategy):
             for k, new_imp in importance.items():
                 if k not in old_importance:
                     self.iter_importance[k] = ParamData(
-                        k, device=new_imp.device,
-                        init_tensor=new_imp.data)
+                        k, device=new_imp.device, init_tensor=new_imp.data
+                    )
                 else:
                     old_imp = old_importance[k]
                     self.iter_importance[k] = ParamData(
-                        k, device=new_imp.device,
-                        init_tensor=self.ewc_alpha * new_imp.data + (
-                                1 - self.ewc_alpha) * old_imp.expand(
-                            new_imp.shape))
+                        k,
+                        device=new_imp.device,
+                        init_tensor=self.ewc_alpha * new_imp.data
+                        + (1 - self.ewc_alpha) * old_imp.expand(new_imp.shape),
+                    )
 
     # Add scores for a single delta_t (referred to as s_t1^t2 in the paper)
     @torch.no_grad()
@@ -154,9 +155,9 @@ def _update_score(self, strategy):
             shape = new_p.shape
             eps = torch.finfo(loss.data.dtype).eps
             self.checkpoint_scores[k].expand(shape)
-            self.checkpoint_scores[k].data += loss.data / \
-                (0.5 * imp.expand(shape) * (new_p - old_p.expand(shape))
-                 .pow(2) + eps)
+            self.checkpoint_scores[k].data += loss.data / (
+                0.5 * imp.expand(shape) * (new_p - old_p.expand(shape)).pow(2) + eps
+            )
 
     # Initialize t_0 checkpoint information
     def before_training(self, strategy, *args, **kwargs):
@@ -184,9 +185,10 @@ def before_backward(self, strategy, *args, **kwargs):
                     continue
                 penalty = self.exp_penalties[k]
                 param_exp = self.exp_params[k]
-                ewc_loss += (penalty.expand(param.shape) *
-                             (param - param_exp.expand(param.shape))
-                             .pow(2)).sum()
+                ewc_loss += (
+                    penalty.expand(param.shape)
+                    * (param - param_exp.expand(param.shape)).pow(2)
+                ).sum()
 
             strategy.loss += self.ewc_lambda * ewc_loss
 
@@ -217,9 +219,11 @@ def after_training_exp(self, strategy, *args, **kwargs):
                     continue
                 p_score = self.exp_scores[k]
                 shape = p_cp_score.data.shape
-                exp_scores[k] = ParamData(k, device=p_score.device,
-                                          init_tensor=0.5 * (p_score.expand(
-                                              shape) + p_cp_score.data))
+                exp_scores[k] = ParamData(
+                    k,
+                    device=p_score.device,
+                    init_tensor=0.5 * (p_score.expand(shape) + p_cp_score.data),
+                )
             self.exp_scores = exp_scores
 
         # Compute weight penalties once for all successive iterations
@@ -229,10 +233,8 @@ def after_training_exp(self, strategy, *args, **kwargs):
         # Normalize terms in [0,1] interval, as suggested in the paper
         # (the importance is already > 0, while negative scores are relu-ed
         # out, hence we scale only the max-values of both terms)
-        max_score = max(map(lambda x: x.data.max(),
-                            self.exp_scores.values()))
-        max_imp = max(map(lambda x: x.data.max(),
-                          self.exp_importance.values()))
+        max_score = max(map(lambda x: x.data.max(), self.exp_scores.values()))
+        max_imp = max(map(lambda x: x.data.max(), self.exp_importance.values()))
 
         for k, score in self.exp_scores.items():
             if k not in self.exp_scores:
@@ -240,8 +242,10 @@ def after_training_exp(self, strategy, *args, **kwargs):
             imp = self.exp_importance[k]
             shape = imp.data.shape
             self.exp_penalties[k] = ParamData(
-                k, device=imp.device,
-                init_tensor=imp.data / max_imp + F.relu(
-                    score.expand(shape)) / max_score)
+                k,
+                device=imp.device,
+                init_tensor=imp.data / max_imp
+                + F.relu(score.expand(shape)) / max_score,
+            )
 
         self.checkpoint_scores = zerolike_params_dict(strategy.model)
diff --git a/avalanche/training/plugins/synaptic_intelligence.py b/avalanche/training/plugins/synaptic_intelligence.py
index fbaf8696e..170316244 100644
--- a/avalanche/training/plugins/synaptic_intelligence.py
+++ b/avalanche/training/plugins/synaptic_intelligence.py
@@ -128,17 +128,13 @@ def before_backward(self, strategy: "SupervisedTemplate", **kwargs):
         if syn_loss is not None:
             strategy.loss += syn_loss.to(strategy.device)
 
-    def before_training_iteration(
-        self, strategy: "SupervisedTemplate", **kwargs
-    ):
+    def before_training_iteration(self, strategy: "SupervisedTemplate", **kwargs):
         super().before_training_iteration(strategy, **kwargs)
         SynapticIntelligencePlugin.pre_update(
             strategy.model, self.syn_data, self.excluded_parameters
         )
 
-    def after_training_iteration(
-        self, strategy: "SupervisedTemplate", **kwargs
-    ):
+    def after_training_iteration(self, strategy: "SupervisedTemplate", **kwargs):
         super().after_training_iteration(strategy, **kwargs)
         SynapticIntelligencePlugin.post_update(
             strategy.model, self.syn_data, self.excluded_parameters
@@ -177,20 +173,25 @@ def create_syn_data(
         for param_name, param in params:
             if param_name not in ewc_data[0]:
                 # new parameter
-                ewc_data[0][param_name] = ParamData(
-                    param_name, param.flatten().shape)
+                ewc_data[0][param_name] = ParamData(param_name, param.flatten().shape)
                 ewc_data[1][param_name] = ParamData(
-                    f"imp_{param_name}", param.flatten().shape)
+                    f"imp_{param_name}", param.flatten().shape
+                )
                 syn_data["old_theta"][param_name] = ParamData(
-                    f"old_theta_{param_name}", param.flatten().shape)
+                    f"old_theta_{param_name}", param.flatten().shape
+                )
                 syn_data["new_theta"][param_name] = ParamData(
-                    f"new_theta_{param_name}", param.flatten().shape)
+                    f"new_theta_{param_name}", param.flatten().shape
+                )
                 syn_data["grad"][param_name] = ParamData(
-                    f"grad{param_name}", param.flatten().shape)
+                    f"grad{param_name}", param.flatten().shape
+                )
                 syn_data["trajectory"][param_name] = ParamData(
-                    f"trajectory_{param_name}", param.flatten().shape)
+                    f"trajectory_{param_name}", param.flatten().shape
+                )
                 syn_data["cum_trajectory"][param_name] = ParamData(
-                    f"cum_trajectory_{param_name}", param.flatten().shape)
+                    f"cum_trajectory_{param_name}", param.flatten().shape
+                )
             elif ewc_data[0][param_name].shape != param.shape:
                 # parameter expansion
                 ewc_data[0][param_name].expand(param.flatten().shape)
@@ -198,10 +199,8 @@ def create_syn_data(
                 syn_data["old_theta"][param_name].expand(param.flatten().shape)
                 syn_data["new_theta"][param_name].expand(param.flatten().shape)
                 syn_data["grad"][param_name].expand(param.flatten().shape)
-                syn_data["trajectory"][param_name]\
-                    .expand(param.flatten().shape)
-                syn_data["cum_trajectory"][param_name]\
-                    .expand(param.flatten().shape)
+                syn_data["trajectory"][param_name].expand(param.flatten().shape)
+                syn_data["cum_trajectory"][param_name].expand(param.flatten().shape)
 
     @staticmethod
     @torch.no_grad()
@@ -250,9 +249,7 @@ def pre_update(model, syn_data: SynDataType, excluded_parameters: Set[str]):
 
     @staticmethod
     @torch.no_grad()
-    def post_update(
-        model, syn_data: SynDataType, excluded_parameters: Set[str]
-    ):
+    def post_update(model, syn_data: SynDataType, excluded_parameters: Set[str]):
         SynapticIntelligencePlugin.extract_weights(
             model, syn_data["new_theta"], excluded_parameters
         )
@@ -285,9 +282,9 @@ def compute_ewc_loss(
             weights = param.to(device).flatten()  # Flat, not detached
             ewc_data0 = ewc_data[0][name].data.to(device)  # Flat, detached
             ewc_data1 = ewc_data[1][name].data.to(device)  # Flat, detached
-            syn_loss: Tensor = torch.dot(
-                ewc_data1, (weights - ewc_data0) ** 2
-            ) * (lambd / 2)
+            syn_loss: Tensor = torch.dot(ewc_data1, (weights - ewc_data0) ** 2) * (
+                lambd / 2
+            )
 
             if loss is None:
                 loss = syn_loss
@@ -336,8 +333,9 @@ def update_ewc_data(
             ewc_data[1][param_name].data = torch.clamp(
                 ewc_data[1][param_name].data, max=clip_to
             )
-            ewc_data[0][param_name].data = \
-                syn_data["new_theta"][param_name].data.clone()
+            ewc_data[0][param_name].data = syn_data["new_theta"][
+                param_name
+            ].data.clone()
 
     @staticmethod
     def explode_excluded_parameters(excluded: Set[str]) -> Set[str]:
@@ -363,10 +361,8 @@ def not_excluded_parameters(
     ) -> Sequence[Tuple[str, Tensor]]:
         # Add wildcards ".*" to all excluded parameter names
         result: List[Tuple[str, Tensor]] = []
-        excluded_parameters = (
-            SynapticIntelligencePlugin.explode_excluded_parameters(
-                excluded_parameters
-            )
+        excluded_parameters = SynapticIntelligencePlugin.explode_excluded_parameters(
+            excluded_parameters
         )
         layers_params = get_layers_and_params(model)
 
@@ -391,7 +387,6 @@ def not_excluded_parameters(
     def allowed_parameters(
         model: Module, excluded_parameters: Set[str]
     ) -> List[Tuple[str, Tensor]]:
-
         allow_list = SynapticIntelligencePlugin.not_excluded_parameters(
             model, excluded_parameters
         )
diff --git a/avalanche/training/regularization.py b/avalanche/training/regularization.py
index b5e3c464e..c8a9aefb4 100644
--- a/avalanche/training/regularization.py
+++ b/avalanche/training/regularization.py
@@ -10,8 +10,8 @@
 
 
 def cross_entropy_with_oh_targets(outputs, targets, eps=1e-5):
-    """ Calculates cross-entropy with temperature scaling, 
-    targets can also be soft targets but they must sum to 1 """
+    """Calculates cross-entropy with temperature scaling,
+    targets can also be soft targets but they must sum to 1"""
     outputs = torch.nn.functional.softmax(outputs, dim=1)
     ce = -(targets * outputs.log()).sum(1)
     ce = ce.mean()
@@ -66,10 +66,10 @@ def _distillation_loss(self, out, prev_out, active_units):
         au = list(active_units)
 
         # some people use the crossentropy instead of the KL
-        # They are equivalent. We compute 
-        # kl_div(log_p_curr, p_prev) = p_prev * (log (p_prev / p_curr)) = 
+        # They are equivalent. We compute
+        # kl_div(log_p_curr, p_prev) = p_prev * (log (p_prev / p_curr)) =
         #   p_prev * log(p_prev) - p_prev * log(p_curr).
-        # Now, the first term is constant (we don't optimize the teacher), 
+        # Now, the first term is constant (we don't optimize the teacher),
         # so optimizing the crossentropy and kl-div are equivalent.
         log_p = torch.log_softmax(out[:, au] / self.temperature, dim=1)
         q = torch.softmax(prev_out[:, au] / self.temperature, dim=1)
@@ -146,7 +146,7 @@ def update(self, experience, model):
 class ACECriterion(RegularizationMethod):
     """
     Asymetric cross-entropy (ACE) Criterion used in
-    "New Insights on Reducing Abrupt Representation 
+    "New Insights on Reducing Abrupt Representation
     Change in Online Continual Learning"
     by Lucas Caccia et. al.
     https://openreview.net/forum?id=N8MaByOzUfb
@@ -161,7 +161,7 @@ def __call__(self, out_in, target_in, out_buffer, target_buffer):
         oh_target_in = F.one_hot(target_in, num_classes=out_in.shape[1])
         oh_target_in = oh_target_in[:, current_classes]
         loss_current = cross_entropy_with_oh_targets(
-                out_in[:, current_classes], oh_target_in
+            out_in[:, current_classes], oh_target_in
         )
         return (loss_buffer + loss_current) / 2
 
diff --git a/avalanche/training/storage_policy.py b/avalanche/training/storage_policy.py
index 01967177e..bc78b4432 100644
--- a/avalanche/training/storage_policy.py
+++ b/avalanche/training/storage_policy.py
@@ -123,7 +123,7 @@ def resize(self, strategy: Any, new_size: int):
         self._buffer_weights = self._buffer_weights[: self.max_size]
 
 
-TGroupBuffer = TypeVar('TGroupBuffer', bound=ExemplarsBuffer)
+TGroupBuffer = TypeVar("TGroupBuffer", bound=ExemplarsBuffer)
 
 
 class BalancedExemplarsBuffer(ExemplarsBuffer, Generic[TGroupBuffer]):
@@ -153,13 +153,11 @@ def __init__(
         self.total_num_groups = total_num_groups
         if not self.adaptive_size:
             assert self.total_num_groups > 0, (
-                "You need to specify `total_num_groups` if "
-                "`adaptive_size=True`."
+                "You need to specify `total_num_groups` if " "`adaptive_size=True`."
             )
         else:
             assert self.total_num_groups is None, (
-                "`total_num_groups` is not compatible with "
-                "`adaptive_size=False`."
+                "`total_num_groups` is not compatible with " "`adaptive_size=False`."
             )
 
         self.buffer_groups: Dict[int, TGroupBuffer] = {}
@@ -180,8 +178,7 @@ def get_group_lengths(self, num_groups):
                 lengths[i] += 1
         else:
             lengths = [
-                self.max_size // self.total_num_groups
-                for _ in range(num_groups)
+                self.max_size // self.total_num_groups for _ in range(num_groups)
             ]
         return lengths
 
@@ -214,9 +211,7 @@ def resize(self, strategy, new_size):
             buffer.resize(strategy, ll)
 
 
-class ExperienceBalancedBuffer(
-        BalancedExemplarsBuffer[
-            ReservoirSamplingBuffer]):
+class ExperienceBalancedBuffer(BalancedExemplarsBuffer[ReservoirSamplingBuffer]):
     """Rehearsal buffer with samples balanced over experiences.
 
     The number of experiences can be fixed up front or adaptive, based on
@@ -224,9 +219,7 @@ class ExperienceBalancedBuffer(
     divided over all the unique observed experiences so far.
     """
 
-    def __init__(
-        self, max_size: int, adaptive_size: bool = True, num_experiences=None
-    ):
+    def __init__(self, max_size: int, adaptive_size: bool = True, num_experiences=None):
         """
         :param max_size: max number of total input samples in the replay
             memory.
@@ -277,7 +270,6 @@ def __init__(
                                   of classes to divide capacity over.
         """
         if not adaptive_size:
-            
             assert total_num_classes is not None and (
                 total_num_classes > 0
             ), """When fixed exp mem size, total_num_classes should be > 0."""
@@ -293,14 +285,12 @@ def update(self, strategy: "BaseSGDTemplate", **kwargs):
         self.update_from_dataset(strategy.experience.dataset, strategy)
 
     def update_from_dataset(
-        self,
-        new_data: AvalancheDataset,
-        strategy: Optional["BaseSGDTemplate"] = None
+        self, new_data: AvalancheDataset, strategy: Optional["BaseSGDTemplate"] = None
     ):
         if len(new_data) == 0:
             return
-        
-        targets = getattr(new_data, 'targets', None)
+
+        targets = getattr(new_data, "targets", None)
         assert targets is not None
 
         # Get sample idxs per class
@@ -339,9 +329,7 @@ def update_from_dataset(
 
         # resize buffers
         for class_id, class_buf in self.buffer_groups.items():
-            self.buffer_groups[class_id].resize(
-                strategy, class_to_len[class_id]
-            )
+            self.buffer_groups[class_id].resize(strategy, class_to_len[class_id])
 
 
 class ParametricBuffer(BalancedExemplarsBuffer):
@@ -393,20 +381,17 @@ def update(self, strategy: "SupervisedTemplate", **kwargs):
                 old_buffer_g.update_from_dataset(strategy, new_data_g)
                 old_buffer_g.resize(strategy, ll)
             else:
-                new_buffer = _ParametricSingleBuffer(
-                    ll, self.selection_strategy
-                )
+                new_buffer = _ParametricSingleBuffer(ll, self.selection_strategy)
                 new_buffer.update_from_dataset(strategy, new_data_g)
                 self.buffer_groups[group_id] = new_buffer
 
         # resize buffers
         for group_id, class_buf in self.buffer_groups.items():
-            self.buffer_groups[group_id].resize(
-                strategy, group_to_len[group_id]
-            )
+            self.buffer_groups[group_id].resize(strategy, group_to_len[group_id])
 
-    def _make_groups(self, strategy, data: AvalancheDataset) -> \
-            Dict[int, AvalancheDataset]:
+    def _make_groups(
+        self, strategy, data: AvalancheDataset
+    ) -> Dict[int, AvalancheDataset]:
         """Split the data by group according to `self.groupby`."""
         if self.groupby is None:
             return {0: data}
@@ -419,11 +404,10 @@ def _make_groups(self, strategy, data: AvalancheDataset) -> \
         else:
             assert False, "Invalid groupby key. Should never get here."
 
-    def _split_by_class(self, data: AvalancheDataset) -> \
-            Dict[int, AvalancheDataset]:
+    def _split_by_class(self, data: AvalancheDataset) -> Dict[int, AvalancheDataset]:
         # Get sample idxs per class
         cl_idxs: Dict[int, List[int]] = defaultdict(list)
-        targets = getattr(data, 'targets')
+        targets = getattr(data, "targets")
         for idx, target in enumerate(targets):
             target = int(target)
             cl_idxs[target].append(idx)
@@ -434,15 +418,15 @@ def _split_by_class(self, data: AvalancheDataset) -> \
             new_groups[c] = classification_subset(data, indices=c_idxs)
         return new_groups
 
-    def _split_by_experience(self, strategy, data: AvalancheDataset) -> \
-            Dict[int, AvalancheDataset]:
+    def _split_by_experience(
+        self, strategy, data: AvalancheDataset
+    ) -> Dict[int, AvalancheDataset]:
         exp_id = strategy.clock.train_exp_counter + 1
         return {exp_id: data}
 
-    def _split_by_task(self, data: AvalancheDataset) -> \
-            Dict[int, AvalancheDataset]:
+    def _split_by_task(self, data: AvalancheDataset) -> Dict[int, AvalancheDataset]:
         new_groups = {}
-        task_set = getattr(data, 'task_set')
+        task_set = getattr(data, "task_set")
         for task_id in task_set:
             new_groups[task_id] = task_set[task_id]
         return new_groups
@@ -565,9 +549,7 @@ def make_sorted_indices_from_features(self, features: Tensor) -> List[int]:
 
         for i in range(len(features)):
             # Compute distances with real center
-            candidate_centers = current_center * i / (i + 1) + features / (
-                i + 1
-            )
+            candidate_centers = current_center * i / (i + 1) + features / (i + 1)
             distances = pow(candidate_centers - center, 2).sum(dim=1)
             distances[selected_indices] = inf
 
diff --git a/avalanche/training/supervised/ar1.py b/avalanche/training/supervised/ar1.py
index 8c706ad52..85a0955d2 100644
--- a/avalanche/training/supervised/ar1.py
+++ b/avalanche/training/supervised/ar1.py
@@ -62,8 +62,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[List[SupervisedPlugin]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
     ):
@@ -107,9 +106,10 @@ def __init__(
             learning experience.
         """
 
-        assert train_epochs > 0, \
-            'train_epochs must be greater than zero so that latent ' + \
-            'activations can be stored in the replay buffer'
+        assert train_epochs > 0, (
+            "train_epochs must be greater than zero so that latent "
+            + "activations can be stored in the replay buffer"
+        )
 
         warnings.warn(
             "The AR1 strategy implementation is in an alpha stage "
@@ -136,9 +136,7 @@ def __init__(
             # Synaptic Intelligence is not applied to the last fully
             # connected layer (and implicitly to "freeze below" ones.
             plugins.append(
-                SynapticIntelligencePlugin(
-                    ewc_lambda, excluded_parameters=[fc_name]
-                )
+                SynapticIntelligencePlugin(ewc_lambda, excluded_parameters=[fc_name])
             )
 
         self.cwr_plugin = CWRStarPlugin(
@@ -146,9 +144,7 @@ def __init__(
         )
         plugins.append(self.cwr_plugin)
 
-        optimizer = SGD(
-            model.parameters(), lr=lr, momentum=momentum, weight_decay=l2
-        )
+        optimizer = SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=l2)
 
         if criterion is None:
             criterion = CrossEntropyLoss()
@@ -229,9 +225,7 @@ def _before_training_exp(self, **kwargs):
             for class_id, count in examples_per_class(self.rm[1]).items():
                 self.model.cur_j[class_id] += count
             self.cwr_plugin.cur_class = [
-                cls
-                for cls in set(self.model.cur_j.keys())
-                if self.model.cur_j[cls] > 0
+                cls for cls in set(self.model.cur_j.keys()) if self.model.cur_j[cls] > 0
             ]
             self.cwr_plugin.reset_weights(self.cwr_plugin.cur_class)
 
@@ -284,9 +278,7 @@ def make_train_dataloader(self, num_workers=0, shuffle=True, **kwargs):
 
         # AR1 only supports SIT scenarios (no task labels).
         self.dataloader = DataLoader(
-            self.adapted_dataset,
-            collate_fn=collate_fn,
-            **other_dataloader_args
+            self.adapted_dataset, collate_fn=collate_fn, **other_dataloader_args
         )
 
     def training_epoch(self, **kwargs):
@@ -297,15 +289,11 @@ def training_epoch(self, **kwargs):
             self.optimizer.zero_grad()
             if self.clock.train_exp_counter > 0:
                 lat_mb_x = self.rm[0][
-                    mb_it
-                    * self.replay_mb_size : (mb_it + 1)
-                    * self.replay_mb_size
+                    mb_it * self.replay_mb_size : (mb_it + 1) * self.replay_mb_size
                 ]
                 lat_mb_x = lat_mb_x.to(self.device)
                 lat_mb_y = self.rm[1][
-                    mb_it
-                    * self.replay_mb_size : (mb_it + 1)
-                    * self.replay_mb_size
+                    mb_it * self.replay_mb_size : (mb_it + 1) * self.replay_mb_size
                 ]
                 lat_mb_y = lat_mb_y.to(self.device)
                 lat_task_id = torch.zeros(lat_mb_y.shape[0]).to(self.device)
@@ -383,6 +371,4 @@ def filter_bn_and_brn(param_def: LayerAndParameter):
         return not isinstance(param_def.layer, (_NormBase, BatchRenorm2D))
 
 
-__all__ = [
-    'AR1'
-]
+__all__ = ["AR1"]
diff --git a/avalanche/training/supervised/cumulative.py b/avalanche/training/supervised/cumulative.py
index 875526a37..26b6abfed 100644
--- a/avalanche/training/supervised/cumulative.py
+++ b/avalanche/training/supervised/cumulative.py
@@ -30,8 +30,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[List[SupervisedPlugin]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
     ):
@@ -78,7 +77,5 @@ def train_dataset_adaptation(self, **kwargs):
         if self.dataset is None:
             self.dataset = exp.dataset
         else:
-            self.dataset = concat_datasets(
-                [self.dataset, exp.dataset]
-            )
+            self.dataset = concat_datasets([self.dataset, exp.dataset])
         self.adapted_dataset = self.dataset
diff --git a/avalanche/training/supervised/deep_slda.py b/avalanche/training/supervised/deep_slda.py
index def1a49d3..83cfec37c 100644
--- a/avalanche/training/supervised/deep_slda.py
+++ b/avalanche/training/supervised/deep_slda.py
@@ -41,8 +41,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[Sequence["SupervisedPlugin"]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
     ):
@@ -190,8 +189,7 @@ def predict(self, X):
             # there have been updates to the model, compute Lambda
             self.Lambda = torch.pinverse(
                 (1 - self.shrinkage_param) * self.Sigma
-                + self.shrinkage_param
-                * torch.eye(self.input_size, device=self.device)
+                + self.shrinkage_param * torch.eye(self.input_size, device=self.device)
             )
             self.prev_num_updates = self.num_updates
 
@@ -225,9 +223,7 @@ def fit_base(self, X, y):
 
         cov_estimator = OAS(assume_centered=True)
         cov_estimator.fit((X - self.muK[y]).cpu().numpy())
-        self.Sigma = (
-            torch.from_numpy(cov_estimator.covariance_).float().to(self.device)
-        )
+        self.Sigma = torch.from_numpy(cov_estimator.covariance_).float().to(self.device)
 
     def save_model(self, save_path, save_name):
         """
diff --git a/avalanche/training/supervised/der.py b/avalanche/training/supervised/der.py
index bddd4e86a..2bcdeb161 100644
--- a/avalanche/training/supervised/der.py
+++ b/avalanche/training/supervised/der.py
@@ -74,8 +74,9 @@ def __init__(
         :param transforms: transformation to be applied to the buffer
         """
         if not adaptive_size:
-            assert total_num_classes is not None and total_num_classes > 0, \
-                "When fixed exp mem size, total_num_classes should be > 0."
+            assert (
+                total_num_classes is not None and total_num_classes > 0
+            ), "When fixed exp mem size, total_num_classes should be > 0."
 
         super().__init__(max_size, adaptive_size, total_num_classes)
         self.adaptive_size = adaptive_size
@@ -87,10 +88,7 @@ def update(self, strategy: "SupervisedTemplate", **kwargs):
         new_data: AvalancheDataset = strategy.experience.dataset
 
         logits = compute_dataset_logits(
-            new_data.eval(), 
-            strategy.model,
-            strategy.train_mb_size, 
-            strategy.device
+            new_data.eval(), strategy.model, strategy.train_mb_size, strategy.device
         )
         new_data_with_logits = make_avalanche_dataset(
             new_data,
@@ -100,7 +98,7 @@ def update(self, strategy: "SupervisedTemplate", **kwargs):
         )
         # Get sample idxs per class
         cl_idxs: Dict[int, List[int]] = defaultdict(list)
-        targets: Sequence[SupportsInt] = getattr(new_data, 'targets')
+        targets: Sequence[SupportsInt] = getattr(new_data, "targets")
         for idx, target in enumerate(targets):
             # Conversion to int may fix issues when target
             # is a single-element torch.tensor
@@ -136,8 +134,7 @@ def update(self, strategy: "SupervisedTemplate", **kwargs):
 
         # resize buffers
         for class_id, class_buf in self.buffer_groups.items():
-            self.buffer_groups[class_id].resize(strategy, 
-                                                class_to_len[class_id])
+            self.buffer_groups[class_id].resize(strategy, class_to_len[class_id])
 
 
 class DER(SupervisedTemplate):
@@ -162,8 +159,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[List[SupervisedPlugin]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
         peval_mode="epoch",
@@ -285,21 +281,20 @@ def training_epoch(self, **kwargs):
             self._after_forward(**kwargs)
 
             if self.replay_loader is not None:
-
                 # DER Loss computation
 
                 self.loss += F.cross_entropy(
-                    self.mb_output[self.batch_size_mem:],
-                    self.mb_y[self.batch_size_mem:],
+                    self.mb_output[self.batch_size_mem :],
+                    self.mb_y[self.batch_size_mem :],
                 )
 
                 self.loss += self.alpha * F.mse_loss(
-                    self.mb_output[:self.batch_size_mem],
+                    self.mb_output[: self.batch_size_mem],
                     self.batch_logits,
                 )
                 self.loss += self.beta * F.cross_entropy(
-                    self.mb_output[:self.batch_size_mem],
-                    self.mb_y[:self.batch_size_mem],
+                    self.mb_output[: self.batch_size_mem],
+                    self.mb_y[: self.batch_size_mem],
                 )
 
                 # They are a few difference compared to the autors impl:
diff --git a/avalanche/training/supervised/er_ace.py b/avalanche/training/supervised/er_ace.py
index 6e58dd383..25b90d370 100644
--- a/avalanche/training/supervised/er_ace.py
+++ b/avalanche/training/supervised/er_ace.py
@@ -43,8 +43,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[List[SupervisedPlugin]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
         peval_mode="epoch",
@@ -153,8 +152,10 @@ def _before_training_exp(self, **kwargs):
         # Update buffer before training exp so that we have current data in
         self.storage_policy.update(self, **kwargs)
         buffer = self.storage_policy.buffer
-        if len(buffer) >= self.batch_size_mem and \
-                self.experience.current_experience > 0:
+        if (
+            len(buffer) >= self.batch_size_mem
+            and self.experience.current_experience > 0
+        ):
             self.replay_loader = cycle(
                 torch.utils.data.DataLoader(
                     buffer,
diff --git a/avalanche/training/supervised/expert_gate.py b/avalanche/training/supervised/expert_gate.py
index 4fac6d65f..223e488e9 100644
--- a/avalanche/training/supervised/expert_gate.py
+++ b/avalanche/training/supervised/expert_gate.py
@@ -19,15 +19,11 @@
 
 from typing import Optional, List
 
-from avalanche.models.expert_gate import (ExpertAutoencoder,
-                                          ExpertModel,
-                                          ExpertGate)
+from avalanche.models.expert_gate import ExpertAutoencoder, ExpertModel, ExpertGate
 from avalanche.models.dynamic_optimizers import reset_optimizer
 from avalanche.training.supervised import AETraining
 from avalanche.training.templates import SupervisedTemplate
-from avalanche.training.plugins import (SupervisedPlugin,
-                                        EvaluationPlugin,
-                                        LwFPlugin)
+from avalanche.training.plugins import SupervisedPlugin, EvaluationPlugin, LwFPlugin
 from avalanche.training.plugins.evaluation import default_evaluator
 
 
@@ -35,14 +31,14 @@ class ExpertGateStrategy(SupervisedTemplate):
     """Expert Gate strategy. New experts are trained and added
     to the model as tasks are learned sequentially.
 
-    Technique introduced in: 
-    'Aljundi, Rahaf, Punarjay Chakravarty, and Tinne Tuytelaars. 
-    "Expert gate: Lifelong learning with a network of experts." 
-    Proceedings of the IEEE Conference on 
+    Technique introduced in:
+    'Aljundi, Rahaf, Punarjay Chakravarty, and Tinne Tuytelaars.
+    "Expert gate: Lifelong learning with a network of experts."
+    Proceedings of the IEEE Conference on
     Computer Vision and Pattern Recognition. 2017.'
     https://arxiv.org/abs/1611.06194
 
-    To use this strategy you need to instantiate an ExpertGate model. 
+    To use this strategy you need to instantiate an ExpertGate model.
     See the ExpertGate plugin for more details.
     """
 
@@ -86,16 +82,16 @@ def __init__(
         :param base_kwargs: any additional
         :param ae_train_mb_size: mini-batch size for training of the autoencoder
         :param ae_train_epochs: number of training epochs for the autoencoder
-        :param ae_lr: the learning rate for the autoencoder training 
+        :param ae_lr: the learning rate for the autoencoder training
         using vanilla SGD
-        :param temp: the temperature hyperparameter when selecting the 
+        :param temp: the temperature hyperparameter when selecting the
         expert during the forward method
             :class:`~avalanche.training.BaseTemplate` constructor arguments.
         """
         # Check that the model has the correct architecture.
         assert isinstance(
-            model,
-            ExpertGate), "ExpertGateStrategy requires an ExpertGate model."
+            model, ExpertGate
+        ), "ExpertGateStrategy requires an ExpertGate model."
 
         expertgate = _ExpertGatePlugin()
 
@@ -115,7 +111,8 @@ def __init__(
             "This strategy is currently in the alpha stage and we are still "
             "working to reproduce the original paper's results. You can find "
             "the code to reproduce the experiments at "
-            "github.com/continualAI/continual-learning-baselines")
+            "github.com/continualAI/continual-learning-baselines"
+        )
 
         super().__init__(
             model=model,
@@ -134,10 +131,10 @@ def __init__(
 
 class _ExpertGatePlugin(SupervisedPlugin):
     """The ExpertGate algorithm is a dynamic architecture algorithm.
-    For every new task, it trains an autoencoder to reconstruct input data 
-    and then trains an AlexNet classifier. Prior to AlexNet training, 
-    the algorithm searches through existing autoencoders, if there are any, 
-    to find the most related autoencoder and select the expert associated 
+    For every new task, it trains an autoencoder to reconstruct input data
+    and then trains an AlexNet classifier. Prior to AlexNet training,
+    the algorithm searches through existing autoencoders, if there are any,
+    to find the most related autoencoder and select the expert associated
     to that autoencoder. The new expert is then fine-tuned or trained using
     Learning without Forgetting (LwF) based on the most related previous expert.
     """
@@ -152,10 +149,7 @@ def __init__(self):
         # Initialize instance of the LwF plugin
         self.lwf_plugin = LwFPlugin(self.alpha, self.temp)
 
-    def before_training_exp(self,
-                            strategy: "SupervisedTemplate",
-                            *args,
-                            **kwargs):
+    def before_training_exp(self, strategy: "SupervisedTemplate", *args, **kwargs):
         super().before_training_exp(strategy, *args, **kwargs)
 
         # Store task label for easy access
@@ -182,17 +176,17 @@ def before_training_exp(self,
         # reset_optimizer(strategy.optimizer, strategy.model.expert)
         # make_optimizer should be called instead of reset_optimizer
         # It puts all parameters from strategy.model in the optimizer
-        # To freeze parameters use param.requires_grad = False 
+        # To freeze parameters use param.requires_grad = False
         # and param.grad = None
         strategy.make_optimizer(**kwargs)
 
         # Remove LwF plugin in case it is not needed
-        if (self.lwf_plugin in strategy.plugins):
+        if self.lwf_plugin in strategy.plugins:
             strategy.plugins.remove(self.lwf_plugin)
 
         print("\nTRAINING EXPERT")
         # If needed, add a new instance of LwF plugin back
-        if (relatedness > strategy.rel_thresh):
+        if relatedness > strategy.rel_thresh:
             print("WITH LWF")
             self.lwf_plugin = LwFPlugin(self.alpha, self.temp)
             strategy.plugins.append(self.lwf_plugin)
@@ -202,20 +196,20 @@ def before_training_exp(self,
     # ##############
 
     def _add_expert(self, strategy: "SupervisedTemplate", task_label, expert):
-        """Adds expert to ExpertGate expert dictionary using the task_label 
+        """Adds expert to ExpertGate expert dictionary using the task_label
         as a key.
         """
         strategy.model.expert_dict[str(task_label)] = expert
 
     def _get_expert(self, strategy: "SupervisedTemplate", key):
-        """Retrieves expert Alex model from the ExpertGate expert 
+        """Retrieves expert Alex model from the ExpertGate expert
         dictionary using the task_label as a key.
         """
         return strategy.model.expert_dict[str(key)]
 
     def _select_expert(self, strategy: "SupervisedTemplate", task_label):
-        """Given a task label, calculates the relatedness between 
-        the autoencoder for this task and all other autoencoders. 
+        """Given a task label, calculates the relatedness between
+        the autoencoder for this task and all other autoencoders.
         Returns the most related expert and the relatedness value.
         """
         print("\nSELECTING EXPERT")
@@ -224,15 +218,17 @@ def _select_expert(self, strategy: "SupervisedTemplate", task_label):
 
         # Preferably we could use `strategy.experience.benchmark.n_classes`
         # but this attribute is not enforced for all benchmarks
-        n_classes = len(strategy.experience.classes_seen_so_far) + \
-            len(strategy.experience.future_classes)
+        n_classes = len(strategy.experience.classes_seen_so_far) + len(
+            strategy.experience.future_classes
+        )
 
-        if (len(strategy.model.expert_dict) == 0):
+        if len(strategy.model.expert_dict) == 0:
             expert = ExpertModel(
                 num_classes=n_classes,
                 arch=strategy.model.arch,
                 device=strategy.device,
-                pretrained_flag=strategy.model.pretrained_flag)
+                pretrained_flag=strategy.model.pretrained_flag,
+            )
             relatedness = 0
 
         # If experts exist,
@@ -243,20 +239,17 @@ def _select_expert(self, strategy: "SupervisedTemplate", task_label):
 
             # Iterate through all autoencoders to get error values
             for autoencoder_id in strategy.model.autoencoder_dict:
-                error_dict[str(autoencoder_id)
-                           ] = self._get_average_reconstruction_error(
-                               strategy, autoencoder_id)
+                error_dict[
+                    str(autoencoder_id)
+                ] = self._get_average_reconstruction_error(strategy, autoencoder_id)
 
             # Send error dictionary to get most relevant autoencoder
-            relatedness_dict = self._task_relatedness(
-                strategy, error_dict, task_label)
+            relatedness_dict = self._task_relatedness(strategy, error_dict, task_label)
 
             # Retrieve best expert
-            most_relevant_expert_key = max(
-                relatedness_dict, key=relatedness_dict.get)
+            most_relevant_expert_key = max(relatedness_dict, key=relatedness_dict.get)
 
-            most_relevant_expert = self._get_expert(
-                strategy, most_relevant_expert_key)
+            most_relevant_expert = self._get_expert(strategy, most_relevant_expert_key)
 
             # Build expert with feature template
             expert = ExpertModel(
@@ -264,7 +257,8 @@ def _select_expert(self, strategy: "SupervisedTemplate", task_label):
                 arch=strategy.model.arch,
                 device=strategy.device,
                 pretrained_flag=strategy.model.pretrained_flag,
-                provided_template=most_relevant_expert)
+                provided_template=most_relevant_expert,
+            )
 
             relatedness = relatedness_dict[most_relevant_expert_key]
             print("SELECTED EXPERT FROM TASK ", most_relevant_expert_key)
@@ -275,13 +269,10 @@ def _select_expert(self, strategy: "SupervisedTemplate", task_label):
     # ########################
     # EXPERT SELECTION METHODS
     # ########################
-    def _task_relatedness(self,
-                          strategy: "SupervisedTemplate",
-                          error_dict,
-                          task_label):
-        """Given a task label and error dictionary, returns a dictionary 
-        of relatedness between the autoencoder of the current task 
-        and all other tasks. 
+    def _task_relatedness(self, strategy: "SupervisedTemplate", error_dict, task_label):
+        """Given a task label and error dictionary, returns a dictionary
+        of relatedness between the autoencoder of the current task
+        and all other tasks.
         """
         # Build a task relatedness dictionary
         relatedness_dict = OrderedDict()
@@ -291,37 +282,39 @@ def _task_relatedness(self,
         # Iterate through all reconstruction errros to obtain task_relatedness
         for task, error_a in error_dict.items():
             if task != str(task_label):
-                relatedness_dict[str(task)] = 1 - ((error_a - error_k)/error_k)
+                relatedness_dict[str(task)] = 1 - ((error_a - error_k) / error_k)
 
         return relatedness_dict
 
-    def _get_average_reconstruction_error(self,
-                                          strategy: "SupervisedTemplate",
-                                          task_label):
-        """Given a task label, retrieves an autoencoder and 
+    def _get_average_reconstruction_error(
+        self, strategy: "SupervisedTemplate", task_label
+    ):
+        """Given a task label, retrieves an autoencoder and
         evaluates the reconstruction error on the current batch of data.
         """
         autoencoder = self._get_autoencoder(strategy, task_label)
 
-        ae_strategy = AETraining(model=autoencoder,
-                                 optimizer=SGD(
-                                     autoencoder.parameters(),
-                                     lr=strategy.ae_lr),
-                                 device=strategy.device,
-                                 eval_mb_size=100,
-                                 eval_every=-1)
+        ae_strategy = AETraining(
+            model=autoencoder,
+            optimizer=SGD(autoencoder.parameters(), lr=strategy.ae_lr),
+            device=strategy.device,
+            eval_mb_size=100,
+            eval_every=-1,
+        )
 
         # Run evaluation on autoencoder
         ae_strategy.eval(strategy.experience)
 
         # Build the key for evaluation metrics dictionary
-        if (strategy.experience.origin_stream.name == "train"):
-            key = 'Loss_Stream/eval_phase/train_stream/Task' + \
-                "{:0>3d}".format(strategy.experience.task_label)
+        if strategy.experience.origin_stream.name == "train":
+            key = "Loss_Stream/eval_phase/train_stream/Task" + "{:0>3d}".format(
+                strategy.experience.task_label
+            )
 
-        elif (strategy.experience.origin_stream.name == "test"):
-            key = 'Loss_Stream/eval_phase/test_stream/Task' + \
-                "{:0>3d}".format(strategy.experience.task_label)
+        elif strategy.experience.origin_stream.name == "test":
+            key = "Loss_Stream/eval_phase/test_stream/Task" + "{:0>3d}".format(
+                strategy.experience.task_label
+            )
 
         # Query for reconstruction loss
         error = ae_strategy.evaluator.get_last_metrics()[key]
@@ -331,19 +324,16 @@ def _get_average_reconstruction_error(self,
     # ##################
     # AUTENCODER METHODS
     # ##################
-    def _add_autoencoder(self,
-                         strategy: "SupervisedTemplate",
-                         task_label):
-        """Builds a new autoencoder and stores it in the ExpertGate 
+    def _add_autoencoder(self, strategy: "SupervisedTemplate", task_label):
+        """Builds a new autoencoder and stores it in the ExpertGate
         autoencoder dictionary. Returns the new autoencoder.
         """
         # Build a new autoencoder
         # This shape is equivalent to the output shape of
         # the Alexnet features module
         new_autoencoder = ExpertAutoencoder(
-            shape=(256, 6, 6),
-            latent_dim=strategy.ae_latent_dim,
-            device=strategy.device)
+            shape=(256, 6, 6), latent_dim=strategy.ae_latent_dim, device=strategy.device
+        )
 
         # Store autoencoder with task number
         strategy.model.autoencoder_dict[str(task_label)] = new_autoencoder
@@ -351,24 +341,27 @@ def _add_autoencoder(self,
         return new_autoencoder
 
     def _get_autoencoder(self, strategy: "SupervisedTemplate", task_label):
-        """Retrieves autoencoder from the ExpertGate autoencoder 
+        """Retrieves autoencoder from the ExpertGate autoencoder
         dictionary using the task_label as a key.
         """
         return strategy.model.autoencoder_dict[str(task_label)]
 
     def _train_autoencoder(self, strategy: "SupervisedTemplate", autoencoder):
-        """Trains an autoencoder for the ExpertGate plugin.
-        """
+        """Trains an autoencoder for the ExpertGate plugin."""
         # Setup autoencoder strategy
-        ae_strategy = AETraining(model=autoencoder,
-                                 optimizer=SGD(autoencoder.parameters(),
-                                               lr=strategy.ae_lr,
-                                               momentum=0.9,
-                                               weight_decay=0.0005),
-                                 device=strategy.device,
-                                 train_mb_size=strategy.ae_train_mb_size,
-                                 train_epochs=strategy.ae_train_epochs,
-                                 eval_every=-1)
+        ae_strategy = AETraining(
+            model=autoencoder,
+            optimizer=SGD(
+                autoencoder.parameters(),
+                lr=strategy.ae_lr,
+                momentum=0.9,
+                weight_decay=0.0005,
+            ),
+            device=strategy.device,
+            train_mb_size=strategy.ae_train_mb_size,
+            train_epochs=strategy.ae_train_epochs,
+            eval_every=-1,
+        )
 
         print("\nTRAINING NEW AUTOENCODER")
         # Train with autoencoder strategy
diff --git a/avalanche/training/supervised/icarl.py b/avalanche/training/supervised/icarl.py
index 0fa893ef6..9e40ab7df 100644
--- a/avalanche/training/supervised/icarl.py
+++ b/avalanche/training/supervised/icarl.py
@@ -41,8 +41,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[List[SupervisedPlugin]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
     ):
@@ -140,15 +139,11 @@ def __init__(self, memory_size, buffer_transform=None, fixed_memory=True):
         self.output_size = None
         self.input_size = None
 
-    def after_train_dataset_adaptation(
-        self, strategy: "SupervisedTemplate", **kwargs
-    ):
+    def after_train_dataset_adaptation(self, strategy: "SupervisedTemplate", **kwargs):
         if strategy.clock.train_exp_counter != 0:
             memory = make_tensor_classification_dataset(
                 torch.cat(self.x_memory).cpu(),
-                torch.tensor(
-                    list(itertools.chain.from_iterable(self.y_memory))
-                ),
+                torch.tensor(list(itertools.chain.from_iterable(self.y_memory))),
                 transform=self.buffer_transform,
                 target_transform=None,
             )
@@ -190,9 +185,10 @@ def after_training_exp(self, strategy: "SupervisedTemplate", **kwargs):
     def compute_class_means(self, strategy):
         if self.class_means == {}:
             n_classes = sum(strategy.experience.benchmark.n_classes_per_exp)
-            self.class_means = {c_id: torch.zeros(self.embedding_size,
-                                                  device=strategy.device)
-                                for c_id in range(n_classes)}
+            self.class_means = {
+                c_id: torch.zeros(self.embedding_size, device=strategy.device)
+                for c_id in range(n_classes)
+            }
 
         for i, class_samples in enumerate(self.x_memory):
             label = self.y_memory[i][0]
@@ -225,8 +221,7 @@ def compute_class_means(self, strategy):
             self.class_means[label] = (m1 + m2) / 2
             self.class_means[label] /= torch.norm(self.class_means[label])
 
-        strategy.model.eval_classifier.replace_class_means_dict(
-            self.class_means)
+        strategy.model.eval_classifier.replace_class_means_dict(self.class_means)
 
     def construct_exemplar_set(self, strategy: SupervisedTemplate):
         assert strategy.experience is not None
@@ -236,9 +231,7 @@ def construct_exemplar_set(self, strategy: SupervisedTemplate):
         previous_seen_classes = sum(benchmark.n_classes_per_exp[:tid])
 
         if self.fixed_memory:
-            nb_protos_cl = int(
-                ceil(self.memory_size / len(self.observed_classes))
-            )
+            nb_protos_cl = int(ceil(self.memory_size / len(self.observed_classes)))
         else:
             nb_protos_cl = self.memory_size
         new_classes = self.observed_classes[
@@ -254,8 +247,7 @@ def construct_exemplar_set(self, strategy: SupervisedTemplate):
             collate_fn = cd.collate_fn if hasattr(cd, "collate_fn") else None
 
             eval_dataloader = DataLoader(
-                cd.eval(), collate_fn=collate_fn,
-                batch_size=strategy.eval_mb_size
+                cd.eval(), collate_fn=collate_fn, batch_size=strategy.eval_mb_size
             )
 
             class_patterns = []
@@ -264,10 +256,7 @@ def construct_exemplar_set(self, strategy: SupervisedTemplate):
                 class_pt = class_pt.to(strategy.device)
                 class_patterns.append(class_pt)
                 with torch.no_grad():
-                    mapped_pttp = (
-                        strategy.model.feature_extractor(class_pt)
-                        .detach()
-                    )
+                    mapped_pttp = strategy.model.feature_extractor(class_pt).detach()
                 mapped_prototypes.append(mapped_pttp)
 
             class_patterns_tensor = torch.cat(class_patterns, dim=0)
@@ -294,9 +283,7 @@ def construct_exemplar_set(self, strategy: SupervisedTemplate):
                 i += 1
 
             pick = (order > 0) * (order < nb_protos_cl + 1) * 1.0
-            self.x_memory.append(
-                class_patterns_tensor[torch.where(pick == 1)[0]]
-            )
+            self.x_memory.append(class_patterns_tensor[torch.where(pick == 1)[0]])
             self.y_memory.append(
                 [new_classes[iter_dico]] * len(torch.where(pick == 1)[0])
             )
@@ -308,16 +295,12 @@ def reduce_exemplar_set(self, strategy: SupervisedTemplate):
         nb_cl = strategy.experience.benchmark.n_classes_per_exp
 
         if self.fixed_memory:
-            nb_protos_cl = int(
-                ceil(self.memory_size / len(self.observed_classes))
-            )
+            nb_protos_cl = int(ceil(self.memory_size / len(self.observed_classes)))
         else:
             nb_protos_cl = self.memory_size
 
         for i in range(len(self.x_memory) - nb_cl[tid]):
             pick = (self.order[i] < nb_protos_cl + 1) * 1.0
             self.x_memory[i] = self.x_memory[i][torch.where(pick == 1)[0]]
-            self.y_memory[i] = self.y_memory[i][
-                : len(torch.where(pick == 1)[0])
-            ]
+            self.y_memory[i] = self.y_memory[i][: len(torch.where(pick == 1)[0])]
             self.order[i] = self.order[i][torch.where(pick == 1)[0]]
diff --git a/avalanche/training/supervised/joint_training.py b/avalanche/training/supervised/joint_training.py
index 112bf91fe..d393f83d1 100644
--- a/avalanche/training/supervised/joint_training.py
+++ b/avalanche/training/supervised/joint_training.py
@@ -34,17 +34,13 @@ class AlreadyTrainedError(Exception):
     pass
 
 
-TDatasetExperience = TypeVar('TDatasetExperience', bound=DatasetExperience)
-TPluginType = TypeVar('TPluginType', bound=BasePlugin, contravariant=True)
-TMBInput = TypeVar('TMBInput')
-TMBOutput = TypeVar('TMBOutput')
+TDatasetExperience = TypeVar("TDatasetExperience", bound=DatasetExperience)
+TPluginType = TypeVar("TPluginType", bound=BasePlugin, contravariant=True)
+TMBInput = TypeVar("TMBInput")
+TMBOutput = TypeVar("TMBOutput")
 
 
-class JointTraining(SupervisedTemplate[
-    TDatasetExperience,
-    TMBInput,
-    TMBOutput
-]):
+class JointTraining(SupervisedTemplate[TDatasetExperience, TMBInput, TMBOutput]):
     """Joint training on the entire stream.
 
     JointTraining performs joint training (also called offline training) on
@@ -70,8 +66,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[Sequence[TPluginType]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
     ):
@@ -110,13 +105,9 @@ def __init__(
 
     def train(
         self,
-        experiences: Union[
-            TDatasetExperience,
-            Iterable[TDatasetExperience]],
+        experiences: Union[TDatasetExperience, Iterable[TDatasetExperience]],
         eval_streams: Optional[
-            Sequence[
-                Union[TDatasetExperience,
-                      Iterable[TDatasetExperience]]]
+            Sequence[Union[TDatasetExperience, Iterable[TDatasetExperience]]]
         ] = None,
         **kwargs
     ):
@@ -144,9 +135,10 @@ def train(
             )
 
         # Normalize training and eval data.
-        experiences_list: Iterable[TDatasetExperience] = \
-            _experiences_parameter_as_iterable(experiences)
-        
+        experiences_list: Iterable[
+            TDatasetExperience
+        ] = _experiences_parameter_as_iterable(experiences)
+
         if eval_streams is None:
             eval_streams = [experiences_list]
 
diff --git a/avalanche/training/supervised/l2p.py b/avalanche/training/supervised/l2p.py
index 1062e7550..715498c20 100644
--- a/avalanche/training/supervised/l2p.py
+++ b/avalanche/training/supervised/l2p.py
@@ -15,15 +15,15 @@ class LearningToPrompt(SupervisedTemplate):
     Learning to Prompt (L2P) strategy.
 
     Technique introduced in:
-    "Wang, Zifeng, et al. "Learning to prompt for continual learning." 
-    Proceedings of the IEEE/CVF Conference on Computer Vision and 
+    "Wang, Zifeng, et al. "Learning to prompt for continual learning."
+    Proceedings of the IEEE/CVF Conference on Computer Vision and
     Pattern Recognition. 2022."
 
     Implementation based on:
     - https://github.com/JH-LEE-KR/l2p-pytorch
     - And implementations by Dario Salvati
 
-    As a model_name, we expect to receive one of the model list in 
+    As a model_name, we expect to receive one of the model list in
     avalanche.models.vit
 
     Those models are based on the library timm.
@@ -39,8 +39,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[List["SupervisedPlugin"]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every: int = -1,
         peval_mode: str = "epoch",
@@ -94,7 +93,7 @@ def __init__(
 
         if device is None:
             device = torch.device("cpu")
-        
+
         self.num_classes = num_classes
         self.lr = lr
         self.sim_coefficient = sim_coefficient
@@ -117,10 +116,11 @@ def __init__(
         )
 
         for n, p in model.named_parameters():
-            if n.startswith(tuple(["blocks", "patch_embed", 
-                                   "cls_token", "norm", "pos_embed"])):
+            if n.startswith(
+                tuple(["blocks", "patch_embed", "cls_token", "norm", "pos_embed"])
+            ):
                 p.requires_grad = False
-        
+
         model.head = torch.nn.Linear(768, num_classes).to(device)
 
         optimizer = torch.optim.Adam(
@@ -197,9 +197,7 @@ def forward(self):
             mask = self.experience.classes_in_this_experience
             not_mask = np.setdiff1d(np.arange(self.num_classes), mask)
             not_mask = torch.tensor(not_mask, dtype=torch.int64).to(self.device)
-            logits = logits.index_fill(dim=1, 
-                                       index=not_mask, 
-                                       value=float("-inf"))
+            logits = logits.index_fill(dim=1, index=not_mask, value=float("-inf"))
 
         return logits
 
diff --git a/avalanche/training/supervised/lamaml.py b/avalanche/training/supervised/lamaml.py
index 9fa949bb4..7df37787c 100644
--- a/avalanche/training/supervised/lamaml.py
+++ b/avalanche/training/supervised/lamaml.py
@@ -10,10 +10,12 @@
 try:
     import higher
 except ImportError:
-    raise ModuleNotFoundError("higher not found, if you want to use "
-                              "MAML please install avalanche with "
-                              "the extra dependencies: "
-                              "pip install avalanche-lib[extra]")
+    raise ModuleNotFoundError(
+        "higher not found, if you want to use "
+        "MAML please install avalanche with "
+        "the extra dependencies: "
+        "pip install avalanche-lib[extra]"
+    )
 
 from avalanche.training.plugins import SupervisedPlugin, EvaluationPlugin
 from avalanche.training.plugins.evaluation import default_evaluator
@@ -40,8 +42,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[Sequence["SupervisedPlugin"]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
         peval_mode="epoch",
@@ -101,7 +102,7 @@ def _before_training_exp(self, **kwargs):
                 alpha_param = nn.Parameter(
                     torch.ones(p.shape) * self.alpha_init, requires_grad=True
                 )
-                self.alpha_params[n.replace('.', '_')] = alpha_param
+                self.alpha_params[n.replace(".", "_")] = alpha_param
             self.alpha_params.to(self.device)
 
             # Create optimizer for the alpha_lr parameters
@@ -111,27 +112,29 @@ def _before_training_exp(self, **kwargs):
 
         # update alpha-lr parameters
         for n, p in self.model.named_parameters():
-            n = n.replace('.', '_')  # dict does not support names with '.'
+            n = n.replace(".", "_")  # dict does not support names with '.'
             if n in self.alpha_params:
                 if self.alpha_params[n].shape != p.shape:
                     old_shape = self.alpha_params[n].shape
                     # parameter expansion
                     expanded = False
-                    assert len(p.shape) == len(old_shape), \
-                        "Expansion cannot add new dimensions"
+                    assert len(p.shape) == len(
+                        old_shape
+                    ), "Expansion cannot add new dimensions"
                     for i, (snew, sold) in enumerate(zip(p.shape, old_shape)):
                         assert snew >= sold, "Shape cannot decrease."
                         if snew > sold:
-                            assert not expanded, \
-                                "Expansion cannot occur " \
-                                "in more than one dimension."
+                            assert not expanded, (
+                                "Expansion cannot occur " "in more than one dimension."
+                            )
                             expanded = True
                             exp_idx = i
 
                     alpha_param = torch.ones(p.shape) * self.alpha_init
-                    idx = [slice(el) if i != exp_idx else
-                           slice(old_shape[exp_idx])
-                           for i, el in enumerate(p.shape)]
+                    idx = [
+                        slice(el) if i != exp_idx else slice(old_shape[exp_idx])
+                        for i, el in enumerate(p.shape)
+                    ]
                     alpha_param[idx] = self.alpha_params[n].detach().clone()
                     alpha_param = nn.Parameter(alpha_param, requires_grad=True)
                     self.alpha_params[n] = alpha_param
@@ -215,18 +218,15 @@ def _inner_updates(self, **kwargs):
         self.meta_losses = [0 for _ in range(self.n_inner_updates)]
 
         for i in range(self.n_inner_updates):
-            batch_x_i = batch_x[i * rough_sz: (i + 1) * rough_sz]
-            batch_y_i = batch_y[i * rough_sz: (i + 1) * rough_sz]
-            batch_t_i = batch_t[i * rough_sz: (i + 1) * rough_sz]
+            batch_x_i = batch_x[i * rough_sz : (i + 1) * rough_sz]
+            batch_y_i = batch_y[i * rough_sz : (i + 1) * rough_sz]
+            batch_t_i = batch_t[i * rough_sz : (i + 1) * rough_sz]
 
             # We assume that samples for inner update are from the same task
-            self.inner_update_step(self.fast_model, batch_x_i, batch_y_i,
-                                   batch_t_i)
+            self.inner_update_step(self.fast_model, batch_x_i, batch_y_i, batch_t_i)
 
             # Compute meta-loss with the combination of batch and buffer samples
-            logits_meta = avalanche_forward(
-                self.fast_model, self.mb_x, self.mb_task_id
-            )
+            logits_meta = avalanche_forward(self.fast_model, self.mb_x, self.mb_task_id)
             meta_loss = self._criterion(logits_meta, self.mb_y)
             self.meta_losses[i] = meta_loss
 
@@ -243,9 +243,7 @@ def _outer_update(self, **kwargs):
         self.apply_grad(self.model, meta_grad_model)
 
         # Clip gradients
-        torch.nn.utils.clip_grad_norm_(
-            self.model.parameters(), self.grad_clip_norm
-        )
+        torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_clip_norm)
 
         if self.learn_lr:
             # Compute meta-gradient for alpha-lr parameters
@@ -266,7 +264,7 @@ def _outer_update(self, **kwargs):
             self.optimizer.step()
         else:
             for p, alpha in zip(
-                    self.model.parameters(), self.alpha_params.parameters()
+                self.model.parameters(), self.alpha_params.parameters()
             ):
                 # Use relu on updated LRs to avoid negative values
                 p.data = p.data - p.grad * F.relu(alpha)
@@ -288,6 +286,4 @@ def init_kaiming_normal(m):
             m.bias.data.zero_()
 
 
-__all__ = [
-    'LaMAML'
-]
+__all__ = ["LaMAML"]
diff --git a/avalanche/training/supervised/lamaml_v2.py b/avalanche/training/supervised/lamaml_v2.py
index b008539dc..c46a0c270 100644
--- a/avalanche/training/supervised/lamaml_v2.py
+++ b/avalanche/training/supervised/lamaml_v2.py
@@ -2,8 +2,9 @@
 
 import pkg_resources
 from pkg_resources import DistributionNotFound, VersionConflict
+
 try:
-    pkg_resources.require('torch>=2.0.0')
+    pkg_resources.require("torch>=2.0.0")
 except (DistributionNotFound, VersionConflict) as e:
     raise RuntimeError(f"LaMAML requires torch >= 2.0.0.")
 
@@ -44,8 +45,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[Sequence["SupervisedPlugin"]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
         peval_mode="epoch",
@@ -66,9 +66,9 @@ def __init__(
                             learning rate. Mutually exclusive with learn_lr and
                             lr_alpha.
         :param alpha_init: initialization value for learnable LRs.
-        :param max_buffer_size: maximum buffer size. The default storage 
+        :param max_buffer_size: maximum buffer size. The default storage
                policy is reservoir-sampling.
-        :param buffer_mb_size: number of buffer samples in each step. 
+        :param buffer_mb_size: number of buffer samples in each step.
         """
         super().__init__(
             model,
@@ -94,9 +94,11 @@ def __init__(
         self.alpha_params_initialized: bool = False
         self.meta_losses: List[Tensor] = []
 
-        self.buffer = Buffer(max_buffer_size=max_buffer_size,
-                             buffer_mb_size=buffer_mb_size,
-                             device=device)
+        self.buffer = Buffer(
+            max_buffer_size=max_buffer_size,
+            buffer_mb_size=buffer_mb_size,
+            device=device,
+        )
 
         self.model.apply(init_kaiming_normal)
 
@@ -112,7 +114,7 @@ def _before_training_exp(self, **kwargs):
                 alpha_param = nn.Parameter(
                     torch.ones(p.shape) * self.alpha_init, requires_grad=True
                 )
-                self.alpha_params[n.replace('.', '_')] = alpha_param
+                self.alpha_params[n.replace(".", "_")] = alpha_param
             self.alpha_params.to(self.device)
 
             # Create optimizer for the alpha_lr parameters
@@ -122,27 +124,29 @@ def _before_training_exp(self, **kwargs):
 
         # update alpha-lr parameters
         for n, p in self.model.named_parameters():
-            n = n.replace('.', '_')  # dict does not support names with '.'
+            n = n.replace(".", "_")  # dict does not support names with '.'
             if n in self.alpha_params:
                 if self.alpha_params[n].shape != p.shape:
                     old_shape = self.alpha_params[n].shape
                     # parameter expansion
                     expanded = False
-                    assert len(p.shape) == len(old_shape), \
-                        "Expansion cannot add new dimensions"
+                    assert len(p.shape) == len(
+                        old_shape
+                    ), "Expansion cannot add new dimensions"
                     for i, (snew, sold) in enumerate(zip(p.shape, old_shape)):
                         assert snew >= sold, "Shape cannot decrease."
                         if snew > sold:
-                            assert not expanded, \
-                                "Expansion cannot occur " \
-                                "in more than one dimension."
+                            assert not expanded, (
+                                "Expansion cannot occur " "in more than one dimension."
+                            )
                             expanded = True
                             exp_idx = i
 
                     alpha_param = torch.ones(p.shape) * self.alpha_init
-                    idx = [slice(el) if i != exp_idx else
-                           slice(old_shape[exp_idx])
-                           for i, el in enumerate(p.shape)]
+                    idx = [
+                        slice(el) if i != exp_idx else slice(old_shape[exp_idx])
+                        for i, el in enumerate(p.shape)
+                    ]
                     alpha_param[idx] = self.alpha_params[n].detach().clone()
                     alpha_param = nn.Parameter(alpha_param, requires_grad=True)
                     self.alpha_params[n] = alpha_param
@@ -173,16 +177,21 @@ def inner_update_step(self, fast_params, x, y, t):
 
         # Compute gradient with respect to the current fast weights
         grads = list(
-            torch.autograd.grad(loss, fast_params.values(),
-                                retain_graph=self.second_order, 
-                                create_graph=self.second_order,
-                                allow_unused=True)
+            torch.autograd.grad(
+                loss,
+                fast_params.values(),
+                retain_graph=self.second_order,
+                create_graph=self.second_order,
+                allow_unused=True,
+            )
         )
 
         # Clip grad norms
         grads = [
             torch.clamp(g, min=-self.grad_clip_norm, max=self.grad_clip_norm)
-            if g is not None else g for g in grads
+            if g is not None
+            else g
+            for g in grads
         ]
 
         # New fast parameters
@@ -193,18 +202,19 @@ def inner_update_step(self, fast_params, x, y, t):
             )
         }
 
-        return new_fast_params 
+        return new_fast_params
 
     def _inner_updates(self, **kwargs):
         # Make a copy of model parameters for fast updates
-        self.initial_fast_params = {n: deepcopy(p) for (n, p) in 
-                                    self.model.named_parameters()}
+        self.initial_fast_params = {
+            n: deepcopy(p) for (n, p) in self.model.named_parameters()
+        }
 
         # Keep reference to the initial fast params
-        fast_params = self.initial_fast_params 
+        fast_params = self.initial_fast_params
 
         # Samples from the current batch
-        batch_x, batch_y, batch_t = self.mb_x, self.mb_y, self.mb_task_id 
+        batch_x, batch_y, batch_t = self.mb_x, self.mb_y, self.mb_task_id
 
         # Get batches from the buffer
         if self.clock.train_exp_counter > 0:
@@ -218,42 +228,38 @@ def _inner_updates(self, **kwargs):
         # Split the current batch into smaller chuncks
         bsize_data = batch_x.shape[0]
         rough_sz = math.ceil(bsize_data / self.n_inner_updates)
-        self.meta_losses = [
-            torch.empty(0) for _ in range(self.n_inner_updates)
-        ]
+        self.meta_losses = [torch.empty(0) for _ in range(self.n_inner_updates)]
 
         # Iterate through the chunks as inner-loops
         for i in range(self.n_inner_updates):
-            batch_x_i = batch_x[i * rough_sz: (i + 1) * rough_sz]
-            batch_y_i = batch_y[i * rough_sz: (i + 1) * rough_sz]
-            batch_t_i = batch_t[i * rough_sz: (i + 1) * rough_sz]
+            batch_x_i = batch_x[i * rough_sz : (i + 1) * rough_sz]
+            batch_y_i = batch_y[i * rough_sz : (i + 1) * rough_sz]
+            batch_t_i = batch_t[i * rough_sz : (i + 1) * rough_sz]
 
             # We assume that samples for inner update are from the same task
-            fast_params = self.inner_update_step(fast_params, 
-                                                 batch_x_i, 
-                                                 batch_y_i, batch_t_i)
+            fast_params = self.inner_update_step(
+                fast_params, batch_x_i, batch_y_i, batch_t_i
+            )
 
             # Compute meta-loss with the combination of batch and buffer samples
-            logits_meta = torch.func.functional_call(self.model, fast_params, 
-                                                     (mixed_x, mixed_t))
+            logits_meta = torch.func.functional_call(
+                self.model, fast_params, (mixed_x, mixed_t)
+            )
             meta_loss_i = self._criterion(logits_meta, mixed_y)
             self.meta_losses[i] = meta_loss_i
 
     def _outer_update(self, **kwargs):
         self.model.zero_grad()
         self.alpha_params.zero_grad()
-        
+
         # Compute meta-gradient for the main model
         meta_loss = sum(self.meta_losses) / len(self.meta_losses)
         meta_loss.backward()
 
-        self.copy_grads(self.model.parameters(), 
-                        self.initial_fast_params.values())
+        self.copy_grads(self.model.parameters(), self.initial_fast_params.values())
 
         # Clip gradients
-        torch.nn.utils.clip_grad_norm_(
-            self.model.parameters(), self.grad_clip_norm
-        )
+        torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_clip_norm)
 
         if self.learn_lr:
             # Update lr for the current batch
@@ -261,29 +267,30 @@ def _outer_update(self, **kwargs):
                 self.alpha_params.parameters(), self.grad_clip_norm
             )
             self.optimizer_alpha.step()
-        
+
         # If sync-update: update with self.optimizer
         # o.w: use the learned LRs to update the model
         if self.sync_update:
             self.optimizer.step()
         else:
             for p, alpha in zip(
-                    self.model.parameters(), self.alpha_params.parameters()
+                self.model.parameters(), self.alpha_params.parameters()
             ):
                 # Use relu on updated LRs to avoid negative values
                 if p.grad is not None:
                     p.data = p.data - p.grad * F.relu(alpha)
-        
+
         self.loss = meta_loss
 
     def _after_training_exp(self, **kwargs):
         self.buffer.update(self)
         super()._after_training_exp(**kwargs)
 
-    
+
 class Buffer:
-    def __init__(self, max_buffer_size=100, buffer_mb_size=10,
-                 device=torch.device("cpu")):
+    def __init__(
+        self, max_buffer_size=100, buffer_mb_size=10, device=torch.device("cpu")
+    ):
         self.storage_policy = ReservoirSamplingBuffer(max_size=max_buffer_size)
         self.buffer_mb_size = buffer_mb_size
         self.device = device
@@ -295,13 +302,16 @@ def __len__(self):
         return len(self.storage_policy.buffer)
 
     def get_buffer_batch(self):
-        rnd_ind = torch.randperm(len(self))[:self.buffer_mb_size]
-        buff_x = torch.cat([self.storage_policy.buffer[i][0].unsqueeze(0)
-                            for i in rnd_ind]).to(self.device)
-        buff_y = torch.LongTensor([self.storage_policy.buffer[i][1]
-                                   for i in rnd_ind]).to(self.device)
-        buff_t = torch.LongTensor([self.storage_policy.buffer[i][2]
-                                   for i in rnd_ind]).to(self.device)
+        rnd_ind = torch.randperm(len(self))[: self.buffer_mb_size]
+        buff_x = torch.cat(
+            [self.storage_policy.buffer[i][0].unsqueeze(0) for i in rnd_ind]
+        ).to(self.device)
+        buff_y = torch.LongTensor(
+            [self.storage_policy.buffer[i][1] for i in rnd_ind]
+        ).to(self.device)
+        buff_t = torch.LongTensor(
+            [self.storage_policy.buffer[i][2] for i in rnd_ind]
+        ).to(self.device)
 
         return buff_x, buff_y, buff_t
 
diff --git a/avalanche/training/supervised/mer.py b/avalanche/training/supervised/mer.py
index 37f6234c3..9f739213d 100644
--- a/avalanche/training/supervised/mer.py
+++ b/avalanche/training/supervised/mer.py
@@ -14,8 +14,9 @@
 
 
 class MERBuffer:
-    def __init__(self, max_buffer_size=100, buffer_mb_size=10,
-                 device=torch.device("cpu")):
+    def __init__(
+        self, max_buffer_size=100, buffer_mb_size=10, device=torch.device("cpu")
+    ):
         self.storage_policy = ReservoirSamplingBuffer(max_size=max_buffer_size)
         self.buffer_mb_size = buffer_mb_size
         self.device = device
@@ -32,12 +33,15 @@ def get_batch(self, x, y, t):
 
         bsize = min(len(self), self.buffer_mb_size)
         rnd_ind = torch.randperm(len(self))[:bsize]
-        buff_x = torch.cat([self.storage_policy.buffer[i][0].unsqueeze(0)
-                            for i in rnd_ind]).to(self.device)
-        buff_y = torch.LongTensor([self.storage_policy.buffer[i][1]
-                                   for i in rnd_ind]).to(self.device)
-        buff_t = torch.LongTensor([self.storage_policy.buffer[i][2]
-                                   for i in rnd_ind]).to(self.device)
+        buff_x = torch.cat(
+            [self.storage_policy.buffer[i][0].unsqueeze(0) for i in rnd_ind]
+        ).to(self.device)
+        buff_y = torch.LongTensor(
+            [self.storage_policy.buffer[i][1] for i in rnd_ind]
+        ).to(self.device)
+        buff_t = torch.LongTensor(
+            [self.storage_policy.buffer[i][2] for i in rnd_ind]
+        ).to(self.device)
 
         mixed_x = torch.cat([x, buff_x], dim=0)
         mixed_y = torch.cat([y, buff_y], dim=0)
@@ -63,8 +67,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[Sequence["SupervisedPlugin"]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
         peval_mode="epoch",
@@ -97,9 +100,11 @@ def __init__(
             peval_mode,
         )
 
-        self.buffer = MERBuffer(max_buffer_size=max_buffer_size,
-                                buffer_mb_size=buffer_mb_size,
-                                device=self.device)
+        self.buffer = MERBuffer(
+            max_buffer_size=max_buffer_size,
+            buffer_mb_size=buffer_mb_size,
+            device=self.device,
+        )
         self.n_inner_steps = n_inner_steps
         self.beta = beta
         self.gamma = gamma
@@ -128,16 +133,19 @@ def _inner_updates(self, **kwargs):
             # Within-batch Reptile update
             w_aft_t = self.model.state_dict()
             self.model.load_state_dict(
-                {name: w_bef_t[name] + ((w_aft_t[name] - w_bef_t[name])
-                                        * self.beta)
-                 for name in w_bef_t}
+                {
+                    name: w_bef_t[name] + ((w_aft_t[name] - w_bef_t[name]) * self.beta)
+                    for name in w_bef_t
+                }
             )
 
     def _outer_update(self, **kwargs):
         w_aft = self.model.state_dict()
         self.model.load_state_dict(
-            {name: self.w_bef[name] + ((w_aft[name] - self.w_bef[name])
-                                       * self.gamma) for name in self.w_bef}
+            {
+                name: self.w_bef[name] + ((w_aft[name] - self.w_bef[name]) * self.gamma)
+                for name in self.w_bef
+            }
         )
         with torch.no_grad():
             pred = self.forward()
diff --git a/avalanche/training/supervised/naive_object_detection.py b/avalanche/training/supervised/naive_object_detection.py
index db11f7e8b..3c13d8e17 100644
--- a/avalanche/training/supervised/naive_object_detection.py
+++ b/avalanche/training/supervised/naive_object_detection.py
@@ -59,8 +59,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[Sequence["SupervisedPlugin"]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
         peval_mode="epoch",
@@ -164,19 +163,18 @@ def make_train_dataloader(
         )
 
         self.dataloader = TaskBalancedDataLoader(
-            self.adapted_dataset,
-            oversample_small_groups=True,
-            **other_dataloader_args
+            self.adapted_dataset, oversample_small_groups=True, **other_dataloader_args
         )
 
     def make_eval_dataloader(
-            self,
-            num_workers=0,
-            shuffle=False,
-            pin_memory=None,
-            persistent_workers=False,
-            drop_last=False,
-            **kwargs):
+        self,
+        num_workers=0,
+        shuffle=False,
+        pin_memory=None,
+        persistent_workers=False,
+        drop_last=False,
+        **kwargs
+    ):
         """
         Initializes the eval data loader.
         :param num_workers: How many subprocesses to use for data loading.
@@ -200,14 +198,9 @@ def make_eval_dataloader(
             **kwargs
         )
 
-        collate_from_data_or_kwargs(
-            self.adapted_dataset,
-            other_dataloader_args)
+        collate_from_data_or_kwargs(self.adapted_dataset, other_dataloader_args)
 
-        self.dataloader = DataLoader(
-            self.adapted_dataset,
-            **other_dataloader_args
-        )
+        self.dataloader = DataLoader(self.adapted_dataset, **other_dataloader_args)
 
     def criterion(self):
         """
@@ -255,12 +248,9 @@ def _unpack_minibatch(self):
         # Unpack minibatch mainly takes care of moving tensors to devices.
         # In addition, it will prepare the targets in the proper dict format.
         images = list(image.to(self.device) for image in self.mbatch[0])
-        targets = [
-            {k: v.to(self.device) for k, v in t.items()} for t in self.mbatch[1]
-        ]
-        
-        mbatch = [images, targets, 
-                  torch.as_tensor(self.mbatch[2]).to(self.device)]
+        targets = [{k: v.to(self.device) for k, v in t.items()} for t in self.mbatch[1]]
+
+        mbatch = [images, targets, torch.as_tensor(self.mbatch[2]).to(self.device)]
         self.mbatch = tuple(mbatch)
 
     def backward(self):
diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py
index 773356872..8897b0e9a 100644
--- a/avalanche/training/supervised/strategy_wrappers.py
+++ b/avalanche/training/supervised/strategy_wrappers.py
@@ -40,7 +40,7 @@
     MASPlugin,
     BiCPlugin,
     MIRPlugin,
-    FromScratchTrainingPlugin
+    FromScratchTrainingPlugin,
 )
 from avalanche.training.templates.base import BaseTemplate
 from avalanche.training.templates import SupervisedTemplate
@@ -73,8 +73,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[List[SupervisedPlugin]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
         **base_kwargs
@@ -132,8 +131,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[Sequence["SupervisedPlugin"]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
         **base_kwargs
@@ -190,8 +188,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[List[SupervisedPlugin]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
         **base_kwargs
@@ -257,8 +254,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[List[SupervisedPlugin]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
         **base_kwargs
@@ -335,8 +331,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[List[SupervisedPlugin]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
         generator_strategy: Optional[BaseTemplate] = None,
@@ -440,7 +435,7 @@ def __init__(
 class AETraining(SupervisedTemplate):
     """AETraining class
 
-    This is the training strategy for the AE model.    
+    This is the training strategy for the AE model.
     We make use of the SupervisedTemplate, even though technically this is not a
     supervised training. However, this reduces the modification to a minimum.
 
@@ -450,10 +445,9 @@ class AETraining(SupervisedTemplate):
     """
 
     ae_evaluator = EvaluationPlugin(
-        loss_metrics(minibatch=False, epoch=True,
-                     experience=False, stream=True),
+        loss_metrics(minibatch=False, epoch=True, experience=False, stream=True),
         loggers=[InteractiveLogger()],
-        )
+    )
 
     def __init__(
         self,
@@ -506,8 +500,7 @@ def __init__(
         )
 
     def criterion(self):
-        modified_mb_x = sigmoid(
-            self.model.feature_module(self.mb_x).to(self.device))
+        modified_mb_x = sigmoid(self.model.feature_module(self.mb_x).to(self.device))
         return self._criterion(modified_mb_x, self.mb_output)
 
 
@@ -539,8 +532,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[List[SupervisedPlugin]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = get_default_vae_logger,
         eval_every=-1,
         **base_kwargs
@@ -608,8 +600,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[List[SupervisedPlugin]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
         **base_kwargs
@@ -677,8 +668,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[List[SupervisedPlugin]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
         **base_kwargs
@@ -745,8 +735,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[List[SupervisedPlugin]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
         **base_kwargs
@@ -816,8 +805,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[List[SupervisedPlugin]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
         **base_kwargs
@@ -887,8 +875,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[List[SupervisedPlugin]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
         **base_kwargs
@@ -960,8 +947,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[List[SupervisedPlugin]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
         **base_kwargs
@@ -1049,8 +1035,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[Sequence["SupervisedPlugin"]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
         **base_kwargs
@@ -1084,7 +1069,7 @@ def __init__(
         """
         if plugins is None:
             plugins = []
-        
+
         plugins = list(plugins)
 
         # This implementation relies on the S.I. Plugin, which contains the
@@ -1129,8 +1114,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[List[SupervisedPlugin]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
         **base_kwargs
@@ -1205,8 +1189,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[List[SupervisedPlugin]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
         **base_kwargs
@@ -1275,8 +1258,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[List[SupervisedPlugin]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
         **base_kwargs
@@ -1346,9 +1328,9 @@ def __init__(
         criterion,
         mem_size: int = 200,
         val_percentage: float = 0.1,
-        T: int = 2, 
+        T: int = 2,
         stage_2_epochs: int = 200,
-        lamb: float = -1, 
+        lamb: float = -1,
         lr: float = 0.1,
         train_mb_size: int = 1,
         train_epochs: int = 1,
@@ -1356,8 +1338,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[List[SupervisedPlugin]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
         **base_kwargs
@@ -1368,13 +1349,13 @@ def __init__(
         :param optimizer: The optimizer to use.
         :param criterion: The loss criterion to use.
         :param mem_size: replay buffer size.
-        :param val_percentage: hyperparameter used to set the 
+        :param val_percentage: hyperparameter used to set the
                 percentage of exemplars in the val set.
-        :param T: hyperparameter used to set the temperature 
+        :param T: hyperparameter used to set the temperature
                 used in stage 1.
-        :param stage_2_epochs: hyperparameter used to set the 
+        :param stage_2_epochs: hyperparameter used to set the
                 amount of epochs of stage 2.
-        :param lamb: hyperparameter used to balance the distilling 
+        :param lamb: hyperparameter used to balance the distilling
                 loss and the classification loss.
         :param lr: hyperparameter used as a learning rate for
                 the second phase of training.
@@ -1395,13 +1376,14 @@ def __init__(
         """
 
         # Instantiate plugin
-        bic = BiCPlugin(mem_size=mem_size, 
-                        val_percentage=val_percentage,
-                        T=T,
-                        stage_2_epochs=stage_2_epochs,
-                        lamb=lamb,
-                        lr=lr,
-                        )
+        bic = BiCPlugin(
+            mem_size=mem_size,
+            val_percentage=val_percentage,
+            T=T,
+            stage_2_epochs=stage_2_epochs,
+            lamb=lamb,
+            lr=lr,
+        )
 
         # Add plugin to the strategy
         if plugins is None:
@@ -1443,8 +1425,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[List[SupervisedPlugin]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
         **base_kwargs
@@ -1476,9 +1457,7 @@ def __init__(
 
         # Instantiate plugin
         mir = MIRPlugin(
-            mem_size=mem_size, 
-            subsample=subsample,
-            batch_size_mem=batch_size_mem
+            mem_size=mem_size, subsample=subsample, batch_size_mem=batch_size_mem
         )
 
         # Add plugin to the strategy
@@ -1523,8 +1502,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[List[SupervisedPlugin]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
         **base_kwargs
@@ -1592,5 +1570,5 @@ def __init__(
     "MAS",
     "BiC",
     "MIR",
-    "FromScratchTraining"
+    "FromScratchTraining",
 ]
diff --git a/avalanche/training/supervised/strategy_wrappers_online.py b/avalanche/training/supervised/strategy_wrappers_online.py
index b85555db1..640f5abb4 100644
--- a/avalanche/training/supervised/strategy_wrappers_online.py
+++ b/avalanche/training/supervised/strategy_wrappers_online.py
@@ -23,10 +23,12 @@
 from avalanche._annotations import deprecated
 
 
-@deprecated(0.5, 
-            "Online strategies are not differentiated"
-            " from normal strategies anymore."
-            "Please use Naive strategy instead")
+@deprecated(
+    0.5,
+    "Online strategies are not differentiated"
+    " from normal strategies anymore."
+    "Please use Naive strategy instead",
+)
 class OnlineNaive(SupervisedTemplate):
     """Online naive finetuning.
 
@@ -50,8 +52,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[Sequence[BasePlugin]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
         **kwargs
diff --git a/avalanche/training/supervised/supervised_contrastive_replay.py b/avalanche/training/supervised/supervised_contrastive_replay.py
index af84f2a21..0e18906eb 100644
--- a/avalanche/training/supervised/supervised_contrastive_replay.py
+++ b/avalanche/training/supervised/supervised_contrastive_replay.py
@@ -31,21 +31,24 @@ class SCR(SupervisedTemplate):
     at the end of each experience (called review trick, but not mentioned
     in the paper). This implementation does not implement the review trick.
     """
-    def __init__(self,
-                 model: SCRModel,
-                 optimizer: Optimizer,
-                 augmentations=Compose([Lambda(lambda el: el)]),
-                 mem_size: int = 100,
-                 temperature: int = 0.1,
-                 train_mb_size: int = 1,
-                 batch_size_mem: int = 100,
-                 train_epochs: int = 1,
-                 eval_mb_size: Optional[int] = 1,
-                 device="cpu",
-                 plugins: Optional[Sequence["BaseSGDPlugin"]] = None,
-                 evaluator=default_evaluator,
-                 eval_every=-1,
-                 peval_mode="epoch"):
+
+    def __init__(
+        self,
+        model: SCRModel,
+        optimizer: Optimizer,
+        augmentations=Compose([Lambda(lambda el: el)]),
+        mem_size: int = 100,
+        temperature: int = 0.1,
+        train_mb_size: int = 1,
+        batch_size_mem: int = 100,
+        train_epochs: int = 1,
+        eval_mb_size: Optional[int] = 1,
+        device="cpu",
+        plugins: Optional[Sequence["BaseSGDPlugin"]] = None,
+        evaluator=default_evaluator,
+        eval_every=-1,
+        peval_mode="epoch",
+    ):
         """
         :param model: an Avalanche model like the avalanche.models.SCRModel,
             where the train classifier uses a projection network (e.g., MLP)
@@ -85,13 +88,15 @@ def __init__(self,
         if not isinstance(model, SCRModel):
             raise ValueError(
                 "Supervised Contrastive Replay model "
-                "needs to be an instance of avalanche.models.SCRModel.")
+                "needs to be an instance of avalanche.models.SCRModel."
+            )
 
         self.replay_plugin = ReplayPlugin(
             mem_size,
             batch_size=train_mb_size,
             batch_size_mem=batch_size_mem,
-            storage_policy=ClassBalancedBuffer(max_size=mem_size))
+            storage_policy=ClassBalancedBuffer(max_size=mem_size),
+        )
 
         self.augmentations = augmentations
         self.temperature = temperature
@@ -116,7 +121,8 @@ def __init__(self,
             plugins,
             evaluator,
             eval_every,
-            peval_mode)
+            peval_mode,
+        )
 
     def criterion(self):
         if self.is_training:
@@ -146,9 +152,7 @@ def _after_forward(self, **kwargs):
         original_examples = self.mb_output[:original_batch_size]
         augmented_examples = self.mb_output[original_batch_size:]
         # (original_batch_size, 2, output_size)
-        self.mb_output = torch.stack(
-            [original_examples, augmented_examples],
-            dim=1)
+        self.mb_output = torch.stack([original_examples, augmented_examples], dim=1)
 
     def _after_training_exp(self, **kwargs):
         """Update NCM means"""
@@ -161,8 +165,9 @@ def compute_class_means(self):
 
         # for each class
         for dataset in self.replay_plugin.storage_policy.buffer_datasets:
-            dl = DataLoader(dataset, shuffle=False,
-                            batch_size=self.eval_mb_size, drop_last=False)
+            dl = DataLoader(
+                dataset, shuffle=False, batch_size=self.eval_mb_size, drop_last=False
+            )
             num_els = 0
             # for each mini-batch in each class
             for x, y, _ in dl:
diff --git a/avalanche/training/templates/base.py b/avalanche/training/templates/base.py
index 1da839ac9..33b1a67e5 100644
--- a/avalanche/training/templates/base.py
+++ b/avalanche/training/templates/base.py
@@ -9,13 +9,12 @@
 from avalanche.benchmarks import CLExperience, CLStream
 from avalanche.core import BasePlugin
 from avalanche.distributed.distributed_helper import DistributedHelper
-from avalanche.training.templates.strategy_mixin_protocol import \
-    BaseStrategyProtocol
+from avalanche.training.templates.strategy_mixin_protocol import BaseStrategyProtocol
 from avalanche.training.utils import trigger_plugins
 
 
-TExperienceType = TypeVar('TExperienceType', bound=CLExperience)
-TPluginType = TypeVar('TPluginType', bound=BasePlugin, contravariant=True)
+TExperienceType = TypeVar("TExperienceType", bound=CLExperience)
+TPluginType = TypeVar("TPluginType", bound=BasePlugin, contravariant=True)
 
 
 class BaseTemplate(BaseStrategyProtocol[TExperienceType]):
@@ -52,15 +51,14 @@ def __init__(
 
         if device is None:
             warnings.warn(
-                'When instantiating a strategy, please pass a non-None device.'
+                "When instantiating a strategy, please pass a non-None device."
             )
-            device = 'cpu'
+            device = "cpu"
 
         self.device = torch.device(device)
         """ PyTorch device where the model will be allocated. """
 
-        self.plugins: List[BasePlugin] = [] \
-            if plugins is None else list(plugins)
+        self.plugins: List[BasePlugin] = [] if plugins is None else list(plugins)
         """ List of `SupervisedPlugin`s. """
 
         # check plugin compatibility
@@ -118,11 +116,11 @@ def train(
             have different names.
         """
         if not self._distributed_check:
-            # Checks if the strategy elements are compatible with 
+            # Checks if the strategy elements are compatible with
             # distributed training
             self._check_distributed_training_compatibility()
             self._distributed_check = True
-        
+
         self.is_training = True
         self._stop_training = False
 
@@ -130,8 +128,9 @@ def train(
         self.model.to(self.device)
 
         # Normalize training and eval data.
-        experiences_list: Iterable[TExperienceType] = \
-            _experiences_parameter_as_iterable(experiences)
+        experiences_list: Iterable[
+            TExperienceType
+        ] = _experiences_parameter_as_iterable(experiences)
 
         if eval_streams is None:
             eval_streams = [experiences_list]
@@ -172,19 +171,20 @@ def eval(
             each metric name
         """
         if not self._distributed_check:
-            # Checks if the strategy elements are compatible with 
+            # Checks if the strategy elements are compatible with
             # distributed training
             self._check_distributed_training_compatibility()
             self._distributed_check = True
-        
+
         # eval can be called inside the train method.
         # Save the shared state here to restore before returning.
         prev_train_state = self._save_train_state()
         self.is_training = False
         self.model.eval()
 
-        experiences_list: Iterable[TExperienceType] = \
-            _experiences_parameter_as_iterable(experiences)
+        experiences_list: Iterable[
+            TExperienceType
+        ] = _experiences_parameter_as_iterable(experiences)
         self.current_eval_stream = experiences_list
 
         self._before_eval(**kwargs)
@@ -268,7 +268,7 @@ def is_callback(x):
                     f"callbacks: {cb_p - cb_supported}",
                 )
                 return
-            
+
     def _check_distributed_training_compatibility(self):
         """
         Check if strategy elements (plugins, ...) are compatible with
@@ -284,8 +284,10 @@ def _check_distributed_training_compatibility(self):
                 unsupported_plugins.append(plugin)
 
         if len(unsupported_plugins) > 0:
-            warnings.warn('You are using plugins that are not compatible'
-                          'with distributed training:')
+            warnings.warn(
+                "You are using plugins that are not compatible"
+                "with distributed training:"
+            )
             for plugin in unsupported_plugins:
                 print(type(plugin), file=sys.stderr)
 
@@ -323,7 +325,6 @@ def _after_eval_exp(self, **kwargs):
 def _group_experiences_by_stream(
     eval_streams: Iterable[Union[Iterable[CLExperience], CLExperience]]
 ) -> List[List[CLExperience]]:
-
     exps: List[CLExperience] = []
     # First, we unpack the list of experiences.
     for exp in eval_streams:
@@ -349,6 +350,4 @@ def _experiences_parameter_as_iterable(
         return [experiences]
 
 
-__all__ = [
-    'BaseTemplate'
-]
+__all__ = ["BaseTemplate"]
diff --git a/avalanche/training/templates/base_sgd.py b/avalanche/training/templates/base_sgd.py
index d0d65da6c..97b3f9376 100644
--- a/avalanche/training/templates/base_sgd.py
+++ b/avalanche/training/templates/base_sgd.py
@@ -15,26 +15,23 @@
 from avalanche.training.plugins.clock import Clock
 from avalanche.training.plugins.evaluation import default_evaluator
 from avalanche.training.templates.base import BaseTemplate
-from avalanche.benchmarks.utils.data_loader import TaskBalancedDataLoader, \
-    collate_from_data_or_kwargs
-from avalanche.training.templates.strategy_mixin_protocol import \
-    SGDStrategyProtocol
+from avalanche.benchmarks.utils.data_loader import (
+    TaskBalancedDataLoader,
+    collate_from_data_or_kwargs,
+)
+from avalanche.training.templates.strategy_mixin_protocol import SGDStrategyProtocol
 from avalanche.training.utils import trigger_plugins
 
 
-TDatasetExperience = TypeVar('TDatasetExperience', bound=DatasetExperience)
-TMBInput = TypeVar('TMBInput')
-TMBOutput = TypeVar('TMBOutput')
+TDatasetExperience = TypeVar("TDatasetExperience", bound=DatasetExperience)
+TMBInput = TypeVar("TMBInput")
+TMBOutput = TypeVar("TMBOutput")
 
 
 class BaseSGDTemplate(
-        SGDStrategyProtocol[
-            TDatasetExperience,
-            TMBInput,
-            TMBOutput],
-        BaseTemplate[
-            TDatasetExperience]
-        ):
+    SGDStrategyProtocol[TDatasetExperience, TMBInput, TMBOutput],
+    BaseTemplate[TDatasetExperience],
+):
     """Base SGD class for continual learning skeletons.
 
     **Training loop**
@@ -64,11 +61,10 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[Sequence[BasePlugin]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
-        peval_mode="epoch"
+        peval_mode="epoch",
     ):
         """Init.
 
@@ -91,11 +87,7 @@ def __init__(
         """
 
         super().__init__()  # type: ignore
-        BaseTemplate.__init__(
-            self=self,
-            model=model,
-            device=device,
-            plugins=plugins)
+        BaseTemplate.__init__(self=self, model=model, device=device, plugins=plugins)
 
         self.optimizer: Optimizer = optimizer
         """ PyTorch optimizer. """
@@ -109,9 +101,7 @@ def __init__(
         self.train_mb_size: int = train_mb_size
         """ Training mini-batch size. """
 
-        self.eval_mb_size: int = (
-            train_mb_size if eval_mb_size is None else eval_mb_size
-        )
+        self.eval_mb_size: int = train_mb_size if eval_mb_size is None else eval_mb_size
         """ Eval mini-batch size. """
 
         self.retain_graph: bool = False
@@ -167,14 +157,14 @@ def __init__(
 
         self._stop_training = False
 
-    def train(self,
-              experiences: Union[TDatasetExperience,
-                                 Iterable[TDatasetExperience]],
-              eval_streams: Optional[
-                    Sequence[Union[TDatasetExperience,
-                                   Iterable[TDatasetExperience]]]] = None,
-              **kwargs):
-
+    def train(
+        self,
+        experiences: Union[TDatasetExperience, Iterable[TDatasetExperience]],
+        eval_streams: Optional[
+            Sequence[Union[TDatasetExperience, Iterable[TDatasetExperience]]]
+        ] = None,
+        **kwargs
+    ):
         super().train(experiences, eval_streams, **kwargs)
         return self.evaluator.get_last_metrics()
 
@@ -280,9 +270,7 @@ def _eval_cleanup(self):
         self.mb_output = None
         self.loss = self._make_empty_loss()
 
-    def _train_exp(
-        self, experience: CLExperience, eval_streams=None, **kwargs
-    ):
+    def _train_exp(self, experience: CLExperience, eval_streams=None, **kwargs):
         """Training loop over a single Experience object.
 
         :param experience: CL experience information.
@@ -334,7 +322,6 @@ def _load_train_state(self, prev_state):
         self.dataloader = prev_state["dataloader"]
 
     def _before_eval_exp(self, **kwargs):
-
         # Data Adaptation
         self._before_eval_dataset_adaptation(**kwargs)
         self.eval_dataset_adaptation(**kwargs)
@@ -352,8 +339,8 @@ def _obtain_common_dataloader_parameters(self, **kwargs):
         to the train and eval dataloaders.
 
         This function can be useful when in need to customize the data loading
-        parameters but no radical changes are needed. When overriding to 
-        add/customize parameters, it is recommended to first call this 
+        parameters but no radical changes are needed. When overriding to
+        add/customize parameters, it is recommended to first call this
         implementation (super) to obtain a base dictionary of parameters.
 
         However, if a more deep change is needed in the data loading procedure,
@@ -370,18 +357,19 @@ def _obtain_common_dataloader_parameters(self, **kwargs):
         """
         other_dataloader_args = {}
 
-        if 'persistent_workers' in kwargs:
+        if "persistent_workers" in kwargs:
             if parse_version(torch.__version__) >= parse_version("1.7.0"):
-                other_dataloader_args["persistent_workers"] = \
-                    kwargs['persistent_workers']
+                other_dataloader_args["persistent_workers"] = kwargs[
+                    "persistent_workers"
+                ]
             else:
-                del kwargs['persistent_workers']
+                del kwargs["persistent_workers"]
 
         for k, v in kwargs.items():
             other_dataloader_args[k] = v
 
-        if other_dataloader_args.get('pin_memory', None) is None:
-            other_dataloader_args['pin_memory'] = self.device.type == 'cuda'
+        if other_dataloader_args.get("pin_memory", None) is None:
+            other_dataloader_args["pin_memory"] = self.device.type == "cuda"
 
         return other_dataloader_args
 
@@ -419,9 +407,7 @@ def make_train_dataloader(
         )
 
         self.dataloader = TaskBalancedDataLoader(
-            self.adapted_dataset,
-            oversample_small_groups=True,
-            **other_dataloader_args
+            self.adapted_dataset, oversample_small_groups=True, **other_dataloader_args
         )
 
     def make_eval_dataloader(
@@ -453,14 +439,9 @@ def make_eval_dataloader(
             persistent_workers=persistent_workers,
         )
 
-        collate_from_data_or_kwargs(
-            self.adapted_dataset,
-            other_dataloader_args)
-        
-        self.dataloader = DataLoader(
-            self.adapted_dataset,
-            **other_dataloader_args
-        )
+        collate_from_data_or_kwargs(self.adapted_dataset, other_dataloader_args)
+
+        self.dataloader = DataLoader(self.adapted_dataset, **other_dataloader_args)
 
     def eval_dataset_adaptation(self, **kwargs):
         """Initialize `self.adapted_dataset`."""
@@ -542,11 +523,7 @@ class PeriodicEval(BaseSGDPlugin, supports_distributed=True):
     This plugin is automatically configured and added by the BaseTemplate.
     """
 
-    def __init__(
-            self,
-            eval_every=-1,
-            peval_mode="epoch",
-            do_initial=True):
+    def __init__(self, eval_every=-1, peval_mode="epoch", do_initial=True):
         """Init.
 
         :param eval_every: the frequency of the calls to `eval` inside the
@@ -603,21 +580,17 @@ def _maybe_peval(self, strategy, counter, **kwargs):
         if self.eval_every > 0 and counter % self.eval_every == 0:
             self._peval(strategy, **kwargs)
 
-    def after_training_epoch(self, strategy: "BaseSGDTemplate",
-                             **kwargs):
+    def after_training_epoch(self, strategy: "BaseSGDTemplate", **kwargs):
         """Periodic eval controlled by `self.eval_every` and
         `self.peval_mode`."""
         if self.peval_mode == "epoch":
-            self._maybe_peval(strategy, strategy.clock.train_exp_epochs,
-                              **kwargs)
+            self._maybe_peval(strategy, strategy.clock.train_exp_epochs, **kwargs)
 
-    def after_training_iteration(self, strategy: "BaseSGDTemplate",
-                                 **kwargs):
+    def after_training_iteration(self, strategy: "BaseSGDTemplate", **kwargs):
         """Periodic eval controlled by `self.eval_every` and
         `self.peval_mode`."""
         if self.peval_mode == "iteration":
-            self._maybe_peval(strategy, strategy.clock.train_exp_iterations,
-                              **kwargs)
+            self._maybe_peval(strategy, strategy.clock.train_exp_iterations, **kwargs)
 
     # ---> New
     def after_training_exp(self, strategy, **kwargs):
diff --git a/avalanche/training/templates/common_templates.py b/avalanche/training/templates/common_templates.py
index cc24a2e6d..ee5d129d6 100644
--- a/avalanche/training/templates/common_templates.py
+++ b/avalanche/training/templates/common_templates.py
@@ -10,8 +10,9 @@
     EvaluationPlugin,
     default_evaluator,
 )
-from avalanche.training.templates.strategy_mixin_protocol import \
-    SupervisedStrategyProtocol
+from avalanche.training.templates.strategy_mixin_protocol import (
+    SupervisedStrategyProtocol,
+)
 
 from .observation_type import *
 from .problem_type import *
@@ -19,25 +20,19 @@
 from .base_sgd import BaseSGDTemplate
 
 
-TDatasetExperience = TypeVar('TDatasetExperience', bound=DatasetExperience)
-TMBInput = TypeVar('TMBInput')
-TMBOutput = TypeVar('TMBOutput')
+TDatasetExperience = TypeVar("TDatasetExperience", bound=DatasetExperience)
+TMBInput = TypeVar("TMBInput")
+TMBOutput = TypeVar("TMBOutput")
 
 
 class SupervisedTemplate(
-        BatchObservation,
-        SupervisedProblem,
-        SGDUpdate,
-        SupervisedStrategyProtocol[
-            TDatasetExperience,
-            TMBInput,
-            TMBOutput],
-        BaseSGDTemplate[
-            TDatasetExperience,
-            TMBInput,
-            TMBOutput
-        ]):
-    
+    BatchObservation,
+    SupervisedProblem,
+    SGDUpdate,
+    SupervisedStrategyProtocol[TDatasetExperience, TMBInput, TMBOutput],
+    BaseSGDTemplate[TDatasetExperience, TMBInput, TMBOutput],
+):
+
     """Base class for continual learning strategies.
 
     SupervisedTemplate is the super class of all supervised task-based
@@ -85,21 +80,20 @@ class SupervisedTemplate(
     PLUGIN_CLASS = SupervisedPlugin
 
     def __init__(
-            self,
-            model: Module,
-            optimizer: Optimizer,
-            criterion=CrossEntropyLoss(),
-            train_mb_size: int = 1,
-            train_epochs: int = 1,
-            eval_mb_size: Optional[int] = 1,
-            device: Union[str, torch.device] = "cpu",
-            plugins: Optional[Sequence[BasePlugin]] = None,
-            evaluator: Union[
-                EvaluationPlugin,
-                Callable[[], EvaluationPlugin]
-            ] = default_evaluator,
-            eval_every=-1,
-            peval_mode="epoch",
+        self,
+        model: Module,
+        optimizer: Optimizer,
+        criterion=CrossEntropyLoss(),
+        train_mb_size: int = 1,
+        train_epochs: int = 1,
+        eval_mb_size: Optional[int] = 1,
+        device: Union[str, torch.device] = "cpu",
+        plugins: Optional[Sequence[BasePlugin]] = None,
+        evaluator: Union[
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
+        ] = default_evaluator,
+        eval_every=-1,
+        peval_mode="epoch",
     ):
         """Init.
 
@@ -156,14 +150,11 @@ def __init__(
 
 
 class SupervisedMetaLearningTemplate(
-        BatchObservation,
-        SupervisedProblem,
-        MetaUpdate,
-        BaseSGDTemplate[
-            TDatasetExperience,
-            TMBInput,
-            TMBOutput
-        ]):
+    BatchObservation,
+    SupervisedProblem,
+    MetaUpdate,
+    BaseSGDTemplate[TDatasetExperience, TMBInput, TMBOutput],
+):
     """Base class for continual learning strategies.
 
     SupervisedMetaLearningTemplate is the super class of all supervised
@@ -221,8 +212,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[Sequence[BasePlugin]] = None,
         evaluator: Union[
-            EvaluationPlugin,
-            Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin]
         ] = default_evaluator,
         eval_every=-1,
         peval_mode="epoch",
@@ -280,8 +270,8 @@ def __init__(
         #    want the original data for the current experience
         #    use :attr:`.BaseTemplate.experience`.
 
-        
+
 __all__ = [
-    'SupervisedTemplate',
-    'SupervisedMetaLearningTemplate',
+    "SupervisedTemplate",
+    "SupervisedMetaLearningTemplate",
 ]
diff --git a/avalanche/training/templates/observation_type/batch_observation.py b/avalanche/training/templates/observation_type/batch_observation.py
index 56a175ad5..9564bb4d9 100644
--- a/avalanche/training/templates/observation_type/batch_observation.py
+++ b/avalanche/training/templates/observation_type/batch_observation.py
@@ -6,10 +6,8 @@
 
 from avalanche.benchmarks import OnlineCLExperience
 from avalanche.models.utils import avalanche_model_adaptation
-from avalanche.training.templates.strategy_mixin_protocol import \
-    SGDStrategyProtocol
-from avalanche.models.dynamic_optimizers import (reset_optimizer, 
-                                                 update_optimizer)
+from avalanche.training.templates.strategy_mixin_protocol import SGDStrategyProtocol
+from avalanche.models.dynamic_optimizers import reset_optimizer, update_optimizer
 from avalanche.training.utils import at_task_boundary
 
 
@@ -31,8 +29,7 @@ def model_adaptation(self, model=None):
         # For training:
         if isinstance(self.experience, OnlineCLExperience) and self.is_training:
             if self.experience.access_task_boundaries:
-                avalanche_model_adaptation(model, 
-                                           self.experience.origin_experience)
+                avalanche_model_adaptation(model, self.experience.origin_experience)
             else:
                 avalanche_model_adaptation(model, self.experience)
         else:
@@ -45,29 +42,27 @@ def make_optimizer(self, reset_optimizer_state=False, **kwargs):
 
         Called before each training experience to configure the optimizer.
 
-        :param reset_optimizer_state: bool, whether to reset the 
+        :param reset_optimizer_state: bool, whether to reset the
             state of the optimizer, defaults to False
 
-        Warnings: 
-            - The first time this function is called 
+        Warnings:
+            - The first time this function is called
               for a given strategy it will reset the
-              optimizer to gather the (name, param) 
+              optimizer to gather the (name, param)
               correspondance of the optimized parameters
               all the model parameters will be put in the
-              optimizer, regardless of what parameters are 
+              optimizer, regardless of what parameters are
               initially put in the optimizer.
         """
         if self.optimized_param_id is None:
-            self.optimized_param_id = \
-                    reset_optimizer(self.optimizer, self.model) 
+            self.optimized_param_id = reset_optimizer(self.optimizer, self.model)
         else:
-            self.optimized_param_id = \
-                update_optimizer(
-                    self.optimizer, 
-                    dict(self.model.named_parameters()),
-                    self.optimized_param_id, 
-                    reset_state=reset_optimizer_state
-                    )
+            self.optimized_param_id = update_optimizer(
+                self.optimizer,
+                dict(self.model.named_parameters()),
+                self.optimized_param_id,
+                reset_state=reset_optimizer_state,
+            )
 
     def check_model_and_optimizer(self, reset_optimizer_state=False, **kwargs):
         # If strategy has access to the task boundaries, and the current
@@ -80,9 +75,7 @@ def check_model_and_optimizer(self, reset_optimizer_state=False, **kwargs):
 
         if at_task_boundary(self.experience):
             self.model = self.model_adaptation()
-            self.make_optimizer(
-                reset_optimizer_state=reset_optimizer_state
-            )
+            self.make_optimizer(reset_optimizer_state=reset_optimizer_state)
         else:
             self.model = self.model_adaptation()
             self.make_optimizer(reset_optimizer_state=reset_optimizer_state)
diff --git a/avalanche/training/templates/problem_type/supervised_problem.py b/avalanche/training/templates/problem_type/supervised_problem.py
index 9ab7b405e..1b12fd539 100644
--- a/avalanche/training/templates/problem_type/supervised_problem.py
+++ b/avalanche/training/templates/problem_type/supervised_problem.py
@@ -1,13 +1,13 @@
 from avalanche.models import avalanche_forward
-from avalanche.training.templates.strategy_mixin_protocol import \
-    SupervisedStrategyProtocol
+from avalanche.training.templates.strategy_mixin_protocol import (
+    SupervisedStrategyProtocol,
+)
 
 
 # Types are perfectly ok for MyPy
 # Also confirmed here: https://stackoverflow.com/a/70907644
 # PyLance just does not understand it
 class SupervisedProblem(SupervisedStrategyProtocol):
-
     @property
     def mb_x(self):
         """Current mini-batch input."""
@@ -43,13 +43,11 @@ def _unpack_minibatch(self):
         mbatch = self.mbatch
         assert mbatch is not None
         assert len(mbatch) >= 3
-        
+
         if isinstance(mbatch, tuple):
             mbatch = list(mbatch)
         for i in range(len(mbatch)):
             self.mbatch[i] = mbatch[i].to(self.device)  # type: ignore
 
 
-__all__ = [
-    'SupervisedProblem'
-]
+__all__ = ["SupervisedProblem"]
diff --git a/avalanche/training/templates/strategy_mixin_protocol.py b/avalanche/training/templates/strategy_mixin_protocol.py
index b675765ca..3f365089c 100644
--- a/avalanche/training/templates/strategy_mixin_protocol.py
+++ b/avalanche/training/templates/strategy_mixin_protocol.py
@@ -13,18 +13,15 @@
 )
 from avalanche.core import BasePlugin
 
-TExperienceType = TypeVar('TExperienceType', bound=CLExperience)
-TSGDExperienceType = TypeVar('TSGDExperienceType', bound=DatasetExperience)
-TMBinput = TypeVar('TMBinput')
-TMBoutput = TypeVar('TMBoutput')
+TExperienceType = TypeVar("TExperienceType", bound=CLExperience)
+TSGDExperienceType = TypeVar("TSGDExperienceType", bound=DatasetExperience)
+TMBinput = TypeVar("TMBinput")
+TMBoutput = TypeVar("TMBoutput")
 
 
-class BaseStrategyProtocol(
-        Protocol[
-            TExperienceType]):
-
+class BaseStrategyProtocol(Protocol[TExperienceType]):
     model: Module
-    
+
     device: torch.device
 
     plugins: List[BasePlugin]
@@ -37,12 +34,9 @@ class BaseStrategyProtocol(
 
 
 class SGDStrategyProtocol(
-        BaseStrategyProtocol[
-            TSGDExperienceType],
-        Protocol[
-            TSGDExperienceType,
-            TMBinput,
-            TMBoutput]):
+    BaseStrategyProtocol[TSGDExperienceType],
+    Protocol[TSGDExperienceType, TMBinput, TMBoutput],
+):
     """
     A protocol for strategies to be used for typing mixin classes.
     """
@@ -63,10 +57,10 @@ class SGDStrategyProtocol(
 
     def forward(self) -> TMBoutput:
         ...
-    
+
     def criterion(self) -> Tensor:
         ...
-    
+
     def backward(self) -> None:
         ...
 
@@ -75,7 +69,7 @@ def _make_empty_loss(self) -> Tensor:
 
     def make_optimizer(self, **kwargs):
         ...
-        
+
     def optimizer_step(self) -> None:
         ...
 
@@ -111,11 +105,8 @@ def _after_training_iteration(self, **kwargs):
 
 
 class SupervisedStrategyProtocol(
-    SGDStrategyProtocol[
-        TSGDExperienceType,
-        TMBinput,
-        TMBoutput], Protocol):
-
+    SGDStrategyProtocol[TSGDExperienceType, TMBinput, TMBoutput], Protocol
+):
     mb_x: Tensor
 
     mb_y: Tensor
@@ -124,11 +115,8 @@ class SupervisedStrategyProtocol(
 
 
 class MetaLearningStrategyProtocol(
-    SGDStrategyProtocol[
-        TSGDExperienceType,
-        TMBinput,
-        TMBoutput], Protocol):
-
+    SGDStrategyProtocol[TSGDExperienceType, TMBinput, TMBoutput], Protocol
+):
     def _before_inner_updates(self, **kwargs):
         ...
 
@@ -149,7 +137,7 @@ def _after_outer_update(self, **kwargs):
 
 
 __all__ = [
-    'SGDStrategyProtocol',
-    'SupervisedStrategyProtocol',
-    'MetaLearningStrategyProtocol'
+    "SGDStrategyProtocol",
+    "SupervisedStrategyProtocol",
+    "MetaLearningStrategyProtocol",
 ]
diff --git a/avalanche/training/templates/update_type/meta_update.py b/avalanche/training/templates/update_type/meta_update.py
index 7d1af3709..79e8a5016 100644
--- a/avalanche/training/templates/update_type/meta_update.py
+++ b/avalanche/training/templates/update_type/meta_update.py
@@ -1,5 +1,6 @@
-from avalanche.training.templates.strategy_mixin_protocol import \
-    MetaLearningStrategyProtocol
+from avalanche.training.templates.strategy_mixin_protocol import (
+    MetaLearningStrategyProtocol,
+)
 from avalanche.training.utils import trigger_plugins
 
 
@@ -53,6 +54,4 @@ def _after_outer_update(self, **kwargs):
         trigger_plugins(self, "after_outer_update", **kwargs)
 
 
-__all__ = [
-    'MetaUpdate'
-]
+__all__ = ["MetaUpdate"]
diff --git a/avalanche/training/templates/update_type/sgd_update.py b/avalanche/training/templates/update_type/sgd_update.py
index e38cdca3d..8014dc409 100644
--- a/avalanche/training/templates/update_type/sgd_update.py
+++ b/avalanche/training/templates/update_type/sgd_update.py
@@ -1,9 +1,7 @@
-from avalanche.training.templates.strategy_mixin_protocol \
-    import SGDStrategyProtocol
+from avalanche.training.templates.strategy_mixin_protocol import SGDStrategyProtocol
 
 
 class SGDUpdate(SGDStrategyProtocol):
-    
     def training_epoch(self, **kwargs):
         """Training epoch.
 
@@ -40,6 +38,4 @@ def training_epoch(self, **kwargs):
             self._after_training_iteration(**kwargs)
 
 
-__all__ = [
-    'SGDUpdate'
-]
+__all__ = ["SGDUpdate"]
diff --git a/avalanche/training/utils.py b/avalanche/training/utils.py
index 9d238c42b..b51a60f7f 100644
--- a/avalanche/training/utils.py
+++ b/avalanche/training/utils.py
@@ -113,7 +113,7 @@ def load_all_dataset(dataset: Dataset, num_workers: int = 0):
         return x, y
 
 
-def zerolike_params_dict(model: Module) -> Dict[str, 'ParamData']:
+def zerolike_params_dict(model: Module) -> Dict[str, "ParamData"]:
     """
     Create a list of (name, parameter), where parameter is initalized to zero.
     The list has as many parameters as model, with the same size.
@@ -121,11 +121,15 @@ def zerolike_params_dict(model: Module) -> Dict[str, 'ParamData']:
     :param model: a pytorch model
     """
 
-    return dict([(k, ParamData(k, p.shape, device=p.device))
-                 for k, p in model.named_parameters()])
+    return dict(
+        [
+            (k, ParamData(k, p.shape, device=p.device))
+            for k, p in model.named_parameters()
+        ]
+    )
 
 
-def copy_params_dict(model, copy_grad=False) -> Dict[str, 'ParamData']:
+def copy_params_dict(model, copy_grad=False) -> Dict[str, "ParamData"]:
     """
     Create a list of (name, parameter), where parameter is copied from model.
     The list has as many parameters as model, with the same size.
@@ -138,8 +142,7 @@ def copy_params_dict(model, copy_grad=False) -> Dict[str, 'ParamData']:
         if copy_grad and p.grad is None:
             continue
         init = p.grad.data.clone() if copy_grad else p.data.clone()
-        out[k] = ParamData(k, p.shape, device=p.device,
-                           init_tensor=init)
+        out[k] = ParamData(k, p.shape, device=p.device, init_tensor=init)
     return out
 
 
@@ -153,9 +156,7 @@ class LayerAndParameter(NamedTuple):
 def get_layers_and_params(model: Module, prefix="") -> List[LayerAndParameter]:
     result: List[LayerAndParameter] = []
     for param_name, param in model.named_parameters(recurse=False):
-        result.append(
-            LayerAndParameter(prefix[:-1], model, prefix + param_name, param)
-        )
+        result.append(LayerAndParameter(prefix[:-1], model, prefix + param_name, param))
 
     layer_name: str
     layer: Module
@@ -195,9 +196,7 @@ def swap_last_fc_layer(model: Module, new_layer: Module) -> None:
 
 
 def adapt_classification_layer(
-    model: Module,
-    num_classes: int,
-    bias: Optional[bool] = None
+    model: Module, num_classes: int, bias: Optional[bool] = None
 ) -> Tuple[str, Linear]:
     last_fc_layer: Linear
     last_fc_name, last_fc_layer = get_last_fc_layer(model)
@@ -348,21 +347,20 @@ def examples_per_class(targets):
         torch.as_tensor(targets), return_counts=True
     )
     for unique_idx in range(len(unique_classes)):
-        result[int(unique_classes[unique_idx])] = int(
-            examples_count[unique_idx]
-        )
+        result[int(unique_classes[unique_idx])] = int(examples_count[unique_idx])
 
     return result
 
 
 class ParamData(object):
     def __init__(
-            self,
-            name: str,
-            shape: Optional[tuple] = None,
-            init_function: Callable[[torch.Size], torch.Tensor] = torch.zeros,
-            init_tensor: Union[torch.Tensor, None] = None,
-            device: Union[str, torch.device] = 'cpu'):
+        self,
+        name: str,
+        shape: Optional[tuple] = None,
+        init_function: Callable[[torch.Size], torch.Tensor] = torch.zeros,
+        init_tensor: Union[torch.Tensor, None] = None,
+        device: Union[str, torch.device] = "cpu",
+    ):
         """
         An object that contains a tensor with methods to expand it along
         a single dimension.
@@ -375,7 +373,7 @@ def __init__(
             on subsequent calls of `reset_like` method.
         :param init_tensor: value to be used when creating the object. If None,
             `init_function` will be used.
-        :param device: pytorch like device specification as a string or 
+        :param device: pytorch like device specification as a string or
             `torch.device`.
         """
         assert isinstance(name, str)
@@ -386,11 +384,11 @@ def __init__(
         self.init_function = init_function
         self.name = name
         if shape is not None:
-            self.shape = torch.Size(shape) 
+            self.shape = torch.Size(shape)
         else:
             assert init_tensor is not None
             self.shape = init_tensor.size()
-            
+
         self.device = torch.device(device)
         if init_tensor is not None:
             self._data: torch.Tensor = init_tensor
@@ -425,14 +423,14 @@ def expand(self, new_shape, padding_fn=torch.zeros):
 
         :return the expanded tensor or the previous tensor
         """
-        assert len(new_shape) == len(self.shape), \
-            "Expansion cannot add new dimensions"
+        assert len(new_shape) == len(self.shape), "Expansion cannot add new dimensions"
         expanded = False
         for i, (snew, sold) in enumerate(zip(new_shape, self.shape)):
             assert snew >= sold, "Shape cannot decrease."
             if snew > sold:
-                assert not expanded, \
-                    "Expansion cannot occur in more than one dimension."
+                assert (
+                    not expanded
+                ), "Expansion cannot occur in more than one dimension."
                 expanded = True
                 exp_idx = i
 
@@ -440,9 +438,10 @@ def expand(self, new_shape, padding_fn=torch.zeros):
             old_data = self._data.clone()
             old_shape_len = self._data.shape[exp_idx]
             self.reset_like(new_shape, init_function=padding_fn)
-            idx = [slice(el) if i != exp_idx else
-                   slice(old_shape_len) for i, el in
-                   enumerate(new_shape)]
+            idx = [
+                slice(el) if i != exp_idx else slice(old_shape_len)
+                for i, el in enumerate(new_shape)
+            ]
             self._data[idx] = old_data
         return self.data
 
@@ -452,10 +451,11 @@ def data(self) -> torch.Tensor:
 
     @data.setter
     def data(self, value):
-        assert value.shape == self._data.shape, \
-            "Shape of new value should be the same of old value. " \
-            "Use `expand` method to expand one dimension. " \
+        assert value.shape == self._data.shape, (
+            "Shape of new value should be the same of old value. "
+            "Use `expand` method to expand one dimension. "
             "Use `reset_like` to reset with a different shape."
+        )
         self._data = value
 
     def __str__(self):
diff --git a/docs/conf.py b/docs/conf.py
index a39c0dbe7..c251ed44e 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -30,19 +30,19 @@
 from jinja2.filters import FILTERS
 
 
-sys.path.insert(0, os.path.abspath('..'))
+sys.path.insert(0, os.path.abspath(".."))
 
 
 # -- Project information -----------------------------------------------------
 
-project = u'Avalanche'
-copyright = u'2022, ContinualAI'
-author = u'ContinualAI'
+project = "Avalanche"
+copyright = "2022, ContinualAI"
+author = "ContinualAI"
 
 # The short X.Y version
-version = u''
+version = ""
 # The full version, including alpha/beta/rc tags
-release = u'0.1'
+release = "0.1"
 
 
 # -- General configuration ---------------------------------------------------
@@ -55,16 +55,16 @@
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
-    'sphinx.ext.doctest',
-    'sphinx.ext.todo',
-    'sphinx.ext.mathjax',
-    'sphinx.ext.viewcode',
-    'sphinx.ext.autodoc',
-    'sphinx.ext.autosummary',
-    'sphinx.ext.githubpages',
-    'sphinx.ext.coverage',
-    'sphinx_rtd_theme',
-    'sphinx_copybutton'
+    "sphinx.ext.doctest",
+    "sphinx.ext.todo",
+    "sphinx.ext.mathjax",
+    "sphinx.ext.viewcode",
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.githubpages",
+    "sphinx.ext.coverage",
+    "sphinx_rtd_theme",
+    "sphinx_copybutton",
 ]
 
 autosummary_generate = True
@@ -72,28 +72,28 @@
 coverage_show_missing_items = True
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['./_templates']
+templates_path = ["./_templates"]
 
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
 #
 # source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
+source_suffix = ".rst"
 
 # The master toctree document.
-master_doc = 'index'
+master_doc = "index"
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
 #
 # This is also used if you do content translation via gettext catalogs.
 # Usually you set "language" from the command line for these cases.
-language = 'en'
+language = "en"
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = [u'_build', 'Thumbs.db', '.DS_Store', '_templates']
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "_templates"]
 
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = None
@@ -122,26 +122,26 @@
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = 'sphinx_rtd_theme'
-html_logo = './_static/img/avalanche_logo.png'
+html_theme = "sphinx_rtd_theme"
+html_logo = "./_static/img/avalanche_logo.png"
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
 #
 html_theme_options = {
-    'collapse_navigation': False,
-    'logo_only': True,
-    'display_version': True
+    "collapse_navigation": False,
+    "logo_only": True,
+    "display_version": True,
 }
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
 
 # custom css
-html_style = 'css/mystyle.css'
+html_style = "css/mystyle.css"
 
 # Custom sidebar templates, must be a dictionary that maps document names
 # to template names.
@@ -157,7 +157,7 @@
 # -- Options for HTMLHelp output ---------------------------------------------
 
 # Output file base name for HTML help builder.
-htmlhelp_basename = 'Avalanchedoc'
+htmlhelp_basename = "Avalanchedoc"
 
 
 # -- Options for LaTeX output ------------------------------------------------
@@ -166,15 +166,12 @@
     # The paper size ('letterpaper' or 'a4paper').
     #
     # 'papersize': 'letterpaper',
-
     # The font size ('10pt', '11pt' or '12pt').
     #
     # 'pointsize': '10pt',
-
     # Additional stuff for the LaTeX preamble.
     #
     # 'preamble': '',
-
     # Latex figure (float) alignment
     #
     # 'figure_align': 'htbp',
@@ -184,8 +181,13 @@
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-    (master_doc, 'Avalanche.tex', u'Avalanche Documentation',
-     u'ContinualAI Research', 'manual'),
+    (
+        master_doc,
+        "Avalanche.tex",
+        "Avalanche Documentation",
+        "ContinualAI Research",
+        "manual",
+    ),
 ]
 
 
@@ -193,10 +195,7 @@
 
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
-man_pages = [
-    (master_doc, 'avalanche', u'Avalanche Documentation',
-     [author], 1)
-]
+man_pages = [(master_doc, "avalanche", "Avalanche Documentation", [author], 1)]
 
 
 # -- Options for Texinfo output ----------------------------------------------
@@ -205,9 +204,15 @@
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-    (master_doc, 'Avalanche', u'Avalanche Documentation',
-     author, 'Avalanche', 'One line description of project.',
-     'Miscellaneous'),
+    (
+        master_doc,
+        "Avalanche",
+        "Avalanche Documentation",
+        author,
+        "Avalanche",
+        "One line description of project.",
+        "Miscellaneous",
+    ),
 ]
 
 
@@ -226,7 +231,7 @@
 # epub_uid = ''
 
 # A list of files that should not be packed into the epub file.
-epub_exclude_files = ['search.html']
+epub_exclude_files = ["search.html"]
 
 
 # -- Extension configuration -------------------------------------------------
@@ -251,109 +256,98 @@
 #   sphinx-build -b coverage . _build
 undocumented_classes_to_ignore = [
     # benchmarks
-    'IDataset',
-    'TensorMNIST',
-    'SpeechCommandsData',
-    'ClassAccuracyPluginMetric',
-    'MeanScoresTrainPluginMetric',
-    'MeanScoresEvalPluginMetric',
-    'AMCAPluginMetric',
-    'DictLVIS',
-    'LvisEvaluator',
-    'CocoEvaluator',
-    'DetectionEvaluator',
-
-    'ClassificationDataset',
-    'YTransformDef',
-    'StreamDef',
-    'Flatten',
-    'XComposedTransformDef',
-    'SubSequence',
-    'SimpleDownloadableDataset',
-    'LazyDatasetSequence',
-    'INATURALIST_DATA',
-    'FilelistDataset',
-    'ClassificationScenarioStream',
-
-    'MaskedAttributeError',
-    'MultiParamTransform',
-    'PixelsPermutation',
-    'SubsetWithTargets',
-    'IClassificationDataset',
-    'StreamUserDef',
-    'ClassificationSubSequence',
-    'ConstantSequence',
-    'SequenceDataset',
-    'DownloadableDataset',
-    'PathsDataset',
-    'Compose',
-    'VideoSubSequence',
-    'PennFudanDataset',
-    'IDatasetWithTargets',
-    'ISupportedClassificationDataset',
-    'LazyStreamDefinition',
-    'ITensorDataset',
-    'XTransformDef',
-    'LazyClassesInExps',
-    '_LazyStreamClassesInExps',
-
-
+    "IDataset",
+    "TensorMNIST",
+    "SpeechCommandsData",
+    "ClassAccuracyPluginMetric",
+    "MeanScoresTrainPluginMetric",
+    "MeanScoresEvalPluginMetric",
+    "AMCAPluginMetric",
+    "DictLVIS",
+    "LvisEvaluator",
+    "CocoEvaluator",
+    "DetectionEvaluator",
+    "ClassificationDataset",
+    "YTransformDef",
+    "StreamDef",
+    "Flatten",
+    "XComposedTransformDef",
+    "SubSequence",
+    "SimpleDownloadableDataset",
+    "LazyDatasetSequence",
+    "INATURALIST_DATA",
+    "FilelistDataset",
+    "ClassificationScenarioStream",
+    "MaskedAttributeError",
+    "MultiParamTransform",
+    "PixelsPermutation",
+    "SubsetWithTargets",
+    "IClassificationDataset",
+    "StreamUserDef",
+    "ClassificationSubSequence",
+    "ConstantSequence",
+    "SequenceDataset",
+    "DownloadableDataset",
+    "PathsDataset",
+    "Compose",
+    "VideoSubSequence",
+    "PennFudanDataset",
+    "IDatasetWithTargets",
+    "ISupportedClassificationDataset",
+    "LazyStreamDefinition",
+    "ITensorDataset",
+    "XTransformDef",
+    "LazyClassesInExps",
+    "_LazyStreamClassesInExps",
     # evaluation
-    'MACPluginMetric',
-    'CPUPluginMetric',
-    'TimePluginMetric',
-    'RAMPluginMetric',
-    'GPUPluginMetric',
-    'DiskPluginMetric',
-    'TopkAccuracyPluginMetric',
-    'AccuracyPluginMetric',
-    'MeanScoresPluginMetricABC',
-    'GenericStreamForgetting',
-    'GenericStreamForwardTransfer',
-    'GenericExperienceForwardTransfer',
-    'GenericExperienceForgetting',
-    'LossPluginMetric',
-
-    'TensorEncoder',
-    'TensorImage',
-    'AlternativeValues',
-    'LabelsRepartitionPlugin',
-
+    "MACPluginMetric",
+    "CPUPluginMetric",
+    "TimePluginMetric",
+    "RAMPluginMetric",
+    "GPUPluginMetric",
+    "DiskPluginMetric",
+    "TopkAccuracyPluginMetric",
+    "AccuracyPluginMetric",
+    "MeanScoresPluginMetricABC",
+    "GenericStreamForgetting",
+    "GenericStreamForwardTransfer",
+    "GenericExperienceForwardTransfer",
+    "GenericExperienceForgetting",
+    "LossPluginMetric",
+    "TensorEncoder",
+    "TensorImage",
+    "AlternativeValues",
+    "LabelsRepartitionPlugin",
     # Training
-    'AlreadyTrainedError',
-
-    'VAETraining',
-    'Clock',
-    'PeriodicEval',
-
+    "AlreadyTrainedError",
+    "VAETraining",
+    "Clock",
+    "PeriodicEval",
     # Utils
-    'LayerAndParameter',
-    'ComposeMaxParamsWarning',
-
+    "LayerAndParameter",
+    "ComposeMaxParamsWarning",
     # Models
-    'IdentityShortcut',
-    'ResidualBlock',
-    'Generator',
-
+    "IdentityShortcut",
+    "ResidualBlock",
+    "Generator",
     # Other
-    'L2Normalization',
-    'PPPloss',
-
-    'COCO',
-    'ClassificationExperience',
-    'LVISAnnotationEntry',
-    'ExperienceMode',
-    'LVISImgEntry',
-    'VAEMLPDecoder',
-    'MultiTaskDecorator',
-    'CLEARMetric',
-    'LVISDetectionTargets',
-    'LVIS',
-    'GenericCLScenario',
-    'BatchRenorm2D',
-    'OpenLORISDataset',
-    'VAEMLPEncoder',
-    'LvisDataset',
+    "L2Normalization",
+    "PPPloss",
+    "COCO",
+    "ClassificationExperience",
+    "LVISAnnotationEntry",
+    "ExperienceMode",
+    "LVISImgEntry",
+    "VAEMLPDecoder",
+    "MultiTaskDecorator",
+    "CLEARMetric",
+    "LVISDetectionTargets",
+    "LVIS",
+    "GenericCLScenario",
+    "BatchRenorm2D",
+    "OpenLORISDataset",
+    "VAEMLPEncoder",
+    "LvisDataset",
 ]
 undocumented_classes_to_ignore = set(undocumented_classes_to_ignore)
 
@@ -368,8 +362,8 @@ def coverage_post_process(app, exception):
 
     # we collected what has been already documented by sphinx to compare it
     # with the full list of classes of Avalanche.
-    doc_classes = app.env.domaindata['py']['objects']
-    doc_classes = set([s.split('.')[-1] for s in doc_classes])
+    doc_classes = app.env.domaindata["py"]["objects"]
+    doc_classes = set([s.split(".")[-1] for s in doc_classes])
     # print(doc_classes)
     # STRONG ASSUMPTION HERE: unique names for classes in different namespaces.
     # Otherwise, we need to detect the case when mylib.Type is documented but
@@ -388,14 +382,17 @@ def is_not_internal(name):
     try:
         lib_classes = set()
         for _, modname, ispkg in pkgutil.walk_packages(
-                path=avalanche.__path__,
-                prefix=avalanche.__name__ + '.'):
-
+            path=avalanche.__path__, prefix=avalanche.__name__ + "."
+        ):
             # print("MODULE: " + modname)
             try:
                 for name, obj in inspect.getmembers(sys.modules[modname]):
                     # print(name)
-                    if inspect.isclass(obj) and obj.__module__.startswith('avalanche') and is_not_internal(obj.__module__ + '.' + name):
+                    if (
+                        inspect.isclass(obj)
+                        and obj.__module__.startswith("avalanche")
+                        and is_not_internal(obj.__module__ + "." + name)
+                    ):
                         # print("CLASS: " + obj.__module__ + '.' + obj.__name__)
                         lib_classes.add(name)
             except Exception as e:
@@ -403,8 +400,10 @@ def is_not_internal(name):
                 # Also seems to crash on module attributes that are
                 # instance variables instead of classes/functions/modules.
                 # No idea why, but we can ignore them for the moment.
-                print(f"KeyError on module {modname}, class {name}, exception "
-                      f"type {type(e)}")
+                print(
+                    f"KeyError on module {modname}, class {name}, exception "
+                    f"type {type(e)}"
+                )
 
     except Exception as e:
         print("ERROR!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
@@ -436,4 +435,4 @@ def get_attributes(item, obj, modulename):
         return ""
 
 
-FILTERS["get_attributes"] = get_attributes
\ No newline at end of file
+FILTERS["get_attributes"] = get_attributes
diff --git a/examples/all_mnist.py b/examples/all_mnist.py
index 612bc40ae..e85b9ebc8 100644
--- a/examples/all_mnist.py
+++ b/examples/all_mnist.py
@@ -27,9 +27,7 @@
 def main(args):
     # Device config
     device = torch.device(
-        f"cuda:{args.cuda}"
-        if torch.cuda.is_available() and args.cuda >= 0
-        else "cpu"
+        f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu"
     )
 
     # model
diff --git a/examples/all_mnist_early_stopping.py b/examples/all_mnist_early_stopping.py
index 835d8e4c7..08078dc5c 100644
--- a/examples/all_mnist_early_stopping.py
+++ b/examples/all_mnist_early_stopping.py
@@ -33,9 +33,7 @@
 def main(args):
     # Device config
     device = torch.device(
-        f"cuda:{args.cuda}"
-        if torch.cuda.is_available() and args.cuda >= 0
-        else "cpu"
+        f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu"
     )
 
     # model
diff --git a/examples/ar1.py b/examples/ar1.py
index cbdfd2be8..a4d9bf29f 100644
--- a/examples/ar1.py
+++ b/examples/ar1.py
@@ -26,9 +26,7 @@
 def main(args):
     # Device config
     device = torch.device(
-        f"cuda:{args.cuda}"
-        if torch.cuda.is_available() and args.cuda >= 0
-        else "cpu"
+        f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu"
     )
     # ---------
 
diff --git a/examples/checkpointing.py b/examples/checkpointing.py
index cb625dfd1..25bf956f4 100644
--- a/examples/checkpointing.py
+++ b/examples/checkpointing.py
@@ -39,22 +39,19 @@ def main_with_checkpointing(args):
 
     # Nothing new here...
     device = torch.device(
-        f"cuda:{args.cuda}"
-        if torch.cuda.is_available() and args.cuda >= 0
-        else "cpu"
+        f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu"
     )
-    print('Using device', device)
+    print("Using device", device)
 
     # CL Benchmark Creation (as usual)
     benchmark = SplitMNIST(5)
-    model = SimpleMLP(input_size=28*28, num_classes=10)
+    model = SimpleMLP(input_size=28 * 28, num_classes=10)
     optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9)
     criterion = CrossEntropyLoss()
 
     # Create the evaluation plugin (as usual)
     evaluation_plugin = EvaluationPlugin(
-        accuracy_metrics(experience=True, stream=True),
-        loggers=[InteractiveLogger()]
+        accuracy_metrics(experience=True, stream=True), loggers=[InteractiveLogger()]
     )
 
     # Create the strategy (as usual)
@@ -66,14 +63,14 @@ def main_with_checkpointing(args):
         train_epochs=2,
         eval_mb_size=128,
         device=device,
-        evaluator=evaluation_plugin
+        evaluator=evaluation_plugin,
     )
 
     # STEP 2: TRY TO LOAD THE LAST CHECKPOINT
     # if the checkpoint exists, load it into the newly created strategy
     # the method also loads the experience counter, so we know where to
     # resume training
-    fname = './checkpoint.pkl'  # name of the checkpoint file
+    fname = "./checkpoint.pkl"  # name of the checkpoint file
     strategy, initial_exp = maybe_load_checkpoint(strategy, fname)
 
     # STEP 3: USE THE "initial_exp" to resume training
@@ -91,6 +88,6 @@ def main_with_checkpointing(args):
         "--cuda",
         type=int,
         default=0,
-        help="Select zero-indexed cuda device. -1 to use CPU."
+        help="Select zero-indexed cuda device. -1 to use CPU.",
     )
     main_with_checkpointing(parser.parse_args())
diff --git a/examples/clear.py b/examples/clear.py
index c8963acdf..2c4de81c3 100644
--- a/examples/clear.py
+++ b/examples/clear.py
@@ -38,8 +38,12 @@
 
 # For CLEAR dataset setup
 DATASET_NAME = "clear100_cvpr2022"
-NUM_CLASSES = {"clear10_neurips_2021": 11, "clear100_cvpr2022": 100,
-               "clear10": 11, "clear100": 100}
+NUM_CLASSES = {
+    "clear10_neurips_2021": 11,
+    "clear100_cvpr2022": 100,
+    "clear10": 11,
+    "clear100": 100,
+}
 assert DATASET_NAME in NUM_CLASSES.keys()
 
 # please refer to paper for discussion on streaming v.s. iid protocol
@@ -105,20 +109,15 @@ def main():
     interactive_logger = InteractiveLogger()
 
     eval_plugin = EvaluationPlugin(
-        accuracy_metrics(minibatch=True, epoch=True, experience=True, 
-                         stream=True),
-        loss_metrics(minibatch=True, epoch=True, experience=True, 
-                     stream=True),
+        accuracy_metrics(minibatch=True, epoch=True, experience=True, stream=True),
+        loss_metrics(minibatch=True, epoch=True, experience=True, stream=True),
         timing_metrics(epoch=True, epoch_running=True),
         forgetting_metrics(experience=True, stream=True),
         cpu_usage_metrics(experience=True),
         confusion_matrix_metrics(
-            num_classes=NUM_CLASSES[DATASET_NAME], save_image=False, 
-            stream=True
-        ),
-        disk_usage_metrics(
-            minibatch=True, epoch=True, experience=True, stream=True
+            num_classes=NUM_CLASSES[DATASET_NAME], save_image=False, stream=True
         ),
+        disk_usage_metrics(minibatch=True, epoch=True, experience=True, stream=True),
         loggers=[interactive_logger, text_logger, tb_logger],
     )
 
@@ -174,8 +173,7 @@ def main():
         print("Current Classes: ", experience.classes_in_this_experience)
         res = cl_strategy.train(experience)
         torch.save(
-            model.state_dict(), 
-            str(MODEL_ROOT / f"model{str(index).zfill(2)}.pth")
+            model.state_dict(), str(MODEL_ROOT / f"model{str(index).zfill(2)}.pth")
         )
         print("Training completed")
         print(
diff --git a/examples/clear_linear.py b/examples/clear_linear.py
index 66bb89d13..eca5b7e3a 100644
--- a/examples/clear_linear.py
+++ b/examples/clear_linear.py
@@ -37,8 +37,12 @@
 
 # For CLEAR dataset setup
 DATASET_NAME = "clear10_neurips2021"
-NUM_CLASSES = {"clear10_neurips_2021": 11, "clear100_cvpr2022": 100,
-               "clear10": 11, "clear100": 100}
+NUM_CLASSES = {
+    "clear10_neurips_2021": 11,
+    "clear100_cvpr2022": 100,
+    "clear10": 11,
+    "clear100": 100,
+}
 CLEAR_FEATURE_TYPE = "moco_b0"  # MoCo V2 pretrained on bucket 0
 # CLEAR_FEATURE_TYPE = "moco_imagenet"  # MoCo V2 pretrained on imagenet
 # CLEAR_FEATURE_TYPE = "byol_imagenet"  # BYOL pretrained on imagenet
@@ -87,20 +91,15 @@ def make_scheduler(optimizer, step_size, gamma=0.1):
     interactive_logger = InteractiveLogger()
 
     eval_plugin = EvaluationPlugin(
-        accuracy_metrics(minibatch=True, epoch=True, experience=True,
-                         stream=True),
-        loss_metrics(minibatch=True, epoch=True, experience=True,
-                     stream=True),
+        accuracy_metrics(minibatch=True, epoch=True, experience=True, stream=True),
+        loss_metrics(minibatch=True, epoch=True, experience=True, stream=True),
         timing_metrics(epoch=True, epoch_running=True),
         forgetting_metrics(experience=True, stream=True),
         cpu_usage_metrics(experience=True),
         confusion_matrix_metrics(
-            num_classes=NUM_CLASSES[DATASET_NAME], save_image=False,
-            stream=True
-        ),
-        disk_usage_metrics(
-            minibatch=True, epoch=True, experience=True, stream=True
+            num_classes=NUM_CLASSES[DATASET_NAME], save_image=False, stream=True
         ),
+        disk_usage_metrics(minibatch=True, epoch=True, experience=True, stream=True),
         loggers=[interactive_logger, text_logger, tb_logger],
     )
 
@@ -154,8 +153,7 @@ def make_scheduler(optimizer, step_size, gamma=0.1):
         print("Current Classes: ", experience.classes_in_this_experience)
         res = cl_strategy.train(experience)
         torch.save(
-            model.state_dict(),
-            str(MODEL_ROOT / f"model{str(index).zfill(2)}.pth")
+            model.state_dict(), str(MODEL_ROOT / f"model{str(index).zfill(2)}.pth")
         )
         print("Training completed")
         print(
@@ -168,8 +166,10 @@ def make_scheduler(optimizer, step_size, gamma=0.1):
     accuracy_matrix = np.zeros((num_timestamp, num_timestamp))
     for train_idx in range(num_timestamp):
         for test_idx in range(num_timestamp):
-            mname = f"Top1_Acc_Exp/eval_phase/test_stream" \
-                    f"/Task00{test_idx}/Exp00{test_idx}"
+            mname = (
+                f"Top1_Acc_Exp/eval_phase/test_stream"
+                f"/Task00{test_idx}/Exp00{test_idx}"
+            )
             accuracy_matrix[train_idx][test_idx] = results[train_idx][mname]
     print("Accuracy_matrix : ")
     print(accuracy_matrix)
diff --git a/examples/confusion_matrix.py b/examples/confusion_matrix.py
index cd5be6ca0..1be83c9a6 100644
--- a/examples/confusion_matrix.py
+++ b/examples/confusion_matrix.py
@@ -39,9 +39,7 @@
 def main(args):
     # --- CONFIG
     device = torch.device(
-        f"cuda:{args.cuda}"
-        if torch.cuda.is_available() and args.cuda >= 0
-        else "cpu"
+        f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu"
     )
     # ---------
 
@@ -71,9 +69,7 @@ def main(args):
         download=True,
         transform=test_transform,
     )
-    benchmark = nc_benchmark(
-        mnist_train, mnist_test, 5, task_labels=False, seed=1234
-    )
+    benchmark = nc_benchmark(mnist_train, mnist_test, 5, task_labels=False, seed=1234)
     # ---------
 
     # MODEL CREATION
@@ -85,9 +81,7 @@ def main(args):
         # save image should be False to appropriately view
         # results in Interactive Logger.
         # a tensor will be printed
-        confusion_matrix_metrics(
-            save_image=False, normalize="all", stream=True
-        ),
+        confusion_matrix_metrics(save_image=False, normalize="all", stream=True),
         loggers=InteractiveLogger(),
     )
 
diff --git a/examples/continual_sequence_classification.py b/examples/continual_sequence_classification.py
index 964878b8d..536b1c0d7 100644
--- a/examples/continual_sequence_classification.py
+++ b/examples/continual_sequence_classification.py
@@ -25,18 +25,14 @@ def main():
 
     if mfcc:
         mfcc_preprocess = torchaudio.transforms.MFCC(
-            sample_rate=16000, n_mfcc=40, 
-            melkwargs={"n_mels": 50, "hop_length": 10}
+            sample_rate=16000, n_mfcc=40, melkwargs={"n_mels": 50, "hop_length": 10}
         )
     else:
         mfcc_preprocess = None
 
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
-    train_ds = SpeechCommands(
-        subset="training", 
-        mfcc_preprocessing=mfcc_preprocess
-    )
+    train_ds = SpeechCommands(subset="training", mfcc_preprocessing=mfcc_preprocess)
     test_ds = SpeechCommands(
         subset="testing",  # you may also use "validation"
         mfcc_preprocessing=mfcc_preprocess,
@@ -71,13 +67,9 @@ def main():
     optimizer = torch.optim.Adam(model.parameters(), lr=lr)
 
     eval_plugin = EvaluationPlugin(
-        accuracy_metrics(
-            epoch=True, experience=True, stream=True
-        ),
-        loss_metrics(
-            epoch=True, experience=True, stream=True
-        ),
-        loggers=[InteractiveLogger()]
+        accuracy_metrics(epoch=True, experience=True, stream=True),
+        loss_metrics(epoch=True, experience=True, stream=True),
+        loggers=[InteractiveLogger()],
     )
 
     strategy = Naive(
@@ -88,7 +80,7 @@ def main():
         train_epochs=1,
         eval_mb_size=256,
         device=device,
-        evaluator=eval_plugin
+        evaluator=eval_plugin,
     )
 
     for exp in benchmark.train_stream:
diff --git a/examples/dataset_inspection.py b/examples/dataset_inspection.py
index fc35f6a70..c4e77ca07 100644
--- a/examples/dataset_inspection.py
+++ b/examples/dataset_inspection.py
@@ -25,9 +25,7 @@
 
 def main(cuda: int):
     # --- CONFIG
-    device = torch.device(
-        f"cuda:{cuda}" if torch.cuda.is_available() else "cpu"
-    )
+    device = torch.device(f"cuda:{cuda}" if torch.cuda.is_available() else "cpu")
     # --- BENCHMARK CREATION
     benchmark = SplitCIFAR10(n_experiences=2, seed=42)
     # ---------
diff --git a/examples/detection.py b/examples/detection.py
index 0f21e21b4..3a0c83acd 100644
--- a/examples/detection.py
+++ b/examples/detection.py
@@ -31,10 +31,7 @@
     ObjectDetectionTemplate,
 )
 
-from avalanche.evaluation.metrics import (
-    timing_metrics,
-    loss_metrics
-)
+from avalanche.evaluation.metrics import timing_metrics, loss_metrics
 from avalanche.evaluation.metrics.detection import DetectionMetrics
 from avalanche.logging import InteractiveLogger
 from avalanche.training.plugins import LRSchedulerPlugin, EvaluationPlugin
@@ -52,9 +49,7 @@
 def main(args):
     # --- CONFIG
     device = torch.device(
-        f"cuda:{args.cuda}"
-        if torch.cuda.is_available() and args.cuda >= 0
-        else "cpu"
+        f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu"
     )
     # ---------
 
@@ -85,9 +80,7 @@ def main(args):
         # 1) Get number of input features for the classifier
         in_features = model.roi_heads.box_predictor.cls_score.in_features
         # 2) Replace the pre-trained head with a new one
-        model.roi_heads.box_predictor = FastRCNNPredictor(
-            in_features, num_classes
-        )
+        model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
     else:
         # Detection + Segmentation
         model = obtain_base_model(segmentation=True)
@@ -96,9 +89,7 @@ def main(args):
         # 1) Get number of input features for the classifier
         in_features = model.roi_heads.box_predictor.cls_score.in_features
         # 2) Replace the pre-trained head with a new one
-        model.roi_heads.box_predictor = FastRCNNPredictor(
-            in_features, num_classes
-        )
+        model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
 
         # now get the number of input features for the mask classifier
         in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
@@ -112,9 +103,7 @@ def main(args):
 
     # Define the optimizer and the scheduler
     params = [p for p in model.parameters() if p.requires_grad]
-    optimizer = torch.optim.SGD(
-        params, lr=0.005, momentum=0.9, weight_decay=0.0005
-    )
+    optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
 
     train_mb_size = 5
     warmup_factor = 1.0 / 1000
@@ -163,27 +152,30 @@ def main(args):
 
 
 def obtain_base_model(segmentation: bool):
-    torchvision_is_old_version = \
-        parse_version(torch.__version__) < parse_version("0.13")
+    torchvision_is_old_version = parse_version(torch.__version__) < parse_version(
+        "0.13"
+    )
 
     pretrain_argument = dict()
 
     if torchvision_is_old_version:
-        pretrain_argument['pretrained'] = True
+        pretrain_argument["pretrained"] = True
     else:
         if segmentation:
-            pretrain_argument['weights'] = \
-                torchvision.models.detection.mask_rcnn.\
-                MaskRCNN_ResNet50_FPN_Weights.DEFAULT
+            pretrain_argument[
+                "weights"
+            ] = (
+                torchvision.models.detection.mask_rcnn.MaskRCNN_ResNet50_FPN_Weights.DEFAULT
+            )
         else:
-            pretrain_argument['weights'] = \
-                torchvision.models.detection.faster_rcnn.\
-                FasterRCNN_ResNet50_FPN_Weights.DEFAULT
-    
+            pretrain_argument[
+                "weights"
+            ] = (
+                torchvision.models.detection.faster_rcnn.FasterRCNN_ResNet50_FPN_Weights.DEFAULT
+            )
+
     if segmentation:
-        model = torchvision.models.detection.maskrcnn_resnet50_fpn(
-            **pretrain_argument
-        )
+        model = torchvision.models.detection.maskrcnn_resnet50_fpn(**pretrain_argument)
     else:
         model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
             **pretrain_argument
@@ -219,9 +211,7 @@ def split_penn_fudan(
     test_size = 50
     train_size = len(dataset) - test_size
     if shuffle:
-        train_dataset, test_dataset = random_split(
-            dataset, [train_size, test_size]
-        )
+        train_dataset, test_dataset = random_split(dataset, [train_size, test_size])
     else:
         indices = list(range(len(dataset)))
         train_dataset = Subset(dataset, indices[:-test_size])
diff --git a/examples/detection_examples_utils.py b/examples/detection_examples_utils.py
index da96dd1c2..488d2d45b 100644
--- a/examples/detection_examples_utils.py
+++ b/examples/detection_examples_utils.py
@@ -47,19 +47,19 @@ def split_detection_benchmark(
     remaining = len(train_dataset) % n_experiences
 
     # Note: in future versions of Avalanche, the make_classification_dataset
-    # function will be replaced with a more specific function for object 
+    # function will be replaced with a more specific function for object
     # detection datasets.
     train_dataset_avl = make_detection_dataset(
         train_dataset,
         transform_groups=transform_groups,
         initial_transform_group="train",
-        collate_fn=detection_collate_fn
+        collate_fn=detection_collate_fn,
     )
     test_dataset_avl = make_detection_dataset(
         test_dataset,
         transform_groups=transform_groups,
         initial_transform_group="eval",
-        collate_fn=detection_collate_fn
+        collate_fn=detection_collate_fn,
     )
 
     exp_sz = [exp_n_imgs for _ in range(n_experiences)]
@@ -80,7 +80,7 @@ def split_detection_benchmark(
     last_slice_idx = 0
     for exp_id in range(n_experiences):
         n_imgs = exp_sz[exp_id]
-        idx_range = train_indices[last_slice_idx:last_slice_idx + n_imgs]
+        idx_range = train_indices[last_slice_idx : last_slice_idx + n_imgs]
         train_exps_datasets.append(
             detection_subset(train_dataset_avl, indices=idx_range)
         )
@@ -107,6 +107,4 @@ def split_detection_benchmark(
     )
 
 
-__all__ = [
-    "split_detection_benchmark"
-]
+__all__ = ["split_detection_benchmark"]
diff --git a/examples/detection_lvis.py b/examples/detection_lvis.py
index ec011e5fa..7db903094 100644
--- a/examples/detection_lvis.py
+++ b/examples/detection_lvis.py
@@ -27,10 +27,7 @@
     ObjectDetectionTemplate,
 )
 
-from avalanche.evaluation.metrics import (
-    timing_metrics,
-    loss_metrics
-)
+from avalanche.evaluation.metrics import timing_metrics, loss_metrics
 from avalanche.logging import InteractiveLogger
 from avalanche.training.plugins import LRSchedulerPlugin, EvaluationPlugin
 import argparse
@@ -50,9 +47,7 @@
 def main(args):
     # --- CONFIG
     device = torch.device(
-        f"cuda:{args.cuda}"
-        if torch.cuda.is_available() and args.cuda >= 0
-        else "cpu"
+        f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu"
     )
     # ---------
 
@@ -90,9 +85,7 @@ def main(args):
 
     # Define the optimizer and the scheduler
     params = [p for p in model.parameters() if p.requires_grad]
-    optimizer = torch.optim.SGD(
-        params, lr=0.005, momentum=0.9, weight_decay=0.0005
-    )
+    optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
 
     train_mb_size = 5
     warmup_factor = 1.0 / 1000
@@ -141,27 +134,30 @@ def main(args):
 
 
 def obtain_base_model(segmentation: bool):
-    torchvision_is_old_version = \
-        parse_version(torch.__version__) < parse_version("0.13")
+    torchvision_is_old_version = parse_version(torch.__version__) < parse_version(
+        "0.13"
+    )
 
     pretrain_argument = dict()
 
     if torchvision_is_old_version:
-        pretrain_argument['pretrained'] = True
+        pretrain_argument["pretrained"] = True
     else:
         if segmentation:
-            pretrain_argument['weights'] = \
-                torchvision.models.detection.mask_rcnn.\
-                MaskRCNN_ResNet50_FPN_Weights.DEFAULT
+            pretrain_argument[
+                "weights"
+            ] = (
+                torchvision.models.detection.mask_rcnn.MaskRCNN_ResNet50_FPN_Weights.DEFAULT
+            )
         else:
-            pretrain_argument['weights'] = \
-                torchvision.models.detection.faster_rcnn.\
-                FasterRCNN_ResNet50_FPN_Weights.DEFAULT
-    
+            pretrain_argument[
+                "weights"
+            ] = (
+                torchvision.models.detection.faster_rcnn.FasterRCNN_ResNet50_FPN_Weights.DEFAULT
+            )
+
     if segmentation:
-        model = torchvision.models.detection.maskrcnn_resnet50_fpn(
-            **pretrain_argument
-        )
+        model = torchvision.models.detection.maskrcnn_resnet50_fpn(**pretrain_argument)
     else:
         model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
             **pretrain_argument
diff --git a/examples/endless_cl_sim.py b/examples/endless_cl_sim.py
index c4ded290a..05a4167cd 100644
--- a/examples/endless_cl_sim.py
+++ b/examples/endless_cl_sim.py
@@ -31,9 +31,7 @@
 def main(args):
     # Config
     device = torch.device(
-        f"cuda:{args.cuda}"
-        if torch.cuda.is_available() and args.cuda >= 0
-        else "cpu"
+        f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu"
     )
 
     # Model
diff --git a/examples/eval_plugin.py b/examples/eval_plugin.py
index 3ab09fb60..16e689b98 100644
--- a/examples/eval_plugin.py
+++ b/examples/eval_plugin.py
@@ -53,9 +53,7 @@
 def main(args):
     # --- CONFIG
     device = torch.device(
-        f"cuda:{args.cuda}"
-        if torch.cuda.is_available() and args.cuda >= 0
-        else "cpu"
+        f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu"
     )
     # ---------
 
@@ -85,9 +83,7 @@ def main(args):
         download=True,
         transform=test_transform,
     )
-    benchmark = nc_benchmark(
-        mnist_train, mnist_test, 5, task_labels=False, seed=1234
-    )
+    benchmark = nc_benchmark(mnist_train, mnist_test, 5, task_labels=False, seed=1234)
     # ---------
 
     # MODEL CREATION
@@ -156,9 +152,7 @@ def main(args):
             experience=True,
             stream=True,
         ),
-        disk_usage_metrics(
-            minibatch=True, epoch=True, experience=True, stream=True
-        ),
+        disk_usage_metrics(minibatch=True, epoch=True, experience=True, stream=True),
         MAC_metrics(minibatch=True, epoch=True, experience=True),
         labels_repartition_metrics(on_train=True, on_eval=True),
         loggers=[interactive_logger, text_logger, csv_logger, tb_logger],
@@ -187,8 +181,7 @@ def main(args):
 
         # train returns a dictionary containing last recorded value
         # for each metric.
-        res = cl_strategy.train(experience, 
-                                eval_streams=[benchmark.test_stream])
+        res = cl_strategy.train(experience, eval_streams=[benchmark.test_stream])
         print("Training completed")
 
         print("Computing accuracy on the whole test set")
diff --git a/examples/ex_model_cl.py b/examples/ex_model_cl.py
index 4bb315158..4e9af8b91 100644
--- a/examples/ex_model_cl.py
+++ b/examples/ex_model_cl.py
@@ -22,9 +22,7 @@
 def main(args):
     # --- CONFIG
     device = torch.device(
-        f"cuda:{args.cuda}"
-        if torch.cuda.is_available() and args.cuda >= 0
-        else "cpu"
+        f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu"
     )
     # ---------
 
diff --git a/examples/expert_gate.py b/examples/expert_gate.py
index 9481db193..ed6dd5174 100644
--- a/examples/expert_gate.py
+++ b/examples/expert_gate.py
@@ -9,9 +9,7 @@
 from sklearn.datasets import make_classification
 from sklearn.model_selection import train_test_split
 
-from avalanche.models import (
-    ExpertGate
-)
+from avalanche.models import ExpertGate
 from avalanche.benchmarks.classic import SplitMNIST
 from avalanche.training.supervised import ExpertGateStrategy
 from avalanche.models.utils import avalanche_model_adaptation
@@ -29,9 +27,7 @@ def main(args):
     # check if selected GPU is available or use CPU
     assert args.cuda == -1 or args.cuda >= 0, "cuda must be -1 or >= 0."
     device = torch.device(
-        f"cuda:{args.cuda}"
-        if torch.cuda.is_available() and args.cuda >= 0
-        else "cpu"
+        f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu"
     )
     print(f"Using device: {device}")
 
@@ -39,8 +35,9 @@ def main(args):
     model = ExpertGate(shape=(3, 227, 227), device=device)
 
     # Vanilla optimization
-    optimizer = SGD(model.expert.parameters(), lr=args.lr,
-                    momentum=0.9, weight_decay=0.0005)
+    optimizer = SGD(
+        model.expert.parameters(), lr=args.lr, momentum=0.9, weight_decay=0.0005
+    )
 
     # Set up strategy
     strategy = ExpertGateStrategy(
@@ -51,7 +48,7 @@ def main(args):
         train_epochs=args.epochs,
         eval_every=-1,
         ae_train_mb_size=args.minibatch_size,
-        ae_train_epochs=int(args.epochs*2),
+        ae_train_epochs=int(args.epochs * 2),
         ae_lr=1e-3,
     )
 
@@ -59,61 +56,69 @@ def main(args):
     scenario = build_scenario(args.mnist)
 
     # Train loop
-    for experience in (scenario.train_stream):
+    for experience in scenario.train_stream:
         t = experience.task_label
         exp_id = experience.current_experience
         training_dataset = experience.dataset
         print()
-        print(f'Task {t} batch {exp_id}')
-        print(f'This batch contains {len(training_dataset)} patterns')
-        print(f'Current Classes: {experience.classes_in_this_experience}')
+        print(f"Task {t} batch {exp_id}")
+        print(f"This batch contains {len(training_dataset)} patterns")
+        print(f"Current Classes: {experience.classes_in_this_experience}")
 
         strategy.train(experience)
 
     # Evaluation loop
     print("\nEVALUATION")
-    for experience in (scenario.test_stream):
+    for experience in scenario.test_stream:
         strategy.eval(experience)
 
 
 def build_scenario(mnist=False):
-
-    if (not mnist):
+    if not mnist:
         # Fake benchmark is (1,1,6)
         # Data needs to be transformed for AlexNet
         # Repeat the "channel" as AlexNet expects 3 channel input
         # Resize to 227 because AlexNet convolution will reduce the data shape
-        CustomDataAlexTransform = transforms.Compose([
-            transforms.Lambda(lambda x: x.repeat(3, 1, 1)),
-            transforms.Resize((227, 227)),
-        ])
+        CustomDataAlexTransform = transforms.Compose(
+            [
+                transforms.Lambda(lambda x: x.repeat(3, 1, 1)),
+                transforms.Resize((227, 227)),
+            ]
+        )
 
         scenario = get_custom_benchmark(
-            use_task_labels=True, train_transform=CustomDataAlexTransform,
-            eval_transform=CustomDataAlexTransform, shuffle=False)
+            use_task_labels=True,
+            train_transform=CustomDataAlexTransform,
+            eval_transform=CustomDataAlexTransform,
+            shuffle=False,
+        )
     else:
         # More resource intensive example
-        MNISTAlexTransform = transforms.Compose([
-            transforms.Resize((227, 227)),
-            transforms.Lambda(lambda x: x.repeat(3, 1, 1)),
-        ])
+        MNISTAlexTransform = transforms.Compose(
+            [
+                transforms.Resize((227, 227)),
+                transforms.Lambda(lambda x: x.repeat(3, 1, 1)),
+            ]
+        )
 
         # Note: Must provide task ID for training
-        scenario = SplitMNIST(n_experiences=5,
-                              return_task_id=True,
-                              train_transform=MNISTAlexTransform,
-                              eval_transform=MNISTAlexTransform)
+        scenario = SplitMNIST(
+            n_experiences=5,
+            return_task_id=True,
+            train_transform=MNISTAlexTransform,
+            eval_transform=MNISTAlexTransform,
+        )
 
     return scenario
 
 
-def get_custom_benchmark(use_task_labels=False,
-                         shuffle=False,
-                         n_samples_per_class=100,
-                         train_transform=None,
-                         eval_transform=None
-                         ):
-
+def get_custom_benchmark(
+    use_task_labels=False,
+    shuffle=False,
+    n_samples_per_class=100,
+    train_transform=None,
+    eval_transform=None,
+):
     dataset = make_classification(
         n_samples=10 * n_samples_per_class,
         n_classes=10,
@@ -145,8 +150,7 @@ def get_custom_benchmark(use_task_labels=False,
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument("--lr", type=float, default=1e-3,
-                        help="Learning rate.")
+    parser.add_argument("--lr", type=float, default=1e-3, help="Learning rate.")
     parser.add_argument(
         "--epochs", type=int, default=10, help="Number of training epochs."
     )
@@ -154,8 +158,7 @@ def get_custom_benchmark(use_task_labels=False,
         "--minibatch_size", type=int, default=256, help="Minibatch size."
     )
     parser.add_argument(
-        "--mnist", action="store_true",
-        help="Use the MNIST dataset for the example"
+        "--mnist", action="store_true", help="Use the MNIST dataset for the example"
     )
     parser.add_argument(
         "--cuda",
diff --git a/examples/from_scratch_training.py b/examples/from_scratch_training.py
index 3839a737a..5cecfb214 100644
--- a/examples/from_scratch_training.py
+++ b/examples/from_scratch_training.py
@@ -13,9 +13,7 @@
 from avalanche.training.plugins import EvaluationPlugin
 from avalanche.benchmarks.classic import SplitMNIST
 from avalanche.logging import InteractiveLogger
-from avalanche.training.supervised import (
-    FromScratchTraining
-)
+from avalanche.training.supervised import FromScratchTraining
 
 
 def main():
@@ -24,16 +22,13 @@ def main():
 
     # create the benchmark
     benchmark = SplitMNIST(
-        n_experiences=5,
-        dataset_root=expanduser("~") + "/.avalanche/data/mnist/"
+        n_experiences=5, dataset_root=expanduser("~") + "/.avalanche/data/mnist/"
     )
 
     # choose some metrics and evaluation method
     interactive_logger = InteractiveLogger()
     eval_plugin = EvaluationPlugin(
-        accuracy_metrics(
-            minibatch=True, epoch=True, experience=True, stream=True
-        ),
+        accuracy_metrics(minibatch=True, epoch=True, experience=True, stream=True),
         loss_metrics(minibatch=True, epoch=True, experience=True, stream=True),
         loggers=[interactive_logger],
     )
diff --git a/examples/generative_replay_MNIST_generator.py b/examples/generative_replay_MNIST_generator.py
index 86ae60dda..ae9bdb88e 100644
--- a/examples/generative_replay_MNIST_generator.py
+++ b/examples/generative_replay_MNIST_generator.py
@@ -27,9 +27,7 @@
 def main(args):
     # --- CONFIG
     device = torch.device(
-        f"cuda:{args.cuda}"
-        if torch.cuda.is_available() and args.cuda >= 0
-        else "cpu"
+        f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu"
     )
 
     # --- BENCHMARK CREATION
diff --git a/examples/generative_replay_splitMNIST.py b/examples/generative_replay_splitMNIST.py
index 2938a298c..7112128ad 100644
--- a/examples/generative_replay_splitMNIST.py
+++ b/examples/generative_replay_splitMNIST.py
@@ -34,9 +34,7 @@
 def main(args):
     # --- CONFIG
     device = torch.device(
-        f"cuda:{args.cuda}"
-        if torch.cuda.is_available() and args.cuda >= 0
-        else "cpu"
+        f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu"
     )
 
     # --- BENCHMARK CREATION
@@ -50,9 +48,7 @@ def main(args):
     interactive_logger = InteractiveLogger()
 
     eval_plugin = EvaluationPlugin(
-        accuracy_metrics(
-            minibatch=True, epoch=True, experience=True, stream=True
-        ),
+        accuracy_metrics(minibatch=True, epoch=True, experience=True, stream=True),
         loss_metrics(minibatch=True, epoch=True, experience=True, stream=True),
         forgetting_metrics(experience=True),
         loggers=[interactive_logger],
diff --git a/examples/getting_started.py b/examples/getting_started.py
index b0acc6f4a..e968b8801 100644
--- a/examples/getting_started.py
+++ b/examples/getting_started.py
@@ -31,9 +31,7 @@
 def main(args):
     # --- CONFIG
     device = torch.device(
-        f"cuda:{args.cuda}"
-        if torch.cuda.is_available() and args.cuda >= 0
-        else "cpu"
+        f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu"
     )
     # ---------
 
@@ -63,9 +61,7 @@ def main(args):
         download=True,
         transform=test_transform,
     )
-    benchmark = nc_benchmark(
-        mnist_train, mnist_test, 5, task_labels=False, seed=1234
-    )
+    benchmark = nc_benchmark(mnist_train, mnist_test, 5, task_labels=False, seed=1234)
     # ---------
 
     # MODEL CREATION
diff --git a/examples/hf_datawrapper.py b/examples/hf_datawrapper.py
index bd228504c..892b3af9c 100644
--- a/examples/hf_datawrapper.py
+++ b/examples/hf_datawrapper.py
@@ -11,14 +11,16 @@
 import numpy as np
 import torch
 import torch.nn
-from transformers import (AutoTokenizer, DataCollatorForSeq2Seq,
-                          T5ForConditionalGeneration)
+from transformers import (
+    AutoTokenizer,
+    DataCollatorForSeq2Seq,
+    T5ForConditionalGeneration,
+)
 
 import avalanche
 import avalanche.training.templates.base
 from avalanche.benchmarks import CLExperience, CLScenario, CLStream
-from avalanche.benchmarks.utils import (AvalancheDataset, ConstantSequence,
-                                        DataAttribute)
+from avalanche.benchmarks.utils import AvalancheDataset, ConstantSequence, DataAttribute
 from avalanche.benchmarks.utils.data import AvalancheDataset
 from avalanche.benchmarks.utils.data_attribute import DataAttribute
 from avalanche.benchmarks.utils.flat_data import ConstantSequence
@@ -38,8 +40,7 @@ def __init__(self, dataset_name, split) -> None:
         self.collate_fn = None
 
     def download_data(self, **kwargs):
-        self.dataset = ds.load_dataset(
-            self.dataset_name, split=self.split, **kwargs)
+        self.dataset = ds.load_dataset(self.dataset_name, split=self.split, **kwargs)
 
     def add_collate_function(self, collate_fn):
         self.collate_fn = collate_fn
@@ -49,12 +50,12 @@ def map_preprocess_func(self, preproc_func, batched, columns_to_keep=[]):
         Applies a preprocessing function to the wrapped Hugging Face Datasets.
 
         Args:
-        - preproc_func: A preprocessing function that will be applied to the 
+        - preproc_func: A preprocessing function that will be applied to the
             dataset. See H.F. library documentation for details.
-        - batched: A boolean indicating whether the preprocessing function 
+        - batched: A boolean indicating whether the preprocessing function
             should be applied to the dataset in batches.
-        - columns_to_keep: A list of column names to keep in the dataset 
-            after the preprocessing function has been applied. If set to an 
+        - columns_to_keep: A list of column names to keep in the dataset
+            after the preprocessing function has been applied. If set to an
             empty list (default), ONLY columns added by the preproc_func will
             be kept.
 
@@ -70,7 +71,7 @@ def map_preprocess_func(self, preproc_func, batched, columns_to_keep=[]):
             self.dataset = self.dataset.remove_columns(list(old_f))
             print(f"Kept columns: {new_f - old_f}")
             print(
-                "If the resulting dataset has 0 columns left. Please ensure" 
+                "If the resulting dataset has 0 columns left. Please ensure"
                 "that the preprocessing phase saves the modified features in"
                 "new columns or pass a list of column names"
             )
@@ -80,14 +81,13 @@ def map_preprocess_func(self, preproc_func, batched, columns_to_keep=[]):
             to_remove = old_f - columns_to_keep
             self.dataset = self.dataset.remove_columns(list(to_remove))
             print(
-                f"The following columns have been removed"
-                "from dataset: {to_remove}")
+                f"The following columns have been removed" "from dataset: {to_remove}"
+            )
         print("Dataset features: ", self.dataset.features.keys())
 
     def to_avalanche_dataset(self, dataset_index):
         tl = DataAttribute(
-            ConstantSequence(dataset_index, len(
-                self.dataset)), "targets_task_labels"
+            ConstantSequence(dataset_index, len(self.dataset)), "targets_task_labels"
         )
         return AvalancheDataset(
             [self.dataset], data_attributes=[tl], collate_fn=self.collate_fn
@@ -166,10 +166,8 @@ def main():
     """
 
     def t2t_converter(example):
-        example[
-            "input_text"
-        ] = f"question: {example['question']}"
-        + f"context: {example['context']} </s>"
+        example["input_text"] = f"question: {example['question']}"
+        +f"context: {example['context']} </s>"
         example["target_text"] = f"{example['answers']['text'][0]} </s>"
         return example
 
@@ -209,8 +207,7 @@ def preprocess_function(
         return outputs
 
     # define the data collator to pass to the resulting avalanche dataset
-    data_collator = DataCollatorForSeq2Seq(
-        AutoTokenizer.from_pretrained("t5-small"))
+    data_collator = DataCollatorForSeq2Seq(AutoTokenizer.from_pretrained("t5-small"))
     data_wrap.add_collate_function(data_collator)
 
     # download the dataset
@@ -219,14 +216,12 @@ def preprocess_function(
     # Optional: define the columns to keep after applying the preprocessing
     # function By default, only columns added to dataset by the preprocessing
     # function are kept
-    columns_list = ["input_ids", "attention_masks",
-                    "decoder_attention_mask", "labels"]
+    columns_list = ["input_ids", "attention_masks", "decoder_attention_mask", "labels"]
     data_wrap.map_preprocess_func(
         preproc_func=t2t_converter, batched=False, columns_to_keep=columns_list
     )
     data_wrap.map_preprocess_func(
-        preproc_func=preprocess_function, batched=True, 
-        columns_to_keep=columns_list
+        preproc_func=preprocess_function, batched=True, columns_to_keep=columns_list
     )
 
     # Convert to an AvalancheDataset
diff --git a/examples/joint_training.py b/examples/joint_training.py
index 029150886..c84828df6 100644
--- a/examples/joint_training.py
+++ b/examples/joint_training.py
@@ -27,12 +27,9 @@
 
 
 def main(args):
-
     # Config
     device = torch.device(
-        f"cuda:{args.cuda}"
-        if torch.cuda.is_available() and args.cuda >= 0
-        else "cpu"
+        f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu"
     )
     # model
     model = SimpleMLP(num_classes=10)
diff --git a/examples/mean_scores.py b/examples/mean_scores.py
index 11a9a33d1..ee415df68 100644
--- a/examples/mean_scores.py
+++ b/examples/mean_scores.py
@@ -20,9 +20,7 @@
 
 def main(cuda: int):
     # --- CONFIG
-    device = torch.device(
-        f"cuda:{cuda}" if torch.cuda.is_available() else "cpu"
-    )
+    device = torch.device(f"cuda:{cuda}" if torch.cuda.is_available() else "cpu")
 
     # --- BENCHMARK CREATION
     benchmark = SplitMNIST(n_experiences=5, seed=42)
diff --git a/examples/multihead.py b/examples/multihead.py
index 99cb0853f..c4facb554 100644
--- a/examples/multihead.py
+++ b/examples/multihead.py
@@ -29,12 +29,9 @@
 
 
 def main(args):
-
     # Config
     device = torch.device(
-        f"cuda:{args.cuda}"
-        if torch.cuda.is_available() and args.cuda >= 0
-        else "cpu"
+        f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu"
     )
     # model
     model = MTSimpleMLP()
@@ -52,9 +49,7 @@ def main(args):
     interactive_logger = InteractiveLogger()
 
     eval_plugin = EvaluationPlugin(
-        accuracy_metrics(
-            minibatch=False, epoch=True, experience=True, stream=True
-        ),
+        accuracy_metrics(minibatch=False, epoch=True, experience=True, stream=True),
         forgetting_metrics(experience=True),
         loggers=[interactive_logger],
     )
diff --git a/examples/naive.py b/examples/naive.py
index abbdf3c93..f03c1fc6f 100644
--- a/examples/naive.py
+++ b/examples/naive.py
@@ -13,9 +13,7 @@
 from avalanche.training.plugins import EvaluationPlugin
 from avalanche.benchmarks.classic import SplitMNIST
 from avalanche.logging import InteractiveLogger
-from avalanche.training.supervised import (
-    Naive
-)
+from avalanche.training.supervised import Naive
 
 
 def main():
@@ -24,16 +22,13 @@ def main():
 
     # create the benchmark
     benchmark = SplitMNIST(
-        n_experiences=5,
-        dataset_root=expanduser("~") + "/.avalanche/data/mnist/"
+        n_experiences=5, dataset_root=expanduser("~") + "/.avalanche/data/mnist/"
     )
 
     # choose some metrics and evaluation method
     interactive_logger = InteractiveLogger()
     eval_plugin = EvaluationPlugin(
-        accuracy_metrics(
-            minibatch=True, epoch=True, experience=True, stream=True
-        ),
+        accuracy_metrics(minibatch=True, epoch=True, experience=True, stream=True),
         loss_metrics(minibatch=True, epoch=True, experience=True, stream=True),
         loggers=[interactive_logger],
     )
diff --git a/examples/nlp.py b/examples/nlp.py
index 242f5fb42..288c521fa 100644
--- a/examples/nlp.py
+++ b/examples/nlp.py
@@ -45,7 +45,6 @@ class CustomDataCollatorSeq2SeqBeta:
     return_tensors: str = "pt"
 
     def __call__(self, features, return_tensors=None):
-
         if return_tensors is None:
             return_tensors = self.return_tensors
         labels = (
@@ -99,10 +98,8 @@ def __call__(self, features, return_tensors=None):
             and self.model is not None
             and hasattr(self.model, "prepare_decoder_input_ids_from_labels")
         ):
-            decoder_input_ids = (
-                self.model.prepare_decoder_input_ids_from_labels(
-                    labels=features["labels"]
-                )
+            decoder_input_ids = self.model.prepare_decoder_input_ids_from_labels(
+                labels=features["labels"]
             )
             features["decoder_input_ids"] = decoder_input_ids
 
@@ -163,9 +160,7 @@ def criterion(self):
 
 def main():
     tokenizer = AutoTokenizer.from_pretrained("t5-small", padding=True)
-    tokenizer.save_pretrained(
-        "./MLDATA/NLP/hf_tokenizers"
-    )  # CHANGE DIRECTORY
+    tokenizer.save_pretrained("./MLDATA/NLP/hf_tokenizers")  # CHANGE DIRECTORY
 
     prefix = "<2en>"
     source_lang = "de"
@@ -173,9 +168,7 @@ def main():
     remote_data = load_dataset("news_commentary", "de-en")
 
     def preprocess_function(examples):
-        inputs = [
-            prefix + example[source_lang] for example in examples["translation"]
-        ]
+        inputs = [prefix + example[source_lang] for example in examples["translation"]]
         targets = [example[target_lang] for example in examples["translation"]]
         model_inputs = tokenizer(inputs, max_length=128, truncation=True)
         with tokenizer.as_target_tokenizer():
@@ -187,18 +180,14 @@ def preprocess_function(examples):
     model = T5ForConditionalGeneration.from_pretrained("t5-small")
     remote_data = remote_data.remove_columns(["id", "translation"])
     remote_data.set_format(type="torch")
-    data_collator = CustomDataCollatorSeq2SeqBeta(
-        tokenizer=tokenizer, model=model
-    )
+    data_collator = CustomDataCollatorSeq2SeqBeta(tokenizer=tokenizer, model=model)
 
     train_exps = []
     for i in range(0, 2):
         # We use very small experiences only to showcase the library.
         # Adapt this to your own benchmark
         exp_data = remote_data["train"].select(range(30 * i, 30 * (i + 1)))
-        tl = DataAttribute(
-            ConstantSequence(i, len(exp_data)), "targets_task_labels"
-        )
+        tl = DataAttribute(ConstantSequence(i, len(exp_data)), "targets_task_labels")
 
         exp = CLExperience()
         exp.dataset = AvalancheDataset(
diff --git a/examples/nlp_nmt.py b/examples/nlp_nmt.py
index 242f5fb42..288c521fa 100644
--- a/examples/nlp_nmt.py
+++ b/examples/nlp_nmt.py
@@ -45,7 +45,6 @@ class CustomDataCollatorSeq2SeqBeta:
     return_tensors: str = "pt"
 
     def __call__(self, features, return_tensors=None):
-
         if return_tensors is None:
             return_tensors = self.return_tensors
         labels = (
@@ -99,10 +98,8 @@ def __call__(self, features, return_tensors=None):
             and self.model is not None
             and hasattr(self.model, "prepare_decoder_input_ids_from_labels")
         ):
-            decoder_input_ids = (
-                self.model.prepare_decoder_input_ids_from_labels(
-                    labels=features["labels"]
-                )
+            decoder_input_ids = self.model.prepare_decoder_input_ids_from_labels(
+                labels=features["labels"]
             )
             features["decoder_input_ids"] = decoder_input_ids
 
@@ -163,9 +160,7 @@ def criterion(self):
 
 def main():
     tokenizer = AutoTokenizer.from_pretrained("t5-small", padding=True)
-    tokenizer.save_pretrained(
-        "./MLDATA/NLP/hf_tokenizers"
-    )  # CHANGE DIRECTORY
+    tokenizer.save_pretrained("./MLDATA/NLP/hf_tokenizers")  # CHANGE DIRECTORY
 
     prefix = "<2en>"
     source_lang = "de"
@@ -173,9 +168,7 @@ def main():
     remote_data = load_dataset("news_commentary", "de-en")
 
     def preprocess_function(examples):
-        inputs = [
-            prefix + example[source_lang] for example in examples["translation"]
-        ]
+        inputs = [prefix + example[source_lang] for example in examples["translation"]]
         targets = [example[target_lang] for example in examples["translation"]]
         model_inputs = tokenizer(inputs, max_length=128, truncation=True)
         with tokenizer.as_target_tokenizer():
@@ -187,18 +180,14 @@ def preprocess_function(examples):
     model = T5ForConditionalGeneration.from_pretrained("t5-small")
     remote_data = remote_data.remove_columns(["id", "translation"])
     remote_data.set_format(type="torch")
-    data_collator = CustomDataCollatorSeq2SeqBeta(
-        tokenizer=tokenizer, model=model
-    )
+    data_collator = CustomDataCollatorSeq2SeqBeta(tokenizer=tokenizer, model=model)
 
     train_exps = []
     for i in range(0, 2):
         # We use very small experiences only to showcase the library.
         # Adapt this to your own benchmark
         exp_data = remote_data["train"].select(range(30 * i, 30 * (i + 1)))
-        tl = DataAttribute(
-            ConstantSequence(i, len(exp_data)), "targets_task_labels"
-        )
+        tl = DataAttribute(ConstantSequence(i, len(exp_data)), "targets_task_labels")
 
         exp = CLExperience()
         exp.dataset = AvalancheDataset(
diff --git a/examples/nlp_qa.py b/examples/nlp_qa.py
index f39e0af19..7a6192d43 100644
--- a/examples/nlp_qa.py
+++ b/examples/nlp_qa.py
@@ -85,16 +85,13 @@ def main():
     """
 
     def t2t_converter(example):
-        example[
-            "input_text"
-        ] = f"question: {example['question']}"
-        + f"context: {example['context']} </s>"
+        example["input_text"] = f"question: {example['question']}"
+        +f"context: {example['context']} </s>"
         example["target_text"] = f"{example['answers']['text'][0]} </s>"
         return example
 
     def preprocess_function(
-        examples, encoder_max_len=encoder_max_len,
-        decoder_max_len=decoder_max_len
+        examples, encoder_max_len=encoder_max_len, decoder_max_len=decoder_max_len
     ):
         encoder_inputs = tokenizer(
             examples["input_text"],
@@ -130,8 +127,7 @@ def preprocess_function(
     squad_tr = squad_tr.map(t2t_converter)
     squad_tr = squad_tr.map(preprocess_function, batched=True)
     squad_tr = squad_tr.remove_columns(
-        ["id", "title", "context", "question",
-            "answers", "input_text", "target_text"]
+        ["id", "title", "context", "question", "answers", "input_text", "target_text"]
     )
     squad_val = squad_val.map(t2t_converter)
     squad_val = squad_val.map(preprocess_function, batched=True)
@@ -149,16 +145,14 @@ def preprocess_function(
         # We use very small experiences only to showcase the library.
         # Adapt this to your own benchmark
         exp_data = squad_tr.select(range(30 * i, 30 * (i + 1)))
-        tl = DataAttribute(ConstantSequence(
-            i, len(exp_data)), "targets_task_labels")
+        tl = DataAttribute(ConstantSequence(i, len(exp_data)), "targets_task_labels")
 
         exp = CLExperience()
         exp.dataset = AvalancheDataset(
             [exp_data], data_attributes=[tl], collate_fn=data_collator
         )
         train_exps.append(exp)
-    tl = DataAttribute(ConstantSequence(
-        2, len(squad_val)), "targets_task_labels")
+    tl = DataAttribute(ConstantSequence(2, len(squad_val)), "targets_task_labels")
     val_exp = CLExperience()
     val_exp.dataset = AvalancheDataset(
         [squad_val], data_attributes=[tl], collate_fn=data_collator
@@ -224,8 +218,7 @@ def preprocess_function(
         repetition_penalty=2.0,
     )
 
-    decoded_answer = tokenizer.batch_decode(
-        generated_answer, skip_special_tokens=True)
+    decoded_answer = tokenizer.batch_decode(generated_answer, skip_special_tokens=True)
     print(f"Answer: {decoded_answer}")
 
 
diff --git a/examples/online_naive.py b/examples/online_naive.py
index ca656c5d3..71a68a1a2 100644
--- a/examples/online_naive.py
+++ b/examples/online_naive.py
@@ -39,9 +39,7 @@
 def main(args):
     # --- CONFIG
     device = torch.device(
-        f"cuda:{args.cuda}"
-        if torch.cuda.is_available() and args.cuda >= 0
-        else "cpu"
+        f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu"
     )
     n_batches = 5
     # ---------
@@ -84,9 +82,7 @@ def main(args):
     interactive_logger = InteractiveLogger()
 
     eval_plugin = EvaluationPlugin(
-        accuracy_metrics(
-            minibatch=True, epoch=True, experience=True, stream=True
-        ),
+        accuracy_metrics(minibatch=True, epoch=True, experience=True, stream=True),
         loss_metrics(minibatch=True, epoch=True, experience=True, stream=True),
         forgetting_metrics(experience=True),
         loggers=[interactive_logger],
@@ -113,10 +109,12 @@ def main(args):
     # ocl_benchmark = OnlineCLScenario(batch_streams)
     for i, exp in enumerate(benchmark.train_stream):
         # Create online scenario from experience exp
-        ocl_benchmark = OnlineCLScenario(original_streams=batch_streams,
-                                         experiences=exp,
-                                         experience_size=10,
-                                         access_task_boundaries=True)
+        ocl_benchmark = OnlineCLScenario(
+            original_streams=batch_streams,
+            experiences=exp,
+            experience_size=10,
+            access_task_boundaries=True,
+        )
 
         # Train on the online train stream of the scenario
         cl_strategy.train(ocl_benchmark.train_stream)
diff --git a/examples/online_replay.py b/examples/online_replay.py
index cfa45e952..94f7ed933 100644
--- a/examples/online_replay.py
+++ b/examples/online_replay.py
@@ -40,9 +40,7 @@
 def main(args):
     # --- CONFIG
     device = torch.device(
-        f"cuda:{args.cuda}"
-        if torch.cuda.is_available() and args.cuda >= 0
-        else "cpu"
+        f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu"
     )
     n_batches = 5
     # ---------
@@ -85,9 +83,7 @@ def main(args):
     interactive_logger = InteractiveLogger()
 
     eval_plugin = EvaluationPlugin(
-        accuracy_metrics(
-            minibatch=True, epoch=True, experience=True, stream=True
-        ),
+        accuracy_metrics(minibatch=True, epoch=True, experience=True, stream=True),
         loss_metrics(minibatch=True, epoch=True, experience=True, stream=True),
         forgetting_metrics(experience=True),
         loggers=[interactive_logger],
diff --git a/examples/pytorchcv_models.py b/examples/pytorchcv_models.py
index 6821c66ff..e80b6aaf7 100644
--- a/examples/pytorchcv_models.py
+++ b/examples/pytorchcv_models.py
@@ -37,7 +37,6 @@
 
 
 def main(args):
-
     # Model getter: specify dataset and depth of the network.
     model = pytorchcv_wrapper.resnet("cifar10", depth=20, pretrained=False)
 
@@ -47,9 +46,7 @@ def main(args):
 
     # --- CONFIG
     device = torch.device(
-        f"cuda:{args.cuda}"
-        if torch.cuda.is_available() and args.cuda >= 0
-        else "cpu"
+        f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu"
     )
 
     # --- TRANSFORMATIONS
@@ -86,9 +83,7 @@ def main(args):
     interactive_logger = InteractiveLogger()
 
     eval_plugin = EvaluationPlugin(
-        accuracy_metrics(
-            minibatch=True, epoch=True, experience=True, stream=True
-        ),
+        accuracy_metrics(minibatch=True, epoch=True, experience=True, stream=True),
         loss_metrics(minibatch=True, epoch=True, experience=True, stream=True),
         forgetting_metrics(experience=True),
         loggers=[interactive_logger],
diff --git a/examples/simple_ctrl.py b/examples/simple_ctrl.py
index daa31723f..ca2222f2d 100644
--- a/examples/simple_ctrl.py
+++ b/examples/simple_ctrl.py
@@ -37,9 +37,7 @@
 def main(args):
     # Device config
     device = torch.device(
-        f"cuda:{args.cuda}"
-        if torch.cuda.is_available() and args.cuda >= 0
-        else "cpu"
+        f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu"
     )
 
     # Intialize the model, stream and training strategy
@@ -60,9 +58,7 @@ def main(args):
     criterion = CrossEntropyLoss()
 
     logger = EvaluationPlugin(
-        accuracy_metrics(
-            minibatch=False, epoch=False, experience=True, stream=True
-        ),
+        accuracy_metrics(minibatch=False, epoch=False, experience=True, stream=True),
         loggers=[InteractiveLogger()],
     )
 
@@ -124,9 +120,7 @@ def main(args):
         res = cl_strategy.eval([v_stream])
 
         acc_last_stream = transfer_mat[-1][-1]
-        acc_last_only = res[
-            "Top1_Acc_Exp/eval_phase/test_stream/" "Task005/Exp005"
-        ]
+        acc_last_only = res["Top1_Acc_Exp/eval_phase/test_stream/" "Task005/Exp005"]
         transfer_value = acc_last_stream - acc_last_only
 
         print(
@@ -134,8 +128,7 @@ def main(args):
             f"stream: {acc_last_stream}"
         )
         print(
-            f"Accuracy on probe task after trained "
-            f"independently: {acc_last_only}"
+            f"Accuracy on probe task after trained " f"independently: {acc_last_only}"
         )
         print(f"T({args.stream})={transfer_value}")
 
@@ -164,8 +157,7 @@ def main(args):
         "--max-epochs",
         type=int,
         default=200,
-        help="The maximum number of training epochs for each "
-        "task. Default to 200.",
+        help="The maximum number of training epochs for each " "task. Default to 200.",
     )
     parser.add_argument(
         "--cuda",
diff --git a/examples/task_incremental.py b/examples/task_incremental.py
index 20b47f239..9d5a94ee6 100644
--- a/examples/task_incremental.py
+++ b/examples/task_incremental.py
@@ -26,12 +26,9 @@
 
 
 def main(args):
-
     # Config
     device = torch.device(
-        f"cuda:{args.cuda}"
-        if torch.cuda.is_available() and args.cuda >= 0
-        else "cpu"
+        f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu"
     )
     # model
     model = SimpleMLP(input_size=32 * 32 * 3, num_classes=10)
diff --git a/examples/task_incremental_with_checkpointing.py b/examples/task_incremental_with_checkpointing.py
index 28cdc21e0..c84d9968a 100644
--- a/examples/task_incremental_with_checkpointing.py
+++ b/examples/task_incremental_with_checkpointing.py
@@ -25,15 +25,24 @@
 from torch.optim import SGD
 
 from avalanche.benchmarks import CLExperience, SplitMNIST
-from avalanche.evaluation.metrics import accuracy_metrics, loss_metrics, \
-    class_accuracy_metrics
-from avalanche.logging import InteractiveLogger, TensorboardLogger, \
-    WandBLogger, TextLogger
+from avalanche.evaluation.metrics import (
+    accuracy_metrics,
+    loss_metrics,
+    class_accuracy_metrics,
+)
+from avalanche.logging import (
+    InteractiveLogger,
+    TensorboardLogger,
+    WandBLogger,
+    TextLogger,
+)
 from avalanche.models import SimpleMLP, as_multitask
 from avalanche.training.determinism.rng_manager import RNGManager
 from avalanche.training.plugins import EvaluationPlugin, ReplayPlugin
-from avalanche.training.plugins.checkpoint import CheckpointPlugin, \
-    FileSystemCheckpointStorage
+from avalanche.training.plugins.checkpoint import (
+    CheckpointPlugin,
+    FileSystemCheckpointStorage,
+)
 from avalanche.training.supervised import Naive
 
 
@@ -46,26 +55,24 @@ def main(args):
 
     # Nothing new here...
     device = torch.device(
-        f"cuda:{args.cuda}"
-        if torch.cuda.is_available() and args.cuda >= 0
-        else "cpu"
+        f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu"
     )
-    print('Using device', device)
+    print("Using device", device)
 
     # CL Benchmark Creation
     n_experiences = 5
-    scenario = SplitMNIST(n_experiences=n_experiences,
-                          return_task_id=True)
-    input_size = 28*28*1
+    scenario = SplitMNIST(n_experiences=n_experiences, return_task_id=True)
+    input_size = 28 * 28 * 1
 
     train_stream: Sequence[CLExperience] = scenario.train_stream
     test_stream: Sequence[CLExperience] = scenario.test_stream
 
     # Define the model (and load initial weights if necessary)
     # Again, not checkpoint-related
-    model = SimpleMLP(input_size=input_size,
-                      num_classes=scenario.n_classes // n_experiences)
-    model = as_multitask(model, 'classifier')
+    model = SimpleMLP(
+        input_size=input_size, num_classes=scenario.n_classes // n_experiences
+    )
+    model = as_multitask(model, "classifier")
 
     # Prepare for training & testing: not checkpoint-related
     optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9)
@@ -86,9 +93,9 @@ def main(args):
     # In brief: CUDA -> CPU (OK), CUDA:0 -> CUDA:1 (OK), CPU -> CUDA (NO!)
     checkpoint_plugin = CheckpointPlugin(
         FileSystemCheckpointStorage(
-            directory='./checkpoints/task_incremental',
+            directory="./checkpoints/task_incremental",
         ),
-        map_location=device
+        map_location=device,
     )
 
     # THIRD CHANGE: LOAD THE CHECKPOINT IF IT EXISTS
@@ -109,32 +116,29 @@ def main(args):
         ]
 
         # Create loggers (as usual)
-        os.makedirs(f'./logs/checkpointing_{args.checkpoint_at}',
-                    exist_ok=True)
+        os.makedirs(f"./logs/checkpointing_{args.checkpoint_at}", exist_ok=True)
         loggers = [
             TextLogger(
-                open(f'./logs/checkpointing_'
-                     f'{args.checkpoint_at}/log.txt', 'w')),
+                open(f"./logs/checkpointing_" f"{args.checkpoint_at}/log.txt", "w")
+            ),
             InteractiveLogger(),
-            TensorboardLogger(f'./logs/checkpointing_{args.checkpoint_at}')
+            TensorboardLogger(f"./logs/checkpointing_{args.checkpoint_at}"),
         ]
 
         if args.wandb:
-            loggers.append(WandBLogger(
-                project_name='AvalancheCheckpointing',
-                run_name=f'checkpointing_{args.checkpoint_at}'
-            ))
+            loggers.append(
+                WandBLogger(
+                    project_name="AvalancheCheckpointing",
+                    run_name=f"checkpointing_{args.checkpoint_at}",
+                )
+            )
 
         # Create the evaluation plugin (as usual)
         evaluation_plugin = EvaluationPlugin(
-            accuracy_metrics(minibatch=False, epoch=True,
-                             experience=True, stream=True),
-            loss_metrics(minibatch=False, epoch=True,
-                         experience=True, stream=True),
-            class_accuracy_metrics(
-                stream=True
-            ),
-            loggers=loggers
+            accuracy_metrics(minibatch=False, epoch=True, experience=True, stream=True),
+            loss_metrics(minibatch=False, epoch=True, experience=True, stream=True),
+            class_accuracy_metrics(stream=True),
+            loggers=loggers,
         )
 
         # Create the strategy (as usual)
@@ -147,7 +151,7 @@ def main(args):
             eval_mb_size=128,
             device=device,
             plugins=plugins,
-            evaluator=evaluation_plugin
+            evaluator=evaluation_plugin,
         )
 
     # Train and test loop, as usual.
@@ -164,7 +168,7 @@ def main(args):
         strategy.eval(test_stream, num_workers=10)
 
         if train_task.current_experience == args.checkpoint_at:
-            print('Exiting early')
+            print("Exiting early")
             break
 
 
@@ -174,15 +178,8 @@ def main(args):
         "--cuda",
         type=int,
         default=0,
-        help="Select zero-indexed cuda device. -1 to use CPU."
-    )
-    parser.add_argument(
-        "--checkpoint_at",
-        type=int,
-        default=-1
-    )
-    parser.add_argument(
-        "--wandb",
-        action='store_true'
+        help="Select zero-indexed cuda device. -1 to use CPU.",
     )
+    parser.add_argument("--checkpoint_at", type=int, default=-1)
+    parser.add_argument("--wandb", action="store_true")
     main(parser.parse_args())
diff --git a/examples/task_metrics.py b/examples/task_metrics.py
index 1e4fa24da..8daa62f9e 100644
--- a/examples/task_metrics.py
+++ b/examples/task_metrics.py
@@ -44,9 +44,7 @@
 def main(args):
     # --- CONFIG
     device = torch.device(
-        f"cuda:{args.cuda}"
-        if torch.cuda.is_available() and args.cuda >= 0
-        else "cpu"
+        f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu"
     )
     # ---------
 
@@ -131,9 +129,7 @@ def main(args):
             experience=True,
             stream=True,
         ),
-        disk_usage_metrics(
-            minibatch=True, epoch=True, experience=True, stream=True
-        ),
+        disk_usage_metrics(minibatch=True, epoch=True, experience=True, stream=True),
         MAC_metrics(minibatch=True, epoch=True, experience=True),
         loggers=[interactive_logger, text_logger, csv_logger],
         collect_all=True,
diff --git a/examples/tensorboard_logger.py b/examples/tensorboard_logger.py
index 67fa6cc15..1778f96df 100644
--- a/examples/tensorboard_logger.py
+++ b/examples/tensorboard_logger.py
@@ -45,9 +45,7 @@
 def main(args):
     # --- CONFIG
     device = torch.device(
-        f"cuda:{args.cuda}"
-        if torch.cuda.is_available() and args.cuda >= 0
-        else "cpu"
+        f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu"
     )
     # ---------
 
@@ -77,9 +75,7 @@ def main(args):
         download=True,
         transform=test_transform,
     )
-    benchmark = nc_benchmark(
-        mnist_train, mnist_test, 5, task_labels=False, seed=1234
-    )
+    benchmark = nc_benchmark(mnist_train, mnist_test, 5, task_labels=False, seed=1234)
     # ---------
 
     # MODEL CREATION
@@ -105,12 +101,8 @@ def main(args):
         ),
         forgetting_metrics(experience=True, stream=True),
         confusion_matrix_metrics(stream=True),
-        cpu_usage_metrics(
-            minibatch=True, epoch=True, experience=True, stream=True
-        ),
-        timing_metrics(
-            minibatch=True, epoch=True, experience=True, stream=True
-        ),
+        cpu_usage_metrics(minibatch=True, epoch=True, experience=True, stream=True),
+        timing_metrics(minibatch=True, epoch=True, experience=True, stream=True),
         ram_usage_metrics(
             every=0.5, minibatch=True, epoch=True, experience=True, stream=True
         ),
@@ -122,9 +114,7 @@ def main(args):
             experience=True,
             stream=True,
         ),
-        disk_usage_metrics(
-            minibatch=True, epoch=True, experience=True, stream=True
-        ),
+        disk_usage_metrics(minibatch=True, epoch=True, experience=True, stream=True),
         MAC_metrics(minibatch=True, epoch=True, experience=True),
         loggers=[interactive_logger, tensorboard_logger],
     )
diff --git a/examples/tvdetection/coco_eval.py b/examples/tvdetection/coco_eval.py
index db9350b89..3de67a2f8 100644
--- a/examples/tvdetection/coco_eval.py
+++ b/examples/tvdetection/coco_eval.py
@@ -32,9 +32,7 @@ def update(self, predictions):
         for iou_type in self.iou_types:
             results = self.prepare(predictions, iou_type)
             with redirect_stdout(io.StringIO()):
-                coco_dt = (
-                    COCO.loadRes(self.coco_gt, results) if results else COCO()
-                )
+                coco_dt = COCO.loadRes(self.coco_gt, results) if results else COCO()
             coco_eval = self.coco_eval[iou_type]
 
             coco_eval.cocoDt = coco_dt
@@ -45,9 +43,7 @@ def update(self, predictions):
 
     def synchronize_between_processes(self):
         for iou_type in self.iou_types:
-            self.eval_imgs[iou_type] = np.concatenate(
-                self.eval_imgs[iou_type], 2
-            )
+            self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)
             create_common_coco_eval(
                 self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type]
             )
@@ -111,9 +107,7 @@ def prepare_for_coco_segmentation(self, predictions):
 
             rles = [
                 mask_util.encode(
-                    np.array(
-                        mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"
-                    )
+                    np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F")
                 )[0]
                 for mask in masks
             ]
diff --git a/examples/tvdetection/coco_utils.py b/examples/tvdetection/coco_utils.py
index 74991ce0e..9726b7ff5 100644
--- a/examples/tvdetection/coco_utils.py
+++ b/examples/tvdetection/coco_utils.py
@@ -106,9 +106,7 @@ def _has_only_empty_bbox(anno):
         return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno)
 
     def _count_visible_keypoints(anno):
-        return sum(
-            sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno
-        )
+        return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno)
 
     min_keypoints_per_image = 10
 
@@ -226,9 +224,7 @@ def get_coco(root, image_set, transforms, mode="instances"):
     PATHS = {
         "train": (
             "train2017",
-            os.path.join(
-                "annotations", anno_file_template.format(mode, "train")
-            ),
+            os.path.join("annotations", anno_file_template.format(mode, "train")),
         ),
         "val": (
             "val2017",
diff --git a/examples/tvdetection/engine.py b/examples/tvdetection/engine.py
index ad172ee06..cae581bdf 100644
--- a/examples/tvdetection/engine.py
+++ b/examples/tvdetection/engine.py
@@ -26,9 +26,7 @@ def train_one_epoch(
 ):
     model.train()
     metric_logger = MetricLogger(delimiter="  ")
-    metric_logger.add_meter(
-        "lr", SmoothedValue(window_size=1, fmt="{value:.6f}")
-    )
+    metric_logger.add_meter("lr", SmoothedValue(window_size=1, fmt="{value:.6f}"))
     header = f"Epoch: [{epoch}]"
 
     lr_scheduler = None
@@ -41,9 +39,7 @@ def train_one_epoch(
         )
 
     # Avalanche: added "*_"
-    for images, targets, *_ in metric_logger.log_every(
-        data_loader, print_freq, header
-    ):
+    for images, targets, *_ in metric_logger.log_every(data_loader, print_freq, header):
         images = list(image.to(device) for image in images)
         targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
         with torch.cuda.amp.autocast(enabled=scaler is not None):
@@ -97,9 +93,7 @@ def evaluate_coco(
     coco_evaluator = CocoEvaluator(coco, iou_types)
     header = "Test:"
 
-    for images, targets, *_ in metric_logger.log_every(
-        data_loader, 100, header
-    ):
+    for images, targets, *_ in metric_logger.log_every(data_loader, 100, header):
         images = list(img.to(device) for img in images)
 
         if torch.cuda.is_available():
@@ -117,9 +111,7 @@ def evaluate_coco(
         evaluator_time = time.time()
         coco_evaluator.update(res)
         evaluator_time = time.time() - evaluator_time
-        metric_logger.update(
-            model_time=model_time, evaluator_time=evaluator_time
-        )
+        metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)
 
     # gather the stats from all processes
     metric_logger.synchronize_between_processes()
@@ -135,14 +127,11 @@ def evaluate_coco(
 def evaluate_lvis(
     model, data_loader, device, lvis: LVIS, metric_logger, cpu_device, iou_types
 ):
-
     # Lorenzo: implemented by taking inspiration from COCO code
     lvis_evaluator = LvisEvaluator(lvis, iou_types)
     header = "Test:"
 
-    for images, targets, *_ in metric_logger.log_every(
-        data_loader, 100, header
-    ):
+    for images, targets, *_ in metric_logger.log_every(data_loader, 100, header):
         images = list(img.to(device) for img in images)
 
         if torch.cuda.is_available():
@@ -159,9 +148,7 @@ def evaluate_lvis(
         evaluator_time = time.time()
         lvis_evaluator.update(res)
         evaluator_time = time.time() - evaluator_time
-        metric_logger.update(
-            model_time=model_time, evaluator_time=evaluator_time
-        )
+        metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)
 
     # gather the stats from all processes
     metric_logger.synchronize_between_processes()
diff --git a/examples/tvdetection/group_by_aspect_ratio.py b/examples/tvdetection/group_by_aspect_ratio.py
index caed539af..a06376b29 100644
--- a/examples/tvdetection/group_by_aspect_ratio.py
+++ b/examples/tvdetection/group_by_aspect_ratio.py
@@ -74,9 +74,7 @@ def __iter__(self):
                 samples_from_group_id = _repeat_to_at_least(
                     samples_per_group[group_id], remaining
                 )
-                buffer_per_group[group_id].extend(
-                    samples_from_group_id[:remaining]
-                )
+                buffer_per_group[group_id].extend(samples_from_group_id[:remaining])
                 assert len(buffer_per_group[group_id]) == self.batch_size
                 yield buffer_per_group[group_id]
                 num_remaining -= 1
diff --git a/examples/tvdetection/lvis_eval.py b/examples/tvdetection/lvis_eval.py
index c7607a1ee..9d0e23a30 100644
--- a/examples/tvdetection/lvis_eval.py
+++ b/examples/tvdetection/lvis_eval.py
@@ -115,9 +115,7 @@ def prepare_for_lvis_detection(self, predictions):
                 masks = masks > 0.5
                 rles = [
                     mask_util.encode(
-                        np.array(
-                            mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"
-                        )
+                        np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F")
                     )[0]
                     for mask in masks
                 ]
diff --git a/examples/tvdetection/presets.py b/examples/tvdetection/presets.py
index ad3d7f77a..88d8c697d 100644
--- a/examples/tvdetection/presets.py
+++ b/examples/tvdetection/presets.py
@@ -3,9 +3,7 @@
 
 
 class DetectionPresetTrain:
-    def __init__(
-        self, data_augmentation, hflip_prob=0.5, mean=(123.0, 117.0, 104.0)
-    ):
+    def __init__(self, data_augmentation, hflip_prob=0.5, mean=(123.0, 117.0, 104.0)):
         if data_augmentation == "hflip":
             self.transforms = T.Compose(
                 [
@@ -35,9 +33,7 @@ def __init__(
                 ]
             )
         else:
-            raise ValueError(
-                f'Unknown data augmentation policy "{data_augmentation}"'
-            )
+            raise ValueError(f'Unknown data augmentation policy "{data_augmentation}"')
 
     def __call__(self, img, target):
         return self.transforms(img, target)
diff --git a/examples/tvdetection/train.py b/examples/tvdetection/train.py
index f5218758c..e73205295 100644
--- a/examples/tvdetection/train.py
+++ b/examples/tvdetection/train.py
@@ -79,9 +79,7 @@ def get_args_parser(add_help=True):
         type=str,
         help="dataset path",
     )
-    parser.add_argument(
-        "--dataset", default="coco", type=str, help="dataset name"
-    )
+    parser.add_argument("--dataset", default="coco", type=str, help="dataset name")
     parser.add_argument(
         "--model", default="maskrcnn_resnet50_fpn", type=str, help="model name"
     )
@@ -156,18 +154,12 @@ def get_args_parser(add_help=True):
         type=float,
         help="decrease lr by a factor of lr-gamma (multisteplr scheduler only)",
     )
-    parser.add_argument(
-        "--print-freq", default=20, type=int, help="print frequency"
-    )
+    parser.add_argument("--print-freq", default=20, type=int, help="print frequency")
     parser.add_argument(
         "--output-dir", default=".", type=str, help="path to save outputs"
     )
-    parser.add_argument(
-        "--resume", default="", type=str, help="path of checkpoint"
-    )
-    parser.add_argument(
-        "--start_epoch", default=0, type=int, help="start epoch"
-    )
+    parser.add_argument("--resume", default="", type=str, help="path of checkpoint")
+    parser.add_argument("--start_epoch", default=0, type=int, help="start epoch")
     parser.add_argument("--aspect-ratio-group-factor", default=3, type=int)
     parser.add_argument(
         "--rpn-score-thresh",
@@ -274,9 +266,7 @@ def main(args):
     print("Creating data loaders")
     if args.distributed:
         train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
-        test_sampler = torch.utils.data.distributed.DistributedSampler(
-            dataset_test
-        )
+        test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test)
     else:
         train_sampler = torch.utils.data.RandomSampler(dataset)
         test_sampler = torch.utils.data.SequentialSampler(dataset_test)
@@ -327,9 +317,7 @@ def main(args):
 
     model_without_ddp = model
     if args.distributed:
-        model = torch.nn.parallel.DistributedDataParallel(
-            model, device_ids=[args.gpu]
-        )
+        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
         model_without_ddp = model.module
 
     params = [p for p in model.parameters() if p.requires_grad]
diff --git a/examples/tvdetection/transforms.py b/examples/tvdetection/transforms.py
index 44d10ff2a..31f03eeaf 100644
--- a/examples/tvdetection/transforms.py
+++ b/examples/tvdetection/transforms.py
@@ -122,17 +122,11 @@ def forward(
 
             for _ in range(self.trials):
                 # check the aspect ratio limitations
-                r = self.min_scale + (
-                    self.max_scale - self.min_scale
-                ) * torch.rand(2)
+                r = self.min_scale + (self.max_scale - self.min_scale) * torch.rand(2)
                 new_w = int(orig_w * r[0])
                 new_h = int(orig_h * r[1])
                 aspect_ratio = new_w / new_h
-                if not (
-                    self.min_aspect_ratio
-                    <= aspect_ratio
-                    <= self.max_aspect_ratio
-                ):
+                if not (self.min_aspect_ratio <= aspect_ratio <= self.max_aspect_ratio):
                     continue
 
                 # check for 0 area crops
@@ -191,9 +185,7 @@ def __init__(
         self.fill = fill
         self.side_range = side_range
         if side_range[0] < 1.0 or side_range[0] > side_range[1]:
-            raise ValueError(
-                f"Invalid canvas side range provided {side_range}."
-            )
+            raise ValueError(f"Invalid canvas side range provided {side_range}.")
         self.p = p
 
     @torch.jit.unused
@@ -238,9 +230,9 @@ def forward(
         image = F.pad(image, [left, top, right, bottom], fill=fill)
         if isinstance(image, torch.Tensor):
             # PyTorch's pad supports only integers on fill. So we need to overwrite the colour
-            v = torch.tensor(
-                self.fill, device=image.device, dtype=image.dtype
-            ).view(-1, 1, 1)
+            v = torch.tensor(self.fill, device=image.device, dtype=image.dtype).view(
+                -1, 1, 1
+            )
             image[..., :top, :] = image[..., :, :left] = image[
                 ..., (top + orig_h) :, :
             ] = image[..., :, (left + orig_w) :] = v
diff --git a/examples/tvdetection/utils.py b/examples/tvdetection/utils.py
index ca0e54dff..21d164683 100644
--- a/examples/tvdetection/utils.py
+++ b/examples/tvdetection/utils.py
@@ -32,9 +32,7 @@ def synchronize_between_processes(self):
         """
         if not is_dist_avail_and_initialized():
             return
-        t = torch.tensor(
-            [self.count, self.total], dtype=torch.float64, device="cuda"
-        )
+        t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda")
         dist.barrier()
         dist.all_reduce(t)
         t = t.tolist()
diff --git a/examples/wandb_logger.py b/examples/wandb_logger.py
index 2d49d5112..5ba9b94df 100644
--- a/examples/wandb_logger.py
+++ b/examples/wandb_logger.py
@@ -45,9 +45,7 @@
 def main(args):
     # --- CONFIG
     device = torch.device(
-        f"cuda:{args.cuda}"
-        if torch.cuda.is_available() and args.cuda >= 0
-        else "cpu"
+        f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu"
     )
     # ---------
 
@@ -77,9 +75,7 @@ def main(args):
         download=True,
         transform=test_transform,
     )
-    benchmark = nc_benchmark(
-        mnist_train, mnist_test, 5, task_labels=False, seed=1234
-    )
+    benchmark = nc_benchmark(mnist_train, mnist_test, 5, task_labels=False, seed=1234)
     # ---------
 
     # MODEL CREATION
@@ -109,12 +105,8 @@ def main(args):
         confusion_matrix_metrics(
             stream=True, wandb=True, class_names=[str(i) for i in range(10)]
         ),
-        cpu_usage_metrics(
-            minibatch=True, epoch=True, experience=True, stream=True
-        ),
-        timing_metrics(
-            minibatch=True, epoch=True, experience=True, stream=True
-        ),
+        cpu_usage_metrics(minibatch=True, epoch=True, experience=True, stream=True),
+        timing_metrics(minibatch=True, epoch=True, experience=True, stream=True),
         ram_usage_metrics(
             every=0.5, minibatch=True, epoch=True, experience=True, stream=True
         ),
@@ -126,9 +118,7 @@ def main(args):
             experience=True,
             stream=True,
         ),
-        disk_usage_metrics(
-            minibatch=True, epoch=True, experience=True, stream=True
-        ),
+        disk_usage_metrics(minibatch=True, epoch=True, experience=True, stream=True),
         MAC_metrics(minibatch=True, epoch=True, experience=True),
         loggers=[interactive_logger, wandb_logger],
     )
diff --git a/profiling/concat_data_with_new_attributes.py b/profiling/concat_data_with_new_attributes.py
index 1d99beac3..06da2c2bf 100644
--- a/profiling/concat_data_with_new_attributes.py
+++ b/profiling/concat_data_with_new_attributes.py
@@ -8,8 +8,10 @@
 from avalanche.benchmarks import SplitMNIST, SplitCIFAR100
 from avalanche.benchmarks.utils import make_avalanche_dataset
 from avalanche.benchmarks.utils.data_attribute import TensorDataAttribute
-from avalanche.training.storage_policy import (BalancedExemplarsBuffer,
-                                               ReservoirSamplingBuffer)
+from avalanche.training.storage_policy import (
+    BalancedExemplarsBuffer,
+    ReservoirSamplingBuffer,
+)
 
 
 class ClassBalancedBufferWithLogits(BalancedExemplarsBuffer):
@@ -18,14 +20,14 @@ class ClassBalancedBufferWithLogits(BalancedExemplarsBuffer):
     """
 
     def __init__(
-            self,
-            max_size: int,
-            adaptive_size: bool = True,
-            total_num_classes: int = None,
+        self,
+        max_size: int,
+        adaptive_size: bool = True,
+        total_num_classes: int = None,
     ):
         if not adaptive_size:
             assert (
-                    total_num_classes > 0
+                total_num_classes > 0
             ), """When fixed exp mem size, total_num_classes should be > 0."""
 
         super().__init__(max_size, adaptive_size, total_num_classes)
@@ -80,11 +82,10 @@ def update(self, dataset, add_attributes=True):
 
         # resize buffers
         for class_id, class_buf in self.buffer_groups.items():
-            self.buffer_groups[class_id].resize(None,
-                                                class_to_len[class_id])
+            self.buffer_groups[class_id].resize(None, class_to_len[class_id])
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     benchmark = SplitMNIST(n_experiences=1)
 
     storage_policy = ClassBalancedBufferWithLogits(max_size=2000)
@@ -98,4 +99,4 @@ def update(self, dataset, add_attributes=True):
     print("Buffer size: ", len(storage_policy.buffer))
     end = time.time()
     duration = end - start
-    print("Buffer access duration: ", duration)
\ No newline at end of file
+    print("Buffer access duration: ", duration)
diff --git a/profiling/data_merging.py b/profiling/data_merging.py
index 4e7f9f882..1ac5b5b4a 100644
--- a/profiling/data_merging.py
+++ b/profiling/data_merging.py
@@ -9,7 +9,11 @@
 
 from tqdm import tqdm
 
-from avalanche.benchmarks import fixed_size_experience_split, SplitMNIST, classification_subset
+from avalanche.benchmarks import (
+    fixed_size_experience_split,
+    SplitMNIST,
+    classification_subset,
+)
 from avalanche.benchmarks.utils.flat_data import _flatdata_depth
 from avalanche.benchmarks.utils.utils import concat_datasets
 from avalanche.training import ReservoirSamplingBuffer
diff --git a/profiling/online_strategy.py b/profiling/online_strategy.py
index 5ed6112c6..a3931e104 100644
--- a/profiling/online_strategy.py
+++ b/profiling/online_strategy.py
@@ -25,8 +25,7 @@
 import pstats
 
 from avalanche.benchmarks import SplitMNIST
-from avalanche.benchmarks.scenarios.online_scenario import \
-    fixed_size_experience_split
+from avalanche.benchmarks.scenarios.online_scenario import fixed_size_experience_split
 from avalanche.models import SimpleMLP
 from avalanche.training.supervised.strategy_wrappers_online import OnlineNaive
 from avalanche.benchmarks.scenarios import OnlineCLScenario
@@ -59,7 +58,7 @@ def profile_online_naive_no_avl(benchmark, device):
         # Iterate over the dataset and train the model
         dataloader = DataLoader(experience_0.dataset, batch_size=1)
         pbar = tqdm(dataloader)
-        for (x, y, _) in pbar:
+        for x, y, _ in pbar:
             x, y = x.to(device), y.to(device)
             optimizer.zero_grad()
             pred = model(x)
@@ -69,7 +68,7 @@ def profile_online_naive_no_avl(benchmark, device):
             pbar.set_description(f"Loss: {loss.item():0.4f}")
 
     stats = pstats.Stats(pr)
-    stats.sort_stats('tottime').print_stats(15)
+    stats.sort_stats("tottime").print_stats(15)
 
 
 ##################################################
@@ -105,17 +104,14 @@ def load_all_data(data):
             optimizer.step()
 
     stats = pstats.Stats(pr)
-    stats.sort_stats('tottime').print_stats(15)
+    stats.sort_stats("tottime").print_stats(15)
 
 
 ##################################################
 #        Online strategy using Avalanche
 ##################################################
 def profile_online_avl(
-        benchmark,
-        device,
-        strategy="naive",
-        use_interactive_logger: bool = True
+    benchmark, device, strategy="naive", use_interactive_logger: bool = True
 ):
     """
     Online strategy using Avalanche.
@@ -145,8 +141,9 @@ def profile_online_avl(
         if strategy == "er":
             # CREATE THE STRATEGY INSTANCE (ONLINE-REPLAY)
             storage_policy = ReservoirSamplingBuffer(max_size=100)
-            replay_plugin = ReplayPlugin(mem_size=100, batch_size=1,
-                                         storage_policy=storage_policy)
+            replay_plugin = ReplayPlugin(
+                mem_size=100, batch_size=1, storage_policy=storage_policy
+            )
             plugins.append(replay_plugin)
 
         # Create OnlineNaive strategy
@@ -158,16 +155,15 @@ def profile_online_avl(
             train_mb_size=1,
             device=device,
             evaluator=eval_plugin,
-            plugins=plugins
+            plugins=plugins,
         )
-        online_cl_scenario = OnlineCLScenario(benchmark.streams.values(),
-                                              experience_0)
+        online_cl_scenario = OnlineCLScenario(benchmark.streams.values(), experience_0)
 
         # Train on the first experience only
         cl_strategy.train(online_cl_scenario.train_stream)
 
     stats = pstats.Stats(pr)
-    stats.sort_stats('tottime').print_stats(40)
+    stats.sort_stats("tottime").print_stats(40)
 
 
 def main(args):
diff --git a/profiling/replay_buffers.py b/profiling/replay_buffers.py
index 4902115f4..5fabaa028 100644
--- a/profiling/replay_buffers.py
+++ b/profiling/replay_buffers.py
@@ -3,12 +3,16 @@
 import time
 from tqdm import tqdm
 
-from avalanche.benchmarks import fixed_size_experience_split, SplitMNIST, split_online_stream
+from avalanche.benchmarks import (
+    fixed_size_experience_split,
+    SplitMNIST,
+    split_online_stream,
+)
 from avalanche.training import ReservoirSamplingBuffer
 from avalanche.training import ParametricBuffer
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     benchmark = SplitMNIST(n_experiences=5)
 
     experience = benchmark.train_stream[0]
diff --git a/profiling/serialization.py b/profiling/serialization.py
index d8676a35c..92ea7d873 100644
--- a/profiling/serialization.py
+++ b/profiling/serialization.py
@@ -17,16 +17,18 @@
 from avalanche.training.plugins import EvaluationPlugin
 from avalanche.training.serialization import maybe_load_checkpoint, save_checkpoint
 
-if __name__ == '__main__':
-    device = 'cpu'
+if __name__ == "__main__":
+    device = "cpu"
     RNGManager.set_random_seeds(42)
     fname = "./checkpoint.pkl"
 
     benchmark = SplitCIFAR100(50)
-    evaluator = EvaluationPlugin(accuracy_metrics(experience=True), loggers=[InteractiveLogger()])
+    evaluator = EvaluationPlugin(
+        accuracy_metrics(experience=True), loggers=[InteractiveLogger()]
+    )
 
-    model = SimpleMLP(input_size=32*32*3, num_classes=benchmark.n_classes)
-    opt = SGD(model.parameters(), lr=.1)
+    model = SimpleMLP(input_size=32 * 32 * 3, num_classes=benchmark.n_classes)
+    opt = SGD(model.parameters(), lr=0.1)
     strat = Naive(model, opt, train_mb_size=128)
 
     if os.path.exists(fname):
@@ -41,17 +43,21 @@
         strat.eval(exp)
 
         start_time = time.time()
-        save_checkpoint(strat, fname, exclude=[
-            # 'optimizer',
-            # These attributes do not have state. Do not save.
-            # They are automatically set to None by the strategy templates
-            # If not, there is a bug...
-            # 'experience',
-            # 'adapted_dataset',
-            # 'dataloader',
-            # 'mbatch',
-            # 'mb_output',
-            # 'current_eval_stream',
-            # '_eval_streams'
-        ])
+        save_checkpoint(
+            strat,
+            fname,
+            exclude=[
+                # 'optimizer',
+                # These attributes do not have state. Do not save.
+                # They are automatically set to None by the strategy templates
+                # If not, there is a bug...
+                # 'experience',
+                # 'adapted_dataset',
+                # 'dataloader',
+                # 'mbatch',
+                # 'mb_output',
+                # 'current_eval_stream',
+                # '_eval_streams'
+            ],
+        )
         print("SAVING TIME: ", time.time() - start_time)
diff --git a/setup.py b/setup.py
index 710152cb4..b753077cd 100644
--- a/setup.py
+++ b/setup.py
@@ -9,13 +9,13 @@
 
 def read(rel_path):
     here = os.path.abspath(os.path.dirname(__file__))
-    with codecs.open(os.path.join(here, rel_path), 'r') as fp:
+    with codecs.open(os.path.join(here, rel_path), "r") as fp:
         return fp.read()
 
 
 def get_version(rel_path):
     for line in read(rel_path).splitlines():
-        if line.startswith('__version__'):
+        if line.startswith("__version__"):
             delim = '"' if '"' in line else "'"
             return line.split(delim)[1]
     else:
@@ -29,17 +29,17 @@ def get_extra_requires(path, add_all=True):
     with open(path) as fp:
         extra_deps = defaultdict(set)
         for line in fp:
-            if line.strip() and not line.startswith('#'):
+            if line.strip() and not line.startswith("#"):
                 tags = set()
-                if ':' in line:
-                    k, v = line.split(':')
-                    tags.update(vv.strip() for vv in v.split(','))
+                if ":" in line:
+                    k, v = line.split(":")
+                    tags.update(vv.strip() for vv in v.split(","))
                 for t in tags:
                     extra_deps[t].add(k)
 
         # add tag `all` at the end
         if add_all:
-            extra_deps['all'] = set(vv for v in extra_deps.values() for vv in v)
+            extra_deps["all"] = set(vv for v in extra_deps.values() for vv in v)
 
     return extra_deps
 
@@ -50,7 +50,7 @@ def get_extra_requires(path, add_all=True):
     author="ContinualAI",
     author_email="contact@continualai.org",
     description="Avalanche: a Comprehensive Framework for Continual Learning "
-                "Research",
+    "Research",
     long_description=long_description,
     long_description_content_type="text/markdown",
     url="https://github.com/ContinualAI/avalanche",
@@ -60,28 +60,26 @@ def get_extra_requires(path, add_all=True):
         "License :: OSI Approved :: MIT License",
         "Operating System :: OS Independent",
     ],
-    python_requires='>=3.7,<3.11',
+    python_requires=">=3.7,<3.11",
     install_requires=[
-        'typing-extensions',
-        'psutil',
-        'gputil',
-        'scikit-learn',
-        'matplotlib',
-        'numpy',
-        'pytorchcv',
-        'wandb',
-        'tensorboard>=1.15',
-        'tqdm',
-        'torch',
-        'torchvision',
-        'torchmetrics',
-        'gdown',
-        'quadprog',
-        'dill',
-        'setuptools<=59.5.0'
+        "typing-extensions",
+        "psutil",
+        "gputil",
+        "scikit-learn",
+        "matplotlib",
+        "numpy",
+        "pytorchcv",
+        "wandb",
+        "tensorboard>=1.15",
+        "tqdm",
+        "torch",
+        "torchvision",
+        "torchmetrics",
+        "gdown",
+        "quadprog",
+        "dill",
+        "setuptools<=59.5.0",
     ],
-    extras_require=get_extra_requires('extra_dependencies.txt',
-                                      add_all=True),
-    include_package_data=True
+    extras_require=get_extra_requires("extra_dependencies.txt", add_all=True),
+    include_package_data=True,
 )
-
diff --git a/tests/benchmarks/scenarios/test_classification_scenario.py b/tests/benchmarks/scenarios/test_classification_scenario.py
index bf5afc9ed..8a4353647 100644
--- a/tests/benchmarks/scenarios/test_classification_scenario.py
+++ b/tests/benchmarks/scenarios/test_classification_scenario.py
@@ -20,18 +20,14 @@ def test_classes_in_exp(self):
         tensor_y = torch.randint(0, 70, (200,))
         tensor_t = torch.randint(0, 5, (200,))
         train_exps.append(
-            make_tensor_classification_dataset(
-                tensor_x, tensor_y, task_labels=tensor_t
-            )
+            make_tensor_classification_dataset(tensor_x, tensor_y, task_labels=tensor_t)
         )
 
         tensor_x = torch.rand(200, 3, 28, 28)
         tensor_y = torch.randint(0, 100, (200,))
         tensor_t = torch.randint(0, 5, (200,))
         train_exps.append(
-            make_tensor_classification_dataset(
-                tensor_x, tensor_y, task_labels=tensor_t
-            )
+            make_tensor_classification_dataset(tensor_x, tensor_y, task_labels=tensor_t)
         )
 
         test_exps = []
@@ -39,9 +35,7 @@ def test_classes_in_exp(self):
         test_y = torch.randint(100, 200, (200,))
         test_t = torch.randint(0, 5, (200,))
         test_exps.append(
-            make_tensor_classification_dataset(
-                test_x, test_y, task_labels=test_t
-            )
+            make_tensor_classification_dataset(test_x, test_y, task_labels=test_t)
         )
 
         other_stream_exps = []
@@ -49,9 +43,7 @@ def test_classes_in_exp(self):
         other_y = torch.randint(400, 600, (200,))
         other_t = torch.randint(0, 5, (200,))
         other_stream_exps.append(
-            make_tensor_classification_dataset(
-                other_x, other_y, task_labels=other_t
-            )
+            make_tensor_classification_dataset(other_x, other_y, task_labels=other_t)
         )
 
         benchmark_instance = dataset_benchmark(
@@ -103,18 +95,14 @@ def test_classes_in_this_experience(self):
         tensor_y = torch.randint(0, 70, (200,))
         tensor_t = torch.randint(0, 5, (200,))
         train_exps.append(
-            make_tensor_classification_dataset(
-                tensor_x, tensor_y, task_labels=tensor_t
-            )
+            make_tensor_classification_dataset(tensor_x, tensor_y, task_labels=tensor_t)
         )
 
         tensor_x = torch.rand(200, 3, 28, 28)
         tensor_y = torch.randint(0, 100, (200,))
         tensor_t = torch.randint(0, 5, (200,))
         train_exps.append(
-            make_tensor_classification_dataset(
-                tensor_x, tensor_y, task_labels=tensor_t
-            )
+            make_tensor_classification_dataset(tensor_x, tensor_y, task_labels=tensor_t)
         )
 
         test_exps = []
@@ -122,9 +110,7 @@ def test_classes_in_this_experience(self):
         test_y = torch.randint(100, 200, (200,))
         test_t = torch.randint(0, 5, (200,))
         test_exps.append(
-            make_tensor_classification_dataset(
-                test_x, test_y, task_labels=test_t
-            )
+            make_tensor_classification_dataset(test_x, test_y, task_labels=test_t)
         )
 
         other_stream_exps = []
@@ -132,9 +118,7 @@ def test_classes_in_this_experience(self):
         other_y = torch.randint(400, 600, (200,))
         other_t = torch.randint(0, 5, (200,))
         other_stream_exps.append(
-            make_tensor_classification_dataset(
-                other_x, other_y, task_labels=other_t
-            )
+            make_tensor_classification_dataset(other_x, other_y, task_labels=other_t)
         )
 
         benchmark_instance = dataset_benchmark(
@@ -143,12 +127,8 @@ def test_classes_in_this_experience(self):
             other_streams_datasets={"other": other_stream_exps},
         )
 
-        train_exp_0: ClassificationExperience = (
-            benchmark_instance.train_stream[0]
-        )
-        train_exp_1: ClassificationExperience = (
-            benchmark_instance.train_stream[1]
-        )
+        train_exp_0: ClassificationExperience = benchmark_instance.train_stream[0]
+        train_exp_1: ClassificationExperience = benchmark_instance.train_stream[1]
         train_0_classes = train_exp_0.classes_in_this_experience
         train_1_classes = train_exp_1.classes_in_this_experience
         train_0_classes_min = min(train_0_classes)
@@ -160,18 +140,14 @@ def test_classes_in_this_experience(self):
         self.assertGreaterEqual(train_1_classes_min, 0)
         self.assertLess(train_1_classes_max, 100)
 
-        test_exp_0: ClassificationExperience = (
-            benchmark_instance.test_stream[0]
-        )
+        test_exp_0: ClassificationExperience = benchmark_instance.test_stream[0]
         test_0_classes = test_exp_0.classes_in_this_experience
         test_0_classes_min = min(test_0_classes)
         test_0_classes_max = max(test_0_classes)
         self.assertGreaterEqual(test_0_classes_min, 100)
         self.assertLess(test_0_classes_max, 200)
 
-        other_exp_0: ClassificationExperience = (
-            benchmark_instance.other_stream[0]
-        )
+        other_exp_0: ClassificationExperience = benchmark_instance.other_stream[0]
         other_0_classes = other_exp_0.classes_in_this_experience
         other_0_classes_min = min(other_0_classes)
         other_0_classes_max = max(other_0_classes)
@@ -229,9 +205,7 @@ def other_gen():
         self.assertIsNone(future_classes)
         # --- END: Test classes timeline before first experience ---
 
-        train_exp_0: ClassificationExperience = (
-            benchmark_instance.train_stream[0]
-        )
+        train_exp_0: ClassificationExperience = benchmark_instance.train_stream[0]
         # --- START: Test classes timeline at first experience ---
         (
             current_classes,
@@ -259,9 +233,7 @@ def other_gen():
         self.assertSetEqual(set(), set(future_classes))
         # --- END: Test classes timeline at first experience ---
 
-        train_exp_1: ClassificationExperience = (
-            benchmark_instance.train_stream[1]
-        )
+        train_exp_1: ClassificationExperience = benchmark_instance.train_stream[1]
         # --- START: Test classes timeline at second experience ---
         # Check if get_classes_timeline(0) is consistent
         (
@@ -305,13 +277,9 @@ def other_gen():
         self.assertLess(train_1_classes_max, 100)
 
         with self.assertRaises(IndexError):
-            train_exp_2: ClassificationExperience = (
-                benchmark_instance.train_stream[2]
-            )
+            train_exp_2: ClassificationExperience = benchmark_instance.train_stream[2]
 
-        test_exp_0: ClassificationExperience = (
-            benchmark_instance.test_stream[0]
-        )
+        test_exp_0: ClassificationExperience = benchmark_instance.test_stream[0]
         test_0_classes = test_exp_0.classes_in_this_experience
         test_0_classes_min = min(test_0_classes)
         test_0_classes_max = max(test_0_classes)
@@ -319,13 +287,9 @@ def other_gen():
         self.assertLess(test_0_classes_max, 200)
 
         with self.assertRaises(IndexError):
-            test_exp_1: ClassificationExperience = (
-                benchmark_instance.test_stream[1]
-            )
+            test_exp_1: ClassificationExperience = benchmark_instance.test_stream[1]
 
-        other_exp_0: ClassificationExperience = (
-            benchmark_instance.other_stream[0]
-        )
+        other_exp_0: ClassificationExperience = benchmark_instance.other_stream[0]
         other_0_classes = other_exp_0.classes_in_this_experience
         other_0_classes_min = min(other_0_classes)
         other_0_classes_max = max(other_0_classes)
@@ -333,9 +297,7 @@ def other_gen():
         self.assertLess(other_0_classes_max, 600)
 
         with self.assertRaises(IndexError):
-            other_exp_1: ClassificationExperience = (
-                benchmark_instance.other_stream[1]
-            )
+            other_exp_1: ClassificationExperience = benchmark_instance.other_stream[1]
 
     def test_lazy_benchmark_drop_old_ones(self):
         train_exps, test_exps, other_stream_exps = self._make_tensor_datasets()
@@ -345,8 +307,7 @@ def test_lazy_benchmark_drop_old_ones(self):
 
         train_gen = ClassificationScenarioTests._generate_stream(train_exps)
         test_gen = ClassificationScenarioTests._generate_stream(test_exps)
-        other_gen = ClassificationScenarioTests._generate_stream(
-            other_stream_exps)
+        other_gen = ClassificationScenarioTests._generate_stream(other_stream_exps)
 
         benchmark_instance = ClassificationScenario(
             stream_definitions=dict(
@@ -381,9 +342,7 @@ def test_lazy_benchmark_drop_old_ones(self):
         self.assertIsNone(future_classes)
         # --- END: Test classes timeline before first experience ---
 
-        train_exp_0: ClassificationExperience = (
-            benchmark_instance.train_stream[0]
-        )
+        train_exp_0: ClassificationExperience = benchmark_instance.train_stream[0]
         # --- START: Test classes timeline at first experience ---
         (
             current_classes,
@@ -413,9 +372,7 @@ def test_lazy_benchmark_drop_old_ones(self):
 
         # Check if it works when the previous experience is dropped
         benchmark_instance.train_stream.drop_previous_experiences(0)
-        train_exp_1: ClassificationExperience = (
-            benchmark_instance.train_stream[1]
-        )
+        train_exp_1: ClassificationExperience = benchmark_instance.train_stream[1]
         # --- START: Test classes timeline at second experience ---
         # Check if get_classes_timeline(0) is consistent
         (
@@ -459,13 +416,9 @@ def test_lazy_benchmark_drop_old_ones(self):
         self.assertLess(train_1_classes_max, 100)
 
         with self.assertRaises(IndexError):
-            train_exp_2: ClassificationExperience = (
-                benchmark_instance.train_stream[2]
-            )
+            train_exp_2: ClassificationExperience = benchmark_instance.train_stream[2]
 
-        test_exp_0: ClassificationExperience = (
-            benchmark_instance.test_stream[0]
-        )
+        test_exp_0: ClassificationExperience = benchmark_instance.test_stream[0]
         test_0_classes = test_exp_0.classes_in_this_experience
         test_0_classes_min = min(test_0_classes)
         test_0_classes_max = max(test_0_classes)
@@ -473,13 +426,9 @@ def test_lazy_benchmark_drop_old_ones(self):
         self.assertLess(test_0_classes_max, 200)
 
         with self.assertRaises(IndexError):
-            test_exp_1: ClassificationExperience = (
-                benchmark_instance.test_stream[1]
-            )
+            test_exp_1: ClassificationExperience = benchmark_instance.test_stream[1]
 
-        other_exp_0: ClassificationExperience = (
-            benchmark_instance.other_stream[0]
-        )
+        other_exp_0: ClassificationExperience = benchmark_instance.other_stream[0]
         other_0_classes = other_exp_0.classes_in_this_experience
         other_0_classes_min = min(other_0_classes)
         other_0_classes_max = max(other_0_classes)
@@ -487,9 +436,7 @@ def test_lazy_benchmark_drop_old_ones(self):
         self.assertLess(other_0_classes_max, 600)
 
         with self.assertRaises(IndexError):
-            other_exp_1: ClassificationExperience = (
-                benchmark_instance.other_stream[1]
-            )
+            other_exp_1: ClassificationExperience = benchmark_instance.other_stream[1]
 
         train_exps = None
         train_exp_0 = None
@@ -526,18 +473,14 @@ def _make_tensor_datasets(self):
         tensor_y = torch.randint(0, 70, (200,))
         tensor_t = torch.randint(0, 5, (200,))
         train_exps.append(
-            make_tensor_classification_dataset(
-                tensor_x, tensor_y, task_labels=tensor_t
-            )
+            make_tensor_classification_dataset(tensor_x, tensor_y, task_labels=tensor_t)
         )
 
         tensor_x = torch.rand(200, 3, 28, 28)
         tensor_y = torch.randint(0, 100, (200,))
         tensor_t = torch.randint(0, 5, (200,))
         train_exps.append(
-            make_tensor_classification_dataset(
-                tensor_x, tensor_y, task_labels=tensor_t
-            )
+            make_tensor_classification_dataset(tensor_x, tensor_y, task_labels=tensor_t)
         )
 
         test_exps = []
@@ -545,9 +488,7 @@ def _make_tensor_datasets(self):
         test_y = torch.randint(100, 200, (200,))
         test_t = torch.randint(0, 5, (200,))
         test_exps.append(
-            make_tensor_classification_dataset(
-                test_x, test_y, task_labels=test_t
-            )
+            make_tensor_classification_dataset(test_x, test_y, task_labels=test_t)
         )
 
         other_stream_exps = []
@@ -555,9 +496,7 @@ def _make_tensor_datasets(self):
         other_y = torch.randint(400, 600, (200,))
         other_t = torch.randint(0, 5, (200,))
         other_stream_exps.append(
-            make_tensor_classification_dataset(
-                other_x, other_y, task_labels=other_t
-            )
+            make_tensor_classification_dataset(other_x, other_y, task_labels=other_t)
         )
 
         return train_exps, test_exps, other_stream_exps
diff --git a/tests/benchmarks/scenarios/test_generic_scenario.py b/tests/benchmarks/scenarios/test_generic_scenario.py
index 7a1b78379..9cc1896b4 100644
--- a/tests/benchmarks/scenarios/test_generic_scenario.py
+++ b/tests/benchmarks/scenarios/test_generic_scenario.py
@@ -24,20 +24,18 @@ def test_exp(self):
             lambda: CLExperience(5, None).eval().current_experience,
         )
 
-        print(
-            "CURRENT_EXPERIENCE: ", CLExperience(5, None).current_experience
-        )
+        print("CURRENT_EXPERIENCE: ", CLExperience(5, None).current_experience)
         assert CLExperience(5, None).current_experience == 5
 
 
 class StreamTests(unittest.TestCase):
     def test_stream_getitem(self):
         # streams should be indexable
-        s = EagerCLStream("a", [
-            CLExperience(0, None),
-            CLExperience(1, None),
-            CLExperience(2, None)],
-            None)
+        s = EagerCLStream(
+            "a",
+            [CLExperience(0, None), CLExperience(1, None), CLExperience(2, None)],
+            None,
+        )
 
         s[0]
         s[1]
@@ -51,11 +49,11 @@ def test_stream_getitem(self):
 
     def test_stream_slicing(self):
         # streams should be sliceable
-        s = EagerCLStream("a", [
-            CLExperience(0, None),
-            CLExperience(1, None),
-            CLExperience(2, None)],
-            None)
+        s = EagerCLStream(
+            "a",
+            [CLExperience(0, None), CLExperience(1, None), CLExperience(2, None)],
+            None,
+        )
 
         ss = s[1:2]
         assert len(ss) == 1
@@ -71,9 +69,10 @@ def test_lazy_stream(self):
         def ls():
             # Also tests if set_stream_info works correctly
             for el in [
-                    CLExperience(0, None), 
-                    CLExperience(0, None), 
-                    CLExperience(0, None)]:
+                CLExperience(0, None),
+                CLExperience(0, None),
+                CLExperience(0, None),
+            ]:
                 yield el
 
         s = CLStream("a", ls(), None, set_stream_info=True)
@@ -87,16 +86,10 @@ def test_scenario_streams(self):
         # streams should be indexable
         sa = EagerCLStream(
             "a",
-            [CLExperience(1, None), 
-             CLExperience(2, None),
-             CLExperience(3, None)],
-            None
-        )
-        sb = EagerCLStream(
-            "b",
-            [CLExperience(12, None), CLExperience(13, None)],
-            None
+            [CLExperience(1, None), CLExperience(2, None), CLExperience(3, None)],
+            None,
         )
+        sb = EagerCLStream("b", [CLExperience(12, None), CLExperience(13, None)], None)
         bench = CLScenario([sa, sb])
 
         bench.a_stream
diff --git a/tests/benchmarks/scenarios/test_rl_scenario.py b/tests/benchmarks/scenarios/test_rl_scenario.py
index 029a3b313..01e8fc823 100644
--- a/tests/benchmarks/scenarios/test_rl_scenario.py
+++ b/tests/benchmarks/scenarios/test_rl_scenario.py
@@ -15,10 +15,10 @@
 
 
 class RLScenarioTests(unittest.TestCase):
-
     @unittest.skipIf(skip, reason="Need gym to run these tests")
     def test_simple_scenario(self):
         from packaging import version
+
         n_envs = 3
         envs = [gym.make("CartPole-v1")] * n_envs
         rl_scenario = RLScenario(
@@ -35,7 +35,7 @@ def test_simple_scenario(self):
             assert exp.task_label == 0
             assert isinstance(env, gym.Env)
             obs = env.reset()
-            if version.parse(gym.__version__) >= version.parse('0.26.0'):
+            if version.parse(gym.__version__) >= version.parse("0.26.0"):
                 self.assertIsInstance(obs[0], np.ndarray)
                 self.assertIsInstance(obs[1], dict)
             else:
@@ -53,7 +53,7 @@ def test_multiple_envs_shuffle(self):
             n_parallel_envs=1,
             task_labels=True,
             eval_envs=envs[:2],
-            shuffle=True
+            shuffle=True,
         )
         tr_stream = rl_scenario.train_stream
         assert len(tr_stream) == 3
@@ -62,7 +62,7 @@ def test_multiple_envs_shuffle(self):
         for i, exp in enumerate(tr_stream):
             assert exp.current_experience == i
             all_t_labels.add(exp.task_label)
-        
+
         self.assertSetEqual(set(range(3)), all_t_labels)
 
         assert len(rl_scenario.eval_stream) == 2
diff --git a/tests/benchmarks/scenarios/test_scenarios_typechecks.py b/tests/benchmarks/scenarios/test_scenarios_typechecks.py
index f3ddcc707..c60c714c7 100644
--- a/tests/benchmarks/scenarios/test_scenarios_typechecks.py
+++ b/tests/benchmarks/scenarios/test_scenarios_typechecks.py
@@ -7,8 +7,9 @@
 
 from avalanche.benchmarks import tensors_benchmark
 from avalanche.benchmarks.generators import nc_benchmark, ni_benchmark
-from avalanche.benchmarks.scenarios.classification_scenario import \
-    ClassificationExperience
+from avalanche.benchmarks.scenarios.classification_scenario import (
+    ClassificationExperience,
+)
 
 
 class ScenariosTypeChecksTests(unittest.TestCase):
@@ -48,9 +49,7 @@ def test_nc_sit_type(self):
             train=False,
             download=True,
         )
-        my_nc_benchmark = nc_benchmark(
-            mnist_train, mnist_test, 5, task_labels=False
-        )
+        my_nc_benchmark = nc_benchmark(mnist_train, mnist_test, 5, task_labels=False)
 
         for batch_info in my_nc_benchmark.train_stream:
             self.assertIsInstance(batch_info, ClassificationExperience)
@@ -82,12 +81,10 @@ def test_tensor_benchmark_type(self):
 
         tensors_benchmark(
             train_tensors=[
-                (torch.randn(2, 3), torch.zeros(2))
-                for _ in range(n_experiences)
+                (torch.randn(2, 3), torch.zeros(2)) for _ in range(n_experiences)
             ],
             test_tensors=[
-                (torch.randn(2, 3), torch.zeros(2))
-                for _ in range(n_experiences)
+                (torch.randn(2, 3), torch.zeros(2)) for _ in range(n_experiences)
             ],
             task_labels=[0] * n_experiences,
             complete_test_set_only=False,
@@ -95,8 +92,7 @@ def test_tensor_benchmark_type(self):
 
         tensors_benchmark(
             train_tensors=[
-                (torch.randn(2, 3), torch.zeros(2))
-                for _ in range(n_experiences)
+                (torch.randn(2, 3), torch.zeros(2)) for _ in range(n_experiences)
             ],
             test_tensors=[(torch.randn(2, 3), torch.zeros(2))],
             task_labels=[0] * n_experiences,
@@ -106,12 +102,10 @@ def test_tensor_benchmark_type(self):
         with self.assertRaises(Exception):
             tensors_benchmark(
                 train_tensors=[
-                    (torch.randn(2, 3), torch.zeros(2))
-                    for _ in range(n_experiences)
+                    (torch.randn(2, 3), torch.zeros(2)) for _ in range(n_experiences)
                 ],
                 test_tensors=[
-                    (torch.randn(2, 3), torch.zeros(2))
-                    for _ in range(n_experiences)
+                    (torch.randn(2, 3), torch.zeros(2)) for _ in range(n_experiences)
                 ],
                 task_labels=[0] * n_experiences,
                 complete_test_set_only=True,
diff --git a/tests/benchmarks/test_avalanche_dataset.py b/tests/benchmarks/test_avalanche_dataset.py
index 3de055fe3..8459cd1fe 100644
--- a/tests/benchmarks/test_avalanche_dataset.py
+++ b/tests/benchmarks/test_avalanche_dataset.py
@@ -57,7 +57,6 @@ class FrozenTransformGroupsCenterCrop:
 
 
 class AvalancheDatasetTests(unittest.TestCase):
-
     def test_attribute_cat_sub(self):
         # Create a dataset of 100 data points described by 22
         # features + 1 class label
@@ -68,22 +67,25 @@ def test_attribute_cat_sub(self):
         tls = [0 for _ in range(100)]  # one task label for each sample
         sup_data = make_classification_dataset(torch_data, task_labels=tls)
         print(sup_data.targets.name, len(sup_data.targets._data))
-        print(sup_data.targets_task_labels.name,
-              len(sup_data.targets_task_labels._data))
+        print(
+            sup_data.targets_task_labels.name, len(sup_data.targets_task_labels._data)
+        )
         assert len(sup_data) == 100
 
         # after subsampling
         sub_data = sup_data.subset(range(10))
         print(sub_data.targets.name, len(sub_data.targets._data))
-        print(sub_data.targets_task_labels.name,
-              len(sub_data.targets_task_labels._data))
+        print(
+            sub_data.targets_task_labels.name, len(sub_data.targets_task_labels._data)
+        )
         assert len(sub_data) == 10
 
         # after concat
         cat_data = sup_data.concat(sup_data)
         print(cat_data.targets.name, len(cat_data.targets._data))
-        print(cat_data.targets_task_labels.name,
-              len(cat_data.targets_task_labels._data))
+        print(
+            cat_data.targets_task_labels.name, len(cat_data.targets_task_labels._data)
+        )
         assert len(cat_data) == 200
 
     def test_avldata_subset_size(self):
@@ -113,9 +115,7 @@ def test_disallowed_attribute_name(self):
         dadata = torch.randint(0, 10, (d_sz,))
         da = DataAttribute(torch.zeros(d_sz), "collate_fn")
         with self.assertRaises(ValueError):
-            d = make_avalanche_dataset(
-                TensorDataset(xdata), data_attributes=[da]
-            )
+            d = make_avalanche_dataset(TensorDataset(xdata), data_attributes=[da])
 
     def test_subset_subset_merge(self):
         d_sz, num_permutations = 3, 4
@@ -143,9 +143,7 @@ def test_subset_subset_merge(self):
             self.assertEqual(len(curr_dataset), d_sz)
 
             print("Check data")
-            x_curr = torch.stack(
-                [curr_dataset[idx][0] for idx in range(d_sz)], dim=0
-            )
+            x_curr = torch.stack([curr_dataset[idx][0] for idx in range(d_sz)], dim=0)
             x_true = torch.stack([xdata[idx] for idx in true_indices], dim=0)
             self.assertTrue(torch.equal(x_curr, x_true))
 
@@ -202,21 +200,15 @@ def test_avalanche_dataset_composition(self):
     def test_avalanche_dataset_add(self):
         dataset_mnist = load_image_benchmark()
         tgs = DefaultTransformGroups((CenterCrop(16), None))
-        dataset_mnist = make_avalanche_dataset(
-            dataset_mnist, transform_groups=tgs
-        )
+        dataset_mnist = make_avalanche_dataset(dataset_mnist, transform_groups=tgs)
 
-        taskl = DataAttribute(
-            ConstantSequence(0, len(dataset_mnist)), "task_labels"
-        )
+        taskl = DataAttribute(ConstantSequence(0, len(dataset_mnist)), "task_labels")
         tgs = DefaultTransformGroups((ToTensor(), lambda target: -1))
         dataset1 = make_avalanche_dataset(
             dataset_mnist, data_attributes=[taskl], transform_groups=tgs
         )
 
-        taskl = DataAttribute(
-            ConstantSequence(2, len(dataset_mnist)), "task_labels"
-        )
+        taskl = DataAttribute(ConstantSequence(2, len(dataset_mnist)), "task_labels")
         tgs = DefaultTransformGroups((None, lambda target: -2))
         dataset2 = make_avalanche_dataset(
             dataset_mnist, data_attributes=[taskl], transform_groups=tgs
@@ -248,9 +240,7 @@ def test_avalanche_dataset_add(self):
     def test_avalanche_dataset_radd(self):
         dataset_mnist = load_image_benchmark()
         tgs = DefaultTransformGroups((CenterCrop(16), None))
-        dataset_mnist = make_avalanche_dataset(
-            dataset_mnist, transform_groups=tgs
-        )
+        dataset_mnist = make_avalanche_dataset(dataset_mnist, transform_groups=tgs)
 
         tgs = DefaultTransformGroups((ToTensor(), lambda target: -1))
         dataset1 = make_avalanche_dataset(dataset_mnist, transform_groups=tgs)
@@ -275,14 +265,10 @@ def test_dataset_add_monkey_patch_vanilla_behaviour(self):
         self.assertEqual(len(dataset_mnist) * 2, len(dataset))
 
     def test_avalanche_dataset_uniform_task_labels(self):
-        dataset_mnist = MNIST(
-            root=default_dataset_location("mnist"), download=True
-        )
+        dataset_mnist = MNIST(root=default_dataset_location("mnist"), download=True)
         x, y = dataset_mnist[0]
 
-        dataset = make_classification_dataset(
-            dataset_mnist, transform=ToTensor()
-        )
+        dataset = make_classification_dataset(dataset_mnist, transform=ToTensor())
         x2, y2, t2 = dataset[0]
 
         self.assertIsInstance(x2, Tensor)
@@ -359,14 +345,10 @@ def test_avalanche_dataset_uniform_task_labels_simple_def(self):
             subset_task0 = dataset.task_set[0]
 
     def test_avalanche_dataset_mixed_task_labels(self):
-        dataset_mnist = MNIST(
-            root=default_dataset_location("mnist"), download=True
-        )
+        dataset_mnist = MNIST(root=default_dataset_location("mnist"), download=True)
         x, y = dataset_mnist[0]
 
-        random_task_labels = [
-            random.randint(0, 10) for _ in range(len(dataset_mnist))
-        ]
+        random_task_labels = [random.randint(0, 10) for _ in range(len(dataset_mnist))]
         dataset = make_classification_dataset(
             dataset_mnist, transform=ToTensor(), task_labels=random_task_labels
         )
@@ -379,9 +361,7 @@ def test_avalanche_dataset_mixed_task_labels(self):
         self.assertTrue(torch.equal(ToTensor()(x), x2))
         self.assertEqual(y, y2)
 
-        self.assertListEqual(
-            random_task_labels, list(dataset.targets_task_labels)
-        )
+        self.assertListEqual(random_task_labels, list(dataset.targets_task_labels))
 
         u_labels, counts = np.unique(random_task_labels, return_counts=True)
         for i, task_label in enumerate(u_labels.tolist()):
@@ -390,9 +370,7 @@ def test_avalanche_dataset_mixed_task_labels(self):
             self.assertEqual(int(counts[i]), len(subset_task))
 
             unique_task_labels = list(subset_task.targets_task_labels)
-            self.assertListEqual(
-                [task_label] * int(counts[i]), unique_task_labels
-            )
+            self.assertListEqual([task_label] * int(counts[i]), unique_task_labels)
 
         with self.assertRaises(KeyError):
             subset_task11 = dataset.task_set[11]
@@ -401,9 +379,7 @@ def test_avalanche_dataset_update_data_attribute(self):
         dataset_orig = load_image_benchmark()
 
         dataset: SupervisedClassificationDataset = make_classification_dataset(
-            dataset_orig,
-            transform=ToTensor(),
-            task_labels=0
+            dataset_orig, transform=ToTensor(), task_labels=0
         )
 
         self.assertIsInstance(dataset, SupervisedClassificationDataset)
@@ -420,35 +396,36 @@ def test_avalanche_dataset_update_data_attribute(self):
         # --- Test add data attribute ---
         plain_attribute = torch.arange(len(dataset))
         get_item_attribute = DataAttribute(
-            data=torch.arange(len(dataset)) + 5,
-            name='gia',
-            use_in_getitem=True
+            data=torch.arange(len(dataset)) + 5, name="gia", use_in_getitem=True
         )
 
         targets_task_labels_not_gia = DataAttribute(
             data=torch.arange(len(dataset)) + 7,
-            name='targets_task_labels',
-            use_in_getitem=False
+            name="targets_task_labels",
+            use_in_getitem=False,
         )
 
         # Test wrong length
-        for wrong_attr in [torch.arange(len(dataset)-1), 
-                           torch.arange(len(dataset)+1)]:
+        for wrong_attr in [
+            torch.arange(len(dataset) - 1),
+            torch.arange(len(dataset) + 1),
+        ]:
             with self.assertRaises(Exception):
-                dataset.update_data_attribute('wrong_attr', wrong_attr)
+                dataset.update_data_attribute("wrong_attr", wrong_attr)
 
         # Add plain attribute
         dataset_plus_plain = dataset.update_data_attribute(
-            'plain_attr', plain_attribute)
+            "plain_attr", plain_attribute
+        )
         # check nothing added from plain attribute
         self.assertEqual(3, len(dataset_plus_plain[0]))
         # check content
-        self.assertTrue(torch.equal(torch.as_tensor(
-            dataset_plus_plain.plain_attr), plain_attribute))
+        self.assertTrue(
+            torch.equal(torch.as_tensor(dataset_plus_plain.plain_attr), plain_attribute)
+        )
 
         # Add get-item attribute
-        dataset_plus_gia = dataset.update_data_attribute(
-            'gia', get_item_attribute)
+        dataset_plus_gia = dataset.update_data_attribute("gia", get_item_attribute)
         # check element added from gia attribute
         elem = dataset_plus_gia[100]
         self.assertEqual(4, len(elem))
@@ -456,42 +433,54 @@ def test_avalanche_dataset_update_data_attribute(self):
         # Name mismatch check
         with self.assertRaises(Exception):
             dataset_plus_gia = dataset.update_data_attribute(
-                'name_mismatch', get_item_attribute)
+                "name_mismatch", get_item_attribute
+            )
 
         # DataAttribute must convert tensors to list
         self.assertIsInstance(elem[3], int)
         self.assertEqual(105, elem[3])
         # check content
-        self.assertTrue(torch.equal(torch.as_tensor(
-            dataset_plus_gia.gia), torch.arange(len(dataset)) + 5))
+        self.assertTrue(
+            torch.equal(
+                torch.as_tensor(dataset_plus_gia.gia), torch.arange(len(dataset)) + 5
+            )
+        )
 
         # Replace attribute (plain)
-        dataset_plus_plain = dataset.update_data_attribute(
-            'targets', plain_attribute)
+        dataset_plus_plain = dataset.update_data_attribute("targets", plain_attribute)
         # check nothing added from plain attribute
         self.assertEqual(3, len(dataset_plus_plain[0]))
         # check content
-        self.assertTrue(torch.equal(torch.as_tensor(
-            dataset_plus_plain.targets), plain_attribute))
+        self.assertTrue(
+            torch.equal(torch.as_tensor(dataset_plus_plain.targets), plain_attribute)
+        )
 
         # Replace attribute (get_item)
         dataset_plus_plain = dataset.update_data_attribute(
-            'targets_task_labels', plain_attribute)
+            "targets_task_labels", plain_attribute
+        )
         # check element no removed
         self.assertEqual(3, len(dataset_plus_plain[0]))
         # check content
-        self.assertTrue(torch.equal(torch.as_tensor(
-            dataset_plus_plain.targets_task_labels), plain_attribute))
+        self.assertTrue(
+            torch.equal(
+                torch.as_tensor(dataset_plus_plain.targets_task_labels), plain_attribute
+            )
+        )
 
         # Replace attribute (remove one from get_item)
         dataset_plus_plain = dataset.update_data_attribute(
-            'targets_task_labels', targets_task_labels_not_gia)
+            "targets_task_labels", targets_task_labels_not_gia
+        )
         # check element removed
         self.assertEqual(2, len(dataset_plus_plain[0]))
         # check content
-        self.assertTrue(torch.equal(torch.as_tensor(
-            dataset_plus_plain.targets_task_labels), 
-            torch.arange(len(dataset)) + 7))
+        self.assertTrue(
+            torch.equal(
+                torch.as_tensor(dataset_plus_plain.targets_task_labels),
+                torch.arange(len(dataset)) + 7,
+            )
+        )
 
 
 if __name__ == "__main__":
diff --git a/tests/benchmarks/test_data_attribute.py b/tests/benchmarks/test_data_attribute.py
index 35ea707e3..a86d16040 100644
--- a/tests/benchmarks/test_data_attribute.py
+++ b/tests/benchmarks/test_data_attribute.py
@@ -3,10 +3,8 @@
 import numpy as np
 import torch
 
-from avalanche.benchmarks.utils import (classification_subset,
-                                        make_avalanche_dataset)
-from avalanche.benchmarks.utils.data_attribute import (DataAttribute,
-                                                       TensorDataAttribute)
+from avalanche.benchmarks.utils import classification_subset, make_avalanche_dataset
+from avalanche.benchmarks.utils.data_attribute import DataAttribute, TensorDataAttribute
 
 
 class DataAttributeTests(unittest.TestCase):
@@ -28,8 +26,7 @@ def test_val_to_idx(self):
         t0 = torch.zeros(10, dtype=torch.int)
         t1 = torch.ones(10, dtype=torch.int)
         da = DataAttribute(torch.cat([t0, t1]), "task_labels")
-        self.assertEqual(da.val_to_idx, {0: list(range(10)), 
-                                         1: list(range(10, 20))})
+        self.assertEqual(da.val_to_idx, {0: list(range(10)), 1: list(range(10, 20))})
 
     def test_subset(self):
         """Test that subset is correctly computed."""
@@ -44,8 +41,7 @@ def test_concat(self):
         t0 = torch.zeros(10, dtype=torch.int)
         t1 = torch.ones(10, dtype=torch.int)
         da = DataAttribute(torch.cat([t0, t1]), "task_labels")
-        self.assertEqual(list(da.concat(da).data), 
-                         list(torch.cat([t0, t1, t0, t1])))
+        self.assertEqual(list(da.concat(da).data), list(torch.cat([t0, t1, t0, t1])))
 
 
 class TensorDataAttributeTests(unittest.TestCase):
@@ -62,8 +58,7 @@ def test_concat(self):
         t0 = torch.zeros(10)
         t1 = torch.ones(10)
         da = DataAttribute(torch.cat([t0, t1]), "logits")
-        self.assertEqual(list(da.concat(da).data), 
-                         list(torch.cat([t0, t1, t0, t1])))
+        self.assertEqual(list(da.concat(da).data), list(torch.cat([t0, t1, t0, t1])))
 
     def test_swap(self):
         """Test that data attributes are
diff --git a/tests/benchmarks/test_flat_data.py b/tests/benchmarks/test_flat_data.py
index a47f63a74..21d74224d 100644
--- a/tests/benchmarks/test_flat_data.py
+++ b/tests/benchmarks/test_flat_data.py
@@ -1,15 +1,17 @@
+import sys
 import unittest
 import random
 
 import torch
 
 from avalanche.benchmarks import fixed_size_experience_split
-from avalanche.benchmarks.utils import AvalancheDataset, \
-    concat_datasets
-from avalanche.benchmarks.utils.classification_dataset import \
-    ClassificationDataset
-from avalanche.benchmarks.utils.flat_data import FlatData, \
-    _flatten_datasets_and_reindex
+from avalanche.benchmarks.utils import AvalancheDataset, concat_datasets
+from avalanche.benchmarks.utils.classification_dataset import ClassificationDataset
+from avalanche.benchmarks.utils.flat_data import (
+    FlatData,
+    _flatten_datasets_and_reindex,
+    LazyIndices,
+)
 from avalanche.benchmarks.utils.flat_data import (
     _flatdata_depth,
     _flatdata_print,
@@ -55,21 +57,15 @@ def test_flatdata_subset_concat_stack_overflow(self):
             # print("CONCAT:")
             # _flatdata_print(curr_dataset)
 
-        self.assertEqual(
-            d_sz * dataset_hierarchy_depth + d_sz, len(curr_dataset)
-        )
+        self.assertEqual(d_sz * dataset_hierarchy_depth + d_sz, len(curr_dataset))
         for idx in range(dataset_hierarchy_depth):
             leaf_range = range(idx * d_sz, (idx + 1) * d_sz)
             permuted = true_indices[idx]
 
-            x_leaf = torch.stack(
-                [curr_dataset[idx] for idx in leaf_range], dim=0
-            )
+            x_leaf = torch.stack([curr_dataset[idx] for idx in leaf_range], dim=0)
             self.assertTrue(torch.equal(x_raw[permuted], x_leaf))
 
-        slice_idxs = list(
-            range(d_sz * dataset_hierarchy_depth, len(curr_dataset))
-        )
+        slice_idxs = list(range(d_sz * dataset_hierarchy_depth, len(curr_dataset)))
         x_slice = torch.stack([curr_dataset[idx] for idx in slice_idxs], dim=0)
         self.assertTrue(torch.equal(x_raw, x_slice))
 
@@ -108,15 +104,19 @@ def test_flatten_and_reindex(self):
         D1 = bm.train_stream[0].dataset
         ds, idxs = _flatten_datasets_and_reindex([D1, D1, D1], None)
 
-        print(f"len-ds: {len(ds)}, max={max(idxs)}, min={min(idxs)}, "
-              f"lens={[len(d) for d in ds]}")
+        print(
+            f"len-ds: {len(ds)}, max={max(idxs)}, min={min(idxs)}, "
+            f"lens={[len(d) for d in ds]}"
+        )
         assert len(ds) == 1
         assert len(idxs) == 3 * len(D1)
         assert max(idxs) == len(D1) - 1
         assert min(idxs) == 0
 
     def test_concat_flattens_same_dataset(self):
-        D = AvalancheDataset([[1, 2, 3]],)
+        D = AvalancheDataset(
+            [[1, 2, 3]],
+        )
         B = concat_datasets([])
         B = B.concat(D)
         print(f"DATA depth={_flatdata_depth(B)}, dsets={len(B._datasets)}")
@@ -212,8 +212,9 @@ def test_flattening_replay_ocl(self):
         benchmark = get_fast_benchmark()
         buffer = ReservoirSamplingBuffer(100)
 
-        for t, exp in enumerate(fixed_size_experience_split(
-                benchmark.train_stream[0], 1, None)):
+        for t, exp in enumerate(
+            fixed_size_experience_split(benchmark.train_stream[0], 1, None)
+        ):
             buffer.update_from_dataset(exp.dataset)
             b = buffer.buffer
             # depths = _flatdata_depth(b)
@@ -232,8 +233,9 @@ def test_flattening_replay_ocl(self):
         print(f"DATA depth={_flatdata_depth(b)}, dsets={len(b._datasets)}")
         assert len(b._datasets) <= 2
 
-        for t, exp in enumerate(fixed_size_experience_split(
-                benchmark.train_stream[1], 1, None)):
+        for t, exp in enumerate(
+            fixed_size_experience_split(benchmark.train_stream[1], 1, None)
+        ):
             buffer.update_from_dataset(exp.dataset)
             b = buffer.buffer
             # depths = _flatdata_depth(b)
@@ -253,5 +255,32 @@ def test_flattening_replay_ocl(self):
         assert len(b._datasets) <= 2
 
 
+class LazyIndicesTests(unittest.TestCase):
+    def test_basic(self):
+        eager = list(range(10))
+        li = LazyIndices(eager)
+        self.assertListEqual(eager, list(li))
+        self.assertEqual(len(eager), len(li))
+
+        li = LazyIndices(eager, eager)
+        self.assertListEqual(eager + eager, list(li))
+        self.assertEqual(len(eager) * 2, len(li))
+
+        li = LazyIndices(eager, offset=7)
+        self.assertListEqual(list([el + 7 for el in eager]), list(li))
+        self.assertEqual(len(eager), len(li))
+
+    def test_recursion(self):
+        eager = list(range(10))
+
+        li = LazyIndices(eager, offset=0)
+        for i in range(sys.getrecursionlimit() * 2 + 10):
+            li = LazyIndices(li, eager, offset=0)
+
+        self.assertEqual(len(eager) * (i + 2), len(li))
+        for el in li:  # keep this to check recursion error
+            pass
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/benchmarks/test_replay_loader.py b/tests/benchmarks/test_replay_loader.py
index e389e9a69..4fa05f27a 100644
--- a/tests/benchmarks/test_replay_loader.py
+++ b/tests/benchmarks/test_replay_loader.py
@@ -27,12 +27,9 @@ def setUp(self):
             np.arange(len(dataset_for_current)), size=100, replace=False
         )
 
-        self.big_task_set = \
-            AvalancheSubset(dataset_for_current, indices_big_set)
-        self.small_task_set = \
-            AvalancheSubset(dataset_for_current, indices_small_set)
-        self.tiny_task_set = \
-            AvalancheSubset(dataset_for_current, indices_tiny_set)
+        self.big_task_set = AvalancheSubset(dataset_for_current, indices_big_set)
+        self.small_task_set = AvalancheSubset(dataset_for_current, indices_small_set)
+        self.tiny_task_set = AvalancheSubset(dataset_for_current, indices_tiny_set)
 
         indices_memory = np.random.choice(
             np.arange(len(dataset_for_memory)), size=2000, replace=False
@@ -44,8 +41,7 @@ def setUp(self):
 
         self.memory_set = AvalancheSubset(dataset_for_memory, indices_memory)
         self.small_memory_set = AvalancheSubset(
-            dataset_for_memory,
-            indices_memory_small
+            dataset_for_memory, indices_memory_small
         )
 
         self._batch_size = None
@@ -69,7 +65,7 @@ def _make_loader(self, memory_set=None, **kwargs):
     def _test_batch_size(self, loader, expected_size=None):
         if expected_size is None:
             expected_size = self._batch_size * 2
-        
+
         for batch in loader:
             self.assertEqual(len(batch[0]), expected_size)
 
@@ -113,17 +109,13 @@ def test_big_batch_size(self):
     def test_zero_iterations_memory(self):
         self._batch_size = 256
         self._task_dataset = self.big_task_set
-        loader = self._make_loader(
-            memory_set=self.small_memory_set
-        )
+        loader = self._make_loader(memory_set=self.small_memory_set)
         self._launch_test_suite_dropped_memory(loader)
 
     def test_zero_iterations_current(self):
         self._batch_size = 256
         self._task_dataset = self.tiny_task_set
-        loader = self._make_loader(
-            memory_set=self.memory_set
-        )
+        loader = self._make_loader(memory_set=self.memory_set)
         self.assertEqual(0, self._length)
         self._launch_test_suite(loader)
 
diff --git a/tests/checkpointing/check_metrics_aligned.py b/tests/checkpointing/check_metrics_aligned.py
index 80c97369d..ca652494d 100644
--- a/tests/checkpointing/check_metrics_aligned.py
+++ b/tests/checkpointing/check_metrics_aligned.py
@@ -9,7 +9,7 @@ def load_pickles(directory):
     files.sort()
     data = []
     for f in files:
-        with open(os.path.join(directory, f), 'rb') as fh:
+        with open(os.path.join(directory, f), "rb") as fh:
             data.append(pickle.load(fh))
 
     return data
@@ -23,11 +23,11 @@ def check_metrics_aligned(directory1, directory2):
     # Check that the metrics are aligned.
     for i in range(len(data1)):
         if data1[i] != data2[i]:
-            print('Metrics are not aligned for experience {}'.format(i))
+            print("Metrics are not aligned for experience {}".format(i))
             sys.exit(1)
 
-    print('Metrics are aligned')
+    print("Metrics are aligned")
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     check_metrics_aligned(sys.argv[1], sys.argv[2])
diff --git a/tests/checkpointing/task_incremental_with_checkpointing.py b/tests/checkpointing/task_incremental_with_checkpointing.py
index ad0a8c6ce..9e6c21027 100644
--- a/tests/checkpointing/task_incremental_with_checkpointing.py
+++ b/tests/checkpointing/task_incremental_with_checkpointing.py
@@ -25,24 +25,44 @@
 from torch.nn import CrossEntropyLoss
 from torch.optim import SGD
 
-from avalanche.benchmarks import CLExperience, SplitCIFAR100, CLStream51, \
-    SplitOmniglot, SplitMNIST, SplitFMNIST
+from avalanche.benchmarks import (
+    CLExperience,
+    SplitCIFAR100,
+    CLStream51,
+    SplitOmniglot,
+    SplitMNIST,
+    SplitFMNIST,
+)
 from avalanche.benchmarks.classic import SplitCIFAR10
-from avalanche.evaluation.metrics import accuracy_metrics, loss_metrics, \
-    class_accuracy_metrics
-from avalanche.logging import InteractiveLogger, TensorboardLogger, \
-    WandBLogger, TextLogger
+from avalanche.evaluation.metrics import (
+    accuracy_metrics,
+    loss_metrics,
+    class_accuracy_metrics,
+)
+from avalanche.logging import (
+    InteractiveLogger,
+    TensorboardLogger,
+    WandBLogger,
+    TextLogger,
+)
 from avalanche.models import SimpleMLP, as_multitask
 from avalanche.training.determinism.rng_manager import RNGManager
-from avalanche.training.plugins import EvaluationPlugin, CWRStarPlugin, \
-    ReplayPlugin, GDumbPlugin, LwFPlugin, SynapticIntelligencePlugin, EWCPlugin
+from avalanche.training.plugins import (
+    EvaluationPlugin,
+    CWRStarPlugin,
+    ReplayPlugin,
+    GDumbPlugin,
+    LwFPlugin,
+    SynapticIntelligencePlugin,
+    EWCPlugin,
+)
 from avalanche.training.checkpoint import maybe_load_checkpoint, save_checkpoint
 from avalanche.training.supervised import Naive
 from tests.unit_tests_utils import get_fast_benchmark
 
 
 def main(args):
-    fname = './checkpoint.pkl'
+    fname = "./checkpoint.pkl"
 
     # FIRST CHANGE: SET THE RANDOM SEEDS
     # In fact, you should to this no matter the checkpointing functionality.
@@ -59,53 +79,53 @@ def main(args):
 
     # Nothing new here...
     device = torch.device(
-        f"cuda:{args.cuda}"
-        if torch.cuda.is_available() and args.cuda >= 0
-        else "cpu"
+        f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu"
     )
-    print('Using device', device)
+    print("Using device", device)
 
     # Code used to select the benchmark: not checkpoint-related
-    use_tasks = 'si' not in args.plugins and 'cwr' not in args.plugins \
-        and args.benchmark != 'Stream51'
-    input_size = 32*32*3
+    use_tasks = (
+        "si" not in args.plugins
+        and "cwr" not in args.plugins
+        and args.benchmark != "Stream51"
+    )
+    input_size = 32 * 32 * 3
     # CL Benchmark Creation
-    if args.benchmark == 'TestBenchmark':
+    if args.benchmark == "TestBenchmark":
         input_size = 28 * 28 * 1
         scenario = get_fast_benchmark(
             use_task_labels=True,
             n_features=input_size,
             n_samples_per_class=256,
-            seed=1337
+            seed=1337,
         )
-    elif args.benchmark == 'SplitMNIST':
+    elif args.benchmark == "SplitMNIST":
         scenario = SplitMNIST(n_experiences=5, return_task_id=True)
-        input_size = 28*28*1
-    elif args.benchmark == 'SplitFMNIST':
+        input_size = 28 * 28 * 1
+    elif args.benchmark == "SplitFMNIST":
         scenario = SplitFMNIST(n_experiences=5, return_task_id=True)
-        input_size = 28*28*1
-    elif args.benchmark == 'SplitCifar100':
+        input_size = 28 * 28 * 1
+    elif args.benchmark == "SplitCifar100":
         scenario = SplitCIFAR100(n_experiences=5, return_task_id=use_tasks)
-    elif args.benchmark == 'SplitCifar10':
+    elif args.benchmark == "SplitCifar10":
         scenario = SplitCIFAR10(n_experiences=5, return_task_id=use_tasks)
-    elif args.benchmark == 'Stream51':
+    elif args.benchmark == "Stream51":
         scenario = CLStream51()
         scenario.n_classes = 51
-        input_size = 224*224*3
-    elif args.benchmark == 'SplitOmniglot':
+        input_size = 224 * 224 * 3
+    elif args.benchmark == "SplitOmniglot":
         scenario = SplitOmniglot(n_experiences=4, return_task_id=use_tasks)
-        input_size = 105*105*1
+        input_size = 105 * 105 * 1
     else:
-        raise ValueError('Unrecognized benchmark name from CLI.')
+        raise ValueError("Unrecognized benchmark name from CLI.")
     train_stream: Sequence[CLExperience] = scenario.train_stream
     test_stream: Sequence[CLExperience] = scenario.test_stream
 
     # Define the model (and load initial weights if necessary)
     # Again, not checkpoint-related
     if use_tasks:
-        model = SimpleMLP(input_size=input_size,
-                          num_classes=scenario.n_classes // 5)
-        model = as_multitask(model, 'classifier')
+        model = SimpleMLP(input_size=input_size, num_classes=scenario.n_classes // 5)
+        model = as_multitask(model, "classifier")
     else:
         model = SimpleMLP(input_size=input_size, num_classes=scenario.n_classes)
 
@@ -116,56 +136,50 @@ def main(args):
     # Create other plugins
     # ...
     plugins = []
-    cli_plugin_names = '_'.join(args.plugins)
+    cli_plugin_names = "_".join(args.plugins)
     for cli_plugin in args.plugins:
-        if cli_plugin == 'cwr':
-            plugin_instance = CWRStarPlugin(
-                model, freeze_remaining_model=False)
-        elif cli_plugin == 'replay':
+        if cli_plugin == "cwr":
+            plugin_instance = CWRStarPlugin(model, freeze_remaining_model=False)
+        elif cli_plugin == "replay":
             plugin_instance = ReplayPlugin(mem_size=500)
-        elif cli_plugin == 'gdumb':
+        elif cli_plugin == "gdumb":
             plugin_instance = GDumbPlugin(mem_size=500)
-        elif cli_plugin == 'lwf':
+        elif cli_plugin == "lwf":
             plugin_instance = LwFPlugin()
-        elif cli_plugin == 'si':
+        elif cli_plugin == "si":
             plugin_instance = SynapticIntelligencePlugin(0.001)
-        elif cli_plugin == 'ewc':
+        elif cli_plugin == "ewc":
             plugin_instance = EWCPlugin(0.001)
         else:
-            raise ValueError('Unrecognized plugin name from CLI.')
-        print('Adding plugin', plugin_instance)
+            raise ValueError("Unrecognized plugin name from CLI.")
+        print("Adding plugin", plugin_instance)
         plugins.append(plugin_instance)
 
     # Create loggers (as usual)
-    os.makedirs(f'./logs/checkpointing_{args.checkpoint_at}',
-                exist_ok=True)
+    os.makedirs(f"./logs/checkpointing_{args.checkpoint_at}", exist_ok=True)
 
     loggers = [
-        TextLogger(
-            open(f'./logs/checkpointing_'
-                 f'{args.checkpoint_at}/log.txt', 'w')),
+        TextLogger(open(f"./logs/checkpointing_" f"{args.checkpoint_at}/log.txt", "w")),
         InteractiveLogger(),
-        TensorboardLogger(f'./logs/checkpointing_{args.checkpoint_at}')
+        TensorboardLogger(f"./logs/checkpointing_{args.checkpoint_at}"),
     ]
 
     if args.wandb:
-        loggers.append(WandBLogger(
-            project_name='AvalancheCheckpointing',
-            run_name=f'checkpointing_{args.benchmark}_'
-                     f'{args.checkpoint_at}_'
-                     f'{cli_plugin_names}'
-        ))
+        loggers.append(
+            WandBLogger(
+                project_name="AvalancheCheckpointing",
+                run_name=f"checkpointing_{args.benchmark}_"
+                f"{args.checkpoint_at}_"
+                f"{cli_plugin_names}",
+            )
+        )
 
     # Create the evaluation plugin (when not using the default one)
     evaluation_plugin = EvaluationPlugin(
-        accuracy_metrics(minibatch=False, epoch=True,
-                         experience=True, stream=True),
-        loss_metrics(minibatch=False, epoch=True,
-                     experience=True, stream=True),
-        class_accuracy_metrics(
-            stream=True
-        ),
-        loggers=loggers
+        accuracy_metrics(minibatch=False, epoch=True, experience=True, stream=True),
+        loss_metrics(minibatch=False, epoch=True, experience=True, stream=True),
+        class_accuracy_metrics(stream=True),
+        loggers=loggers,
     )
 
     # Create the strategy
@@ -178,7 +192,7 @@ def main(args):
         eval_mb_size=128,
         device=device,
         plugins=plugins,
-        evaluator=evaluation_plugin
+        evaluator=evaluation_plugin,
     )
 
     # THIRD CHANGE: LOAD THE CHECKPOINT IF EXISTS
@@ -203,12 +217,15 @@ def main(args):
         save_checkpoint(strategy, fname)
 
         Path(args.log_metrics_to).mkdir(parents=True, exist_ok=True)
-        with open(Path(args.log_metrics_to) /
-                  f'metrics_exp{train_task.current_experience}.pkl', 'wb') as f:
+        with open(
+            Path(args.log_metrics_to)
+            / f"metrics_exp{train_task.current_experience}.pkl",
+            "wb",
+        ) as f:
             pickle.dump(metrics, f)
 
         if train_task.current_experience == args.checkpoint_at:
-            print('Exiting early')
+            print("Exiting early")
             break
 
 
@@ -218,32 +235,13 @@ def main(args):
         "--cuda",
         type=int,
         default=0,
-        help="Select zero-indexed cuda device. -1 to use CPU."
-    )
-    parser.add_argument(
-        "--benchmark",
-        type=str,
-        default='SplitCifar100',
-        help="The benchmark to use."
-    )
-    parser.add_argument(
-        "--checkpoint_at",
-        type=int,
-        default=-1
-    )
-    parser.add_argument(
-        "--log_metrics_to",
-        type=str,
-        default='./metrics'
-    )
-    parser.add_argument(
-        "--wandb",
-        action='store_true'
+        help="Select zero-indexed cuda device. -1 to use CPU.",
     )
     parser.add_argument(
-        "--plugins",
-        nargs='*',
-        required=False,
-        default=[]
+        "--benchmark", type=str, default="SplitCifar100", help="The benchmark to use."
     )
+    parser.add_argument("--checkpoint_at", type=int, default=-1)
+    parser.add_argument("--log_metrics_to", type=str, default="./metrics")
+    parser.add_argument("--wandb", action="store_true")
+    parser.add_argument("--plugins", nargs="*", required=False, default=[])
     main(parser.parse_args())
diff --git a/tests/distributed/distributed_test_utils.py b/tests/distributed/distributed_test_utils.py
index 4e17e8f4b..fe9831ded 100644
--- a/tests/distributed/distributed_test_utils.py
+++ b/tests/distributed/distributed_test_utils.py
@@ -7,26 +7,25 @@
 
 
 def common_dst_tests_setup():
-    use_gpu_in_tests = os.environ.get('USE_GPU', 'false').lower() in [
-        '1', 'true']
+    use_gpu_in_tests = os.environ.get("USE_GPU", "false").lower() in ["1", "true"]
     use_gpu_in_tests = use_gpu_in_tests and torch.cuda.is_available()
     DistributedHelper.init_distributed(1234, use_cuda=use_gpu_in_tests)
     return use_gpu_in_tests
 
 
 def check_skip_distributed_test() -> bool:
-    return os.environ.get('DISTRIBUTED_TESTS', 'false').lower() \
-        not in ['1', 'true']
+    return os.environ.get("DISTRIBUTED_TESTS", "false").lower() not in ["1", "true"]
 
 
 def check_skip_distributed_slow_test() -> bool:
-    return check_skip_distributed_test() or \
-        os.environ.get('FAST_TEST', 'false').lower() in ['1', 'true']
+    return check_skip_distributed_test() or os.environ.get(
+        "FAST_TEST", "false"
+    ).lower() in ["1", "true"]
 
 
 @contextlib.contextmanager
 def suppress_dst_tests_output():
-    if os.environ['LOCAL_RANK'] != 0:
+    if os.environ["LOCAL_RANK"] != 0:
         with contextlib.redirect_stderr(None):
             with contextlib.redirect_stdout(None):
                 yield
@@ -35,8 +34,8 @@ def suppress_dst_tests_output():
 
 
 __all__ = [
-    'common_dst_tests_setup',
-    'check_skip_distributed_test',
-    'check_skip_distributed_slow_test',
-    'suppress_dst_tests_output'
+    "common_dst_tests_setup",
+    "check_skip_distributed_test",
+    "check_skip_distributed_slow_test",
+    "suppress_dst_tests_output",
 ]
diff --git a/tests/distributed/test_distributed_helper.py b/tests/distributed/test_distributed_helper.py
index 123c281b5..d57961c3f 100644
--- a/tests/distributed/test_distributed_helper.py
+++ b/tests/distributed/test_distributed_helper.py
@@ -10,57 +10,59 @@
 import torch.distributed as dst
 from torch.nn import Module
 from torch.nn.parallel import DistributedDataParallel
-from avalanche.benchmarks.generators.benchmark_generators import \
-    dataset_benchmark
-from avalanche.benchmarks.utils.classification_dataset import \
-    make_tensor_classification_dataset
+from avalanche.benchmarks.generators.benchmark_generators import dataset_benchmark
+from avalanche.benchmarks.utils.classification_dataset import (
+    make_tensor_classification_dataset,
+)
 
 from avalanche.distributed import DistributedHelper
-from avalanche.distributed.distributed_helper import \
-    RollingSeedContext, BroadcastSeedContext
+from avalanche.distributed.distributed_helper import (
+    RollingSeedContext,
+    BroadcastSeedContext,
+)
 from avalanche.models import SimpleMLP, as_multitask
 from avalanche.models.utils import avalanche_model_adaptation
 
 from avalanche.training.determinism.rng_manager import RNGManager
-from tests.distributed.distributed_test_utils import \
-    check_skip_distributed_slow_test, check_skip_distributed_test, \
-    suppress_dst_tests_output, common_dst_tests_setup
+from tests.distributed.distributed_test_utils import (
+    check_skip_distributed_slow_test,
+    check_skip_distributed_test,
+    suppress_dst_tests_output,
+    common_dst_tests_setup,
+)
 
 
 class DistributedHelperTests(unittest.TestCase):
-
     def setUp(self) -> None:
         self.use_gpu_in_tests = common_dst_tests_setup()
 
-    @unittest.skipIf(check_skip_distributed_test(),
-                     'Distributed tests ignored')
+    @unittest.skipIf(check_skip_distributed_test(), "Distributed tests ignored")
     def test_device_id(self):
         if self.use_gpu_in_tests:
             self.assertEqual(dst.get_rank(), DistributedHelper.get_device_id())
-            self.assertEqual(torch.device(f'cuda:{dst.get_rank()}'),
-                             DistributedHelper.make_device())
+            self.assertEqual(
+                torch.device(f"cuda:{dst.get_rank()}"), DistributedHelper.make_device()
+            )
         else:
             self.assertEqual(-1, DistributedHelper.get_device_id())
-            self.assertEqual(torch.device('cpu'),
-                             DistributedHelper.make_device())
+            self.assertEqual(torch.device("cpu"), DistributedHelper.make_device())
 
-    @unittest.skipIf(check_skip_distributed_test(),
-                     'Distributed tests ignored')
+    @unittest.skipIf(check_skip_distributed_test(), "Distributed tests ignored")
     def test_wrap_model(self):
-        mb_size = 1*2*2*3*5
+        mb_size = 1 * 2 * 2 * 3 * 5
         num_classes = 11
         torch.manual_seed(1234 + DistributedHelper.rank)
         mb_x = torch.randn((mb_size, 32))
         mb_y = torch.randint(0, num_classes, (mb_size,))
         mb_t = torch.full((mb_size,), 1)
         model = SimpleMLP(num_classes=num_classes, input_size=32)
-        model = as_multitask(model, 'classifier')
+        model = as_multitask(model, "classifier")
         self.assertIsInstance(model, Module)
 
         device = DistributedHelper.make_device()
 
-        if device.type == 'cuda':
-            # Additional test: must raise an error if the model 
+        if device.type == "cuda":
+            # Additional test: must raise an error if the model
             # is not already in the correct device
             with self.assertRaises(Exception):
                 model_wrapped = DistributedHelper.wrap_model(model)
@@ -81,12 +83,16 @@ def test_wrap_model(self):
         model_wrapped.eval()
 
         benchmark = dataset_benchmark(
-            [make_tensor_classification_dataset(
-                mb_x, mb_y, mb_t, task_labels=mb_t.tolist()
-            )],
-            [make_tensor_classification_dataset(
-                mb_x, mb_y, mb_t, task_labels=mb_t.tolist()
-            )]
+            [
+                make_tensor_classification_dataset(
+                    mb_x, mb_y, mb_t, task_labels=mb_t.tolist()
+                )
+            ],
+            [
+                make_tensor_classification_dataset(
+                    mb_x, mb_y, mb_t, task_labels=mb_t.tolist()
+                )
+            ],
         )
 
         avalanche_model_adaptation(model, benchmark.train_stream[0])
@@ -107,13 +113,11 @@ def test_wrap_model(self):
             start_idx = mb_size * DistributedHelper.rank
             end_idx = start_idx + mb_size
 
-            self.assertTrue(torch.equal(mb_out1, 
-                                        mb_out_all[start_idx: end_idx]))
-        
+            self.assertTrue(torch.equal(mb_out1, mb_out_all[start_idx:end_idx]))
+
         self.assertTrue(model is DistributedHelper.unwrap_model(model_wrapped))
 
-    @unittest.skipIf(check_skip_distributed_test(),
-                     'Distributed tests ignored')
+    @unittest.skipIf(check_skip_distributed_test(), "Distributed tests ignored")
     def test_broadcast_tensor_or_objects(self):
         ts = torch.full((10,), DistributedHelper.rank, dtype=torch.long)
         DistributedHelper.broadcast(ts)
@@ -122,98 +126,96 @@ def test_broadcast_tensor_or_objects(self):
         device = DistributedHelper.make_device()
         ts = ts.to(device)
 
-        my_object = {'a': DistributedHelper.rank, 'b': ts}
+        my_object = {"a": DistributedHelper.rank, "b": ts}
         my_object_from_main = DistributedHelper.broadcast_object(my_object)
 
-        expect = {
-            'a': 0, 
-            'b': torch.full((10,), 0, dtype=torch.long).tolist()}
-        
-        self.assertEqual(device, my_object_from_main['b'].device)
-        my_object_from_main['b'] = my_object_from_main['b'].tolist()
+        expect = {"a": 0, "b": torch.full((10,), 0, dtype=torch.long).tolist()}
+
+        self.assertEqual(device, my_object_from_main["b"].device)
+        my_object_from_main["b"] = my_object_from_main["b"].tolist()
         self.assertEqual(expect, my_object_from_main)
 
-    @unittest.skipIf(check_skip_distributed_test(),
-                     'Distributed tests ignored')
+    @unittest.skipIf(check_skip_distributed_test(), "Distributed tests ignored")
     def test_gather_all_objects(self):
         ts = torch.full((10,), DistributedHelper.rank, dtype=torch.long)
 
         device = DistributedHelper.make_device()
         ts = ts.to(device)
 
-        my_object = {'a': DistributedHelper.rank, 'b': ts}
+        my_object = {"a": DistributedHelper.rank, "b": ts}
         all_objects = DistributedHelper.gather_all_objects(my_object)
         self.assertIsInstance(all_objects, list)
         self.assertEqual(DistributedHelper.world_size, len(all_objects))
 
         for rank in range(DistributedHelper.world_size):
             expect = {
-                'a': rank,
-                'b': torch.full((10,), rank, dtype=torch.long).tolist()}
-        
-            self.assertEqual(device, all_objects[rank]['b'].device)
-            all_objects[rank]['b'] = all_objects[rank]['b'].tolist()
+                "a": rank,
+                "b": torch.full((10,), rank, dtype=torch.long).tolist(),
+            }
+
+            self.assertEqual(device, all_objects[rank]["b"].device)
+            all_objects[rank]["b"] = all_objects[rank]["b"].tolist()
             self.assertEqual(expect, all_objects[rank])
 
-    @unittest.skipIf(check_skip_distributed_test(),
-                     'Distributed tests ignored')
+    @unittest.skipIf(check_skip_distributed_test(), "Distributed tests ignored")
     def test_cat_all(self):
         if DistributedHelper.rank == 0:
-            ts = torch.full((10+1, 5), DistributedHelper.rank, dtype=torch.long)
+            ts = torch.full((10 + 1, 5), DistributedHelper.rank, dtype=torch.long)
         else:
             ts = torch.full((10, 5), DistributedHelper.rank, dtype=torch.long)
         device = DistributedHelper.make_device()
 
-        if device.type == 'cuda':
+        if device.type == "cuda":
             # Additional test: tensors do not need to be on the default device
             DistributedHelper.cat_all(ts)
-            
+
         ts = ts.to(device)
 
         concatenated_tensor = DistributedHelper.cat_all(ts)
 
         self.assertEqual(device, concatenated_tensor.device)
 
-        expect = torch.empty((DistributedHelper.world_size * 10 + 1, 5), 
-                             dtype=torch.long).to(device)
+        expect = torch.empty(
+            (DistributedHelper.world_size * 10 + 1, 5), dtype=torch.long
+        ).to(device)
         for rank in range(DistributedHelper.world_size):
             if rank == 0:
-                expect[rank * 10: (rank + 1) * 10 + 1] = rank
+                expect[rank * 10 : (rank + 1) * 10 + 1] = rank
             else:
-                expect[1 + rank * 10: 1 + (rank + 1) * 10] = rank
-        
+                expect[1 + rank * 10 : 1 + (rank + 1) * 10] = rank
+
         self.assertTrue(torch.equal(concatenated_tensor, expect))
 
-    @unittest.skipIf(check_skip_distributed_test(),
-                     'Distributed tests ignored')
+    @unittest.skipIf(check_skip_distributed_test(), "Distributed tests ignored")
     def test_gather_all_same_size(self):
         ts = torch.full((10, 5), DistributedHelper.rank, dtype=torch.long)
         device = DistributedHelper.make_device()
 
-        if device.type == 'cuda':
+        if device.type == "cuda":
             # Additional test: tensors do not need to be on the default device
             DistributedHelper.gather_all(ts)
 
             # On the other hand, PyTorch all_gather requires tensors to be on
             # the default device
             with self.assertRaises(Exception):
-                
-                out_t = [torch.empty_like(ts)
-                         for _ in range(DistributedHelper.world_size)]
+                out_t = [
+                    torch.empty_like(ts) for _ in range(DistributedHelper.world_size)
+                ]
                 torch.distributed.all_gather(out_t, ts)
-            
+
             # ... while this should work
-            out_t = [torch.empty_like(ts).to(device)
-                     for _ in range(DistributedHelper.world_size)]
+            out_t = [
+                torch.empty_like(ts).to(device)
+                for _ in range(DistributedHelper.world_size)
+            ]
             torch.distributed.all_gather(out_t, ts.to(device))
 
         ts = ts.to(device)
 
         for same_shape in [False, True]:
-            print(f'same_shape={same_shape}')
+            print(f"same_shape={same_shape}")
             # with self.subTest(same_shape=same_shape):
-            tensor_list = DistributedHelper.gather_all(
-                ts, same_shape=same_shape)
+            tensor_list = DistributedHelper.gather_all(ts, same_shape=same_shape)
 
             self.assertEqual(DistributedHelper.world_size, len(tensor_list))
 
@@ -224,27 +226,30 @@ def test_gather_all_same_size(self):
                 expect = torch.full((10, 5), rank, dtype=torch.long).to(device)
                 self.assertTrue(torch.equal(tensor_list[rank], expect))
 
-    @unittest.skipIf(check_skip_distributed_slow_test(),
-                     'Distributed tests ignored')
+    @unittest.skipIf(check_skip_distributed_slow_test(), "Distributed tests ignored")
     def test_gather_all_performance_known_same_shape(self):
-        ts = torch.full((128, 224, 224, 3),
-                        DistributedHelper.rank,
-                        dtype=torch.float32)
+        ts = torch.full((128, 224, 224, 3), DistributedHelper.rank, dtype=torch.float32)
         device = DistributedHelper.make_device()
         ts = ts.to(device)
 
-        resulting_tensors = [torch.empty_like(ts).to(device)
-                             for _ in range(DistributedHelper.world_size)]
+        resulting_tensors = [
+            torch.empty_like(ts).to(device) for _ in range(DistributedHelper.world_size)
+        ]
 
         from tqdm import tqdm
+
         n_times = 30
         torch.distributed.all_gather(resulting_tensors, ts)
         start_time = time.time()
         for _ in tqdm(range(n_times)):
             torch.distributed.all_gather(resulting_tensors, ts)
         end_time = time.time()
-        print('Time taken by PyTorch all_gather', end_time-start_time,
-              'avg', (end_time-start_time) / n_times)
+        print(
+            "Time taken by PyTorch all_gather",
+            end_time - start_time,
+            "avg",
+            (end_time - start_time) / n_times,
+        )
 
         start_time = time.time()
         out_list = [None for _ in range(DistributedHelper.world_size)]
@@ -253,15 +258,17 @@ def test_gather_all_performance_known_same_shape(self):
         for _ in tqdm(range(n_times)):
             torch.distributed.all_gather_object(out_list, ts)
         end_time = time.time()
-        print('Time taken by PyTorch all_gather_object', end_time-start_time,
-              'avg', (end_time-start_time) / n_times)
-    
-    @unittest.skipIf(check_skip_distributed_slow_test(),
-                     'Distributed tests ignored')
+        print(
+            "Time taken by PyTorch all_gather_object",
+            end_time - start_time,
+            "avg",
+            (end_time - start_time) / n_times,
+        )
+
+    @unittest.skipIf(check_skip_distributed_slow_test(), "Distributed tests ignored")
     def test_gather_all_performance_sync_shape(self):
         max_shape_size = 10
-        shape = [128, 6, DistributedHelper.rank+1] + \
-            ([3] * DistributedHelper.rank)
+        shape = [128, 6, DistributedHelper.rank + 1] + ([3] * DistributedHelper.rank)
 
         device = DistributedHelper.make_device()
 
@@ -269,10 +276,11 @@ def shape_all_gather():
             ts = torch.zeros((max_shape_size,), dtype=torch.int64)
             for i in range(len(shape)):
                 ts[i] = shape[i]
-            
+
             ts = ts.to(device)
-            all_tensors_shape = [torch.empty_like(ts)
-                                 for _ in range(DistributedHelper.world_size)]
+            all_tensors_shape = [
+                torch.empty_like(ts) for _ in range(DistributedHelper.world_size)
+            ]
             torch.distributed.all_gather(all_tensors_shape, ts)
             all_tensors_shape = [t.cpu() for t in all_tensors_shape]
 
@@ -281,7 +289,7 @@ def shape_all_gather():
                     if t[x] == 0:
                         if x == 0:
                             # Tensor with 0-length shape
-                            all_tensors_shape[i] = t[:x+1]
+                            all_tensors_shape[i] = t[: x + 1]
                         else:
                             all_tensors_shape[i] = t[:x]
                         break
@@ -291,14 +299,19 @@ def shape_all_gather_objects():
             torch.distributed.all_gather_object(out_list, shape)
 
         from tqdm import tqdm
+
         n_times = 1000
         shape_all_gather()
         start_time = time.time()
         for _ in tqdm(range(n_times)):
             shape_all_gather()
         end_time = time.time()
-        print('Time taken by PyTorch all_gather', end_time-start_time,
-              'avg', (end_time-start_time) / n_times)
+        print(
+            "Time taken by PyTorch all_gather",
+            end_time - start_time,
+            "avg",
+            (end_time - start_time) / n_times,
+        )
 
         start_time = time.time()
         shape_all_gather_objects()
@@ -306,15 +319,18 @@ def shape_all_gather_objects():
         for _ in tqdm(range(n_times)):
             shape_all_gather_objects()
         end_time = time.time()
-        print('Time taken by PyTorch all_gather_object', end_time-start_time,
-              'avg', (end_time-start_time) / n_times)
-    
-    @unittest.skipIf(check_skip_distributed_test(),
-                     'Distributed tests ignored')
+        print(
+            "Time taken by PyTorch all_gather_object",
+            end_time - start_time,
+            "avg",
+            (end_time - start_time) / n_times,
+        )
+
+    @unittest.skipIf(check_skip_distributed_test(), "Distributed tests ignored")
     def test_gather_all_same_dim0(self):
-        ts = torch.full((10, DistributedHelper.rank+1),
-                        DistributedHelper.rank,
-                        dtype=torch.long)
+        ts = torch.full(
+            (10, DistributedHelper.rank + 1), DistributedHelper.rank, dtype=torch.long
+        )
         device = DistributedHelper.make_device()
 
         ts = ts.to(device)
@@ -326,17 +342,14 @@ def test_gather_all_same_dim0(self):
             self.assertEqual(device, t.device)
 
         for rank in range(DistributedHelper.world_size):
-            expect = torch.full((10, rank+1),
-                                rank,
-                                dtype=torch.long).to(device)
+            expect = torch.full((10, rank + 1), rank, dtype=torch.long).to(device)
             self.assertTrue(torch.equal(tensor_list[rank], expect))
 
-    @unittest.skipIf(check_skip_distributed_test(),
-                     'Distributed tests ignored')
+    @unittest.skipIf(check_skip_distributed_test(), "Distributed tests ignored")
     def test_gather_all_same_dim1_n(self):
-        ts = torch.full((10+DistributedHelper.rank, 5),
-                        DistributedHelper.rank,
-                        dtype=torch.long)
+        ts = torch.full(
+            (10 + DistributedHelper.rank, 5), DistributedHelper.rank, dtype=torch.long
+        )
         device = DistributedHelper.make_device()
 
         ts = ts.to(device)
@@ -348,13 +361,10 @@ def test_gather_all_same_dim1_n(self):
             self.assertEqual(device, t.device)
 
         for rank in range(DistributedHelper.world_size):
-            expect = torch.full((10+rank, 5), 
-                                rank,
-                                dtype=torch.long).to(device)
+            expect = torch.full((10 + rank, 5), rank, dtype=torch.long).to(device)
             self.assertTrue(torch.equal(tensor_list[rank], expect))
 
-    @unittest.skipIf(check_skip_distributed_test(),
-                     'Distributed tests ignored')
+    @unittest.skipIf(check_skip_distributed_test(), "Distributed tests ignored")
     def test_gather_all_zero_shaped(self):
         ts = torch.full(tuple(), DistributedHelper.rank, dtype=torch.long)
         device = DistributedHelper.make_device()
@@ -362,11 +372,9 @@ def test_gather_all_zero_shaped(self):
         ts = ts.to(device)
 
         for same_shape in [False, True]:
-            print(f'same_shape={same_shape}')
+            print(f"same_shape={same_shape}")
             # with self.subTest(same_shape=same_shape):
-            tensor_list = DistributedHelper.gather_all(
-                ts, 
-                same_shape=same_shape)
+            tensor_list = DistributedHelper.gather_all(ts, same_shape=same_shape)
             self.assertEqual(DistributedHelper.world_size, len(tensor_list))
 
             for t in tensor_list:
@@ -376,13 +384,13 @@ def test_gather_all_zero_shaped(self):
                 expect = torch.full(tuple(), rank, dtype=torch.long).to(device)
                 self.assertTrue(torch.equal(tensor_list[rank], expect))
 
-    @unittest.skipIf(check_skip_distributed_test(),
-                     'Distributed tests ignored')
+    @unittest.skipIf(check_skip_distributed_test(), "Distributed tests ignored")
     def test_check_equal_tensors(self):
-        if DistributedHelper.world_size == 1 and \
-                DistributedHelper.get_device_id() >= 0:
-            self.skipTest('When using CUDA, there must be at '
-                          'least two processes to run this test')
+        if DistributedHelper.world_size == 1 and DistributedHelper.get_device_id() >= 0:
+            self.skipTest(
+                "When using CUDA, there must be at "
+                "least two processes to run this test"
+            )
         torch.manual_seed(1234)
         ts = torch.randn((100,))
         DistributedHelper.check_equal_tensors(ts)
@@ -392,8 +400,7 @@ def test_check_equal_tensors(self):
         with self.assertRaises(Exception):
             DistributedHelper.check_equal_tensors(ts)
 
-    @unittest.skipIf(check_skip_distributed_test(),
-                     'Distributed tests ignored')
+    @unittest.skipIf(check_skip_distributed_test(), "Distributed tests ignored")
     def test_fields(self):
         self.assertEqual(dst.get_rank(), DistributedHelper.rank)
         self.assertEqual(dst.get_world_size(), DistributedHelper.world_size)
@@ -401,14 +408,13 @@ def test_fields(self):
         self.assertEqual(dst.get_rank() == 0, DistributedHelper.is_main_process)
 
         if self.use_gpu_in_tests:
-            self.assertEqual('nccl', DistributedHelper.backend)
+            self.assertEqual("nccl", DistributedHelper.backend)
             self.assertTrue(DistributedHelper.forced_cuda_comm)
         else:
-            self.assertEqual('gloo', DistributedHelper.backend)
+            self.assertEqual("gloo", DistributedHelper.backend)
             self.assertFalse(DistributedHelper.forced_cuda_comm)
 
-    @unittest.skipIf(check_skip_distributed_test(),
-                     'Distributed tests ignored')
+    @unittest.skipIf(check_skip_distributed_test(), "Distributed tests ignored")
     def test_set_random_seeds_and_align(self):
         DistributedHelper.set_random_seeds(5678)
 
@@ -422,79 +428,73 @@ def test_set_random_seeds_and_align(self):
             random.randint(0, 1000000)
 
         DistributedHelper.align_seeds()
-        
+
         ref_values = (
             int(np.random.randint(0, 1000000)),
             int(torch.randint(0, 1000000, (1,))),
-            int(random.randint(0, 1000000))
+            int(random.randint(0, 1000000)),
         )
 
         DistributedHelper.check_equal_objects(ref_values)
-    
-    @unittest.skipIf(check_skip_distributed_test(),
-                     'Distributed tests ignored')
+
+    @unittest.skipIf(check_skip_distributed_test(), "Distributed tests ignored")
     def test_rolling_seed_aligner(self):
         RNGManager.set_random_seeds(4321)
 
         with RollingSeedContext():
             RNGManager.set_random_seeds(1234 + DistributedHelper.rank)
-            random.randint(0, 2 ** 64 - 1)
+            random.randint(0, 2**64 - 1)
 
-        final_value = random.randint(0, 2 ** 64 - 1)
+        final_value = random.randint(0, 2**64 - 1)
         self.assertEqual(14732185405572191734, final_value)
 
-    @unittest.skipIf(check_skip_distributed_test(),
-                     'Distributed tests ignored')
+    @unittest.skipIf(check_skip_distributed_test(), "Distributed tests ignored")
     def test_broadcast_seed_aligner(self):
         RNGManager.set_random_seeds(4321)
 
         with BroadcastSeedContext():
             RNGManager.set_random_seeds(1234 + DistributedHelper.rank)
-            random.randint(0, 2 ** 64 - 1)
+            random.randint(0, 2**64 - 1)
 
-        final_value = random.randint(0, 2 ** 64 - 1)
+        final_value = random.randint(0, 2**64 - 1)
         self.assertEqual(15306775005444441373, final_value)
-    
-    @unittest.skipIf(check_skip_distributed_test(),
-                     'Distributed tests ignored')
+
+    @unittest.skipIf(check_skip_distributed_test(), "Distributed tests ignored")
     def test_main_process_first(self):
-        tmpdirname = ''
+        tmpdirname = ""
         try:
             my_rank = DistributedHelper.rank
             if DistributedHelper.is_main_process:
                 tmpdirname = tempfile.mkdtemp()
-            
+
             tmpdirname = DistributedHelper.broadcast_object(tmpdirname)
-        
+
             with DistributedHelper.main_process_first():
-                
                 for _ in range(2):
                     time.sleep(0.1 + my_rank * 0.05)
                     files = list(os.listdir(tmpdirname))
                     if DistributedHelper.is_main_process:
                         self.assertEqual(0, len(files))
                     else:
-                        self.assertIn(f'rank0', files)
-                        self.assertNotIn(f'rank{my_rank}', files)
+                        self.assertIn(f"rank0", files)
+                        self.assertNotIn(f"rank{my_rank}", files)
+
+                with open(os.path.join(tmpdirname, f"rank{my_rank}"), "w") as f:
+                    f.write("ok")
 
-                with open(os.path.join(tmpdirname, f'rank{my_rank}'), 'w') \
-                        as f:
-                    f.write('ok')
-                
                 for _ in range(2):
                     time.sleep(0.1 + my_rank * 0.05)
                     files = list(os.listdir(tmpdirname))
                     if DistributedHelper.is_main_process:
                         self.assertEqual(1, len(files))
-                        self.assertIn(f'rank0', files)
+                        self.assertIn(f"rank0", files)
                     else:
-                        self.assertIn(f'rank0', files)
-                        self.assertIn(f'rank{my_rank}', files)
-            
+                        self.assertIn(f"rank0", files)
+                        self.assertIn(f"rank{my_rank}", files)
+
             DistributedHelper.barrier()
             files = set(os.listdir(tmpdirname))
-            expect = set([f'rank{rnk}'
-                          for rnk in range(DistributedHelper.world_size)])
+            expect = set([f"rank{rnk}" for rnk in range(DistributedHelper.world_size)])
             self.assertSetEqual(expect, files)
             DistributedHelper.barrier()
         finally:
diff --git a/tests/evaluation/test_image_samples.py b/tests/evaluation/test_image_samples.py
index 51afd7a8b..2b2561f20 100644
--- a/tests/evaluation/test_image_samples.py
+++ b/tests/evaluation/test_image_samples.py
@@ -11,9 +11,7 @@
 
 class ImageSamplesTests(unittest.TestCase):
     def test_image_samples(args):
-        p_metric = ImagesSamplePlugin(
-            n_cols=5, n_rows=5, group=True, mode="train"
-        )
+        p_metric = ImagesSamplePlugin(n_cols=5, n_rows=5, group=True, mode="train")
 
         scenario = SplitMNIST(5)
         curr_exp = scenario.train_stream[0]
@@ -28,17 +26,13 @@ def test_image_samples(args):
         # save_image(img_grid, './logs/test_image_grid.png')
 
     def test_tensor_samples(args):
-        p_metric = ImagesSamplePlugin(
-            n_cols=5, n_rows=5, group=True, mode="train"
-        )
+        p_metric = ImagesSamplePlugin(n_cols=5, n_rows=5, group=True, mode="train")
 
         scenario = SplitMNIST(5)
         curr_exp = scenario.train_stream[0]
         for mb in DataLoader(curr_exp.dataset, batch_size=32):
             break
-        curr_dataset = make_tensor_classification_dataset(
-            *mb[:2], targets=mb[1]
-        )
+        curr_dataset = make_tensor_classification_dataset(*mb[:2], targets=mb[1])
 
         strategy_mock = MagicMock(
             eval_mb_size=32, experience=curr_exp, adapted_dataset=curr_dataset
diff --git a/tests/run_dist_tests.py b/tests/run_dist_tests.py
index 1ef96d7a4..fa8d9f94d 100644
--- a/tests/run_dist_tests.py
+++ b/tests/run_dist_tests.py
@@ -18,22 +18,25 @@ def get_distributed_test_cases(suite: Union[TestCase, TestSuite]) -> Set[str]:
     if isinstance(suite, TestCase):
         case_id = suite.id()
 
-        if case_id.startswith('distributed.') or \
-                case_id.startswith('tests.distributed.'):
+        if case_id.startswith("distributed.") or case_id.startswith(
+            "tests.distributed."
+        ):
             found_cases.add(case_id)
 
-        if '_FailedTest' in case_id:
+        if "_FailedTest" in case_id:
             raise RuntimeError(
-                f'Errors encountered while listing test cases: {case_id}')
+                f"Errors encountered while listing test cases: {case_id}"
+            )
 
     return found_cases
 
 
 @click.command()
-@click.argument('test_cases', nargs=-1)
+@click.argument("test_cases", nargs=-1)
 def run_distributed_suites(test_cases):
     cases_names = get_distributed_test_cases(
-        unittest.defaultTestLoader.discover('.'))  # Don't change the path!
+        unittest.defaultTestLoader.discover(".")
+    )  # Don't change the path!
     cases_names = list(sorted(cases_names))
     print(cases_names)
     if len(test_cases) > 0:
@@ -41,41 +44,48 @@ def run_distributed_suites(test_cases):
         cases_names = [x for x in cases_names if x in test_cases]
 
         if set(cases_names) != test_cases:
-            print('Some cases have not been found!',
-                  test_cases - set(cases_names))
+            print("Some cases have not been found!", test_cases - set(cases_names))
             sys.exit(1)
 
-    print('Running', len(cases_names), 'tests')
+    print("Running", len(cases_names), "tests")
     p = None
     success = True
     exited = False
     failed_test_cases = set()
 
-    use_gpu_in_tests = os.environ.get('USE_GPU', 'false').lower() in [
-        '1', 'true']
+    use_gpu_in_tests = os.environ.get("USE_GPU", "false").lower() in ["1", "true"]
     if use_gpu_in_tests:
-        print('Running tests using GPUs')
+        print("Running tests using GPUs")
         import torch
+
         nproc_per_node = torch.cuda.device_count()
     else:
-        print('Running tests using CPU only')
+        print("Running tests using CPU only")
         nproc_per_node = 2
 
     for case_name in cases_names:
         if exited:
-            print('Exiting due to keyboard interrupt')
+            print("Exiting due to keyboard interrupt")
             break
-        print('Running test:', case_name, flush=True)
+        print("Running test:", case_name, flush=True)
         try:
             my_env = os.environ.copy()
-            my_env['DISTRIBUTED_TESTS'] = '1'
+            my_env["DISTRIBUTED_TESTS"] = "1"
             p = Popen(
-                ['python', '-m', 'torch.distributed.run', '--nnodes=1',
-                 f'--nproc_per_node={nproc_per_node}',
-                 '-m', 'unittest', case_name],
-                stdout=sys.stdout, 
+                [
+                    "python",
+                    "-m",
+                    "torch.distributed.run",
+                    "--nnodes=1",
+                    f"--nproc_per_node={nproc_per_node}",
+                    "-m",
+                    "unittest",
+                    case_name,
+                ],
+                stdout=sys.stdout,
                 stderr=sys.stderr,
-                env=my_env)
+                env=my_env,
+            )
             p.communicate()
         except KeyboardInterrupt:
             success = False
@@ -83,7 +93,7 @@ def run_distributed_suites(test_cases):
             p.send_signal(signal.SIGINT)
         finally:
             exit_code = p.wait()
-            print('Test completed with code', exit_code)
+            print("Test completed with code", exit_code)
             success = success and exit_code == 0
             p = None
 
@@ -91,15 +101,15 @@ def run_distributed_suites(test_cases):
                 failed_test_cases.add(case_name)
 
     if success:
-        print('Tests completed successfully')
+        print("Tests completed successfully")
         sys.exit(0)
     else:
-        print('The following tests terminated with errors:')
+        print("The following tests terminated with errors:")
         for failed_case in sorted(failed_test_cases):
             print(failed_case)
 
         sys.exit(1)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     run_distributed_suites()
diff --git a/tests/test_avalanche_classification_dataset.py b/tests/test_avalanche_classification_dataset.py
index f90aaa0e8..09f2717d3 100644
--- a/tests/test_avalanche_classification_dataset.py
+++ b/tests/test_avalanche_classification_dataset.py
@@ -48,7 +48,8 @@
     _flatdata_print,
 )
 from avalanche.benchmarks.utils.classification_dataset import (
-    ClassificationDataset, concat_classification_datasets_sequentially
+    ClassificationDataset,
+    concat_classification_datasets_sequentially,
 )
 from tests.unit_tests_utils import load_image_data
 
@@ -75,9 +76,7 @@ def get_mbatch(data, batch_size=5):
 
 class AvalancheDatasetTests(unittest.TestCase):
     def test_avalanche_dataset_multi_param_transform(self):
-        dataset_mnist = MNIST(
-            root=default_dataset_location("mnist"), download=True
-        )
+        dataset_mnist = MNIST(root=default_dataset_location("mnist"), download=True)
 
         ref_instance2_idx = None
         for instance_idx, (_, instance_y) in enumerate(dataset_mnist):
@@ -93,9 +92,7 @@ def test_avalanche_dataset_multi_param_transform(self):
                 break
         self.assertIsNotNone(ref_instance_idx)
 
-        with self.assertWarns(
-            avalanche.benchmarks.utils.ComposeMaxParamsWarning
-        ):
+        with self.assertWarns(avalanche.benchmarks.utils.ComposeMaxParamsWarning):
             dataset_transform = avalanche.benchmarks.utils.MultiParamCompose(
                 [ToTensor(), zero_if_label_2]
             )
@@ -105,9 +102,7 @@ def test_avalanche_dataset_multi_param_transform(self):
 
         tgs = {"train": dataset_transform, "eval": dataset_transform}
         x, y = dataset_mnist[ref_instance_idx]
-        dataset = make_classification_dataset(
-            dataset_mnist, transform_groups=tgs
-        )
+        dataset = make_classification_dataset(dataset_mnist, transform_groups=tgs)
         x2, y2, t2 = dataset[ref_instance_idx]
 
         self.assertIsInstance(x2, Tensor)
@@ -193,9 +188,7 @@ def test_avalanche_dataset_mixed_task_labels(self):
         )
         x, y = dataset_mnist[0]
 
-        random_task_labels = [
-            random.randint(0, 10) for _ in range(len(dataset_mnist))
-        ]
+        random_task_labels = [random.randint(0, 10) for _ in range(len(dataset_mnist))]
         dataset = make_classification_dataset(
             dataset_mnist, transform=ToTensor(), task_labels=random_task_labels
         )
@@ -208,9 +201,7 @@ def test_avalanche_dataset_mixed_task_labels(self):
         self.assertTrue(torch.equal(ToTensor()(x), x2))
         self.assertEqual(y, y2)
 
-        self.assertListEqual(
-            random_task_labels, list(dataset.targets_task_labels)
-        )
+        self.assertListEqual(random_task_labels, list(dataset.targets_task_labels))
 
         u_labels, counts = np.unique(random_task_labels, return_counts=True)
         for i, task_label in enumerate(u_labels.tolist()):
@@ -219,9 +210,7 @@ def test_avalanche_dataset_mixed_task_labels(self):
             self.assertEqual(int(counts[i]), len(subset_task))
 
             unique_task_labels = list(subset_task.targets_task_labels)
-            self.assertListEqual(
-                [task_label] * int(counts[i]), unique_task_labels
-            )
+            self.assertListEqual([task_label] * int(counts[i]), unique_task_labels)
 
         with self.assertRaises(KeyError):
             subset_task11 = dataset.task_set[11]
@@ -265,9 +254,7 @@ def test_avalanche_dataset_task_labels_inheritance(self):
         dataset_mnist = MNIST(
             root=expanduser("~") + "/.avalanche/data/mnist/", download=True
         )
-        random_task_labels = [
-            random.randint(0, 10) for _ in range(len(dataset_mnist))
-        ]
+        random_task_labels = [random.randint(0, 10) for _ in range(len(dataset_mnist))]
         dataset_orig = make_classification_dataset(
             dataset_mnist, transform=ToTensor(), task_labels=random_task_labels
         )
@@ -282,9 +269,7 @@ def test_avalanche_dataset_task_labels_inheritance(self):
         self.assertIsInstance(t3, int)
         self.assertEqual(random_task_labels[0], t3)
 
-        self.assertListEqual(
-            random_task_labels, list(dataset_orig.targets_task_labels)
-        )
+        self.assertListEqual(random_task_labels, list(dataset_orig.targets_task_labels))
 
         self.assertListEqual(
             random_task_labels, list(dataset_child.targets_task_labels)
@@ -530,9 +515,7 @@ def my_collate_fn(patterns):
             return x_values, y_values, z_values, t_values
 
         whole_dataset = TensorDataset(tensor_x, tensor_y, tensor_z)
-        dataset = make_classification_dataset(
-            whole_dataset, collate_fn=my_collate_fn
-        )
+        dataset = make_classification_dataset(whole_dataset, collate_fn=my_collate_fn)
 
         x, y, z, t = dataset[0]
         self.assertIsInstance(x, Tensor)
@@ -576,9 +559,7 @@ def my_collate_fn2(patterns):
             return x_values, y_values, z_values, t_values
 
         whole_dataset = TensorDataset(tensor_x, tensor_y, tensor_z)
-        dataset = make_classification_dataset(
-            whole_dataset, collate_fn=my_collate_fn
-        )
+        dataset = make_classification_dataset(whole_dataset, collate_fn=my_collate_fn)
         inherited = make_classification_dataset(
             dataset, collate_fn=my_collate_fn2
         )  # Ok
@@ -889,9 +870,7 @@ def transform_target_plus_one(target_value):
         self.assertFalse(torch.equal(ToTensor()(x2), x3))
 
     def test_avalanche_avalanche_subset_recursion_transform(self):
-        dataset_mnist = MNIST(
-            root=default_dataset_location("mnist"), download=True
-        )
+        dataset_mnist = MNIST(root=default_dataset_location("mnist"), download=True)
         x, y = dataset_mnist[3000]
         x2, y2 = dataset_mnist[1010]
 
@@ -927,9 +906,7 @@ def transform_target_plus_one(target_value):
         self.assertFalse(pil_images_equal(x2, x3))
 
     def test_avalanche_avalanche_subset_recursion_frozen_transform(self):
-        dataset_mnist = MNIST(
-            root=default_dataset_location("mnist"), download=True
-        )
+        dataset_mnist = MNIST(root=default_dataset_location("mnist"), download=True)
         x, y = dataset_mnist[3000]
         x2, y2 = dataset_mnist[1010]
 
@@ -983,9 +960,7 @@ def transform_target_plus_two(target_value):
         self.assertFalse(pil_images_equal(x2, x5))
 
     def test_avalanche_avalanche_subset_recursion_sub_class_mapping(self):
-        dataset_mnist = MNIST(
-            root=default_dataset_location("mnist"), download=True
-        )
+        dataset_mnist = MNIST(root=default_dataset_location("mnist"), download=True)
         x, y = dataset_mnist[3000]
         x2, y2 = dataset_mnist[1010]
 
@@ -1016,18 +991,14 @@ def test_avalanche_avalanche_subset_recursion_sub_class_mapping(self):
         self.assertFalse(pil_images_equal(x2, x3))
 
     def test_avalanche_avalanche_subset_recursion_up_class_mapping(self):
-        dataset_mnist = MNIST(
-            root=default_dataset_location("mnist"), download=True
-        )
+        dataset_mnist = MNIST(root=default_dataset_location("mnist"), download=True)
         x, y = dataset_mnist[3000]
         x2, y2 = dataset_mnist[1010]
 
         class_mapping = list(range(10))
         random.shuffle(class_mapping)
 
-        subset = classification_subset(
-            dataset_mnist, indices=[3000, 8, 4, 1010, 12]
-        )
+        subset = classification_subset(dataset_mnist, indices=[3000, 8, 4, 1010, 12])
 
         dataset = classification_subset(
             subset, indices=[0, 3, 1], class_mapping=class_mapping
@@ -1049,9 +1020,7 @@ def test_avalanche_avalanche_subset_recursion_up_class_mapping(self):
         self.assertFalse(pil_images_equal(x2, x3))
 
     def test_avalanche_avalanche_subset_recursion_mix_class_mapping(self):
-        dataset_mnist = MNIST(
-            root=default_dataset_location("mnist"), download=True
-        )
+        dataset_mnist = MNIST(root=default_dataset_location("mnist"), download=True)
         x, y = dataset_mnist[3000]
         x2, y2 = dataset_mnist[1010]
 
@@ -1109,9 +1078,7 @@ def test_avalanche_avalanche_subset_concat_stack_overflow(self):
         true_indices: List[List[int]] = []
         true_indices.append(list(current_indices))
         for idx in range(dataset_hierarchy_depth):
-            current_indices = [
-                current_indices[x] for x in random_permutations[idx]
-            ]
+            current_indices = [current_indices[x] for x in random_permutations[idx]]
             true_indices.append(current_indices)
         true_indices = list(reversed(true_indices))
 
@@ -1136,9 +1103,7 @@ def test_avalanche_avalanche_subset_concat_stack_overflow(self):
             for idx_internal in range(idx + 1):
                 # curr_dataset is the concat of idx+1 datasets.
                 # Check all of them are permuted correctly
-                leaf_range = range(
-                    idx_internal * d_sz, (idx_internal + 1) * d_sz
-                )
+                leaf_range = range(idx_internal * d_sz, (idx_internal + 1) * d_sz)
                 permuted = true_indices[idx_internal + intermediate_idx_test]
                 self.assertTrue(
                     torch.equal(tensor_y[permuted], curr_targets[leaf_range])
@@ -1146,9 +1111,7 @@ def test_avalanche_avalanche_subset_concat_stack_overflow(self):
 
             self.assertTrue(torch.equal(tensor_y, curr_targets[-d_sz:]))
 
-        self.assertEqual(
-            d_sz * dataset_hierarchy_depth + d_sz, len(curr_dataset)
-        )
+        self.assertEqual(d_sz * dataset_hierarchy_depth + d_sz, len(curr_dataset))
 
         def collect_permuted_data(dataset, indices):
             x, y, t = [], [], []
@@ -1163,9 +1126,7 @@ def collect_permuted_data(dataset, indices):
             leaf_range = range(idx * d_sz, (idx + 1) * d_sz)
             permuted = true_indices[idx]
 
-            x_leaf, y_leaf, t_leaf = collect_permuted_data(
-                curr_dataset, leaf_range
-            )
+            x_leaf, y_leaf, t_leaf = collect_permuted_data(curr_dataset, leaf_range)
             self.assertTrue(torch.equal(tensor_x[permuted], x_leaf))
             self.assertTrue(torch.equal(tensor_y[permuted], y_leaf))
             self.assertTrue(torch.equal(tensor_t[permuted], t_leaf))
@@ -1173,19 +1134,13 @@ def collect_permuted_data(dataset, indices):
             trg_leaf = torch.tensor(curr_dataset.targets)[leaf_range]
             self.assertTrue(torch.equal(tensor_y[permuted], trg_leaf))
 
-        slice_idxs = list(
-            range(d_sz * dataset_hierarchy_depth, len(curr_dataset))
-        )
-        x_slice, y_slice, t_slice = collect_permuted_data(
-            curr_dataset, slice_idxs
-        )
+        slice_idxs = list(range(d_sz * dataset_hierarchy_depth, len(curr_dataset)))
+        x_slice, y_slice, t_slice = collect_permuted_data(curr_dataset, slice_idxs)
         self.assertTrue(torch.equal(tensor_x, x_slice))
         self.assertTrue(torch.equal(tensor_y, y_slice))
         self.assertTrue(torch.equal(tensor_t, t_slice))
 
-        trg_slice = torch.tensor(curr_dataset.targets)[
-            d_sz * dataset_hierarchy_depth :
-        ]
+        trg_slice = torch.tensor(curr_dataset.targets)[d_sz * dataset_hierarchy_depth :]
         self.assertTrue(torch.equal(tensor_y, trg_slice))
 
         # If you broke this test it means that dataset merging is not working
@@ -1228,8 +1183,9 @@ def test_avalanche_concat_classification_datasets_sequentially(self):
         ]
 
         # concatenate datasets
-        final_train, _, classes = \
-            concat_classification_datasets_sequentially(train, test)
+        final_train, _, classes = concat_classification_datasets_sequentially(
+            train, test
+        )
 
         # merge all classes into a single list
         classes_all = []
@@ -1245,9 +1201,7 @@ def test_avalanche_concat_classification_datasets_sequentially(self):
 
 class TransformationSubsetTests(unittest.TestCase):
     def test_avalanche_subset_transform(self):
-        dataset_mnist = MNIST(
-            root=default_dataset_location("mnist"), download=True
-        )
+        dataset_mnist = MNIST(root=default_dataset_location("mnist"), download=True)
         x, y = dataset_mnist[0]
         dataset = classification_subset(dataset_mnist, transform=ToTensor())
         x2, y2, t2 = dataset[0]
@@ -1284,9 +1238,7 @@ def test_avalanche_subset_composition(self):
         self.assertEqual(0, t2)
 
     def test_avalanche_subset_indices(self):
-        dataset_mnist = MNIST(
-            root=default_dataset_location("mnist"), download=True
-        )
+        dataset_mnist = MNIST(root=default_dataset_location("mnist"), download=True)
         x, y = dataset_mnist[1000]
         x2, y2 = dataset_mnist[1007]
 
@@ -1302,9 +1254,7 @@ def test_avalanche_subset_indices(self):
         self.assertFalse(pil_images_equal(x2, x3))
 
     def test_avalanche_subset_mapping(self):
-        dataset_mnist = MNIST(
-            root=default_dataset_location("mnist"), download=True
-        )
+        dataset_mnist = MNIST(root=default_dataset_location("mnist"), download=True)
         _, y = dataset_mnist[1000]
 
         mapping = list(range(10))
@@ -1322,9 +1272,7 @@ def test_avalanche_subset_mapping(self):
         self.assertEqual(y2, swap_y)
 
     def test_avalanche_subset_uniform_task_labels(self):
-        dataset_mnist = MNIST(
-            root=default_dataset_location("mnist"), download=True
-        )
+        dataset_mnist = MNIST(root=default_dataset_location("mnist"), download=True)
         x, y = dataset_mnist[1000]
         x2, y2 = dataset_mnist[1007]
 
@@ -1355,9 +1303,7 @@ def test_avalanche_subset_uniform_task_labels(self):
         self.assertEqual(1, t4)
 
     def test_avalanche_subset_mixed_task_labels(self):
-        dataset_mnist = MNIST(
-            root=default_dataset_location("mnist"), download=True
-        )
+        dataset_mnist = MNIST(root=default_dataset_location("mnist"), download=True)
         x, y = dataset_mnist[1000]
         x2, y2 = dataset_mnist[1007]
 
@@ -1365,8 +1311,7 @@ def test_avalanche_subset_mixed_task_labels(self):
         full_task_labels[1000] = 2
         # First, test by passing len(task_labels) == len(dataset_mnist)
         dataset = classification_subset(
-            dataset_mnist, indices=[1000, 1007],
-            task_labels=full_task_labels
+            dataset_mnist, indices=[1000, 1007], task_labels=full_task_labels
         )
 
         x3, y3, t3 = dataset[0]
@@ -1378,8 +1323,7 @@ def test_avalanche_subset_mixed_task_labels(self):
 
         # Secondly, test by passing len(task_labels) == len(indices)
         dataset = classification_subset(
-            dataset_mnist, indices=[1000, 1007],
-            task_labels=[3, 5]
+            dataset_mnist, indices=[1000, 1007], task_labels=[3, 5]
         )
 
         x3, y3, t3 = dataset[0]
@@ -1390,19 +1334,13 @@ def test_avalanche_subset_mixed_task_labels(self):
         self.assertEqual(5, t4)
 
     def test_avalanche_subset_task_labels_inheritance(self):
-        dataset_mnist = MNIST(
-            root=default_dataset_location("mnist"), download=True
-        )
-        random_task_labels = [
-            random.randint(0, 10) for _ in range(len(dataset_mnist))
-        ]
+        dataset_mnist = MNIST(root=default_dataset_location("mnist"), download=True)
+        random_task_labels = [random.randint(0, 10) for _ in range(len(dataset_mnist))]
         dataset_orig = make_classification_dataset(
             dataset_mnist, transform=ToTensor(), task_labels=random_task_labels
         )
 
-        dataset_child = classification_subset(
-            dataset_orig, indices=[1000, 1007]
-        )
+        dataset_child = classification_subset(dataset_orig, indices=[1000, 1007])
         _, _, t2 = dataset_orig[1000]
         _, _, t5 = dataset_orig[1007]
         _, _, t3 = dataset_child[0]
@@ -1413,9 +1351,7 @@ def test_avalanche_subset_task_labels_inheritance(self):
         self.assertEqual(random_task_labels[1000], t3)
         self.assertEqual(random_task_labels[1007], t6)
 
-        self.assertListEqual(
-            random_task_labels, list(dataset_orig.targets_task_labels)
-        )
+        self.assertListEqual(random_task_labels, list(dataset_orig.targets_task_labels))
 
         self.assertListEqual(
             [random_task_labels[1000], random_task_labels[1007]],
@@ -1442,9 +1378,7 @@ def my_collate_fn2(patterns):
             return x_values, y_values, z_values, t_values
 
         whole_dataset = TensorDataset(tensor_x, tensor_y, tensor_z)
-        dataset = make_classification_dataset(
-            whole_dataset, collate_fn=my_collate_fn
-        )
+        dataset = make_classification_dataset(whole_dataset, collate_fn=my_collate_fn)
         inherited = classification_subset(
             dataset, indices=list(range(5, 150)), collate_fn=my_collate_fn2
         )  # Ok
@@ -1465,14 +1399,11 @@ def my_collate_fn2(patterns):
 
 class TransformationTensorDatasetTests(unittest.TestCase):
     def test_tensor_dataset_helper_tensor_y(self):
-
         train_exps = [
-            [torch.rand(50, 32, 32), torch.randint(0, 100, (50,))]
-            for _ in range(5)
+            [torch.rand(50, 32, 32), torch.randint(0, 100, (50,))] for _ in range(5)
         ]
         test_exps = [
-            [torch.rand(23, 32, 32), torch.randint(0, 100, (23,))]
-            for _ in range(5)
+            [torch.rand(23, 32, 32), torch.randint(0, 100, (23,))] for _ in range(5)
         ]
 
         cl_benchmark = create_generic_benchmark_from_tensor_lists(
@@ -1505,12 +1436,8 @@ def test_tensor_dataset_helper_tensor_y(self):
             )
             self.assertEqual(0, cl_benchmark.train_stream[exp_id].task_label)
 
-            self.assertTrue(
-                torch.all(torch.eq(test_exps[exp_id][0], benchmark_test_x))
-            )
-            self.assertTrue(
-                torch.all(torch.eq(test_exps[exp_id][1], benchmark_test_y))
-            )
+            self.assertTrue(torch.all(torch.eq(test_exps[exp_id][0], benchmark_test_x)))
+            self.assertTrue(torch.all(torch.eq(test_exps[exp_id][1], benchmark_test_y)))
             self.assertSequenceEqual(
                 test_exps[exp_id][1].tolist(),
                 cl_benchmark.test_stream[exp_id].dataset.targets,
@@ -1519,12 +1446,10 @@ def test_tensor_dataset_helper_tensor_y(self):
 
     def test_tensor_dataset_helper_list_y(self):
         train_exps = [
-            (torch.rand(50, 32, 32), torch.randint(0, 100, (50,)))
-            for _ in range(5)
+            (torch.rand(50, 32, 32), torch.randint(0, 100, (50,))) for _ in range(5)
         ]
         test_exps = [
-            (torch.rand(23, 32, 32), torch.randint(0, 100, (23,)))
-            for _ in range(5)
+            (torch.rand(23, 32, 32), torch.randint(0, 100, (23,))) for _ in range(5)
         ]
 
         cl_benchmark = create_generic_benchmark_from_tensor_lists(
@@ -1548,21 +1473,15 @@ def test_tensor_dataset_helper_list_y(self):
             self.assertTrue(
                 torch.all(torch.eq(train_exps[exp_id][0], benchmark_train_x))
             )
-            self.assertSequenceEqual(
-                train_exps[exp_id][1], benchmark_train_y.tolist()
-            )
+            self.assertSequenceEqual(train_exps[exp_id][1], benchmark_train_y.tolist())
             self.assertSequenceEqual(
                 train_exps[exp_id][1],
                 cl_benchmark.train_stream[exp_id].dataset.targets,
             )
             self.assertEqual(0, cl_benchmark.train_stream[exp_id].task_label)
 
-            self.assertTrue(
-                torch.all(torch.eq(test_exps[exp_id][0], benchmark_test_x))
-            )
-            self.assertSequenceEqual(
-                test_exps[exp_id][1], benchmark_test_y.tolist()
-            )
+            self.assertTrue(torch.all(torch.eq(test_exps[exp_id][0], benchmark_test_x)))
+            self.assertSequenceEqual(test_exps[exp_id][1], benchmark_test_y.tolist())
             self.assertSequenceEqual(
                 test_exps[exp_id][1],
                 cl_benchmark.test_stream[exp_id].dataset.targets,
@@ -1572,16 +1491,12 @@ def test_tensor_dataset_helper_list_y(self):
 
 class AvalancheDatasetTransformOpsTests(unittest.TestCase):
     def test_avalanche_inherit_groups(self):
-        original_dataset = MNIST(
-            root=default_dataset_location("mnist"), download=True
-        )
+        original_dataset = MNIST(root=default_dataset_location("mnist"), download=True)
 
         def plus_one_target(target):
             return target + 1
 
-        transform_groups = dict(
-            train=(ToTensor(), None), eval=(None, plus_one_target)
-        )
+        transform_groups = dict(train=(ToTensor(), None), eval=(None, plus_one_target))
         x, y = original_dataset[0]
         dataset = make_classification_dataset(
             original_dataset, transform_groups=transform_groups
@@ -1634,13 +1549,9 @@ def plus_one_target(target):
         # self.assertEqual(y, y7)
 
     def test_avalanche_inherit_groups_freeze_transforms(self):
-        original_dataset = MNIST(
-            root=default_dataset_location("mnist"), download=True
-        )
+        original_dataset = MNIST(root=default_dataset_location("mnist"), download=True)
 
-        transform_groups = dict(
-            train=(RandomCrop(16), None), eval=(None, None)
-        )
+        transform_groups = dict(train=(RandomCrop(16), None), eval=(None, None))
         dataset = make_classification_dataset(
             original_dataset, transform_groups=transform_groups
         )
@@ -1652,14 +1563,10 @@ def test_avalanche_inherit_groups_freeze_transforms(self):
         dataset_frozen = dataset_inherit.freeze_transforms()
         x2, *_ = dataset_frozen[0]
 
-        dataset_frozen_reset = dataset_frozen.replace_current_transform_group(
-            None
-        )
+        dataset_frozen_reset = dataset_frozen.replace_current_transform_group(None)
         x3, *_ = dataset_frozen_reset[0]
 
-        dataset_reset = dataset_inherit.replace_current_transform_group(
-            None
-        )
+        dataset_reset = dataset_inherit.replace_current_transform_group(None)
         x4, *_ = dataset_reset[0]
 
         self.assertEqual(x.size, (16, 16))
@@ -1668,13 +1575,9 @@ def test_avalanche_inherit_groups_freeze_transforms(self):
         self.assertEqual(x4.size, (28, 28))
 
     def test_freeze_transforms(self):
-        original_dataset = MNIST(
-            root=default_dataset_location("mnist"), download=True
-        )
+        original_dataset = MNIST(root=default_dataset_location("mnist"), download=True)
         x, y = original_dataset[0]
-        dataset = make_classification_dataset(
-            original_dataset, transform=ToTensor()
-        )
+        dataset = make_classification_dataset(original_dataset, transform=ToTensor())
         dataset_frozen = dataset.freeze_transforms()
 
         x2, y2, _ = dataset_frozen[0]
@@ -1684,40 +1587,32 @@ def test_freeze_transforms(self):
         self.assertEqual(y, y2)
 
     def test_freeze_transforms_subset(self):
-        original_dataset = MNIST(
-            root=default_dataset_location("mnist"), download=True
-        )
+        original_dataset = MNIST(root=default_dataset_location("mnist"), download=True)
         x, y = original_dataset[0]
         dataset: AvalancheDataset = make_classification_dataset(
             original_dataset, transform=ToTensor()
         )
-        dataset_subset = dataset.subset(
-            (1, 2, 3)
-        )
-        
+        dataset_subset = dataset.subset((1, 2, 3))
+
         dataset_frozen = dataset_subset.freeze_transforms()
         x, *_ = dataset_frozen[0]
         self.assertIsInstance(x, Tensor)
 
-        dataset_frozen = dataset_frozen.replace_current_transform_group(
-            None
-        )
+        dataset_frozen = dataset_frozen.replace_current_transform_group(None)
 
         x, *_ = dataset_frozen[0]
         self.assertIsInstance(x, Tensor)
 
-        dataset_frozen_derivative = \
-            dataset_frozen.replace_current_transform_group(
-                ToPILImage())
+        dataset_frozen_derivative = dataset_frozen.replace_current_transform_group(
+            ToPILImage()
+        )
 
         x, *_ = dataset_frozen[0]
         x2, *_ = dataset_frozen_derivative[0]
         self.assertIsInstance(x, Tensor)
         self.assertIsInstance(x2, Image)
 
-        dataset_frozen = dataset_frozen.replace_current_transform_group(
-            ToPILImage()
-        )
+        dataset_frozen = dataset_frozen.replace_current_transform_group(ToPILImage())
 
         x, *_ = dataset_frozen[0]
         x2, *_ = dataset_frozen_derivative[0]
@@ -1742,15 +1637,11 @@ def test_freeze_transforms_chain(self):
         self.assertIsInstance(dataset_frozen[0][0], Image)
         self.assertIsInstance(dataset_transform[0][0], Image)
 
-        dataset_transform = dataset_transform.replace_current_transform_group(
-            None
-        )
+        dataset_transform = dataset_transform.replace_current_transform_group(None)
         self.assertIsInstance(dataset_transform[0][0], Tensor)
         self.assertIsInstance(dataset_frozen[0][0], Image)
 
-        dataset_frozen = dataset_frozen.replace_current_transform_group(
-            ToTensor()
-        )
+        dataset_frozen = dataset_frozen.replace_current_transform_group(ToTensor())
         self.assertIsInstance(dataset_transform[0][0], Tensor)
         self.assertIsInstance(dataset_frozen[0][0], Tensor)
 
@@ -1765,13 +1656,9 @@ def test_freeze_transforms_chain(self):
         self.assertIsInstance(x2, Image)
 
     def test_replace_transforms(self):
-        original_dataset = MNIST(
-            root=default_dataset_location("mnist"), download=True
-        )
+        original_dataset = MNIST(root=default_dataset_location("mnist"), download=True)
         x, y = original_dataset[0]
-        dataset = make_classification_dataset(
-            original_dataset, transform=ToTensor()
-        )
+        dataset = make_classification_dataset(original_dataset, transform=ToTensor())
         x2, *_ = dataset[0]
         dataset_reset = dataset.replace_current_transform_group(None)
         x3, *_ = dataset_reset[0]
@@ -1780,9 +1667,7 @@ def test_replace_transforms(self):
         self.assertIsInstance(x2, Tensor)
         self.assertIsInstance(x3, Image)
 
-        dataset_reset = dataset_reset.replace_current_transform_group(
-            ToTensor()
-        )
+        dataset_reset = dataset_reset.replace_current_transform_group(ToTensor())
         x4, *_ = dataset_reset[0]
         self.assertIsInstance(x4, Tensor)
 
@@ -1800,13 +1685,9 @@ def test_replace_transforms(self):
         self.assertEqual(y + 1, y6)
 
     def test_transforms_replace_freeze_mix(self):
-        original_dataset = MNIST(
-            root=default_dataset_location("mnist"), download=True
-        )
+        original_dataset = MNIST(root=default_dataset_location("mnist"), download=True)
         x, _ = original_dataset[0]
-        dataset = make_classification_dataset(
-            original_dataset, transform=ToTensor()
-        )
+        dataset = make_classification_dataset(original_dataset, transform=ToTensor())
         x2, *_ = dataset[0]
         dataset_reset = dataset.replace_current_transform_group((None, None))
         x3, *_ = dataset_reset[0]
@@ -1828,9 +1709,7 @@ def test_transforms_replace_freeze_mix(self):
         self.assertIsInstance(x5, Tensor)
 
     def test_transforms_groups_base_usage(self):
-        original_dataset = MNIST(
-            root=default_dataset_location("mnist"), download=True
-        )
+        original_dataset = MNIST(root=default_dataset_location("mnist"), download=True)
         dataset = make_classification_dataset(
             original_dataset,
             transform_groups=dict(
@@ -1891,9 +1770,7 @@ def test_transforms_groups_constructor_error(self):
             )
 
     def test_transforms_groups_alternative_default_group(self):
-        original_dataset = MNIST(
-            root=default_dataset_location("mnist"), download=True
-        )
+        original_dataset = MNIST(root=default_dataset_location("mnist"), download=True)
         dataset = make_classification_dataset(
             original_dataset,
             transform_groups=dict(train=(ToTensor(), None), eval=(None, None)),
@@ -1911,9 +1788,7 @@ def test_transforms_groups_alternative_default_group(self):
         self.assertIsInstance(x3, Image)
 
     def test_transforms_groups_partial_constructor(self):
-        original_dataset = MNIST(
-            root=default_dataset_location("mnist"), download=True
-        )
+        original_dataset = MNIST(root=default_dataset_location("mnist"), download=True)
         dataset = make_classification_dataset(
             original_dataset, transform_groups=dict(train=(ToTensor(), None))
         )
@@ -1926,18 +1801,14 @@ def test_transforms_groups_partial_constructor(self):
         self.assertIsInstance(x2, Tensor)
 
     def test_transforms_groups_multiple_groups(self):
-        original_dataset = MNIST(
-            root=default_dataset_location("mnist"), download=True
-        )
+        original_dataset = MNIST(root=default_dataset_location("mnist"), download=True)
         dataset = make_classification_dataset(
             original_dataset,
             transform_groups=dict(
                 train=(ToTensor(), None),
                 eval=(None, None),
                 other=(
-                    Compose(
-                        [ToTensor(), Lambda(lambda tensor: tensor.numpy())]
-                    ),
+                    Compose([ToTensor(), Lambda(lambda tensor: tensor.numpy())]),
                     None,
                 ),
             ),
@@ -1955,17 +1826,11 @@ def test_transforms_groups_multiple_groups(self):
         self.assertIsInstance(x3, np.ndarray)
 
     def test_transformation_concat_dataset(self):
-        original_dataset = MNIST(
-            root=default_dataset_location("mnist"), download=True
-        )
-        original_dataset2 = MNIST(
-            root=default_dataset_location("mnist"), download=True
-        )
+        original_dataset = MNIST(root=default_dataset_location("mnist"), download=True)
+        original_dataset2 = MNIST(root=default_dataset_location("mnist"), download=True)
 
         dataset = concat_datasets([original_dataset, original_dataset2])
-        self.assertEqual(
-            len(original_dataset) + len(original_dataset2), len(dataset)
-        )
+        self.assertEqual(len(original_dataset) + len(original_dataset2), len(dataset))
 
     def test_transformation_concat_dataset_groups(self):
         original_dataset = make_classification_dataset(
@@ -1979,9 +1844,7 @@ def test_transformation_concat_dataset_groups(self):
 
         dataset = original_dataset.concat(original_dataset2)
 
-        self.assertEqual(
-            len(original_dataset) + len(original_dataset2), len(dataset)
-        )
+        self.assertEqual(len(original_dataset) + len(original_dataset2), len(dataset))
 
         x, *_ = dataset[0]
         x2, *_ = dataset[len(original_dataset)]
diff --git a/tests/test_cifar100_benchmarks.py b/tests/test_cifar100_benchmarks.py
index 7fdd9eac4..80d3265f1 100644
--- a/tests/test_cifar100_benchmarks.py
+++ b/tests/test_cifar100_benchmarks.py
@@ -36,7 +36,6 @@ def count_downloads_c100(*args, **kwargs):
 
 class CIFAR100BenchmarksTests(unittest.TestCase):
     def setUp(self):
-
         global CIFAR10_DOWNLOAD_METHOD, CIFAR100_DOWNLOAD_METHOD
         CIFAR10_DOWNLOAD_METHOD = cifar_download.get_cifar10_dataset
         CIFAR100_DOWNLOAD_METHOD = cifar_download.get_cifar100_dataset
diff --git a/tests/test_cifar10_benchmarks.py b/tests/test_cifar10_benchmarks.py
index 50a4e6cda..76bceb9ab 100644
--- a/tests/test_cifar10_benchmarks.py
+++ b/tests/test_cifar10_benchmarks.py
@@ -30,7 +30,6 @@ def setUp(self):
         cifar_benchmark.get_cifar10_dataset = count_downloads
 
     def tearDown(self):
-
         global CIFAR10_DOWNLOAD_METHOD
         if CIFAR10_DOWNLOAD_METHOD is not None:
             cifar_download.get_cifar10_dataset = CIFAR10_DOWNLOAD_METHOD
diff --git a/tests/test_ctrl.py b/tests/test_ctrl.py
index 10c651474..a05dd51e3 100644
--- a/tests/test_ctrl.py
+++ b/tests/test_ctrl.py
@@ -86,9 +86,7 @@ def test_determinism(self):
                 bench_1 = CTrL(stream, seed=1)
                 bench_2 = CTrL(stream, seed=1)
 
-                for exp1, exp2 in zip(
-                    bench_1.train_stream, bench_2.train_stream
-                ):
+                for exp1, exp2 in zip(bench_1.train_stream, bench_2.train_stream):
                     for sample1, sample2 in zip(exp1.dataset, exp2.dataset):
                         self.assertTrue(custom_equals(sample1, sample2))
 
diff --git a/tests/test_custom_streams.py b/tests/test_custom_streams.py
index 3746af861..b5a8c61e1 100644
--- a/tests/test_custom_streams.py
+++ b/tests/test_custom_streams.py
@@ -10,7 +10,6 @@
 
 class CustomStreamsTests(unittest.TestCase):
     def test_custom_streams_name_and_length(self):
-
         train_exps = []
         test_exps = []
         valid_exps = []
diff --git a/tests/test_dataloaders.py b/tests/test_dataloaders.py
index 87498f8c9..77da7e6dd 100644
--- a/tests/test_dataloaders.py
+++ b/tests/test_dataloaders.py
@@ -29,7 +29,7 @@
     ReplayDataLoader,
     TaskBalancedDataLoader,
     GroupBalancedDataLoader,
-    GroupBalancedInfiniteDataLoader
+    GroupBalancedInfiniteDataLoader,
 )
 
 
@@ -52,9 +52,7 @@ def get_fast_benchmark():
 
     train_dataset = TensorDataset(train_X, train_y)
     test_dataset = TensorDataset(test_X, test_y)
-    my_nc_benchmark = nc_benchmark(
-        train_dataset, test_dataset, 5, task_labels=True
-    )
+    my_nc_benchmark = nc_benchmark(train_dataset, test_dataset, 5, task_labels=True)
     return my_nc_benchmark
 
 
@@ -159,8 +157,14 @@ def test_dataloader_with_multiple_workers(self):
 
         # Continual learning strategy
         cl_strategy = Naive(
-            model, optimizer, criterion, train_mb_size=32, train_epochs=1,
-            eval_mb_size=32, device=device)
+            model,
+            optimizer,
+            criterion,
+            train_mb_size=32,
+            train_epochs=1,
+            eval_mb_size=32,
+            device=device,
+        )
 
         # test training for one experience
         train_exp = train_stream[0]
diff --git a/tests/test_endless_cl_sim.py b/tests/test_endless_cl_sim.py
index e9b899bc1..98e524517 100644
--- a/tests/test_endless_cl_sim.py
+++ b/tests/test_endless_cl_sim.py
@@ -25,7 +25,6 @@ class EndlessCLSimTest(unittest.TestCase):
         "We don't want to download large datasets in github actions.",
     )
     def test_endless_cl_classification(self):
-
         if "FAST_TEST" in os.environ:
             pass
         else:
diff --git a/tests/test_fmnist_benckmarks.py b/tests/test_fmnist_benckmarks.py
index 3675e9f71..1864c8249 100644
--- a/tests/test_fmnist_benckmarks.py
+++ b/tests/test_fmnist_benckmarks.py
@@ -16,8 +16,9 @@
 class FMNISTBenchmarksTests(unittest.TestCase):
     def setUp(self):
         import avalanche.benchmarks.classic.cfashion_mnist as cfashion_mnist
-        from avalanche.benchmarks.datasets.external_datasets.fmnist import \
-            get_fmnist_dataset
+        from avalanche.benchmarks.datasets.external_datasets.fmnist import (
+            get_fmnist_dataset,
+        )
 
         global MNIST_DOWNLOAD_METHOD
         MNIST_DOWNLOAD_METHOD = get_fmnist_dataset
@@ -27,16 +28,18 @@ def count_downloads(*args, **kwargs):
             MNIST_DOWNLOADS += 1
             return MNIST_DOWNLOAD_METHOD(*args, **kwargs)
 
-        avalanche.benchmarks.datasets.external_datasets.fmnist.\
-            get_fmnist_dataset = count_downloads
+        avalanche.benchmarks.datasets.external_datasets.fmnist.get_fmnist_dataset = (
+            count_downloads
+        )
 
     def tearDown(self):
         global MNIST_DOWNLOAD_METHOD
         if MNIST_DOWNLOAD_METHOD is not None:
             import avalanche.benchmarks.classic.cfashion_mnist as cfashion_mnist
 
-            avalanche.benchmarks.datasets.external_datasets.fmnist.\
-                get_fmnist_dataset = MNIST_DOWNLOAD_METHOD
+            avalanche.benchmarks.datasets.external_datasets.fmnist.get_fmnist_dataset = (
+                MNIST_DOWNLOAD_METHOD
+            )
             MNIST_DOWNLOAD_METHOD = None
 
     @unittest.skipIf(
diff --git a/tests/test_helper_method.py b/tests/test_helper_method.py
index 1d8abbef8..71705cd7c 100644
--- a/tests/test_helper_method.py
+++ b/tests/test_helper_method.py
@@ -41,12 +41,8 @@ def test_integration(self):
         modules = [(SimpleMLP(input_size=6), "classifier")]
         for m, name in modules:
             self._test_integration(copy.deepcopy(m), name)
-            self._test_integration(
-                copy.deepcopy(m), name, plugins=[LwFPlugin()]
-            )
-            self._test_integration(
-                copy.deepcopy(m), name, plugins=[EWCPlugin(0.5)]
-            )
+            self._test_integration(copy.deepcopy(m), name, plugins=[LwFPlugin()])
+            self._test_integration(copy.deepcopy(m), name, plugins=[EWCPlugin(0.5)])
 
     def test_initialisation(self):
         module = SimpleMLP()
@@ -61,9 +57,7 @@ def test_initialisation(self):
         new_classifier_bias = torch.clone(
             module.classifier.classifiers["0"].classifier.bias
         )
-        self.assertTrue(
-            torch.equal(old_classifier_weight, new_classifier_weight)
-        )
+        self.assertTrue(torch.equal(old_classifier_weight, new_classifier_weight))
         self.assertTrue(torch.equal(old_classifier_bias, new_classifier_bias))
 
     def _test_outputs(self, module, clf_name):
@@ -129,9 +123,7 @@ def _test_modules(self, module, clf_name):
     def _test_integration(self, module, clf_name, plugins=[]):
         module = as_multitask(module, clf_name)
         module = module.to(self.device)
-        optimizer = SGD(
-            module.parameters(), lr=0.05, momentum=0.9, weight_decay=0.0002
-        )
+        optimizer = SGD(module.parameters(), lr=0.05, momentum=0.9, weight_decay=0.0002)
 
         strategy = Naive(
             module,
diff --git a/tests/test_high_level_generators.py b/tests/test_high_level_generators.py
index c01474907..27003fd29 100644
--- a/tests/test_high_level_generators.py
+++ b/tests/test_high_level_generators.py
@@ -102,13 +102,9 @@ def test_filelist_benchmark(self):
         archive_name = os.path.join(
             expanduser("~") + "/.avalanche/data", "cats_and_dogs_filtered.zip"
         )
-        extract_archive(
-            archive_name, to_path=expanduser("~") + "/.avalanche/data/"
-        )
+        extract_archive(archive_name, to_path=expanduser("~") + "/.avalanche/data/")
 
-        dirpath = (
-            expanduser("~") + "/.avalanche/data/cats_and_dogs_filtered/train"
-        )
+        dirpath = expanduser("~") + "/.avalanche/data/cats_and_dogs_filtered/train"
 
         with tempfile.TemporaryDirectory() as tmpdirname:
             list_paths = []
@@ -123,9 +119,7 @@ def test_filelist_benchmark(self):
                 list_paths.append(filelist_path)
                 with open(filelist_path, "w") as wf:
                     for name in filenames_list:
-                        wf.write(
-                            "{} {}\n".format(os.path.join(rel_dir, name), label)
-                        )
+                        wf.write("{} {}\n".format(os.path.join(rel_dir, name), label))
 
             generic_benchmark = filelist_benchmark(
                 dirpath,
@@ -150,13 +144,9 @@ def test_paths_benchmark(self):
         archive_name = os.path.join(
             expanduser("~") + "/.avalanche/data", "cats_and_dogs_filtered.zip"
         )
-        extract_archive(
-            archive_name, to_path=expanduser("~") + "/.avalanche/data/"
-        )
+        extract_archive(archive_name, to_path=expanduser("~") + "/.avalanche/data/")
 
-        dirpath = (
-            expanduser("~") + "/.avalanche/data/cats_and_dogs_filtered/train"
-        )
+        dirpath = expanduser("~") + "/.avalanche/data/cats_and_dogs_filtered/train"
 
         train_experiences = []
         for rel_dir, label in zip(["cats", "dogs"], [0, 1]):
@@ -264,9 +254,7 @@ def test_data_incremental_benchmark(self):
 
             for x, y, *_ in exp.dataset:
                 self.assertTrue(torch.equal(ref_tensor_x[tensor_idx], x))
-                self.assertTrue(
-                    torch.equal(ref_tensor_y[tensor_idx], torch.tensor(y))
-                )
+                self.assertTrue(torch.equal(ref_tensor_y[tensor_idx], torch.tensor(y)))
                 tensor_idx += 1
 
         exp = data_incremental_instance.test_stream[0]
@@ -346,9 +334,7 @@ def test_gen():
 
             for x, y, *_ in exp.dataset:
                 self.assertTrue(torch.equal(ref_tensor_x[tensor_idx], x))
-                self.assertTrue(
-                    torch.equal(ref_tensor_y[tensor_idx], torch.tensor(y))
-                )
+                self.assertTrue(torch.equal(ref_tensor_y[tensor_idx], torch.tensor(y)))
                 tensor_idx += 1
 
         exp = data_incremental_instance.test_stream[0]
@@ -502,8 +488,7 @@ def test_benchmark_with_validation_stream_rel_size(self):
 
         # Regression test for #1371
         self.assertEquals(
-            [0],
-            valid_benchmark.train_stream[0].classes_in_this_experience
+            [0], valid_benchmark.train_stream[0].classes_in_this_experience
         )
 
     def test_lazy_benchmark_with_validation_stream_fixed_size(self):
@@ -530,9 +515,7 @@ def test_lazy_benchmark_with_validation_stream_fixed_size(self):
                 # Test experience
                 test_x = torch.zeros(50, *pattern_shape)
                 test_y = torch.zeros(50, dtype=torch.long)
-                experience_test = make_tensor_classification_dataset(
-                    test_x, test_y
-                )
+                experience_test = make_tensor_classification_dataset(test_x, test_y)
 
                 def train_gen():
                     # Lazy generator of the training stream
@@ -545,9 +528,7 @@ def test_gen():
                         yield dataset
 
                 initial_benchmark_instance = create_lazy_generic_benchmark(
-                    train_generator=LazyStreamDefinition(
-                        train_gen(), 2, [0, 0]
-                    ),
+                    train_generator=LazyStreamDefinition(train_gen(), 2, [0, 0]),
                     test_generator=LazyStreamDefinition(test_gen(), 1, [0]),
                     complete_test_set_only=True,
                 )
@@ -579,36 +560,28 @@ def test_gen():
                 ].exps_data.get_experience_if_loaded(0)
                 self.assertEqual(expect_laziness, maybe_exp is None)
 
-                self.assertEqual(
-                    80, len(valid_benchmark.train_stream[0].dataset)
-                )
+                self.assertEqual(80, len(valid_benchmark.train_stream[0].dataset))
 
                 maybe_exp = valid_benchmark.stream_definitions[
                     "train"
                 ].exps_data.get_experience_if_loaded(1)
                 self.assertEqual(expect_laziness, maybe_exp is None)
 
-                self.assertEqual(
-                    60, len(valid_benchmark.train_stream[1].dataset)
-                )
+                self.assertEqual(60, len(valid_benchmark.train_stream[1].dataset))
 
                 maybe_exp = valid_benchmark.stream_definitions[
                     "valid"
                 ].exps_data.get_experience_if_loaded(0)
                 self.assertEqual(expect_laziness, maybe_exp is None)
 
-                self.assertEqual(
-                    20, len(valid_benchmark.valid_stream[0].dataset)
-                )
+                self.assertEqual(20, len(valid_benchmark.valid_stream[0].dataset))
 
                 maybe_exp = valid_benchmark.stream_definitions[
                     "valid"
                 ].exps_data.get_experience_if_loaded(1)
                 self.assertEqual(expect_laziness, maybe_exp is None)
 
-                self.assertEqual(
-                    20, len(valid_benchmark.valid_stream[1].dataset)
-                )
+                self.assertEqual(20, len(valid_benchmark.valid_stream[1].dataset))
 
                 self.assertIsNotNone(
                     valid_benchmark.stream_definitions[
diff --git a/tests/test_loggers.py b/tests/test_loggers.py
index 7d8215c17..36f3403e3 100644
--- a/tests/test_loggers.py
+++ b/tests/test_loggers.py
@@ -49,14 +49,10 @@ def test_csv_logger(self):
 
     def _test_logger(self, logp):
         evalp = EvaluationPlugin(
-            loss_metrics(
-                minibatch=True, epoch=True, experience=True, stream=True
-            ),
+            loss_metrics(minibatch=True, epoch=True, experience=True, stream=True),
             loggers=[logp],
         )
-        strat = Naive(
-            self.model, self.optimizer, evaluator=evalp, train_mb_size=32
-        )
+        strat = Naive(self.model, self.optimizer, evaluator=evalp, train_mb_size=32)
         for e in self.benchmark.train_stream:
             strat.train(e)
         strat.eval(self.benchmark.train_stream)
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index dad836723..4eda28233 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -23,7 +23,7 @@
     Forgetting,
     ForwardTransfer,
     CumulativeAccuracy,
-    LabelsRepartition
+    LabelsRepartition,
 )
 
 from tests.unit_tests_utils import FAST_TEST, is_github_action
@@ -96,25 +96,18 @@ def test_topk_accuracy(self):
         test_out[6][2] = 0.8
         test_out[6][3] = 0.7
 
-        expected_per_k = [
-            2/7,  # top-1
-            4/7,  # top-2
-            6/7,  # top-3
-            1.0   # top-4
-        ]
+        expected_per_k = [2 / 7, 4 / 7, 6 / 7, 1.0]  # top-1  # top-2  # top-3  # top-4
 
         for k in range(1, 5):
             with self.subTest(k=k):
                 test_t_label = k % 2
                 metric = TopkAccuracy(k)
-                expected_result = expected_per_k[k-1]
-            
+                expected_result = expected_per_k[k - 1]
+
                 self.assertEqual(metric.result(), {})
                 metric.update(test_out, test_y, test_t_label)
 
-                self.assertAlmostEqual(
-                    expected_result,
-                    metric.result()[test_t_label])
+                self.assertAlmostEqual(expected_result, metric.result()[test_t_label])
                 metric.reset()
                 self.assertEqual(metric.result(), {})
 
@@ -383,9 +376,7 @@ def test_amca_two_task_static(self):
         self.assertDictEqual(metric.result(), {0: my_amca, 1: my_amca2})
 
         metric.next_experience()
-        self.assertDictEqual(
-            metric.result(), {0: my_amca * 2 / 3, 1: my_amca2 * 2 / 3}
-        )
+        self.assertDictEqual(metric.result(), {0: my_amca * 2 / 3, 1: my_amca2 * 2 / 3})
 
         metric.reset()
         self.assertDictEqual(metric.result(), {0: 0.0, 1: 0.0})
@@ -425,9 +416,7 @@ def test_multistream_amca_two_task_dynamic(self):
         self.assertDictEqual(metric.result(), {"test": {0: my_amca}})
 
         metric.set_stream("train")
-        self.assertDictEqual(
-            metric.result(), {"test": {0: my_amca}, "train": {}}
-        )
+        self.assertDictEqual(metric.result(), {"test": {0: my_amca}, "train": {}})
 
         metric.update(my_out2, my_y2, 1)
         self.assertDictEqual(
@@ -447,9 +436,7 @@ def test_multistream_amca_two_task_dynamic(self):
         )
 
         metric.reset()
-        self.assertDictEqual(
-            metric.result(), {"test": {0: 0.0}, "train": {1: 0.0}}
-        )
+        self.assertDictEqual(metric.result(), {"test": {0: 0.0}, "train": {1: 0.0}})
 
     def test_loss(self):
         metric = TaskAwareLoss()
@@ -626,54 +613,30 @@ def test_cumulative_accuracy(self):
         for id in expected_results:
             self.assertEqual(result[id], expected_results[id])
         metric.reset()
-        
+
     def test_labels_repartition(self):
         metric = LabelsRepartition()
         f = metric.result()
         self.assertEqual(f, {})
-        metric.update(
-            [0, 0, 1, 0, 2, 1, 2], 
-            [1, 1, 2, 2, 3, 3, 5])
-        
-        metric.update(
-            [0, 3], 
-            [7, 8])
-        
+        metric.update([0, 0, 1, 0, 2, 1, 2], [1, 1, 2, 2, 3, 3, 5])
+
+        metric.update([0, 3], [7, 8])
+
         f = metric.result()
         reference_dict = {
-            0: {
-                1: 2,
-                2: 1,
-                7: 1
-            },
-            1: {
-                2: 1,
-                3: 1
-            },
-            2: {
-                3: 1,
-                5: 1
-            },
-            3: {
-                8: 1
-            }
+            0: {1: 2, 2: 1, 7: 1},
+            1: {2: 1, 3: 1},
+            2: {3: 1, 5: 1},
+            3: {8: 1},
         }
         self.assertDictEqual(reference_dict, f)
         metric.update_order([7, 8, 9, 10, 0, 2, 1, 5, 3])
         f = metric.result()
         self.assertDictEqual(reference_dict, f)
-        self.assertSequenceEqual(
-            list(f[0].keys()), [7, 2, 1]
-        )
-        self.assertSequenceEqual(
-            list(f[1].keys()), [2, 3]
-        )
-        self.assertSequenceEqual(
-            list(f[2].keys()), [5, 3]
-        )
-        self.assertSequenceEqual(
-            list(f[3].keys()), [8]
-        )
+        self.assertSequenceEqual(list(f[0].keys()), [7, 2, 1])
+        self.assertSequenceEqual(list(f[1].keys()), [2, 3])
+        self.assertSequenceEqual(list(f[2].keys()), [5, 3])
+        self.assertSequenceEqual(list(f[3].keys()), [8])
 
         # Should not return a defaultdict
         with self.assertRaises(Exception):
@@ -683,20 +646,12 @@ def test_labels_repartition(self):
         metric.update_order([7, 8, 9, 10, 0, 2, 1, 5])
         f2 = metric.result()
         reference_dict2 = {
-            0: {
-                1: 2,
-                2: 1,
-                7: 1
-            },
+            0: {1: 2, 2: 1, 7: 1},
             1: {
                 2: 1,
             },
-            2: {
-                5: 1
-            },
-            3: {
-                8: 1
-            }
+            2: {5: 1},
+            3: {8: 1},
         }
         self.assertDictEqual(reference_dict2, f2)
 
diff --git a/tests/test_models.py b/tests/test_models.py
index b6014b93a..5103d39bc 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -52,9 +52,7 @@ def setUp(self):
     def test_vgg(self):
         model = vgg(depth=19, batch_normalization=True, pretrained=False)
         # Batch norm is activated
-        self.assertIsInstance(
-            model.features.stage1.unit1.bn, torch.nn.BatchNorm2d
-        )
+        self.assertIsInstance(model.features.stage1.unit1.bn, torch.nn.BatchNorm2d)
         # Check correct depth is loaded
         self.assertEqual(len(model.features.stage5), 5)
 
@@ -72,24 +70,18 @@ def test_resnet(self):
 
     def test_pyramidnet(self):
         model = pyramidnet("cifar10", depth=110)
-        self.assertIsInstance(
-            model, pytorchcv.models.pyramidnet_cifar.CIFARPyramidNet
-        )
+        self.assertIsInstance(model, pytorchcv.models.pyramidnet_cifar.CIFARPyramidNet)
         model = pyramidnet("imagenet", depth=101)
         self.assertIsInstance(model, pytorchcv.models.pyramidnet.PyramidNet)
 
     def test_densenet(self):
         model = densenet("svhn", depth=40)
-        self.assertIsInstance(
-            model, pytorchcv.models.densenet_cifar.CIFARDenseNet
-        )
+        self.assertIsInstance(model, pytorchcv.models.densenet_cifar.CIFARDenseNet)
 
     def test_get_model(self):
         # Check general wrapper and whether downloading pretrained model works
         model = get_model("resnet10", pretrained=True)
-        self.assertIsInstance(
-            model, pytorchcv.models.resnet.ResNet
-        )
+        self.assertIsInstance(model, pytorchcv.models.resnet.ResNet)
 
 
 class DynamicOptimizersTests(unittest.TestCase):
@@ -154,9 +146,7 @@ def test_optimizer_update(self):
         # check new_param is in optimizer
         # check old_param is NOT in optimizer
         p_new = torch.nn.Parameter(torch.zeros(10, 10))
-        optimized = update_optimizer(optimizer, 
-                                     {"new_param": p_new}, 
-                                     {"old_param": p})
+        optimized = update_optimizer(optimizer, {"new_param": p_new}, {"old_param": p})
         self.assertTrue("new_param" in optimized)
         self.assertFalse("old_param" in optimized)
         self.assertTrue(self._is_param_in_optimizer(p_new, strategy.optimizer))
@@ -166,7 +156,8 @@ def test_optimizers(self):
         # SIT scenario
         model, criterion, benchmark = self.init_scenario(multi_task=True)
         for optimizer in self._iterate_optimizers(
-                model, "SGDmom", "Adam", "SGD", "AdamW"):
+            model, "SGDmom", "Adam", "SGD", "AdamW"
+        ):
             strategy = Naive(
                 model,
                 optimizer,
@@ -215,11 +206,11 @@ def test_checkpointing(self):
 
         # Check that the state has been well serialized
         self.assertEqual(len(strategy.optimizer.state), len(old_state))
-        for (key_new, value_new_dict), (key_old, value_old_dict) in \
-                zip(strategy.optimizer.state.items(), old_state.items()):
-
+        for (key_new, value_new_dict), (key_old, value_old_dict) in zip(
+            strategy.optimizer.state.items(), old_state.items()
+        ):
             self.assertTrue(torch.equal(key_new, key_old))
-            
+
             value_new = value_new_dict["momentum_buffer"]
             value_old = value_old_dict["momentum_buffer"]
 
@@ -252,18 +243,15 @@ def _test_optimizer(self, strategy):
         module = torch.nn.Linear(10, 10)
         param1 = list(module.parameters())[0]
         strategy.make_optimizer()
-        self.assertFalse(self._is_param_in_optimizer(param1, 
-                                                     strategy.optimizer))
+        self.assertFalse(self._is_param_in_optimizer(param1, strategy.optimizer))
         strategy.model.add_module("new_module", module)
         strategy.make_optimizer()
-        self.assertTrue(self._is_param_in_optimizer(param1, 
-                                                    strategy.optimizer))
+        self.assertTrue(self._is_param_in_optimizer(param1, strategy.optimizer))
         # Remove a parameter
         del strategy.model.new_module
 
         strategy.make_optimizer()
-        self.assertFalse(self._is_param_in_optimizer(param1, 
-                                                     strategy.optimizer))
+        self.assertFalse(self._is_param_in_optimizer(param1, strategy.optimizer))
 
     def get_model(self, multi_task=False):
         if multi_task:
@@ -276,9 +264,7 @@ def get_model(self, multi_task=False):
 class DynamicModelsTests(unittest.TestCase):
     def setUp(self):
         common_setups()
-        self.benchmark = get_fast_benchmark(
-            use_task_labels=False, shuffle=False
-        )
+        self.benchmark = get_fast_benchmark(use_task_labels=False, shuffle=False)
 
     def test_incremental_classifier(self):
         model = SimpleMLP(input_size=6, hidden_size=10)
@@ -311,9 +297,7 @@ def test_incremental_classifier(self):
         w_ptr = model.classifier.classifier.weight.data_ptr()
         b_ptr = model.classifier.classifier.bias.data_ptr()
         opt_params_ptrs = [
-            w.data_ptr()
-            for group in optimizer.param_groups
-            for w in group["params"]
+            w.data_ptr() for group in optimizer.param_groups for w in group["params"]
         ]
         # classifier params should be optimized
         assert w_ptr in opt_params_ptrs
@@ -332,9 +316,7 @@ def test_incremental_classifier(self):
         old_w_ptr, old_b_ptr = w_ptr, b_ptr
         strategy.train(benchmark.train_stream[4])
         opt_params_ptrs = [
-            w.data_ptr()
-            for group in optimizer.param_groups
-            for w in group["params"]
+            w.data_ptr() for group in optimizer.param_groups for w in group["params"]
         ]
         new_w_ptr = model.classifier.classifier.weight.data_ptr()
         new_b_ptr = model.classifier.classifier.bias.data_ptr()
@@ -421,27 +403,19 @@ def test_multihead_head_creation(self):
         w_ptr = model.classifier.classifiers["0"].classifier.weight.data_ptr()
         b_ptr = model.classifier.classifiers["0"].classifier.bias.data_ptr()
         opt_params_ptrs = [
-            w.data_ptr()
-            for group in optimizer.param_groups
-            for w in group["params"]
+            w.data_ptr() for group in optimizer.param_groups for w in group["params"]
         ]
         assert w_ptr in opt_params_ptrs
         assert b_ptr in opt_params_ptrs
 
         # head update
         strategy.train(benchmark.train_stream[4])
-        w_ptr_t0 = model.classifier.classifiers[
-            "0"
-        ].classifier.weight.data_ptr()
+        w_ptr_t0 = model.classifier.classifiers["0"].classifier.weight.data_ptr()
         b_ptr_t0 = model.classifier.classifiers["0"].classifier.bias.data_ptr()
-        w_ptr_new = model.classifier.classifiers[
-            "4"
-        ].classifier.weight.data_ptr()
+        w_ptr_new = model.classifier.classifiers["4"].classifier.weight.data_ptr()
         b_ptr_new = model.classifier.classifiers["4"].classifier.bias.data_ptr()
         opt_params_ptrs = [
-            w.data_ptr()
-            for group in optimizer.param_groups
-            for w in group["params"]
+            w.data_ptr() for group in optimizer.param_groups for w in group["params"]
         ]
 
         assert w_ptr not in opt_params_ptrs  # head0 has been updated
@@ -560,9 +534,7 @@ def test_multi_head_classifier_masking(self):
             mb, _, tmb = get_mbatch(exp.dataset, batch_size=7)
             out = model(mb, tmb)
             assert torch.all(out[:, curr_au] != model.mask_value)
-            assert torch.all(
-                out[:, :nunits][:, curr_mask == 0] == model.mask_value
-            )
+            assert torch.all(out[:, :nunits][:, curr_mask == 0] == model.mask_value)
         # check masking after adaptation on the entire stream
         for tid, exp in enumerate(benchmark.train_stream):
             curr_au = exp.classes_in_this_experience
@@ -574,18 +546,14 @@ def test_multi_head_classifier_masking(self):
             mb, _, tmb = get_mbatch(exp.dataset)
             out = model(mb, tmb)
             assert torch.all(out[:, curr_au] != model.mask_value)
-            assert torch.all(
-                out[:, :nunits][:, curr_mask == 0] == model.mask_value
-            )
+            assert torch.all(out[:, :nunits][:, curr_mask == 0] == model.mask_value)
 
 
 class TrainEvalModelTests(unittest.TestCase):
     def test_classifier_selection(self):
         base_model = SimpleCNN()
 
-        feature_extractor = torch.nn.Sequential(
-            base_model.features,
-            torch.nn.Flatten())
+        feature_extractor = torch.nn.Sequential(base_model.features, torch.nn.Flatten())
         classifier1 = base_model.classifier
         classifier2 = torch.nn.Linear(64, 7)
 
@@ -635,11 +603,23 @@ def test_ncm_class_expansion(self):
         classifier = NCMClassifier()
         classifier.update_class_means_dict(class_means_dict)
         assert classifier.class_means.shape == (3, 4)
-        new_mean = torch.randn(4,)
+        new_mean = torch.randn(
+            4,
+        )
         classifier.update_class_means_dict({5: new_mean.clone()})
         assert classifier.class_means.shape == (6, 4)
-        assert torch.all(classifier.class_means[3] == torch.zeros(4,))
-        assert torch.all(classifier.class_means[4] == torch.zeros(4,))
+        assert torch.all(
+            classifier.class_means[3]
+            == torch.zeros(
+                4,
+            )
+        )
+        assert torch.all(
+            classifier.class_means[4]
+            == torch.zeros(
+                4,
+            )
+        )
         assert torch.all(classifier.class_means[5] == new_mean)
 
     def test_ncm_replace_means(self):
@@ -660,12 +640,20 @@ def test_ncm_replace_means(self):
 
     def test_ncm_save_load(self):
         classifier = NCMClassifier()
-        classifier.update_class_means_dict({1: torch.randn(5,),
-                                            2: torch.randn(5,)})
-        torch.save(classifier.state_dict(), 'ncm.pt')
+        classifier.update_class_means_dict(
+            {
+                1: torch.randn(
+                    5,
+                ),
+                2: torch.randn(
+                    5,
+                ),
+            }
+        )
+        torch.save(classifier.state_dict(), "ncm.pt")
         del classifier
         classifier = NCMClassifier()
-        check = torch.load('ncm.pt')
+        check = torch.load("ncm.pt")
         classifier.load_state_dict(check)
         assert classifier.class_means.shape == (3, 5)
         assert (classifier.class_means[0] == 0).all()
diff --git a/tests/test_nc_mt_scenario.py b/tests/test_nc_mt_scenario.py
index 16fff928d..d63c0a131 100644
--- a/tests/test_nc_mt_scenario.py
+++ b/tests/test_nc_mt_scenario.py
@@ -46,9 +46,7 @@ def test_mt_single_dataset(self):
         all_classes = set()
         all_original_classes = set()
         for task_id in range(5):
-            all_classes.update(
-                my_nc_benchmark.classes_in_experience["train"][task_id]
-            )
+            all_classes.update(my_nc_benchmark.classes_in_experience["train"][task_id])
             all_original_classes.update(
                 my_nc_benchmark.original_classes_in_exp[task_id]
             )
@@ -86,9 +84,7 @@ def test_mt_single_dataset_without_class_id_remap(self):
 
         all_classes = set()
         for task_id in range(my_nc_benchmark.n_experiences):
-            all_classes.update(
-                my_nc_benchmark.classes_in_experience["train"][task_id]
-            )
+            all_classes.update(my_nc_benchmark.classes_in_experience["train"][task_id])
 
         self.assertEqual(10, len(all_classes))
 
@@ -115,9 +111,7 @@ def test_mt_single_dataset_fixed_order(self):
 
         all_classes = []
         for task_id in range(5):
-            all_classes.extend(
-                my_nc_benchmark.classes_in_experience["train"][task_id]
-            )
+            all_classes.extend(my_nc_benchmark.classes_in_experience["train"][task_id])
 
         self.assertEqual(order, all_classes)
 
@@ -153,9 +147,7 @@ def test_sit_single_dataset_fixed_order_subset(self):
                 set(order[task_id * 2 : (task_id + 1) * 2]),
                 my_nc_benchmark.original_classes_in_exp[task_id],
             )
-            all_classes.extend(
-                my_nc_benchmark.classes_in_experience["train"][task_id]
-            )
+            all_classes.extend(my_nc_benchmark.classes_in_experience["train"][task_id])
 
         self.assertEqual([0, 1] * 4, all_classes)
 
@@ -191,9 +183,7 @@ def test_sit_single_dataset_fixed_subset_no_remap_idx(self):
                 set(order[task_id * 4 : (task_id + 1) * 4]),
                 my_nc_benchmark.original_classes_in_exp[task_id],
             )
-            all_classes.update(
-                my_nc_benchmark.classes_in_experience["train"][task_id]
-            )
+            all_classes.update(my_nc_benchmark.classes_in_experience["train"][task_id])
 
         self.assertEqual(set(order), all_classes)
 
@@ -260,20 +250,12 @@ def test_mt_single_dataset_task_size(self):
 
         all_classes = set()
         for task_id in range(3):
-            all_classes.update(
-                my_nc_benchmark.classes_in_experience["train"][task_id]
-            )
+            all_classes.update(my_nc_benchmark.classes_in_experience["train"][task_id])
         self.assertEqual(5, len(all_classes))
 
-        self.assertEqual(
-            5, len(my_nc_benchmark.classes_in_experience["train"][0])
-        )
-        self.assertEqual(
-            3, len(my_nc_benchmark.classes_in_experience["train"][1])
-        )
-        self.assertEqual(
-            2, len(my_nc_benchmark.classes_in_experience["train"][2])
-        )
+        self.assertEqual(5, len(my_nc_benchmark.classes_in_experience["train"][0]))
+        self.assertEqual(3, len(my_nc_benchmark.classes_in_experience["train"][1]))
+        self.assertEqual(2, len(my_nc_benchmark.classes_in_experience["train"][2]))
 
     def test_mt_multi_dataset_one_task_per_set(self):
         split_mapping = [0, 1, 2, 0, 1, 2, 3, 4, 5, 6]
@@ -288,25 +270,15 @@ def test_mt_multi_dataset_one_task_per_set(self):
             download=True,
         )
 
-        train_part1 = make_nc_transformation_subset(
-            mnist_train, None, None, range(3)
-        )
+        train_part1 = make_nc_transformation_subset(mnist_train, None, None, range(3))
         train_part2 = make_nc_transformation_subset(
             mnist_train, None, None, range(3, 10)
         )
-        train_part2 = classification_subset(
-            train_part2, class_mapping=split_mapping
-        )
+        train_part2 = classification_subset(train_part2, class_mapping=split_mapping)
 
-        test_part1 = make_nc_transformation_subset(
-            mnist_test, None, None, range(3)
-        )
-        test_part2 = make_nc_transformation_subset(
-            mnist_test, None, None, range(3, 10)
-        )
-        test_part2 = classification_subset(
-            test_part2, class_mapping=split_mapping
-        )
+        test_part1 = make_nc_transformation_subset(mnist_test, None, None, range(3))
+        test_part2 = make_nc_transformation_subset(mnist_test, None, None, range(3, 10))
+        test_part2 = classification_subset(test_part2, class_mapping=split_mapping)
         my_nc_benchmark = nc_benchmark(
             [train_part1, train_part2],
             [test_part1, test_part2],
@@ -352,10 +324,8 @@ def test_mt_multi_dataset_one_task_per_set(self):
                 == set(range(0, 7))
             )
             or (
-                my_nc_benchmark.classes_in_experience["train"][0]
-                == set(range(0, 7))
-                and my_nc_benchmark.classes_in_experience["train"][1]
-                == {0, 1, 2}
+                my_nc_benchmark.classes_in_experience["train"][0] == set(range(0, 7))
+                and my_nc_benchmark.classes_in_experience["train"][1] == {0, 1, 2}
             )
         )
 
@@ -384,12 +354,11 @@ def test_nc_utils_corner_cases(self):
             download=True,
         )
 
-        unique_train_targets, train_targets_count = \
-            torch.as_tensor(mnist_train.targets).unique(return_counts=True)
+        unique_train_targets, train_targets_count = torch.as_tensor(
+            mnist_train.targets
+        ).unique(return_counts=True)
 
-        train_part1 = make_nc_transformation_subset(
-            mnist_train, None, None, None
-        )
+        train_part1 = make_nc_transformation_subset(mnist_train, None, None, None)
         test_part1 = make_nc_transformation_subset(
             mnist_test, None, None, None, bucket_classes=True
         )
@@ -416,25 +385,20 @@ def test_nc_utils_corner_cases(self):
         self.assertEqual(len(mnist_test), len(test_exp.dataset))
 
         other_split = make_nc_transformation_subset(
-            mnist_train, None, None, None, 
-            bucket_classes=False, 
-            sort_indexes=True
+            mnist_train, None, None, None, bucket_classes=False, sort_indexes=True
         )
 
         for b, s in zip([False, True], [False, True]):
             other_split = make_nc_transformation_subset(
-                mnist_train, None, None, None, 
-                bucket_classes=b, 
-                sort_indexes=s
+                mnist_train, None, None, None, bucket_classes=b, sort_indexes=s
             )
 
             self.assertEqual(len(mnist_train), len(other_split))
-            unique_other_targets, other_targets_count = \
-                torch.as_tensor(other_split.targets).unique(return_counts=True)
-            self.assertTrue(torch.equal(unique_train_targets, 
-                                        unique_other_targets))
-            self.assertTrue(torch.equal(train_targets_count, 
-                                        other_targets_count))
+            unique_other_targets, other_targets_count = torch.as_tensor(
+                other_split.targets
+            ).unique(return_counts=True)
+            self.assertTrue(torch.equal(unique_train_targets, unique_other_targets))
+            self.assertTrue(torch.equal(train_targets_count, other_targets_count))
 
     def test_nc_mt_slicing(self):
         mnist_train = MNIST(
@@ -472,9 +436,7 @@ def test_nc_mt_slicing(self):
         self.assertEqual("train", sliced_stream.name)
 
         for batch_id, experience in enumerate(sliced_stream):
-            self.assertEqual(
-                iterable_slice[batch_id], experience.current_experience
-            )
+            self.assertEqual(iterable_slice[batch_id], experience.current_experience)
             self.assertIsInstance(experience, NCExperience)
 
         sliced_stream = my_nc_benchmark.test_stream[iterable_slice]
@@ -483,9 +445,7 @@ def test_nc_mt_slicing(self):
         self.assertEqual("test", sliced_stream.name)
 
         for batch_id, experience in enumerate(sliced_stream):
-            self.assertEqual(
-                iterable_slice[batch_id], experience.current_experience
-            )
+            self.assertEqual(iterable_slice[batch_id], experience.current_experience)
             self.assertIsInstance(experience, NCExperience)
 
 
diff --git a/tests/test_nc_sit_scenario.py b/tests/test_nc_sit_scenario.py
index 1044bfef8..df158f61a 100644
--- a/tests/test_nc_sit_scenario.py
+++ b/tests/test_nc_sit_scenario.py
@@ -49,9 +49,7 @@ def test_sit_single_dataset(self):
 
         all_classes = set()
         for batch_id in range(5):
-            all_classes.update(
-                my_nc_benchmark.classes_in_experience["train"][batch_id]
-            )
+            all_classes.update(my_nc_benchmark.classes_in_experience["train"][batch_id])
 
         self.assertEqual(10, len(all_classes))
 
@@ -77,9 +75,7 @@ def test_sit_single_dataset_fixed_order(self):
 
         all_classes = []
         for batch_id in range(5):
-            all_classes.extend(
-                my_nc_benchmark.classes_in_experience["train"][batch_id]
-            )
+            all_classes.extend(my_nc_benchmark.classes_in_experience["train"][batch_id])
 
         self.assertEqual(order, all_classes)
 
@@ -110,9 +106,7 @@ def test_sit_single_dataset_fixed_order_subset(self):
             self.assertEqual(
                 2, len(my_nc_benchmark.classes_in_experience["train"][batch_id])
             )
-            all_classes.update(
-                my_nc_benchmark.classes_in_experience["train"][batch_id]
-            )
+            all_classes.update(my_nc_benchmark.classes_in_experience["train"][batch_id])
 
         self.assertEqual(set(order), all_classes)
 
@@ -144,9 +138,7 @@ def test_sit_single_dataset_remap_indexes(self):
             self.assertEqual(
                 2, len(my_nc_benchmark.classes_in_experience["train"][batch_id])
             )
-            all_classes.extend(
-                my_nc_benchmark.classes_in_experience["train"][batch_id]
-            )
+            all_classes.extend(my_nc_benchmark.classes_in_experience["train"][batch_id])
         self.assertEqual(list(range(8)), all_classes)
 
         # Regression test for issue #258
@@ -154,9 +146,7 @@ def test_sit_single_dataset_remap_indexes(self):
             unique_dataset_classes = sorted(set(experience.dataset.targets))
             expected_dataset_classes = list(range(2 * i, 2 * (i + 1)))
 
-            self.assertListEqual(
-                expected_dataset_classes, unique_dataset_classes
-            )
+            self.assertListEqual(expected_dataset_classes, unique_dataset_classes)
             self.assertListEqual(
                 sorted(order[2 * i : 2 * (i + 1)]),
                 sorted(my_nc_benchmark.original_classes_in_exp[i]),
@@ -204,9 +194,7 @@ def test_sit_single_dataset_remap_indexes_each_exp(self):
             self.assertEqual(
                 2, len(my_nc_benchmark.classes_in_experience["train"][batch_id])
             )
-            all_classes.extend(
-                my_nc_benchmark.classes_in_experience["train"][batch_id]
-            )
+            all_classes.extend(my_nc_benchmark.classes_in_experience["train"][batch_id])
         self.assertEqual(8, len(all_classes))
         self.assertListEqual([0, 1], sorted(set(all_classes)))
 
@@ -214,9 +202,7 @@ def test_sit_single_dataset_remap_indexes_each_exp(self):
         for i, experience in enumerate(my_nc_benchmark.train_stream):
             unique_dataset_classes = sorted(set(experience.dataset.targets))
             expected_dataset_classes = [0, 1]
-            self.assertListEqual(
-                expected_dataset_classes, unique_dataset_classes
-            )
+            self.assertListEqual(expected_dataset_classes, unique_dataset_classes)
             self.assertListEqual(
                 sorted(order[2 * i : 2 * (i + 1)]),
                 sorted(my_nc_benchmark.original_classes_in_exp[i]),
@@ -285,20 +271,12 @@ def test_sit_single_dataset_batch_size(self):
 
         all_classes = set()
         for batch_id in range(3):
-            all_classes.update(
-                my_nc_benchmark.classes_in_experience["train"][batch_id]
-            )
+            all_classes.update(my_nc_benchmark.classes_in_experience["train"][batch_id])
         self.assertEqual(10, len(all_classes))
 
-        self.assertEqual(
-            5, len(my_nc_benchmark.classes_in_experience["train"][0])
-        )
-        self.assertEqual(
-            3, len(my_nc_benchmark.classes_in_experience["train"][1])
-        )
-        self.assertEqual(
-            2, len(my_nc_benchmark.classes_in_experience["train"][2])
-        )
+        self.assertEqual(5, len(my_nc_benchmark.classes_in_experience["train"][0]))
+        self.assertEqual(3, len(my_nc_benchmark.classes_in_experience["train"][1]))
+        self.assertEqual(2, len(my_nc_benchmark.classes_in_experience["train"][2]))
 
     def test_sit_multi_dataset_one_batch_per_set(self):
         split_mapping = [0, 1, 2, 0, 1, 2, 3, 4, 5, 6]
@@ -313,25 +291,15 @@ def test_sit_multi_dataset_one_batch_per_set(self):
             download=True,
         )
 
-        train_part1 = make_nc_transformation_subset(
-            mnist_train, None, None, range(3)
-        )
+        train_part1 = make_nc_transformation_subset(mnist_train, None, None, range(3))
         train_part2 = make_nc_transformation_subset(
             mnist_train, None, None, range(3, 10)
         )
-        train_part2 = classification_subset(
-            train_part2, class_mapping=split_mapping
-        )
+        train_part2 = classification_subset(train_part2, class_mapping=split_mapping)
 
-        test_part1 = make_nc_transformation_subset(
-            mnist_test, None, None, range(3)
-        )
-        test_part2 = make_nc_transformation_subset(
-            mnist_test, None, None, range(3, 10)
-        )
-        test_part2 = classification_subset(
-            test_part2, class_mapping=split_mapping
-        )
+        test_part1 = make_nc_transformation_subset(mnist_test, None, None, range(3))
+        test_part2 = make_nc_transformation_subset(mnist_test, None, None, range(3, 10))
+        test_part2 = classification_subset(test_part2, class_mapping=split_mapping)
         my_nc_benchmark = nc_benchmark(
             [train_part1, train_part2],
             [test_part1, test_part2],
@@ -347,9 +315,7 @@ def test_sit_multi_dataset_one_batch_per_set(self):
 
         all_classes = set()
         for batch_id in range(2):
-            all_classes.update(
-                my_nc_benchmark.classes_in_experience["train"][batch_id]
-            )
+            all_classes.update(my_nc_benchmark.classes_in_experience["train"][batch_id])
 
         self.assertEqual(10, len(all_classes))
 
@@ -360,10 +326,8 @@ def test_sit_multi_dataset_one_batch_per_set(self):
                 == set(range(3, 10))
             )
             or (
-                my_nc_benchmark.classes_in_experience["train"][0]
-                == set(range(3, 10))
-                and my_nc_benchmark.classes_in_experience["train"][1]
-                == {0, 1, 2}
+                my_nc_benchmark.classes_in_experience["train"][0] == set(range(3, 10))
+                and my_nc_benchmark.classes_in_experience["train"][1] == {0, 1, 2}
             )
         )
 
@@ -380,25 +344,15 @@ def test_sit_multi_dataset_merge(self):
             download=True,
         )
 
-        train_part1 = make_nc_transformation_subset(
-            mnist_train, None, None, range(5)
-        )
+        train_part1 = make_nc_transformation_subset(mnist_train, None, None, range(5))
         train_part2 = make_nc_transformation_subset(
             mnist_train, None, None, range(5, 10)
         )
-        train_part2 = classification_subset(
-            train_part2, class_mapping=split_mapping
-        )
+        train_part2 = classification_subset(train_part2, class_mapping=split_mapping)
 
-        test_part1 = make_nc_transformation_subset(
-            mnist_test, None, None, range(5)
-        )
-        test_part2 = make_nc_transformation_subset(
-            mnist_test, None, None, range(5, 10)
-        )
-        test_part2 = classification_subset(
-            test_part2, class_mapping=split_mapping
-        )
+        test_part1 = make_nc_transformation_subset(mnist_test, None, None, range(5))
+        test_part2 = make_nc_transformation_subset(mnist_test, None, None, range(5, 10))
+        test_part2 = classification_subset(test_part2, class_mapping=split_mapping)
         my_nc_benchmark = nc_benchmark(
             [train_part1, train_part2],
             [test_part1, test_part2],
@@ -417,9 +371,7 @@ def test_sit_multi_dataset_merge(self):
 
         all_classes = set()
         for batch_id in range(5):
-            all_classes.update(
-                my_nc_benchmark.classes_in_experience["train"][batch_id]
-            )
+            all_classes.update(my_nc_benchmark.classes_in_experience["train"][batch_id])
 
         self.assertEqual(10, len(all_classes))
 
@@ -459,9 +411,7 @@ def test_nc_sit_slicing(self):
         self.assertEqual("train", sliced_stream.name)
 
         for batch_id, experience in enumerate(sliced_stream):
-            self.assertEqual(
-                iterable_slice[batch_id], experience.current_experience
-            )
+            self.assertEqual(iterable_slice[batch_id], experience.current_experience)
             self.assertIsInstance(experience, NCExperience)
 
         sliced_stream = my_nc_benchmark.test_stream[iterable_slice]
@@ -470,9 +420,7 @@ def test_nc_sit_slicing(self):
         self.assertEqual("test", sliced_stream.name)
 
         for batch_id, experience in enumerate(sliced_stream):
-            self.assertEqual(
-                iterable_slice[batch_id], experience.current_experience
-            )
+            self.assertEqual(iterable_slice[batch_id], experience.current_experience)
             self.assertIsInstance(experience, NCExperience)
 
     def test_nc_benchmark_transformations_basic(self):
@@ -554,13 +502,13 @@ def test_nc_benchmark_classes_in_exp_range(self):
             fixed_class_order=random_class_order,
             shuffle=False,
         )
-        
+
         cie_data = benchmark_instance.classes_in_exp_range(0, None)
         self.assertEqual(100, len(cie_data))
 
         for i in range(5):
             expected = set(random_class_order[i * 20 : (i + 1) * 20])
-            self.assertSetEqual(expected, set(cie_data[i*20: (i+1)*20]))
+            self.assertSetEqual(expected, set(cie_data[i * 20 : (i + 1) * 20]))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_ni_sit_scenario.py b/tests/test_ni_sit_scenario.py
index 9b0c6ed91..d80495248 100644
--- a/tests/test_ni_sit_scenario.py
+++ b/tests/test_ni_sit_scenario.py
@@ -45,9 +45,7 @@ def test_ni_sit_single_dataset(self):
             torch.as_tensor(mnist_train.targets), return_counts=True
         )
 
-        min_batch_size = torch.sum(
-            unique_count // my_ni_benchmark.n_experiences
-        ).item()
+        min_batch_size = torch.sum(unique_count // my_ni_benchmark.n_experiences).item()
         max_batch_size = min_batch_size + my_ni_benchmark.n_classes
 
         pattern_count = 0
@@ -87,9 +85,7 @@ def test_ni_sit_single_dataset_fixed_assignment(self):
             mnist_train, mnist_test, 5, shuffle=True, seed=1234
         )
 
-        reference_assignment = (
-            ni_benchmark_reference.train_exps_patterns_assignment
-        )
+        reference_assignment = ni_benchmark_reference.train_exps_patterns_assignment
 
         my_ni_benchmark = ni_benchmark(
             mnist_train,
@@ -160,25 +156,15 @@ def test_ni_sit_multi_dataset_merge(self):
             download=True,
         )
 
-        train_part1 = make_nc_transformation_subset(
-            mnist_train, None, None, range(5)
-        )
+        train_part1 = make_nc_transformation_subset(mnist_train, None, None, range(5))
         train_part2 = make_nc_transformation_subset(
             mnist_train, None, None, range(5, 10)
         )
-        train_part2 = classification_subset(
-            train_part2, class_mapping=split_mapping
-        )
+        train_part2 = classification_subset(train_part2, class_mapping=split_mapping)
 
-        test_part1 = make_nc_transformation_subset(
-            mnist_test, None, None, range(5)
-        )
-        test_part2 = make_nc_transformation_subset(
-            mnist_test, None, None, range(5, 10)
-        )
-        test_part2 = classification_subset(
-            test_part2, class_mapping=split_mapping
-        )
+        test_part1 = make_nc_transformation_subset(mnist_test, None, None, range(5))
+        test_part2 = make_nc_transformation_subset(mnist_test, None, None, range(5, 10))
+        test_part2 = classification_subset(test_part2, class_mapping=split_mapping)
         my_ni_benchmark = ni_benchmark(
             [train_part1, train_part2],
             [test_part1, test_part2],
@@ -198,9 +184,7 @@ def test_ni_sit_multi_dataset_merge(self):
 
         all_classes = set()
         for batch_id in range(5):
-            all_classes.update(
-                my_ni_benchmark.classes_in_experience["train"][batch_id]
-            )
+            all_classes.update(my_ni_benchmark.classes_in_experience["train"][batch_id])
 
         self.assertEqual(10, len(all_classes))
 
@@ -236,9 +220,7 @@ def test_ni_sit_slicing(self):
         self.assertEqual("train", sliced_stream.name)
 
         for batch_id, experience in enumerate(sliced_stream):
-            self.assertEqual(
-                iterable_slice[batch_id], experience.current_experience
-            )
+            self.assertEqual(iterable_slice[batch_id], experience.current_experience)
             self.assertIsInstance(experience, NIExperience)
 
         with self.assertRaises(IndexError):
@@ -252,9 +234,7 @@ def test_ni_sit_slicing(self):
         self.assertEqual("test", sliced_stream.name)
 
         for batch_id, experience in enumerate(sliced_stream):
-            self.assertEqual(
-                iterable_slice[batch_id], experience.current_experience
-            )
+            self.assertEqual(iterable_slice[batch_id], experience.current_experience)
             self.assertIsInstance(experience, NIExperience)
 
 
diff --git a/tests/test_tinyimagenet.py b/tests/test_tinyimagenet.py
index c2f85ad7e..54a64828c 100644
--- a/tests/test_tinyimagenet.py
+++ b/tests/test_tinyimagenet.py
@@ -26,7 +26,6 @@ class TinyImagenetTest(unittest.TestCase):
         "We don't want to download large datasets in github actions.",
     )
     def test_tinyimagenet_default_loader(self):
-
         logger = logging.getLogger("avalanche")
         logger.setLevel(logging.INFO)
         logger.addHandler(logging.StreamHandler())
diff --git a/tests/training/test_ar1.py b/tests/training/test_ar1.py
index de0b6a8e0..3c9afcf1a 100644
--- a/tests/training/test_ar1.py
+++ b/tests/training/test_ar1.py
@@ -22,9 +22,7 @@
 class AR1Test(unittest.TestCase):
     def test_ar1(self):
         my_nc_benchmark = self.load_ar1_benchmark()
-        strategy = AR1(
-            train_epochs=1, train_mb_size=10, eval_mb_size=10, rm_sz=20
-        )
+        strategy = AR1(train_epochs=1, train_mb_size=10, eval_mb_size=10, rm_sz=20)
         run_strategy(my_nc_benchmark, strategy)
 
     def load_ar1_benchmark(self):
diff --git a/tests/training/test_dictionary_mbatches.py b/tests/training/test_dictionary_mbatches.py
index 51b8c9f9c..43fcd4514 100644
--- a/tests/training/test_dictionary_mbatches.py
+++ b/tests/training/test_dictionary_mbatches.py
@@ -83,15 +83,13 @@ def test_dictionary_train_replay(self):
             exp.dataset = av_data
             train_exps.append(exp)
             test_exps.append(exp)
-        
+
         train_stream = CLStream("train", train_exps, None)
         test_stream = CLStream("test", test_exps, None)
-        benchmark = CLScenario(
-            [train_stream, test_stream]
-        )
+        benchmark = CLScenario([train_stream, test_stream])
         train_stream.benchmark = benchmark
         test_stream.benchmark = benchmark
-        
+
         eval_plugin = avalanche.training.plugins.EvaluationPlugin(
             avalanche.evaluation.metrics.loss_metrics(
                 epoch=True, experience=True, stream=True
diff --git a/tests/training/test_online_strategies.py b/tests/training/test_online_strategies.py
index 14021764c..4c6db0b23 100644
--- a/tests/training/test_online_strategies.py
+++ b/tests/training/test_online_strategies.py
@@ -55,8 +55,7 @@ def test_naive(self):
             eval_mb_size=50,
             evaluator=default_evaluator,
         )
-        ocl_benchmark = OnlineCLScenario(benchmark_streams,
-                                         access_task_boundaries=True)
+        ocl_benchmark = OnlineCLScenario(benchmark_streams, access_task_boundaries=True)
         self.run_strategy_boundaries(ocl_benchmark, strategy)
 
         # Without task boundaries
@@ -70,8 +69,9 @@ def test_naive(self):
             eval_mb_size=50,
             evaluator=default_evaluator,
         )
-        ocl_benchmark = OnlineCLScenario(benchmark_streams,
-                                         access_task_boundaries=False)
+        ocl_benchmark = OnlineCLScenario(
+            benchmark_streams, access_task_boundaries=False
+        )
         self.run_strategy_no_boundaries(ocl_benchmark, strategy)
 
     def load_benchmark(self, use_task_labels=False):
diff --git a/tests/training/test_plugins.py b/tests/training/test_plugins.py
index c98a40ee0..04e591f99 100644
--- a/tests/training/test_plugins.py
+++ b/tests/training/test_plugins.py
@@ -312,7 +312,6 @@ def _test_scheduler_multi_step_lr_plugin(
         first_epoch_only=False,
         first_exp_only=False,
     ):
-
         benchmark = PluginTests.create_benchmark(n_samples_per_class=20)
         model = _PlainMLP(input_size=6, hidden_size=10)
         optim = SGD(model.parameters(), lr=base_lr)
@@ -341,15 +340,13 @@ def assert_model_equals(self, model1, model2):
         self.assertSetEqual(set(dict1.keys()), set(dict2.keys()))
 
         # compare params
-        for (k, v) in dict1.items():
+        for k, v in dict1.items():
             self.assertTrue(torch.equal(v, dict2[k]))
 
     def assert_benchmark_equals(
         self, bench1: GenericCLScenario, bench2: GenericCLScenario
     ):
-        self.assertSetEqual(
-            set(bench1.streams.keys()), set(bench2.streams.keys())
-        )
+        self.assertSetEqual(set(bench1.streams.keys()), set(bench2.streams.keys()))
 
         def get_mbatch(data, batch_size=5):
             dl = DataLoader(
@@ -367,19 +364,11 @@ def get_mbatch(data, batch_size=5):
                 dataset1 = exp1.dataset
                 dataset2 = exp2.dataset
                 for t_idx in range(3):
-                    dataset1_content = get_mbatch(dataset1, len(dataset1))[
-                        t_idx
-                    ]
-                    dataset2_content = get_mbatch(dataset2, len(dataset2))[:][
-                        t_idx
-                    ]
-                    self.assertTrue(
-                        torch.equal(dataset1_content, dataset2_content)
-                    )
+                    dataset1_content = get_mbatch(dataset1, len(dataset1))[t_idx]
+                    dataset2_content = get_mbatch(dataset2, len(dataset2))[:][t_idx]
+                    self.assertTrue(torch.equal(dataset1_content, dataset2_content))
 
-    def _verify_rop_tests_reproducibility(
-        self, init_strategy, n_epochs, criterion
-    ):
+    def _verify_rop_tests_reproducibility(self, init_strategy, n_epochs, criterion):
         # This doesn't actually test the support for the specific scheduler
         # (ReduceLROnPlateau), but it's only used to check if:
         # - the same model+benchmark pair can be instantiated in a
@@ -440,9 +429,7 @@ def test_scheduler_reduce_on_plateau_plugin(self):
         def _prepare_rng_critical_parts(seed=1234):
             torch.random.manual_seed(seed)
             return (
-                PluginTests.create_benchmark(
-                    seed=seed, n_samples_per_class=100
-                ),
+                PluginTests.create_benchmark(seed=seed, n_samples_per_class=100),
                 _PlainMLP(input_size=6, hidden_size=10),
             )
 
@@ -565,8 +552,7 @@ def _prepare_rng_critical_parts(seed=1234):
 
     @unittest.skipIf(
         FAST_TEST,
-        "skip test because it is extremely slow "
-        "and should not be broken easily.",
+        "skip test because it is extremely slow " "and should not be broken easily.",
     )
     def test_scheduler_reduce_on_plateau_plugin_with_val_stream(self):
         # Regression test for issue #858 (part 2)
@@ -631,7 +617,6 @@ def _prepare_rng_critical_parts(seed=1234):
                         scheduler = ReduceLROnPlateau(optimizer)
 
                     for epoch in range(n_epochs):
-
                         val_exp = benchmark.valid_stream[exp_idx]
 
                         for x, y, t in TaskBalancedDataLoader(
@@ -858,7 +843,6 @@ def __init__(
         hidden_size=512,
         hidden_layers=1,
     ):
-
         super().__init__()
 
         layers = nn.Sequential(
diff --git a/tests/training/test_replay.py b/tests/training/test_replay.py
index 77a2f7f4d..6f736e462 100644
--- a/tests/training/test_replay.py
+++ b/tests/training/test_replay.py
@@ -21,7 +21,8 @@
     ExemplarsSelectionStrategy,
     HerdingSelectionStrategy,
     ClosestToCenterSelectionStrategy,
-    ParametricBuffer, ReservoirSamplingBuffer,
+    ParametricBuffer,
+    ReservoirSamplingBuffer,
 )
 from avalanche.training.supervised import Naive
 from avalanche.training.templates import SupervisedTemplate
@@ -43,9 +44,7 @@ def test_replay_balanced_memory(self):
     def _test_replay_balanced_memory(self, storage_policy, mem_size):
         benchmark = get_fast_benchmark(use_task_labels=True)
         model = SimpleMLP(input_size=6, hidden_size=10)
-        replayPlugin = ReplayPlugin(
-            mem_size=mem_size, storage_policy=storage_policy
-        )
+        replayPlugin = ReplayPlugin(mem_size=mem_size, storage_policy=storage_policy)
         cl_strategy = Naive(
             model,
             SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001),
diff --git a/tests/training/test_strategies.py b/tests/training/test_strategies.py
index 00626a2da..859b861ec 100644
--- a/tests/training/test_strategies.py
+++ b/tests/training/test_strategies.py
@@ -48,7 +48,7 @@
     ER_ACE,
     DER,
     LearningToPrompt,
-    ExpertGateStrategy
+    ExpertGateStrategy,
 )
 from avalanche.training.supervised.cumulative import Cumulative
 from avalanche.training.supervised.icarl import ICaRL
@@ -77,13 +77,15 @@ def test_eval_streams_normalization(self):
         assert len(res[0]) == test_len
 
         res = _group_experiences_by_stream(
-            [*benchmark.test_stream, *benchmark.train_stream])
+            [*benchmark.test_stream, *benchmark.train_stream]
+        )
         assert len(res) == 2
         assert len(res[0]) == test_len
         assert len(res[1]) == train_len
 
         res = _group_experiences_by_stream(
-            [benchmark.test_stream, benchmark.train_stream])
+            [benchmark.test_stream, benchmark.train_stream]
+        )
         assert len(res) == 2
         assert len(res[0]) == test_len
         assert len(res[1]) == train_len
@@ -172,9 +174,7 @@ def test_plugins_compatibility_checks(self):
         criterion = CrossEntropyLoss()
 
         evalp = EvaluationPlugin(
-            loss_metrics(
-                minibatch=True, epoch=True, experience=True, stream=True
-            ),
+            loss_metrics(minibatch=True, epoch=True, experience=True, stream=True),
             loggers=[InteractiveLogger()],
             strict_checks=None,
         )
@@ -195,9 +195,7 @@ def test_forward_hooks(self):
         optimizer = SGD(model.parameters(), lr=1e-3)
         criterion = CrossEntropyLoss()
 
-        strategy = Naive(
-            model, optimizer, criterion, train_epochs=2, eval_every=0
-        )
+        strategy = Naive(model, optimizer, criterion, train_epochs=2, eval_every=0)
         was_hook_called = False
 
         def hook(a, b, c):
@@ -267,9 +265,7 @@ def init_scenario(self, multi_task=False):
 
     def test_naive(self):
         # SIT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=False
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=False)
         strategy = Naive(
             model,
             optimizer,
@@ -282,9 +278,7 @@ def test_naive(self):
         run_strategy(benchmark, strategy)
 
         # MT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=True
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=True)
         strategy = Naive(
             model,
             optimizer,
@@ -309,15 +303,11 @@ def after_train_dataset_adaptation(
                 Check that the dataset used for training contains the
                 correct number of samples.
                 """
-                cum_len = sum(
-                    [len(exp.dataset) for exp in self.benchmark.train_stream]
-                )
+                cum_len = sum([len(exp.dataset) for exp in self.benchmark.train_stream])
                 assert len(strategy.adapted_dataset) == cum_len
 
         # SIT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=False
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=False)
         strategy = JointTraining(
             model,
             optimizer,
@@ -332,9 +322,7 @@ def after_train_dataset_adaptation(
         strategy.train(benchmark.train_stream)
 
         # MT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=True
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=True)
         strategy = JointTraining(
             model,
             optimizer,
@@ -356,9 +344,7 @@ def after_train_dataset_adaptation(
 
     def test_cwrstar(self):
         # SIT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=False
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=False)
         last_fc_name, _ = get_last_fc_layer(model)
         strategy = CWRStar(
             model,
@@ -385,9 +371,7 @@ def test_cwrstar(self):
             model.past_j[cls] = 0
 
         # MT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=True
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=True)
         strategy = CWRStar(
             model,
             optimizer,
@@ -411,9 +395,7 @@ def test_cwrstar(self):
 
     def test_replay(self):
         # SIT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=False
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=False)
         strategy = Replay(
             model,
             optimizer,
@@ -427,9 +409,7 @@ def test_replay(self):
         run_strategy(benchmark, strategy)
 
         # MT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=True
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=True)
         strategy = Replay(
             model,
             optimizer,
@@ -444,9 +424,7 @@ def test_replay(self):
 
     def test_gdumb(self):
         # SIT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=False
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=False)
         strategy = GDumb(
             model,
             optimizer,
@@ -460,9 +438,7 @@ def test_gdumb(self):
         run_strategy(benchmark, strategy)
 
         # MT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=True
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=True)
         strategy = GDumb(
             model,
             optimizer,
@@ -477,9 +453,7 @@ def test_gdumb(self):
 
     def test_cumulative(self):
         # SIT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=False
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=False)
         strategy = Cumulative(
             model,
             optimizer,
@@ -492,9 +466,7 @@ def test_cumulative(self):
         run_strategy(benchmark, strategy)
 
         # MT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=True
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=True)
         strategy = Cumulative(
             model,
             optimizer,
@@ -535,9 +507,7 @@ def test_warning_slda_lwf(self):
 
     def test_lwf(self):
         # SIT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=False
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=False)
         strategy = LwF(
             model,
             optimizer,
@@ -552,9 +522,7 @@ def test_lwf(self):
         run_strategy(benchmark, strategy)
 
         # MT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=True
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=True)
         strategy = LwF(
             model,
             optimizer,
@@ -570,9 +538,7 @@ def test_lwf(self):
 
     def test_agem(self):
         # SIT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=False
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=False)
         strategy = AGEM(
             model,
             optimizer,
@@ -586,9 +552,7 @@ def test_agem(self):
         run_strategy(benchmark, strategy)
 
         # MT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=True
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=True)
         strategy = AGEM(
             model,
             optimizer,
@@ -603,9 +567,7 @@ def test_agem(self):
 
     def test_gem(self):
         # SIT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=False
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=False)
         strategy = GEM(
             model,
             optimizer,
@@ -619,9 +581,7 @@ def test_gem(self):
         run_strategy(benchmark, strategy)
 
         # MT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=True
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=True)
         strategy = GEM(
             model,
             optimizer,
@@ -636,9 +596,7 @@ def test_gem(self):
 
     def test_ewc(self):
         # SIT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=False
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=False)
         strategy = EWC(
             model,
             optimizer,
@@ -653,9 +611,7 @@ def test_ewc(self):
         run_strategy(benchmark, strategy)
 
         # MT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=True
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=True)
         strategy = EWC(
             model,
             optimizer,
@@ -670,9 +626,7 @@ def test_ewc(self):
 
     def test_ewc_online(self):
         # SIT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=False
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=False)
         strategy = EWC(
             model,
             optimizer,
@@ -687,8 +641,7 @@ def test_ewc_online(self):
         run_strategy(benchmark, strategy)
 
         # # MT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=True)
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=True)
         strategy = EWC(
             model,
             optimizer,
@@ -704,9 +657,7 @@ def test_ewc_online(self):
 
     def test_rwalk(self):
         # SIT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=False
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=False)
         strategy = Naive(
             model,
             optimizer,
@@ -725,8 +676,7 @@ def test_rwalk(self):
         run_strategy(benchmark, strategy)
 
         # # MT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=True)
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=True)
         strategy = Naive(
             model,
             optimizer,
@@ -746,9 +696,7 @@ def test_rwalk(self):
 
     def test_synaptic_intelligence(self):
         # SIT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=False
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=False)
         strategy = SynapticIntelligence(
             model,
             optimizer,
@@ -761,8 +709,7 @@ def test_synaptic_intelligence(self):
         run_strategy(benchmark, strategy)
 
         # MT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=True)
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=True)
         strategy = SynapticIntelligence(
             model,
             optimizer,
@@ -780,9 +727,7 @@ def test_cope(self):
         emb_size = n_classes  # Embedding size
 
         # SIT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=False
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=False)
         strategy = CoPE(
             model,
             optimizer,
@@ -845,31 +790,32 @@ def test_expertgate(self):
             train_epochs=2,
             eval_mb_size=50,
             ae_train_mb_size=10,
-            ae_train_epochs=2, 
+            ae_train_epochs=2,
             ae_lr=5e-4,
         )
 
         # Mandatory transform for AlexNet
         # 3 Channels and input size should be a minimum of 227
-        AlexNetTransform = transforms.Compose([
-            transforms.Lambda(lambda x: x.repeat(3, 1, 1)),      
-            transforms.Resize((227, 227)),
-            ])
+        AlexNetTransform = transforms.Compose(
+            [
+                transforms.Lambda(lambda x: x.repeat(3, 1, 1)),
+                transforms.Resize((227, 227)),
+            ]
+        )
 
         # train and test loop
         benchmark = self.load_benchmark(
-            use_task_labels=True, 
-            train_transform=AlexNetTransform, 
+            use_task_labels=True,
+            train_transform=AlexNetTransform,
             eval_transform=AlexNetTransform,
-            shuffle=False)
+            shuffle=False,
+        )
 
         for experience in benchmark.train_stream:
             strategy.train(experience)
 
     def test_icarl(self):
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=False
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=False)
 
         strategy = ICaRL(
             model.features,
@@ -888,11 +834,8 @@ def test_icarl(self):
         run_strategy(benchmark, strategy)
 
     def test_lfl(self):
-
         # SIT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=False
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=False)
         strategy = LFL(
             model,
             optimizer,
@@ -922,9 +865,7 @@ def test_lfl(self):
 
     def test_mas(self):
         # SIT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=False
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=False)
         strategy = MAS(
             model,
             optimizer,
@@ -953,12 +894,10 @@ def test_mas(self):
         #     train_epochs=2,
         # )
         # run_strategy(benchmark, strategy)
-    
+
     def test_bic(self):
         # SIT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=False
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=False)
         strategy = BiC(
             model,
             optimizer,
@@ -978,9 +917,7 @@ def test_bic(self):
 
     def test_mir(self):
         # SIT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=False
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=False)
         strategy = MIR(
             model,
             optimizer,
@@ -996,8 +933,7 @@ def test_mir(self):
         run_strategy(benchmark, strategy)
 
         # MT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=True)
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=True)
         strategy = MIR(
             model,
             optimizer,
@@ -1014,9 +950,7 @@ def test_mir(self):
 
     def test_erace(self):
         # SIT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=False
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=False)
         strategy = ER_ACE(
             model,
             optimizer,
@@ -1029,11 +963,9 @@ def test_erace(self):
             train_epochs=2,
         )
         run_strategy(benchmark, strategy)
-    
+
     def test_l2p(self):
-        _, _, _, benchmark = self.init_scenario(
-            multi_task=False
-        )
+        _, _, _, benchmark = self.init_scenario(multi_task=False)
 
         strategy = LearningToPrompt(
             model_name="simpleMLP",
@@ -1052,9 +984,7 @@ def test_l2p(self):
 
     def test_der(self):
         # SIT scenario
-        model, optimizer, criterion, benchmark = self.init_scenario(
-            multi_task=False
-        )
+        model, optimizer, criterion, benchmark = self.init_scenario(multi_task=False)
         strategy = DER(
             model,
             optimizer,
@@ -1068,20 +998,25 @@ def test_der(self):
         )
         run_strategy(benchmark, strategy)
 
-    def load_benchmark(self, use_task_labels=False, 
-                       train_transform=None, 
-                       eval_transform=None, 
-                       shuffle=True):
+    def load_benchmark(
+        self,
+        use_task_labels=False,
+        train_transform=None,
+        eval_transform=None,
+        shuffle=True,
+    ):
         """
         Returns a NC benchmark from a fake dataset of 10 classes, 5 experiences,
         2 classes per experience.
 
         :param fast_test: if True loads fake data, MNIST otherwise.
         """
-        return get_fast_benchmark(use_task_labels=use_task_labels, 
-                                  train_transform=train_transform, 
-                                  eval_transform=eval_transform,
-                                  shuffle=shuffle)
+        return get_fast_benchmark(
+            use_task_labels=use_task_labels,
+            train_transform=train_transform,
+            eval_transform=eval_transform,
+            shuffle=shuffle,
+        )
 
     def get_model(self, fast_test=False, multi_task=False):
         if fast_test:
diff --git a/tests/training/test_strategy_utils.py b/tests/training/test_strategy_utils.py
index baa44877d..2bd569dd7 100644
--- a/tests/training/test_strategy_utils.py
+++ b/tests/training/test_strategy_utils.py
@@ -16,6 +16,4 @@ def run_strategy(benchmark, cl_strategy):
         results.append(cl_strategy.eval(benchmark.test_stream[:]))
 
 
-__all__ = [
-    'run_strategy'
-]
+__all__ = ["run_strategy"]
diff --git a/tests/training/test_supervised_regression.py b/tests/training/test_supervised_regression.py
index 64f7bb437..9ccb1c766 100644
--- a/tests/training/test_supervised_regression.py
+++ b/tests/training/test_supervised_regression.py
@@ -65,9 +65,7 @@ def __init__(
         self.optimizer = optimizer
         self.train_epochs: int = train_epochs
         self.train_mb_size: int = train_mb_size
-        self.eval_mb_size: int = (
-            train_mb_size if eval_mb_size is None else eval_mb_size
-        )
+        self.eval_mb_size: int = train_mb_size if eval_mb_size is None else eval_mb_size
         self.device = device
         self.plugins = [] if plugins is None else plugins
 
@@ -134,9 +132,7 @@ def train(
         ],
         eval_streams: Optional[
             Sequence[
-                Union[
-                    ClassificationExperience, Sequence[ClassificationExperience]
-                ]
+                Union[ClassificationExperience, Sequence[ClassificationExperience]]
             ]
         ] = None,
         **kwargs
@@ -240,9 +236,7 @@ def train_dataset_adaptation(self, **kwargs):
     @torch.no_grad()
     def eval(
         self,
-        exp_list: Union[
-            ClassificationExperience, Sequence[ClassificationExperience]
-        ],
+        exp_list: Union[ClassificationExperience, Sequence[ClassificationExperience]],
         **kwargs
     ):
         """
@@ -560,9 +554,7 @@ def test_reproduce_old_base_strategy(self):
         print(old_losses)
         print(new_losses)
         np.testing.assert_allclose(old_losses, new_losses)
-        for par_old, par_new in zip(
-            old_model.parameters(), new_model.parameters()
-        ):
+        for par_old, par_new in zip(old_model.parameters(), new_model.parameters()):
             np.testing.assert_allclose(par_old.detach(), par_new.detach())
 
     def test_reproduce_old_cumulative_strategy(self):
@@ -607,9 +599,7 @@ def test_reproduce_old_cumulative_strategy(self):
         print(old_losses)
         print(new_losses)
         np.testing.assert_allclose(old_losses, new_losses)
-        for par_old, par_new in zip(
-            old_model.parameters(), new_model.parameters()
-        ):
+        for par_old, par_new in zip(old_model.parameters(), new_model.parameters()):
             np.testing.assert_allclose(par_old.detach(), par_new.detach())
 
 
diff --git a/tests/training/test_training_utils.py b/tests/training/test_training_utils.py
index 65aa55222..e2adcd69e 100644
--- a/tests/training/test_training_utils.py
+++ b/tests/training/test_training_utils.py
@@ -9,32 +9,27 @@ def test_init(self):
         with self.assertRaises(AssertionError):
             # different shapes between input shape
             # and init tensor
-            p = ParamData('test', shape=(3, 3),
-                          device=device,
-                          init_tensor=torch.randn(3, 2))
+            p = ParamData(
+                "test", shape=(3, 3), device=device, init_tensor=torch.randn(3, 2)
+            )
 
         with self.assertRaises(AssertionError):
             # missing either shape or init tensor
-            p = ParamData('test', device=device)
+            p = ParamData("test", device=device)
 
-        p = ParamData('test', device=device,
-                      init_tensor=torch.randn(2, 3))
+        p = ParamData("test", device=device, init_tensor=torch.randn(2, 3))
         self.assertEqual(p.data.shape, p.shape)
 
-        p = ParamData('test', device=device,
-                      shape=(2, 3))
+        p = ParamData("test", device=device, shape=(2, 3))
         self.assertEqual(p.data.shape, p.shape)
 
-        p = ParamData('test', device=device,
-                      shape=(2, 3),
-                      init_function=torch.ones)
+        p = ParamData("test", device=device, shape=(2, 3), init_function=torch.ones)
         self.assertEqual(p.data.shape, p.shape)
         self.assertTrue((torch.ones(2, 3) == p.data).all())
 
     def test_expand(self):
-        device = 'cpu'
-        p = ParamData('test', device=device, shape=(2, 3),
-                      init_function=torch.ones)
+        device = "cpu"
+        p = ParamData("test", device=device, shape=(2, 3), init_function=torch.ones)
         with self.assertRaises(AssertionError):
             p.expand((3, 4))
         with self.assertRaises(AssertionError):
@@ -49,22 +44,21 @@ def test_expand(self):
         self.assertTrue(new_p.shape == (2, 5))
         self.assertTrue(p.data.shape == (2, 5))
         self.assertTrue(p.shape == (2, 5))
-        p = ParamData('test', device=device, shape=(2, 3),
-                      init_function=torch.ones)
+        p = ParamData("test", device=device, shape=(2, 3), init_function=torch.ones)
         new_p = p.expand((5, 3))
         self.assertTrue((p.data[:2, :3] == 1).all())
         self.assertTrue((p.data[2:, 3:] == 0).all())
         self.assertTrue(new_p.shape == (5, 3))
         self.assertTrue(p.data.shape == (5, 3))
         self.assertTrue(p.shape == (5, 3))
-        p = ParamData('test', device=device, shape=(2, 3))
+        p = ParamData("test", device=device, shape=(2, 3))
         p.expand((2, 5), padding_fn=torch.ones)
         self.assertTrue((p.data[:2, :3] == 0).all())
         self.assertTrue((p.data[2:, 3:] == 1).all())
 
     def test_reset(self):
-        device = 'cpu'
-        p = ParamData('test', device=device, shape=(2, 3))
+        device = "cpu"
+        p = ParamData("test", device=device, shape=(2, 3))
         p.reset_like((3, 4))
         self.assertTrue(p.shape == (3, 4))
         self.assertTrue(p.data.shape == (3, 4))
diff --git a/tests/unit_tests_utils.py b/tests/unit_tests_utils.py
index 7e6d232aa..2eb74b551 100644
--- a/tests/unit_tests_utils.py
+++ b/tests/unit_tests_utils.py
@@ -114,9 +114,14 @@ def load_tensor_benchmark():
 
 
 def get_fast_benchmark(
-    use_task_labels=False, shuffle=True, n_samples_per_class=100,
-    n_classes=10, n_features=6, seed=None,
-    train_transform=None, eval_transform=None,
+    use_task_labels=False,
+    shuffle=True,
+    n_samples_per_class=100,
+    n_classes=10,
+    n_features=6,
+    seed=None,
+    train_transform=None,
+    eval_transform=None,
 ):
     dataset = make_classification(
         n_samples=n_classes * n_samples_per_class,
@@ -124,7 +129,7 @@ def get_fast_benchmark(
         n_features=n_features,
         n_informative=6,
         n_redundant=0,
-        random_state=seed
+        random_state=seed,
     )
 
     X = torch.from_numpy(dataset[0]).float()
@@ -144,7 +149,7 @@ def get_fast_benchmark(
         shuffle=shuffle,
         train_transform=train_transform,
         eval_transform=eval_transform,
-        seed=seed
+        seed=seed,
     )
     return my_nc_benchmark
 
@@ -156,8 +161,7 @@ def __init__(self, n_elements=10000, n_classes=100):
         super().__init__()
         self.targets = list(range(n_classes))
         self.targets += [
-            random.randint(0, n_classes-1) for _ 
-            in range(n_elements - n_classes)
+            random.randint(0, n_classes - 1) for _ in range(n_elements - n_classes)
         ]
 
     def __getitem__(self, index):