Merge pull request #39 from aimagelab/dev

Small fixes
aimagelab · Apr 30, 2024 · 10446ef · 10446ef
2 parents 174a263 + 43f8c8f
commit 10446ef
Show file tree

Hide file tree

Showing 10 changed files with 22 additions and 25 deletions.
diff --git a/README.md b/README.md
@@ -127,8 +127,11 @@ We have created a [WIKI](https://aimagelab.github.io/mammoth/)! Check it out for
 
 ### Other Awesome CL works using Mammoth
 
-***Get in touch if we missed your awesome work!***
+**_Get in touch if we missed your awesome work!_**
 
+- Prediction Error-based Classification for Class-Incremental Learning (**ICLR2024**) [[paper](https://arxiv.org/pdf/2305.18806)] [[code](https://github.com/michalzajac-ml/pec)]
+- TriRE: A Multi-Mechanism Learning Paradigm for Continual Knowledge Retention and Promotion (**NeurIPS2023**) [[paper](https://arxiv.org/pdf/2310.08217.pdf)] [[code](https://github.com/NeurAI-Lab/TriRE)]
+- Overcoming Recency Bias of Normalization Statistics in Continual Learning: Balance and Adaptation (**NeurIPS2023**) [[paper](https://arxiv.org/pdf/2310.08855.pdf)] [[code](https://github.com/lvyilin/AdaB2N)]
 - A Unified and General Framework for Continual Learning (**ICLR2024**) [[paper](https://arxiv.org/pdf/2403.13249.pdf)] [[code](https://github.com/joey-wang123/CL-refresh-learning)]
 - Decoupling Learning and Remembering: a Bilevel Memory Framework with Knowledge Projection for Task-Incremental Learning (**CVPR2023**) [[paper](https://openaccess.thecvf.com/content/CVPR2023/papers/Sun_Decoupling_Learning_and_Remembering_A_Bilevel_Memory_Framework_With_Knowledge_CVPR_2023_paper.pdf)] [[code](https://github.com/SunWenJu123/BMKP)]
 - Regularizing Second-Order Influences for Continual Learning (**CVPR2023**) [[paper](https://openaccess.thecvf.com/content/CVPR2023/papers/Sun_Regularizing_Second-Order_Influences_for_Continual_Learning_CVPR_2023_paper.pdf)] [[code](https://github.com/feifeiobama/InfluenceCL)]
@@ -141,6 +144,7 @@ We have created a [WIKI](https://aimagelab.github.io/mammoth/)! Check it out for
 - ConSlide: Asynchronous Hierarchical Interaction Transformer with Breakup-Reorganize Rehearsal for Continual Whole Slide Image Analysis (**ICCV2023**) [[paper](https://openaccess.thecvf.com/content/ICCV2023/papers/Huang_ConSlide_Asynchronous_Hierarchical_Interaction_Transformer_with_Breakup-Reorganize_Rehearsal_for_Continual_ICCV_2023_paper.pdf)] [[code](https://github.com/HKU-MedAI/ConSlide)]
 - CBA: Improving Online Continual Learning via Continual Bias Adaptor (**ICCV2023**) [[paper](https://arxiv.org/pdf/2308.06925.pdf)] [[code](https://github.com/wqza/CBA-online-CL)]
 - Neuro-Symbolic Continual Learning: Knowledge, Reasoning Shortcuts and Concept Rehearsal (**ICML2023**) [[paper](https://arxiv.org/pdf/2302.01242.pdf)] [[code](https://github.com/ema-marconato/NeSy-CL)]
+- Learnability and Algorithm for Continual Learning (**ICML2023**) [[paper](https://arxiv.org/pdf/2306.12646.pdf)] [[code](https://github.com/k-gyuhak/CLOOD)]
 - Pretrained Language Model in Continual Learning: a Comparative Study (**ICLR2022**) [[paper](https://openreview.net/pdf?id=figzpGMrdD)] [[code](https://github.com/wutong8023/PLM4CL)]
 - Representational continuity for unsupervised continual learning (**ICLR2022**) [[paper](https://openreview.net/pdf?id=9Hrka5PA7LW)] [[code](https://github.com/divyam3897/UCL)]
 - Continual Normalization: Rethinking Batch Normalization for Online Continual Learning (**ICLR2022**) [[paper](https://arxiv.org/abs/2203.16102)] [[code](https://github.com/phquang/Continual-Normalization)]
@@ -152,7 +156,6 @@ We have created a [WIKI](https://aimagelab.github.io/mammoth/)! Check it out for
 - Self-supervised models are continual learners (**CVPR2022**) [[paper](https://arxiv.org/abs/2112.04215)] [[code](https://github.com/DonkeyShot21/cassle)]
 - Learning from Students: Online Contrastive Distillation Network for General Continual Learning (**IJCAI2022**) [[paper](https://www.ijcai.org/proceedings/2022/0446.pdf)] [[code](https://github.com/lijincm/OCD-Net)]
 
-
 ### Contributing
 
 Pull requests welcome!

diff --git a/datasets/perm_mnist.py b/datasets/perm_mnist.py
@@ -5,6 +5,7 @@
 
 from typing import Tuple
 
+import numpy as np
 import torch
 import torch.nn.functional as F
 import torchvision.transforms as transforms
@@ -16,7 +17,6 @@
 from datasets.utils.continual_dataset import ContinualDataset, store_masked_loaders
 from utils.conf import base_path
 
-
 class MyMNIST(MNIST):
     """
     Overrides the MNIST dataset to change the getitem function.
@@ -73,7 +73,7 @@ class PermutedMNIST(ContinualDataset):
     SIZE = (28, 28)
 
     def get_data_loaders(self) -> Tuple[torch.utils.data.DataLoader, torch.utils.data.DataLoader]:
-        transform = transforms.Compose((transforms.ToTensor(), Permutation()))
+        transform = transforms.Compose((transforms.ToTensor(), Permutation(np.prod(PermutedMNIST.SIZE))))
 
         train_dataset = MyMNIST(base_path() + 'MNIST',
                                 train=True, download=True, transform=transform)
@@ -85,7 +85,7 @@ def get_data_loaders(self) -> Tuple[torch.utils.data.DataLoader, torch.utils.dat
 
     @staticmethod
     def get_backbone():
-        return MNISTMLP(28 * 28, PermutedMNIST.N_CLASSES_PER_TASK)
+        return MNISTMLP(np.prod(PermutedMNIST.SIZE), PermutedMNIST.N_CLASSES_PER_TASK)
 
     @staticmethod
     def get_transform():

diff --git a/datasets/transforms/permutation.py b/datasets/transforms/permutation.py
@@ -11,11 +11,11 @@ class Permutation(object):
     Defines a fixed permutation for a numpy array.
     """
 
-    def __init__(self) -> None:
+    def __init__(self, size) -> None:
         """
         Initializes the permutation.
         """
-        self.perm = None
+        self.perm = np.random.permutation(size)
 
     def __call__(self, sample: np.ndarray) -> np.ndarray:
         """
@@ -28,8 +28,6 @@ def __call__(self, sample: np.ndarray) -> np.ndarray:
             permuted image
         """
         old_shape = sample.shape
-        if self.perm is None:
-            self.perm = np.random.permutation(len(sample.flatten()))
 
         return sample.flatten()[self.perm].reshape(old_shape)
 

diff --git a/datasets/utils/continual_dataset.py b/datasets/utils/continual_dataset.py
@@ -249,7 +249,8 @@ def store_masked_loaders(train_dataset: Dataset, test_dataset: Dataset,
                                            batch_size=setting.args.batch_size, shuffle=False)
     setting.test_loaders.append(test_loader)
     setting.train_loader = train_loader
-
-    setting.i += setting.N_CLASSES_PER_TASK
-    setting.c_task += 1
+
+    if setting.SETTING == 'task-il' or setting.SETTING == 'class-il':
+        setting.i += setting.N_CLASSES_PER_TASK
+        setting.c_task += 1
     return train_loader, test_loader
diff --git a/docs/getting_started/index.rst b/docs/getting_started/index.rst
@@ -19,7 +19,7 @@ Mammoth logs all the results and metrics under the ``data/results`` directory (b
 WandB
 ~~~~~
 
-For advanced logging, including loss values, metrics, and hyperparameters, you can use [WandB](https://wandb.ai/) by providing both ``--wandb_project`` and ``--wandb_entity`` arguments. If you don't want to use WandB, you can simply omit these arguments.
+For advanced logging, including loss values, metrics, and hyperparameters, you can use `WandB <https://wandb.ai/>`_ by providing both ``--wandb_project`` and ``--wandb_entity`` arguments. If you don't want to use WandB, you can simply omit these arguments.
 
 .. tip::
     By default, all arguments, loss values, and metrics are logged. Thanks to the **autolog_wandb** (:ref:`module-models`), all the variables created in the **observe** that contain *loss* or start with *_wandb_* will be logged. Thus, in order to loss all the separate loss values, you can simply add ``loss = loss + loss1 + loss2`` to the **observe** function.

diff --git a/docs/getting_started/parseval.rst b/docs/getting_started/parseval.rst
@@ -3,6 +3,7 @@
 Mammoth parseval
 ================
 
-.. todo::
+.. admonition:: TO BE CONTINUED!
+    :class: tip
 
     To be written
diff --git a/docs/readme.rst b/docs/readme.rst
@@ -165,7 +165,10 @@ Other Awesome CL works using Mammoth
 
     **Get in touch if we missed your awesome work!**
 
-
+`- Prediction Error-based Classification for Class-Incremental Learning (ICLR2024) <(https://arxiv.org/pdf/2305.18806)>`_ (`code <(https://github.com/michalzajac-ml/pec)>`_)
+`- TriRE: A Multi-Mechanism Learning Paradigm for Continual Knowledge Retention and Promotion (NeurIPS2023) <(https://arxiv.org/pdf/2310.08217.pdf)>`_ (`code <(https://github.com/NeurAI-Lab/TriRE)>`_)
+`- Overcoming Recency Bias of Normalization Statistics in Continual Learning: Balance and Adaptation (NeurIPS2023) <(https://arxiv.org/pdf/2310.08855.pdf)>`_ (`code <(https://github.com/lvyilin/AdaB2N)>`_)
+`- A Unified and General Framework for Continual Learning (ICLR2024) <(https://arxiv.org/pdf/2403.13249.pdf)>`_ (`code <(https://github.com/joey-wang123/CL-refresh-learning)>`_)
 `- Decoupling Learning and Remembering: a Bilevel Memory Framework with Knowledge Projection for Task-Incremental Learning (CVPR2023) <(https://openaccess.thecvf.com/content/CVPR2023/papers/Sun_Decoupling_Learning_and_Remembering_A_Bilevel_Memory_Framework_With_Knowledge_CVPR_2023_paper.pdf)>`_ (`code <(https://github.com/SunWenJu123/BMKP)>`_)
 `- Regularizing Second-Order Influences for Continual Learning (CVPR2023) <(https://openaccess.thecvf.com/content/CVPR2023/papers/Sun_Regularizing_Second-Order_Influences_for_Continual_Learning_CVPR_2023_paper.pdf)>`_ (`code <(https://github.com/feifeiobama/InfluenceCL)>`_)
 `- Sparse Coding in a Dual Memory System for Lifelong Learning (CVPR2023) <(https://arxiv.org/pdf/2301.05058.pdf)>`_ (`code <(https://github.com/NeurAI-Lab/SCoMMER)>`_)

diff --git a/models/coda_prompt.py b/models/coda_prompt.py
@@ -66,13 +66,8 @@ def begin_task(self, dataset):
             self.opt = self.get_optimizer()
 
         self.scheduler = CosineSchedule(self.opt, K=self.args.n_epochs)
-        self.old_epoch = 0
 
     def observe(self, inputs, labels, not_aug_inputs, epoch=0):
-        if self.scheduler and self.old_epoch != epoch:
-            self.scheduler.step()
-            self.old_epoch = epoch
-            self.iteration = 0
         labels = labels.long()
         self.opt.zero_grad()
         logits, loss_prompt = self.net(inputs, train=True)

diff --git a/models/slca.py b/models/slca.py
@@ -82,13 +82,9 @@ def begin_task(self, dataset):
         self.opt, self.scheduler = self.net.get_optimizer()
         self.net._network.train()
 
-        self.old_epoch = 0
         self.opt.zero_grad()
 
     def observe(self, inputs, labels, not_aug_inputs, epoch=0):
-        if self.old_epoch != epoch:
-            self.old_epoch = epoch
-            self.scheduler.step()
 
         labels = labels.long()
         logits = self.net._network(inputs, bcb_no_grad=self.net.fix_bcb)['logits']

diff --git a/models/slca_utils/convs/vits.py b/models/slca_utils/convs/vits.py
@@ -643,7 +643,7 @@ def _create_vision_transformer(variant, pretrained=False, **kwargs):
     if 'siglip' in variant and kwargs.get('global_pool', None) != 'map':
         strict = False
 
-    pretrained_cfg = resolve_pretrained_cfg(variant, pretrained_cfg=kwargs.pop('pretrained_cfg', None))
+    pretrained_cfg = resolve_pretrained_cfg(variant, pretrained_cfg=kwargs.pop('pretrained_cfg', None) or default_cfgs[variant])
     pretrained_cfg.custom_load = True
 
     return build_model_with_cfg(