diff --git a/README.md b/README.md index f7eca005..52e12d65 100644 --- a/README.md +++ b/README.md @@ -127,8 +127,11 @@ We have created a [WIKI](https://aimagelab.github.io/mammoth/)! Check it out for ### Other Awesome CL works using Mammoth -***Get in touch if we missed your awesome work!*** +**_Get in touch if we missed your awesome work!_** +- Prediction Error-based Classification for Class-Incremental Learning (**ICLR2024**) [[paper](https://arxiv.org/pdf/2305.18806)] [[code](https://github.com/michalzajac-ml/pec)] +- TriRE: A Multi-Mechanism Learning Paradigm for Continual Knowledge Retention and Promotion (**NeurIPS2023**) [[paper](https://arxiv.org/pdf/2310.08217.pdf)] [[code](https://github.com/NeurAI-Lab/TriRE)] +- Overcoming Recency Bias of Normalization Statistics in Continual Learning: Balance and Adaptation (**NeurIPS2023**) [[paper](https://arxiv.org/pdf/2310.08855.pdf)] [[code](https://github.com/lvyilin/AdaB2N)] - A Unified and General Framework for Continual Learning (**ICLR2024**) [[paper](https://arxiv.org/pdf/2403.13249.pdf)] [[code](https://github.com/joey-wang123/CL-refresh-learning)] - Decoupling Learning and Remembering: a Bilevel Memory Framework with Knowledge Projection for Task-Incremental Learning (**CVPR2023**) [[paper](https://openaccess.thecvf.com/content/CVPR2023/papers/Sun_Decoupling_Learning_and_Remembering_A_Bilevel_Memory_Framework_With_Knowledge_CVPR_2023_paper.pdf)] [[code](https://github.com/SunWenJu123/BMKP)] - Regularizing Second-Order Influences for Continual Learning (**CVPR2023**) [[paper](https://openaccess.thecvf.com/content/CVPR2023/papers/Sun_Regularizing_Second-Order_Influences_for_Continual_Learning_CVPR_2023_paper.pdf)] [[code](https://github.com/feifeiobama/InfluenceCL)] @@ -141,6 +144,7 @@ We have created a [WIKI](https://aimagelab.github.io/mammoth/)! Check it out for - ConSlide: Asynchronous Hierarchical Interaction Transformer with Breakup-Reorganize Rehearsal for Continual Whole Slide Image Analysis (**ICCV2023**) [[paper](https://openaccess.thecvf.com/content/ICCV2023/papers/Huang_ConSlide_Asynchronous_Hierarchical_Interaction_Transformer_with_Breakup-Reorganize_Rehearsal_for_Continual_ICCV_2023_paper.pdf)] [[code](https://github.com/HKU-MedAI/ConSlide)] - CBA: Improving Online Continual Learning via Continual Bias Adaptor (**ICCV2023**) [[paper](https://arxiv.org/pdf/2308.06925.pdf)] [[code](https://github.com/wqza/CBA-online-CL)] - Neuro-Symbolic Continual Learning: Knowledge, Reasoning Shortcuts and Concept Rehearsal (**ICML2023**) [[paper](https://arxiv.org/pdf/2302.01242.pdf)] [[code](https://github.com/ema-marconato/NeSy-CL)] +- Learnability and Algorithm for Continual Learning (**ICML2023**) [[paper](https://arxiv.org/pdf/2306.12646.pdf)] [[code](https://github.com/k-gyuhak/CLOOD)] - Pretrained Language Model in Continual Learning: a Comparative Study (**ICLR2022**) [[paper](https://openreview.net/pdf?id=figzpGMrdD)] [[code](https://github.com/wutong8023/PLM4CL)] - Representational continuity for unsupervised continual learning (**ICLR2022**) [[paper](https://openreview.net/pdf?id=9Hrka5PA7LW)] [[code](https://github.com/divyam3897/UCL)] - Continual Normalization: Rethinking Batch Normalization for Online Continual Learning (**ICLR2022**) [[paper](https://arxiv.org/abs/2203.16102)] [[code](https://github.com/phquang/Continual-Normalization)] @@ -152,7 +156,6 @@ We have created a [WIKI](https://aimagelab.github.io/mammoth/)! Check it out for - Self-supervised models are continual learners (**CVPR2022**) [[paper](https://arxiv.org/abs/2112.04215)] [[code](https://github.com/DonkeyShot21/cassle)] - Learning from Students: Online Contrastive Distillation Network for General Continual Learning (**IJCAI2022**) [[paper](https://www.ijcai.org/proceedings/2022/0446.pdf)] [[code](https://github.com/lijincm/OCD-Net)] - ### Contributing Pull requests welcome! diff --git a/datasets/perm_mnist.py b/datasets/perm_mnist.py index 28d98c11..194cd12d 100644 --- a/datasets/perm_mnist.py +++ b/datasets/perm_mnist.py @@ -5,6 +5,7 @@ from typing import Tuple +import numpy as np import torch import torch.nn.functional as F import torchvision.transforms as transforms @@ -16,7 +17,6 @@ from datasets.utils.continual_dataset import ContinualDataset, store_masked_loaders from utils.conf import base_path - class MyMNIST(MNIST): """ Overrides the MNIST dataset to change the getitem function. @@ -73,7 +73,7 @@ class PermutedMNIST(ContinualDataset): SIZE = (28, 28) def get_data_loaders(self) -> Tuple[torch.utils.data.DataLoader, torch.utils.data.DataLoader]: - transform = transforms.Compose((transforms.ToTensor(), Permutation())) + transform = transforms.Compose((transforms.ToTensor(), Permutation(np.prod(PermutedMNIST.SIZE)))) train_dataset = MyMNIST(base_path() + 'MNIST', train=True, download=True, transform=transform) @@ -85,7 +85,7 @@ def get_data_loaders(self) -> Tuple[torch.utils.data.DataLoader, torch.utils.dat @staticmethod def get_backbone(): - return MNISTMLP(28 * 28, PermutedMNIST.N_CLASSES_PER_TASK) + return MNISTMLP(np.prod(PermutedMNIST.SIZE), PermutedMNIST.N_CLASSES_PER_TASK) @staticmethod def get_transform(): diff --git a/datasets/transforms/permutation.py b/datasets/transforms/permutation.py index 4d24592a..1b8cd7b7 100644 --- a/datasets/transforms/permutation.py +++ b/datasets/transforms/permutation.py @@ -11,11 +11,11 @@ class Permutation(object): Defines a fixed permutation for a numpy array. """ - def __init__(self) -> None: + def __init__(self, size) -> None: """ Initializes the permutation. """ - self.perm = None + self.perm = np.random.permutation(size) def __call__(self, sample: np.ndarray) -> np.ndarray: """ @@ -28,8 +28,6 @@ def __call__(self, sample: np.ndarray) -> np.ndarray: permuted image """ old_shape = sample.shape - if self.perm is None: - self.perm = np.random.permutation(len(sample.flatten())) return sample.flatten()[self.perm].reshape(old_shape) diff --git a/datasets/utils/continual_dataset.py b/datasets/utils/continual_dataset.py index 5849d796..f78f2098 100644 --- a/datasets/utils/continual_dataset.py +++ b/datasets/utils/continual_dataset.py @@ -249,7 +249,8 @@ def store_masked_loaders(train_dataset: Dataset, test_dataset: Dataset, batch_size=setting.args.batch_size, shuffle=False) setting.test_loaders.append(test_loader) setting.train_loader = train_loader - - setting.i += setting.N_CLASSES_PER_TASK - setting.c_task += 1 + + if setting.SETTING == 'task-il' or setting.SETTING == 'class-il': + setting.i += setting.N_CLASSES_PER_TASK + setting.c_task += 1 return train_loader, test_loader diff --git a/docs/getting_started/index.rst b/docs/getting_started/index.rst index 2823af45..d9ba6c76 100644 --- a/docs/getting_started/index.rst +++ b/docs/getting_started/index.rst @@ -19,7 +19,7 @@ Mammoth logs all the results and metrics under the ``data/results`` directory (b WandB ~~~~~ -For advanced logging, including loss values, metrics, and hyperparameters, you can use [WandB](https://wandb.ai/) by providing both ``--wandb_project`` and ``--wandb_entity`` arguments. If you don't want to use WandB, you can simply omit these arguments. +For advanced logging, including loss values, metrics, and hyperparameters, you can use `WandB `_ by providing both ``--wandb_project`` and ``--wandb_entity`` arguments. If you don't want to use WandB, you can simply omit these arguments. .. tip:: By default, all arguments, loss values, and metrics are logged. Thanks to the **autolog_wandb** (:ref:`module-models`), all the variables created in the **observe** that contain *loss* or start with *_wandb_* will be logged. Thus, in order to loss all the separate loss values, you can simply add ``loss = loss + loss1 + loss2`` to the **observe** function. diff --git a/docs/getting_started/parseval.rst b/docs/getting_started/parseval.rst index e4891d8a..df387737 100644 --- a/docs/getting_started/parseval.rst +++ b/docs/getting_started/parseval.rst @@ -3,6 +3,7 @@ Mammoth parseval ================ -.. todo:: +.. admonition:: TO BE CONTINUED! + :class: tip To be written \ No newline at end of file diff --git a/docs/readme.rst b/docs/readme.rst index e3cf2e20..d74cf12e 100644 --- a/docs/readme.rst +++ b/docs/readme.rst @@ -165,7 +165,10 @@ Other Awesome CL works using Mammoth **Get in touch if we missed your awesome work!** - +`- Prediction Error-based Classification for Class-Incremental Learning (ICLR2024) <(https://arxiv.org/pdf/2305.18806)>`_ (`code <(https://github.com/michalzajac-ml/pec)>`_) +`- TriRE: A Multi-Mechanism Learning Paradigm for Continual Knowledge Retention and Promotion (NeurIPS2023) <(https://arxiv.org/pdf/2310.08217.pdf)>`_ (`code <(https://github.com/NeurAI-Lab/TriRE)>`_) +`- Overcoming Recency Bias of Normalization Statistics in Continual Learning: Balance and Adaptation (NeurIPS2023) <(https://arxiv.org/pdf/2310.08855.pdf)>`_ (`code <(https://github.com/lvyilin/AdaB2N)>`_) +`- A Unified and General Framework for Continual Learning (ICLR2024) <(https://arxiv.org/pdf/2403.13249.pdf)>`_ (`code <(https://github.com/joey-wang123/CL-refresh-learning)>`_) `- Decoupling Learning and Remembering: a Bilevel Memory Framework with Knowledge Projection for Task-Incremental Learning (CVPR2023) <(https://openaccess.thecvf.com/content/CVPR2023/papers/Sun_Decoupling_Learning_and_Remembering_A_Bilevel_Memory_Framework_With_Knowledge_CVPR_2023_paper.pdf)>`_ (`code <(https://github.com/SunWenJu123/BMKP)>`_) `- Regularizing Second-Order Influences for Continual Learning (CVPR2023) <(https://openaccess.thecvf.com/content/CVPR2023/papers/Sun_Regularizing_Second-Order_Influences_for_Continual_Learning_CVPR_2023_paper.pdf)>`_ (`code <(https://github.com/feifeiobama/InfluenceCL)>`_) `- Sparse Coding in a Dual Memory System for Lifelong Learning (CVPR2023) <(https://arxiv.org/pdf/2301.05058.pdf)>`_ (`code <(https://github.com/NeurAI-Lab/SCoMMER)>`_) diff --git a/models/coda_prompt.py b/models/coda_prompt.py index d2429a9c..e43ac038 100644 --- a/models/coda_prompt.py +++ b/models/coda_prompt.py @@ -66,13 +66,8 @@ def begin_task(self, dataset): self.opt = self.get_optimizer() self.scheduler = CosineSchedule(self.opt, K=self.args.n_epochs) - self.old_epoch = 0 def observe(self, inputs, labels, not_aug_inputs, epoch=0): - if self.scheduler and self.old_epoch != epoch: - self.scheduler.step() - self.old_epoch = epoch - self.iteration = 0 labels = labels.long() self.opt.zero_grad() logits, loss_prompt = self.net(inputs, train=True) diff --git a/models/slca.py b/models/slca.py index b04669aa..177e116a 100644 --- a/models/slca.py +++ b/models/slca.py @@ -82,13 +82,9 @@ def begin_task(self, dataset): self.opt, self.scheduler = self.net.get_optimizer() self.net._network.train() - self.old_epoch = 0 self.opt.zero_grad() def observe(self, inputs, labels, not_aug_inputs, epoch=0): - if self.old_epoch != epoch: - self.old_epoch = epoch - self.scheduler.step() labels = labels.long() logits = self.net._network(inputs, bcb_no_grad=self.net.fix_bcb)['logits'] diff --git a/models/slca_utils/convs/vits.py b/models/slca_utils/convs/vits.py index 370655b9..3615ee94 100644 --- a/models/slca_utils/convs/vits.py +++ b/models/slca_utils/convs/vits.py @@ -643,7 +643,7 @@ def _create_vision_transformer(variant, pretrained=False, **kwargs): if 'siglip' in variant and kwargs.get('global_pool', None) != 'map': strict = False - pretrained_cfg = resolve_pretrained_cfg(variant, pretrained_cfg=kwargs.pop('pretrained_cfg', None)) + pretrained_cfg = resolve_pretrained_cfg(variant, pretrained_cfg=kwargs.pop('pretrained_cfg', None) or default_cfgs[variant]) pretrained_cfg.custom_load = True return build_model_with_cfg(