Merge remote-tracking branch 'origin/dev' into HEAD

aimagelab · Feb 3, 2024 · d8de400 · d8de400
2 parents c44de18 + 01439c0
commit d8de400
Show file tree

Hide file tree

Showing 17 changed files with 311 additions and 229 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,7 @@
 *.gif
 *.pdf
 data/
+data
 checkpoints/
 *.png
 *.pt

diff --git a/README.md b/README.md
@@ -127,27 +127,35 @@ We have created a [WIKI](https://aimagelab.github.io/mammoth/)! Check it out for
 
 ### Other Awesome CL works using Mammoth
 
-- New Insights on Reducing Abrupt Representation Change in Online Continual Learning (**ICLR2022**) [[paper](https://openreview.net/pdf?id=N8MaByOzUfb)] [[code](https://github.com/pclucas14/AML)]
-- Learning fast, learning slow: A general continual learning method based on complementary learning system (**ICLR2022**) [[paper](https://openreview.net/pdf?id=uxxFrDwrE7Y)] [[code](https://github.com/NeurAI-Lab/CLS-ER)]
-- Self-supervised models are continual learners (**CVPR2022**) [[paper](https://arxiv.org/abs/2112.04215)] [[code](https://github.com/DonkeyShot21/cassle)]
+***Get in touch if we missed your awesome work!***
+
+- Decoupling Learning and Remembering: a Bilevel Memory Framework with Knowledge Projection for Task-Incremental Learning (**CVPR2023**) [[paper](https://openaccess.thecvf.com/content/CVPR2023/papers/Sun_Decoupling_Learning_and_Remembering_A_Bilevel_Memory_Framework_With_Knowledge_CVPR_2023_paper.pdf)] [[code](https://github.com/SunWenJu123/BMKP)]
+- Regularizing Second-Order Influences for Continual Learning (**CVPR2023**) [[paper](https://openaccess.thecvf.com/content/CVPR2023/papers/Sun_Regularizing_Second-Order_Influences_for_Continual_Learning_CVPR_2023_paper.pdf)] [[code](https://github.com/feifeiobama/InfluenceCL)]
+- Sparse Coding in a Dual Memory System for Lifelong Learning (**CVPR2023**) [[paper](https://arxiv.org/pdf/2301.05058.pdf)] [[code](https://github.com/NeurAI-Lab/SCoMMER)]
+- A Unified Approach to Domain Incremental Learning with Memory: Theory and Algorithm (**CVPR2023**) [[paper](https://arxiv.org/pdf/2310.12244.pdf)] [[code](https://github.com/Wang-ML-Lab/unified-continual-learning)]
+- A Multi-Head Model for Continual Learning via Out-of-Distribution Replay (**CVPR2023**) [[paper](https://arxiv.org/pdf/2208.09734.pdf)] [[code](https://github.com/k-gyuhak/MORE)]
+- Preserving Linear Separability in Continual Learning by Backward Feature Projection (**CVPR2023**) [[paper](https://arxiv.org/pdf/2303.14595.pdf)] [[code](https://github.com/rvl-lab-utoronto/BFP)]
+- Complementary Calibration: Boosting General Continual Learning With Collaborative Distillation and Self-Supervision (**TIP2023**) [[paper](https://ieeexplore.ieee.org/document/10002397)] [[code](https://github.com/lijincm/CoCa)]
+- Continual Learning by Modeling Intra-Class Variation (**TMLR2023**) [[paper](https://arxiv.org/abs/2210.05398)] [[code](https://github.com/yulonghui/MOCA)]
+- ConSlide: Asynchronous Hierarchical Interaction Transformer with Breakup-Reorganize Rehearsal for Continual Whole Slide Image Analysis (**ICCV2023**) [[paper](https://openaccess.thecvf.com/content/ICCV2023/papers/Huang_ConSlide_Asynchronous_Hierarchical_Interaction_Transformer_with_Breakup-Reorganize_Rehearsal_for_Continual_ICCV_2023_paper.pdf)] [[code](https://github.com/HKU-MedAI/ConSlide)]
+- CBA: Improving Online Continual Learning via Continual Bias Adaptor (**ICCV2023**) [[paper](https://arxiv.org/pdf/2308.06925.pdf)] [[code](https://github.com/wqza/CBA-online-CL)]
+- Neuro-Symbolic Continual Learning: Knowledge, Reasoning Shortcuts and Concept Rehearsal (**ICML2023**) [[paper](https://arxiv.org/pdf/2302.01242.pdf)] [[code](https://github.com/ema-marconato/NeSy-CL)]
+- Pretrained Language Model in Continual Learning: a Comparative Study (**ICLR2022**) [[paper](https://openreview.net/pdf?id=figzpGMrdD)] [[code](https://github.com/wutong8023/PLM4CL)]
 - Representational continuity for unsupervised continual learning (**ICLR2022**) [[paper](https://openreview.net/pdf?id=9Hrka5PA7LW)] [[code](https://github.com/divyam3897/UCL)]
-- Continual Learning by Modeling Intra-Class Variation (**TMLR 2023**) [[paper](https://arxiv.org/abs/2210.05398)] [[code](https://github.com/yulonghui/MOCA)]
-- Consistency is the key to further Mitigating Catastrophic Forgetting in Continual Learning (**CoLLAs2022**) [[paper](https://arxiv.org/pdf/2207.04998.pdf)] [[code](https://github.com/NeurAI-Lab/ConsistencyCL)]
 - Continual Normalization: Rethinking Batch Normalization for Online Continual Learning (**ICLR2022**) [[paper](https://arxiv.org/abs/2203.16102)] [[code](https://github.com/phquang/Continual-Normalization)]
-- NISPA: Neuro-Inspired Stability-Plasticity Adaptation for Continual Learning in Sparse Networks (**ICML2022**) [[paper](https://arxiv.org/abs/2206.09117)]
-- Learning from Students: Online Contrastive Distillation Network for General Continual Learning (**IJCAI2022**) [[paper](https://www.ijcai.org/proceedings/2022/0446.pdf)] [[code](https://github.com/lijincm/OCD-Net)]
 - Learning Fast, Learning Slow: A General Continual Learning Method based on Complementary Learning System (**ICLR2022**) [[paper](https://arxiv.org/pdf/2201.12604.pdf)] [[code](https://github.com/NeurAI-Lab/CLS-ER)]
+- New Insights on Reducing Abrupt Representation Change in Online Continual Learning (**ICLR2022**) [[paper](https://openreview.net/pdf?id=N8MaByOzUfb)] [[code](https://github.com/pclucas14/AML)]
+- Looking Back on Learned Experiences for Class/Task Incremental Learning (**ICLR2022**) [[paper](https://openreview.net/pdf?id=RxplU3vmBx)] [[code](https://github.com/MozhganPourKeshavarz/Cost-Free-Incremental-Learning)]
+- Task Agnostic Representation Consolidation: a Self-supervised based Continual Learning Approach (**CoLLAs2022**) [[paper](https://arxiv.org/pdf/2207.06267.pdf)] [[code](https://github.com/NeurAI-Lab/TARC)]
+- Consistency is the key to further Mitigating Catastrophic Forgetting in Continual Learning (**CoLLAs2022**) [[paper](https://arxiv.org/pdf/2207.04998.pdf)] [[code](https://github.com/NeurAI-Lab/ConsistencyCL)]
+- Self-supervised models are continual learners (**CVPR2022**) [[paper](https://arxiv.org/abs/2112.04215)] [[code](https://github.com/DonkeyShot21/cassle)]
+- Learning from Students: Online Contrastive Distillation Network for General Continual Learning (**IJCAI2022**) [[paper](https://www.ijcai.org/proceedings/2022/0446.pdf)] [[code](https://github.com/lijincm/OCD-Net)]
 
-## Update Roadmap
-
-In the near future, we plan to incorporate the following improvements into this master repository:
-
-- ER+Tricks (_Rethinking Experience Replay: a Bag of Tricks for Continual Learning_)
-
-Pull requests welcome! [Get in touch](mailto:[email protected])
 
 ### Contributing
 
+Pull requests welcome!
+
 Please use `autopep8` with parameters:
 
 - `--aggressive`

diff --git a/datasets/seq_cifar100.py b/datasets/seq_cifar100.py
@@ -152,7 +152,6 @@ def get_batch_size():
 
     @staticmethod
     def get_scheduler(model, args: Namespace) -> torch.optim.lr_scheduler:
-        model.opt = model.get_optimizer()
         scheduler = ContinualDataset.get_scheduler(model, args)
         if scheduler is None:
             scheduler = torch.optim.lr_scheduler.MultiStepLR(model.opt, [35, 45], gamma=0.1, verbose=False)

diff --git a/datasets/utils/continual_dataset.py b/datasets/utils/continual_dataset.py
@@ -134,11 +134,13 @@ def get_denormalization_transform() -> nn.Module:
     def get_scheduler(model, args: Namespace) -> torch.optim.lr_scheduler._LRScheduler:
         """Returns the scheduler to be used for the current dataset."""
         if args.lr_scheduler is not None:
+            model.opt = model.get_optimizer()
             # check if lr_scheduler is in torch.optim.lr_scheduler
             supported_scheds = {sched_name.lower(): sched_name for sched_name in dir(scheds) if sched_name.lower() in ContinualDataset.AVAIL_SCHEDS}
             sched = None
             if args.lr_scheduler.lower() in supported_scheds:
                 if args.lr_scheduler.lower() == 'multisteplr':
+                    assert args.lr_milestones is not None, 'MultiStepLR requires `--lr_milestones`'
                     sched = getattr(scheds, supported_scheds[args.lr_scheduler.lower()])(model.opt,
                                                                                          milestones=args.lr_milestones,
                                                                                          gamma=args.sched_multistep_lr_gamma)

diff --git a/docs/getting_started/index.rst b/docs/getting_started/index.rst
@@ -22,7 +22,7 @@ WandB
 For advanced logging, including loss values, metrics, and hyperparameters, you can use [WandB](https://wandb.ai/) by providing both ``--wandb_project`` and ``--wandb_entity`` arguments. If you don't want to use WandB, you can simply omit these arguments.
 
 .. tip::
-    By default, all arguments, loss values, and metrics are logged. Thanks to the **autolog_wandb** (:ref:`module-models`), all the variables created in the **observe** that start with *loss* or *_wandb_* will be logged. Thus, in order to loss all the separate loss values, you can simply add ``loss = loss + loss1 + loss2`` to the **observe** function.
+    By default, all arguments, loss values, and metrics are logged. Thanks to the **autolog_wandb** (:ref:`module-models`), all the variables created in the **observe** that contain *loss* or start with *_wandb_* will be logged. Thus, in order to loss all the separate loss values, you can simply add ``loss = loss + loss1 + loss2`` to the **observe** function.
 
 Metrics are logged on WandB both in a raw form, separated for each task and class. This allows further analysis (e.g., with the Mammoth :ref:`Parseval <module-parseval>`). To differentiate between raw metrics logged on WandB and other aggregated metrics that may have been logged, all the raw metrics are prefixed with **RESULTS_**. This behavior can be changed by changing the prefix in the **log_accs** function in :ref:`module-loggers`.
 

diff --git a/docs/readme.rst b/docs/readme.rst
@@ -161,29 +161,38 @@ Our Papers
 Other Awesome CL works using Mammoth
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-- `New Insights on Reducing Abrupt Representation Change in Online Continual Learning (ICLR2022) <https://openreview.net/pdf?id=N8MaByOzUfb>`_ (`code <https://github.com/pclucas14/AML>`_)
-- `Learning fast, learning slow: A general continual learning method based on complementary learning system (ICLR2022) <https://openreview.net/pdf?id=uxxFrDwrE7Y>`_ (`code <https://github.com/NeurAI-Lab/CLS-ER>`_)
-- `Self-supervised models are continual learners (CVPR2022) <https://arxiv.org/abs/2112.04215>`_ (`code <https://github.com/DonkeyShot21/cassle>`_)
-- `Representational continuity for unsupervised continual learning (ICLR2022) <https://openreview.net/pdf?id=9Hrka5PA7LW>`_ (`code <https://github.com/divyam3897/UCL>`_)
-- `Continual Learning by Modeling Intra-Class Variation (TMLR 2023) <https://arxiv.org/abs/2210.05398>`_ (`code <https://github.com/yulonghui/MOCA>`_)
-- `Consistency is the key to further Mitigating Catastrophic Forgetting in Continual Learning (CoLLAs2022) <https://arxiv.org/pdf/2207.04998.pdf>`_ (`code <https://github.com/NeurAI-Lab/ConsistencyCL>`_)
-- `Continual Normalization: Rethinking Batch Normalization for Online Continual Learning (ICLR2022) <https://arxiv.org/abs/2203.16102>`_ (`code <https://github.com/phquang/Continual-Normalization>`_)
-- `NISPA: Neuro-Inspired Stability-Plasticity Adaptation for Continual Learning in Sparse Networks (ICML2022) <https://arxiv.org/abs/2206.09117>`_
-- `Learning from Students: Online Contrastive Distillation Network for General Continual Learning (IJCAI2022) <https://www.ijcai.org/proceedings/2022/0446.pdf>`_ (`code <https://github.com/lijincm/OCD-Net>`_)
-- `Learning Fast, Learning Slow: A General Continual Learning Method based on Complementary Learning System (ICLR2022) <https://arxiv.org/pdf/2201.12604.pdf>`_ (`code <https://github.com/NeurAI-Lab/CLS-ER>`_)
-
-Update Roadmap
---------------
-
-In the near future, we plan to incorporate the following improvements into this master repository:
-
-- ER+Tricks (*Rethinking Experience Replay: a Bag of Tricks for Continual Learning*)
-
-Pull requests welcome! `Get in touch <mailto:[email protected]>`_
+.. admonition::
+
+    **Get in touch if we missed your awesome work!**
+
+
+`- Decoupling Learning and Remembering: a Bilevel Memory Framework with Knowledge Projection for Task-Incremental Learning (CVPR2023) <(https://openaccess.thecvf.com/content/CVPR2023/papers/Sun_Decoupling_Learning_and_Remembering_A_Bilevel_Memory_Framework_With_Knowledge_CVPR_2023_paper.pdf)>`_ (`code <(https://github.com/SunWenJu123/BMKP)>`_)
+`- Regularizing Second-Order Influences for Continual Learning (CVPR2023) <(https://openaccess.thecvf.com/content/CVPR2023/papers/Sun_Regularizing_Second-Order_Influences_for_Continual_Learning_CVPR_2023_paper.pdf)>`_ (`code <(https://github.com/feifeiobama/InfluenceCL)>`_)
+`- Sparse Coding in a Dual Memory System for Lifelong Learning (CVPR2023) <(https://arxiv.org/pdf/2301.05058.pdf)>`_ (`code <(https://github.com/NeurAI-Lab/SCoMMER)>`_)
+`- A Unified Approach to Domain Incremental Learning with Memory: Theory and Algorithm (CVPR2023) <(https://arxiv.org/pdf/2310.12244.pdf)>`_ (`code <(https://github.com/Wang-ML-Lab/unified-continual-learning)>`_)
+`- A Multi-Head Model for Continual Learning via Out-of-Distribution Replay (CVPR2023) <(https://arxiv.org/pdf/2208.09734.pdf)>`_ (`code <(https://github.com/k-gyuhak/MORE)>`_)
+`- Preserving Linear Separability in Continual Learning by Backward Feature Projection (CVPR2023) <(https://arxiv.org/pdf/2303.14595.pdf)>`_ (`code <(https://github.com/rvl-lab-utoronto/BFP)>`_)
+`- Complementary Calibration: Boosting General Continual Learning With Collaborative Distillation and Self-Supervision (TIP2023) <(https://ieeexplore.ieee.org/document/10002397)>`_ (`code <(https://github.com/lijincm/CoCa)>`_)
+`- Continual Learning by Modeling Intra-Class Variation (TMLR2023) <(https://arxiv.org/abs/2210.05398)>`_ (`code <(https://github.com/yulonghui/MOCA)>`_)
+`- ConSlide: Asynchronous Hierarchical Interaction Transformer with Breakup-Reorganize Rehearsal for Continual Whole Slide Image Analysis (ICCV2023) <(https://openaccess.thecvf.com/content/ICCV2023/papers/Huang_ConSlide_Asynchronous_Hierarchical_Interaction_Transformer_with_Breakup-Reorganize_Rehearsal_for_Continual_ICCV_2023_paper.pdf)>`_ (`code <(https://github.com/HKU-MedAI/ConSlide)>`_)
+`- CBA: Improving Online Continual Learning via Continual Bias Adaptor (ICCV2023) <(https://arxiv.org/pdf/2308.06925.pdf)>`_ (`code <(https://github.com/wqza/CBA-online-CL)>`_)
+`- Neuro-Symbolic Continual Learning: Knowledge, Reasoning Shortcuts and Concept Rehearsal (ICML2023) <(https://arxiv.org/pdf/2302.01242.pdf)>`_ (`code <(https://github.com/ema-marconato/NeSy-CL)>`_)
+`- Pretrained Language Model in Continual Learning: a Comparative Study (ICLR2022) <(https://openreview.net/pdf?id=figzpGMrdD)>`_ (`code <(https://github.com/wutong8023/PLM4CL)>`_)
+`- Representational continuity for unsupervised continual learning (ICLR2022) <(https://openreview.net/pdf?id=9Hrka5PA7LW)>`_ (`code <(https://github.com/divyam3897/UCL)>`_)
+`- Continual Normalization: Rethinking Batch Normalization for Online Continual Learning (ICLR2022) <(https://arxiv.org/abs/2203.16102)>`_ (`code <(https://github.com/phquang/Continual-Normalization)>`_)
+`- Learning Fast, Learning Slow: A General Continual Learning Method based on Complementary Learning System (ICLR2022) <(https://arxiv.org/pdf/2201.12604.pdf)>`_ (`code <(https://github.com/NeurAI-Lab/CLS-ER)>`_)
+`- New Insights on Reducing Abrupt Representation Change in Online Continual Learning (ICLR2022) <(https://openreview.net/pdf?id=N8MaByOzUfb)>`_ (`code <(https://github.com/pclucas14/AML)>`_)
+`- Looking Back on Learned Experiences for Class/Task Incremental Learning (ICLR2022) <(https://openreview.net/pdf?id=RxplU3vmBx)>`_ (`code <(https://github.com/MozhganPourKeshavarz/Cost-Free-Incremental-Learning)>`_)
+`- Task Agnostic Representation Consolidation: a Self-supervised based Continual Learning Approach (CoLLAs2022) <(https://arxiv.org/pdf/2207.06267.pdf)>`_ (`code <(https://github.com/NeurAI-Lab/TARC)>`_)
+`- Consistency is the key to further Mitigating Catastrophic Forgetting in Continual Learning (CoLLAs2022) <(https://arxiv.org/pdf/2207.04998.pdf)>`_ (`code <(https://github.com/NeurAI-Lab/ConsistencyCL)>`_)
+`- Self-supervised models are continual learners (CVPR2022) <(https://arxiv.org/abs/2112.04215)>`_ (`code <(https://github.com/DonkeyShot21/cassle)>`_)
+`- Learning from Students: Online Contrastive Distillation Network for General Continual Learning (IJCAI2022) <(https://www.ijcai.org/proceedings/2022/0446.pdf)>`_ (`code <(https://github.com/lijincm/OCD-Net)>`_)
 
 Contributing
 ------------
 
+Pull requests welcome!
+
 Please use `autopep8` with parameters:
 
 - `--aggressive`

diff --git a/models/der.py b/models/der.py
@@ -34,16 +34,16 @@ def observe(self, inputs, labels, not_aug_inputs, epoch=None):
 
         outputs = self.net(inputs)
         loss = self.loss(outputs, labels)
-        tot_loss += loss.item()
         loss.backward()
+        tot_loss += loss.item()
 
         if not self.buffer.is_empty():
             buf_inputs, buf_logits = self.buffer.get_data(
                 self.args.minibatch_size, transform=self.transform, device=self.device)
             buf_outputs = self.net(buf_inputs)
-            loss = self.args.alpha * F.mse_loss(buf_outputs, buf_logits)
-            tot_loss += loss.item()
-            loss.backward()
+            loss_mse = self.args.alpha * F.mse_loss(buf_outputs, buf_logits)
+            loss_mse.backward()
+            tot_loss += loss_mse.item()
 
         self.opt.step()
         self.buffer.add_data(examples=not_aug_inputs, logits=outputs.data)

diff --git a/models/derpp.py b/models/derpp.py
@@ -37,23 +37,23 @@ def observe(self, inputs, labels, not_aug_inputs, epoch=None):
         outputs = self.net(inputs)
 
         loss = self.loss(outputs, labels)
-        tot_loss = loss.item()
         loss.backward()
+        tot_loss = loss.item()
 
         if not self.buffer.is_empty():
             buf_inputs, _, buf_logits = self.buffer.get_data(self.args.minibatch_size, transform=self.transform, device=self.device)
 
             buf_outputs = self.net(buf_inputs)
-            loss = self.args.alpha * F.mse_loss(buf_outputs, buf_logits)
-            tot_loss += loss.item()
-            loss.backward()
+            loss_mse = self.args.alpha * F.mse_loss(buf_outputs, buf_logits)
+            loss_mse.backward()
+            tot_loss += loss_mse.item()
 
             buf_inputs, buf_labels, _ = self.buffer.get_data(self.args.minibatch_size, transform=self.transform, device=self.device)
 
             buf_outputs = self.net(buf_inputs)
-            loss = self.args.beta * self.loss(buf_outputs, buf_labels)
-            tot_loss += loss.item()
-            loss.backward()
+            loss_ce = self.args.beta * self.loss(buf_outputs, buf_labels)
+            loss_ce.backward()
+            tot_loss += loss_ce.item()
 
         self.opt.step()
 

diff --git a/models/gdumb_lider.py b/models/gdumb_lider.py
@@ -7,24 +7,6 @@
 from utils.status import progress_bar
 
 
-def get_parser() -> ArgumentParser:
-    parser = ArgumentParser(description='GDumb learns an empty model only on the buffer.'
-                                        'Treated with LiDER!')
-    add_management_args(parser)
-    add_rehearsal_args(parser)
-    parser.add_argument('--maxlr', type=float, default=5e-2,
-                        help='Max learning rate.')
-    parser.add_argument('--minlr', type=float, default=5e-4,
-                        help='Min learning rate.')
-    parser.add_argument('--fitting_epochs', type=int, default=256,
-                        help='Number of epochs to fit the buffer.')
-    parser.add_argument('--cutmix_alpha', type=float, default=1.0,
-                        help='Alpha parameter for cutmix')
-    add_experiment_args(parser)
-    add_lipschitz_args(parser)
-
-    return parser
-
 
 def fit_buffer(self: LiderOptimizer, epochs):
     optimizer = SGD(self.get_parameters(), lr=self.args.maxlr, momentum=self.args.optim_mom, weight_decay=self.args.optim_wd, nesterov=self.args.optim_nesterov)

diff --git a/models/lwf_mc.py b/models/lwf_mc.py
@@ -11,17 +11,6 @@
 from models.utils.continual_model import ContinualModel
 
 
-def get_parser() -> ArgumentParser:
-    parser = ArgumentParser(description='Learning without Forgetting - Multi-Class.')
-
-    add_management_args(parser)
-    add_experiment_args(parser)
-
-    parser.add_argument('--wd_reg', type=float, default=0.0,
-                        help='L2 regularization applied to the parameters.')
-    return parser
-
-
 class LwFMC(ContinualModel):
     NAME = 'lwf_mc'
     COMPATIBILITY = ['class-il', 'task-il']
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,6 +2,7 @@ @@
     *.gif
     *.pdf
     data/
+    data
     checkpoints/
     *.png
     *.pt
@@ Expand Down @@