Custom scheduler and optimizer classes

luxonis · Nov 14, 2024 · cefe3a6 · cefe3a6
1 parent 5c3a3a7
commit cefe3a6
Show file tree

Hide file tree

Showing 6 changed files with 159 additions and 104 deletions.
diff --git a/luxonis_train/config/config.py b/luxonis_train/config/config.py
@@ -328,7 +328,6 @@ class CallbackConfig(BaseModelExtraForbid):
 
 class OptimizerConfig(BaseModelExtraForbid):
     name: str = "Adam"
-    apply_custom_lr: bool = False
     params: Params = {}
 
 
@@ -356,6 +355,7 @@ class TrainerConfig(BaseModelExtraForbid):
     profiler: Literal["simple", "advanced"] | None = None
     matmul_precision: Literal["medium", "high", "highest"] | None = None
     verbose: bool = True
+    apply_custom_lr: bool = False
 
     seed: int | None = None
     n_validation_batches: PositiveInt | None = None

diff --git a/luxonis_train/models/luxonis_lightning.py b/luxonis_train/models/luxonis_lightning.py
@@ -6,7 +6,6 @@
 from typing import Literal, cast
 
 import lightning.pytorch as pl
-import numpy as np
 import torch
 from lightning.pytorch.callbacks import ModelCheckpoint, RichModelSummary
 from lightning.pytorch.utilities import rank_zero_only  # type: ignore
@@ -865,70 +864,32 @@ def configure_optimizers(
         cfg_optimizer = self.cfg.trainer.optimizer
         cfg_scheduler = self.cfg.trainer.scheduler
 
-        apply_custom_lr = cfg_optimizer.apply_custom_lr
+        if self.cfg.trainer.apply_custom_lr:
+            assert (
+                cfg_optimizer.name == "TripleLRSGD"
+            ), "Custom learning rate is only supported for TripleLRSGD optimizer."
+            assert (
+                cfg_scheduler.name == "TripleLRScheduler"
+            ), "Custom learning rate is only supported for TripleLRScheduler scheduler."
 
-        if apply_custom_lr:
-            assert cfg_optimizer.name == "SGD", (
-                "Custom learning rates are supported only for SGD optimizer. "
-                f"Got {cfg_optimizer.name}."
-            )
-            self.max_stepnum = math.ceil(
+            max_stepnum = math.ceil(
                 len(self._core.loaders["train"]) / self.cfg.trainer.batch_size
             )
-            self.warmup_stepnum = max(
-                round(
-                    self.cfg.trainer.optimizer.params["warmup_epochs"]
-                    * self.max_stepnum
-                ),
-                1000,
-            )
-            self.step = 0
-            batch_norm_weights, regular_weights, biases = [], [], []
-            for module in self.modules():
-                if hasattr(module, "bias") and isinstance(
-                    module.bias, torch.nn.Parameter
-                ):
-                    biases.append(module.bias)
-                if isinstance(module, torch.nn.BatchNorm2d):
-                    batch_norm_weights.append(module.weight)
-                elif hasattr(module, "weight") and isinstance(
-                    module.weight, torch.nn.Parameter
-                ):
-                    regular_weights.append(module.weight)
-
-            optimizer = torch.optim.SGD(
-                [
-                    {
-                        "params": batch_norm_weights,
-                        "lr": cfg_optimizer.params["lr"],
-                        "momentum": cfg_optimizer.params["momentum"],
-                        "nesterov": True,
-                    },
-                    {
-                        "params": regular_weights,
-                        "weight_decay": cfg_optimizer.params["weight_decay"],
-                    },
-                    {"params": biases},
-                ],
-                lr=cfg_optimizer.params["lr"],
-                momentum=cfg_optimizer.params["momentum"],
-                nesterov=cfg_optimizer.params["nesterov"],
+            custom_optimizer = OPTIMIZERS.get(cfg_optimizer.name)(
+                self, cfg_optimizer.params
             )
+            optimizer = custom_optimizer.create_optimizer()
 
-            lrf = (
-                self.cfg.trainer.optimizer.params["lre"]
-                / self.cfg.trainer.optimizer.params["lr"]
-            )
-            self.lf = (
-                lambda x: (
-                    (1 - math.cos(x * math.pi / self.cfg.trainer.epochs)) / 2
-                )
-                * (lrf - 1)
-                + 1
-            )
-            scheduler = torch.optim.lr_scheduler.LambdaLR(
-                optimizer, lr_lambda=self.lf
+            custom_scheduler = SCHEDULERS.get(cfg_scheduler.name)(
+                optimizer,
+                cfg_scheduler.params,
+                self.cfg.trainer.epochs,
+                max_stepnum,
             )
+            scheduler = custom_scheduler.create_scheduler()
+
+            self.custom_scheduler = custom_scheduler
+
             return [optimizer], [scheduler]
 
         else:
@@ -969,50 +930,8 @@ def get_scheduler(scheduler_cfg, optimizer):
     def on_after_backward(self):
         """Custom logic to adjust learning rates and momentum after
         loss.backward."""
-        if self.cfg.trainer.optimizer.apply_custom_lr:
-            self.custom_logic()
-
-    def custom_logic(self):
-        """Custom logic to adjust learning rates and momentum after
-        loss.backward."""
-
-        # Increment step counter
-        self.step = (
-            self.step % self.max_stepnum
-        )  # Reset step counter after each epoch
-        curr_step = self.step + self.max_stepnum * self.current_epoch
-
-        # Warm-up phase adjustments
-        if curr_step <= self.warmup_stepnum:
-            optimizer = self.optimizers()
-            for k, param in enumerate(optimizer.param_groups):
-                warmup_bias_lr = (
-                    self.cfg.trainer.optimizer.params["warmup_bias_lr"]
-                    if k == 2
-                    else 0.0
-                )
-                param["lr"] = np.interp(
-                    curr_step,
-                    [0, self.warmup_stepnum],
-                    [
-                        warmup_bias_lr,
-                        self.cfg.trainer.optimizer.params["lr"]
-                        * self.lf(self.current_epoch),
-                    ],
-                )
-                if "momentum" in param:
-                    param["momentum"] = np.interp(
-                        curr_step,
-                        [0, self.warmup_stepnum],
-                        [
-                            self.cfg.trainer.optimizer.params[
-                                "warmup_momentum"
-                            ],
-                            self.cfg.trainer.optimizer.params["momentum"],
-                        ],
-                    )
-
-        self.step += 1
+        if self.cfg.trainer.apply_custom_lr:
+            self.custom_scheduler.update_learning_rate(self.current_epoch)
 
     def load_checkpoint(self, path: str | Path | None) -> None:
         """Loads checkpoint weights from provided path.

diff --git a/luxonis_train/optimizers/custom_optimizers.py b/luxonis_train/optimizers/custom_optimizers.py
@@ -0,0 +1,51 @@
+import torch
+
+
+class TripleLRSGD:
+    def __init__(self, model: torch.nn.Module, params: dict) -> None:
+        """TripleLRSGD is a custom optimizer that separates weights into
+        batch norm weights, regular weights, and biases.
+
+        @type model: torch.nn.Module
+        @param model: The model to be used
+        @type params: dict
+        @param params: The parameters to be used for the optimizer
+        """
+        self.model = model
+        self.params = params
+
+    def create_optimizer(self):
+        batch_norm_weights, regular_weights, biases = [], [], []
+
+        for module in self.model.modules():
+            if hasattr(module, "bias") and isinstance(
+                module.bias, torch.nn.Parameter
+            ):
+                biases.append(module.bias)
+            if isinstance(module, torch.nn.BatchNorm2d):
+                batch_norm_weights.append(module.weight)
+            elif hasattr(module, "weight") and isinstance(
+                module.weight, torch.nn.Parameter
+            ):
+                regular_weights.append(module.weight)
+
+        optimizer = torch.optim.SGD(
+            [
+                {
+                    "params": batch_norm_weights,
+                    "lr": self.params["lr"],
+                    "momentum": self.params["momentum"],
+                    "nesterov": self.params["nesterov"],
+                },
+                {
+                    "params": regular_weights,
+                    "weight_decay": self.params["weight_decay"],
+                },
+                {"params": biases},
+            ],
+            lr=self.params["lr"],
+            momentum=self.params["momentum"],
+            nesterov=self.params["nesterov"],
+        )
+
+        return optimizer
diff --git a/luxonis_train/optimizers/optimizers.py b/luxonis_train/optimizers/optimizers.py
@@ -2,6 +2,8 @@
 
 from luxonis_train.utils.registry import OPTIMIZERS
 
+from .custom_optimizers import TripleLRSGD
+
 for optimizer in [
     optim.Adadelta,
     optim.Adagrad,
@@ -15,5 +17,6 @@
     optim.RAdam,
     optim.RMSprop,
     optim.SGD,
+    TripleLRSGD,
 ]:
     OPTIMIZERS.register_module(module=optimizer)
diff --git a/luxonis_train/schedulers/custom_schedulers.py b/luxonis_train/schedulers/custom_schedulers.py
@@ -0,0 +1,79 @@
+import math
+
+import numpy as np
+import torch
+
+
+class TripleLRScheduler:
+    def __init__(
+        self,
+        optimizer: torch.optim.Optimizer,
+        params: dict,
+        epochs: int,
+        max_stepnum: int,
+    ) -> None:
+        """TripleLRScheduler is a custom learning rate scheduler that
+        combines a cosine annealing.
+
+        @type optimizer: torch.optim.Optimizer
+        @param optimizer: The optimizer to be used
+        @type parmas: dict
+        @param parmas: The parameters to be used for the scheduler
+        @type epochs: int
+        @param epochs: The number of epochs to train for
+        @type max_stepnum: int
+        @param max_stepnum: The maximum number of steps to train for
+        """
+        self.optimizer = optimizer
+        self.params = params
+        self.max_stepnum = max_stepnum
+        self.warmup_stepnum = max(
+            round(self.params["warmup_epochs"] * self.max_stepnum), 1000
+        )
+        self.step = 0
+        self.lrf = self.params["lre"] / self.optimizer.defaults["lr"]
+        self.lf = (
+            lambda x: ((1 - math.cos(x * math.pi / epochs)) / 2)
+            * (self.lrf - 1)
+            + 1
+        )
+
+    def create_scheduler(self):
+        scheduler = torch.optim.lr_scheduler.LambdaLR(
+            self.optimizer, lr_lambda=self.lf
+        )
+        return scheduler
+
+    def update_learning_rate(self, current_epoch: int) -> None:
+        """Update the learning rate based on the current epoch.
+
+        @type current_epoch: int
+        @param current_epoch: The current epoch
+        """
+        self.step = self.step % self.max_stepnum
+        curr_step = self.step + self.max_stepnum * current_epoch
+
+        if curr_step <= self.warmup_stepnum:
+            for k, param in enumerate(self.optimizer.param_groups):
+                warmup_bias_lr = (
+                    self.params["warmup_bias_lr"] if k == 2 else 0.0
+                )
+                param["lr"] = np.interp(
+                    curr_step,
+                    [0, self.warmup_stepnum],
+                    [
+                        warmup_bias_lr,
+                        self.optimizer.defaults["lr"] * self.lf(current_epoch),
+                    ],
+                )
+                if "momentum" in param:
+                    self.optimizer.defaults["momentum"] = np.interp(
+                        curr_step,
+                        [0, self.warmup_stepnum],
+                        [
+                            self.params["warmup_momentum"],
+                            self.optimizer.defaults["momentum"],
+                        ],
+                    )
+
+        self.step += 1
diff --git a/luxonis_train/schedulers/schedulers.py b/luxonis_train/schedulers/schedulers.py
@@ -2,6 +2,8 @@
 
 from luxonis_train.utils.registry import SCHEDULERS
 
+from .custom_schedulers import TripleLRScheduler
+
 for scheduler in [
     lr_scheduler.LambdaLR,
     lr_scheduler.MultiplicativeLR,
@@ -18,5 +20,6 @@
     lr_scheduler.CyclicLR,
     lr_scheduler.OneCycleLR,
     lr_scheduler.CosineAnnealingWarmRestarts,
+    TripleLRScheduler,
 ]:
     SCHEDULERS.register_module(module=scheduler)