Skip to content

Commit

Permalink
Custom scheduler and optimizer classes
Browse files Browse the repository at this point in the history
  • Loading branch information
JSabadin committed Nov 14, 2024
1 parent 5c3a3a7 commit cefe3a6
Show file tree
Hide file tree
Showing 6 changed files with 159 additions and 104 deletions.
2 changes: 1 addition & 1 deletion luxonis_train/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,6 @@ class CallbackConfig(BaseModelExtraForbid):

class OptimizerConfig(BaseModelExtraForbid):
name: str = "Adam"
apply_custom_lr: bool = False
params: Params = {}


Expand Down Expand Up @@ -356,6 +355,7 @@ class TrainerConfig(BaseModelExtraForbid):
profiler: Literal["simple", "advanced"] | None = None
matmul_precision: Literal["medium", "high", "highest"] | None = None
verbose: bool = True
apply_custom_lr: bool = False

seed: int | None = None
n_validation_batches: PositiveInt | None = None
Expand Down
125 changes: 22 additions & 103 deletions luxonis_train/models/luxonis_lightning.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from typing import Literal, cast

import lightning.pytorch as pl
import numpy as np
import torch
from lightning.pytorch.callbacks import ModelCheckpoint, RichModelSummary
from lightning.pytorch.utilities import rank_zero_only # type: ignore
Expand Down Expand Up @@ -865,70 +864,32 @@ def configure_optimizers(
cfg_optimizer = self.cfg.trainer.optimizer
cfg_scheduler = self.cfg.trainer.scheduler

apply_custom_lr = cfg_optimizer.apply_custom_lr
if self.cfg.trainer.apply_custom_lr:
assert (
cfg_optimizer.name == "TripleLRSGD"
), "Custom learning rate is only supported for TripleLRSGD optimizer."
assert (
cfg_scheduler.name == "TripleLRScheduler"
), "Custom learning rate is only supported for TripleLRScheduler scheduler."

if apply_custom_lr:
assert cfg_optimizer.name == "SGD", (
"Custom learning rates are supported only for SGD optimizer. "
f"Got {cfg_optimizer.name}."
)
self.max_stepnum = math.ceil(
max_stepnum = math.ceil(
len(self._core.loaders["train"]) / self.cfg.trainer.batch_size

Check failure on line 876 in luxonis_train/models/luxonis_lightning.py

View workflow job for this annotation

GitHub Actions / type-check

"loaders" is not a known attribute of "None" (reportOptionalMemberAccess)
)
self.warmup_stepnum = max(
round(
self.cfg.trainer.optimizer.params["warmup_epochs"]
* self.max_stepnum
),
1000,
)
self.step = 0
batch_norm_weights, regular_weights, biases = [], [], []
for module in self.modules():
if hasattr(module, "bias") and isinstance(
module.bias, torch.nn.Parameter
):
biases.append(module.bias)
if isinstance(module, torch.nn.BatchNorm2d):
batch_norm_weights.append(module.weight)
elif hasattr(module, "weight") and isinstance(
module.weight, torch.nn.Parameter
):
regular_weights.append(module.weight)

optimizer = torch.optim.SGD(
[
{
"params": batch_norm_weights,
"lr": cfg_optimizer.params["lr"],
"momentum": cfg_optimizer.params["momentum"],
"nesterov": True,
},
{
"params": regular_weights,
"weight_decay": cfg_optimizer.params["weight_decay"],
},
{"params": biases},
],
lr=cfg_optimizer.params["lr"],
momentum=cfg_optimizer.params["momentum"],
nesterov=cfg_optimizer.params["nesterov"],
custom_optimizer = OPTIMIZERS.get(cfg_optimizer.name)(
self, cfg_optimizer.params

Check failure on line 879 in luxonis_train/models/luxonis_lightning.py

View workflow job for this annotation

GitHub Actions / type-check

Argument of type "Self@LuxonisLightningModule" cannot be assigned to parameter "params" of type "ParamsT" in function "__init__"   Type "Self@LuxonisLightningModule" is not assignable to type "ParamsT"     "LuxonisLightningModule*" is incompatible with protocol "Iterable[Tensor]"       "__iter__" is not present     "LuxonisLightningModule*" is incompatible with protocol "Iterable[Dict[str, Any]]"       "__iter__" is not present (reportArgumentType)
)
optimizer = custom_optimizer.create_optimizer()

Check failure on line 881 in luxonis_train/models/luxonis_lightning.py

View workflow job for this annotation

GitHub Actions / type-check

Cannot access attribute "create_optimizer" for class "Optimizer"   Attribute "create_optimizer" is unknown (reportAttributeAccessIssue)

lrf = (
self.cfg.trainer.optimizer.params["lre"]
/ self.cfg.trainer.optimizer.params["lr"]
)
self.lf = (
lambda x: (
(1 - math.cos(x * math.pi / self.cfg.trainer.epochs)) / 2
)
* (lrf - 1)
+ 1
)
scheduler = torch.optim.lr_scheduler.LambdaLR(
optimizer, lr_lambda=self.lf
custom_scheduler = SCHEDULERS.get(cfg_scheduler.name)(
optimizer,
cfg_scheduler.params,
self.cfg.trainer.epochs,
max_stepnum,

Check failure on line 887 in luxonis_train/models/luxonis_lightning.py

View workflow job for this annotation

GitHub Actions / type-check

Expected 3 positional arguments (reportCallIssue)
)
scheduler = custom_scheduler.create_scheduler()

Check failure on line 889 in luxonis_train/models/luxonis_lightning.py

View workflow job for this annotation

GitHub Actions / type-check

Cannot access attribute "create_scheduler" for class "LRScheduler"   Attribute "create_scheduler" is unknown (reportAttributeAccessIssue)

self.custom_scheduler = custom_scheduler

return [optimizer], [scheduler]

else:
Expand Down Expand Up @@ -969,50 +930,8 @@ def get_scheduler(scheduler_cfg, optimizer):
def on_after_backward(self):
"""Custom logic to adjust learning rates and momentum after
loss.backward."""
if self.cfg.trainer.optimizer.apply_custom_lr:
self.custom_logic()

def custom_logic(self):
"""Custom logic to adjust learning rates and momentum after
loss.backward."""

# Increment step counter
self.step = (
self.step % self.max_stepnum
) # Reset step counter after each epoch
curr_step = self.step + self.max_stepnum * self.current_epoch

# Warm-up phase adjustments
if curr_step <= self.warmup_stepnum:
optimizer = self.optimizers()
for k, param in enumerate(optimizer.param_groups):
warmup_bias_lr = (
self.cfg.trainer.optimizer.params["warmup_bias_lr"]
if k == 2
else 0.0
)
param["lr"] = np.interp(
curr_step,
[0, self.warmup_stepnum],
[
warmup_bias_lr,
self.cfg.trainer.optimizer.params["lr"]
* self.lf(self.current_epoch),
],
)
if "momentum" in param:
param["momentum"] = np.interp(
curr_step,
[0, self.warmup_stepnum],
[
self.cfg.trainer.optimizer.params[
"warmup_momentum"
],
self.cfg.trainer.optimizer.params["momentum"],
],
)

self.step += 1
if self.cfg.trainer.apply_custom_lr:
self.custom_scheduler.update_learning_rate(self.current_epoch)

Check failure on line 934 in luxonis_train/models/luxonis_lightning.py

View workflow job for this annotation

GitHub Actions / type-check

Cannot access attribute "update_learning_rate" for class "LRScheduler"   Attribute "update_learning_rate" is unknown (reportAttributeAccessIssue)

def load_checkpoint(self, path: str | Path | None) -> None:
"""Loads checkpoint weights from provided path.
Expand Down
51 changes: 51 additions & 0 deletions luxonis_train/optimizers/custom_optimizers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import torch


class TripleLRSGD:
def __init__(self, model: torch.nn.Module, params: dict) -> None:
"""TripleLRSGD is a custom optimizer that separates weights into
batch norm weights, regular weights, and biases.
@type model: torch.nn.Module
@param model: The model to be used
@type params: dict
@param params: The parameters to be used for the optimizer
"""
self.model = model
self.params = params

def create_optimizer(self):
batch_norm_weights, regular_weights, biases = [], [], []

for module in self.model.modules():
if hasattr(module, "bias") and isinstance(
module.bias, torch.nn.Parameter
):
biases.append(module.bias)
if isinstance(module, torch.nn.BatchNorm2d):
batch_norm_weights.append(module.weight)
elif hasattr(module, "weight") and isinstance(
module.weight, torch.nn.Parameter
):
regular_weights.append(module.weight)

optimizer = torch.optim.SGD(
[
{
"params": batch_norm_weights,
"lr": self.params["lr"],
"momentum": self.params["momentum"],
"nesterov": self.params["nesterov"],
},
{
"params": regular_weights,
"weight_decay": self.params["weight_decay"],
},
{"params": biases},
],
lr=self.params["lr"],
momentum=self.params["momentum"],
nesterov=self.params["nesterov"],
)

return optimizer
3 changes: 3 additions & 0 deletions luxonis_train/optimizers/optimizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from luxonis_train.utils.registry import OPTIMIZERS

from .custom_optimizers import TripleLRSGD

for optimizer in [
optim.Adadelta,
optim.Adagrad,
Expand All @@ -15,5 +17,6 @@
optim.RAdam,
optim.RMSprop,
optim.SGD,
TripleLRSGD,
]:
OPTIMIZERS.register_module(module=optimizer)
79 changes: 79 additions & 0 deletions luxonis_train/schedulers/custom_schedulers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import math

import numpy as np
import torch


class TripleLRScheduler:
def __init__(
self,
optimizer: torch.optim.Optimizer,
params: dict,
epochs: int,
max_stepnum: int,
) -> None:
"""TripleLRScheduler is a custom learning rate scheduler that
combines a cosine annealing.
@type optimizer: torch.optim.Optimizer
@param optimizer: The optimizer to be used
@type parmas: dict
@param parmas: The parameters to be used for the scheduler
@type epochs: int
@param epochs: The number of epochs to train for
@type max_stepnum: int
@param max_stepnum: The maximum number of steps to train for
"""
self.optimizer = optimizer
self.params = params
self.max_stepnum = max_stepnum
self.warmup_stepnum = max(
round(self.params["warmup_epochs"] * self.max_stepnum), 1000
)
self.step = 0
self.lrf = self.params["lre"] / self.optimizer.defaults["lr"]
self.lf = (
lambda x: ((1 - math.cos(x * math.pi / epochs)) / 2)
* (self.lrf - 1)
+ 1
)

def create_scheduler(self):
scheduler = torch.optim.lr_scheduler.LambdaLR(
self.optimizer, lr_lambda=self.lf
)
return scheduler

def update_learning_rate(self, current_epoch: int) -> None:
"""Update the learning rate based on the current epoch.
@type current_epoch: int
@param current_epoch: The current epoch
"""
self.step = self.step % self.max_stepnum
curr_step = self.step + self.max_stepnum * current_epoch

if curr_step <= self.warmup_stepnum:
for k, param in enumerate(self.optimizer.param_groups):
warmup_bias_lr = (
self.params["warmup_bias_lr"] if k == 2 else 0.0
)
param["lr"] = np.interp(
curr_step,
[0, self.warmup_stepnum],
[
warmup_bias_lr,
self.optimizer.defaults["lr"] * self.lf(current_epoch),
],
)
if "momentum" in param:
self.optimizer.defaults["momentum"] = np.interp(
curr_step,
[0, self.warmup_stepnum],
[
self.params["warmup_momentum"],
self.optimizer.defaults["momentum"],
],
)

self.step += 1
3 changes: 3 additions & 0 deletions luxonis_train/schedulers/schedulers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from luxonis_train.utils.registry import SCHEDULERS

from .custom_schedulers import TripleLRScheduler

for scheduler in [
lr_scheduler.LambdaLR,
lr_scheduler.MultiplicativeLR,
Expand All @@ -18,5 +20,6 @@
lr_scheduler.CyclicLR,
lr_scheduler.OneCycleLR,
lr_scheduler.CosineAnnealingWarmRestarts,
TripleLRScheduler,
]:
SCHEDULERS.register_module(module=scheduler)

0 comments on commit cefe3a6

Please sign in to comment.