-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* fixed incorrect class property call * fixed exporter uploading * uploadCheckpoint uploads on every checkpoint epoch * fix temp files names * updated callback readme * pre-commit run
- Loading branch information
1 parent
afade1f
commit 2c654a5
Showing
6 changed files
with
78 additions
and
47 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import logging | ||
import os | ||
from typing import Any | ||
|
||
import lightning.pytorch as pl | ||
import torch | ||
from luxonis_ml.utils.filesystem import LuxonisFileSystem | ||
|
||
from luxonis_train.utils.registry import CALLBACKS | ||
|
||
|
||
@CALLBACKS.register_module() | ||
class UploadCheckpoint(pl.Callback): | ||
"""Callback that uploads best checkpoint based on the validation loss.""" | ||
|
||
def __init__(self, upload_directory: str): | ||
"""Constructs `UploadCheckpoint`. | ||
@type upload_directory: str | ||
@param upload_directory: Path used as upload directory | ||
""" | ||
super().__init__() | ||
self.fs = LuxonisFileSystem( | ||
upload_directory, allow_active_mlflow_run=True, allow_local=False | ||
) | ||
self.logger = logging.getLogger(__name__) | ||
self.last_logged_epoch = None | ||
self.last_best_checkpoint = None | ||
|
||
def on_save_checkpoint( | ||
self, | ||
trainer: pl.Trainer, | ||
pl_module: pl.LightningModule, | ||
checkpoint: dict[str, Any], | ||
) -> None: | ||
# Log only once per epoch in case there are multiple ModelCheckpoint callbacks | ||
if not self.last_logged_epoch == trainer.current_epoch: | ||
model_checkpoint_callbacks = [ | ||
c | ||
for c in trainer.callbacks # type: ignore | ||
if isinstance(c, pl.callbacks.ModelCheckpoint) # type: ignore | ||
] | ||
# NOTE: assume that first checkpoint callback is based on val loss | ||
curr_best_checkpoint = model_checkpoint_callbacks[0].best_model_path | ||
|
||
if self.last_best_checkpoint != curr_best_checkpoint: | ||
self.logger.info(f"Started checkpoint upload to {self.fs.full_path}...") | ||
temp_filename = "curr_best_val_loss.ckpt" | ||
torch.save(checkpoint, temp_filename) | ||
self.fs.put_file( | ||
local_path=temp_filename, | ||
remote_path=temp_filename, | ||
mlflow_instance=trainer.logger.experiment.get( # type: ignore | ||
"mlflow", None | ||
), | ||
) | ||
os.remove(temp_filename) | ||
self.logger.info("Checkpoint upload finished") | ||
self.last_best_checkpoint = curr_best_checkpoint | ||
|
||
self.last_logged_epoch = trainer.current_epoch |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters