From 28b071f72bd4c39c630fe60cc176474f65102401 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= Date: Mon, 7 Oct 2024 10:09:09 +0200 Subject: [PATCH 1/6] `pre-commit<4.0.0` (#99) --- requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index e4dbd194..b8b82a23 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,6 +1,6 @@ coverage-badge>=1.1.0 gdown>=4.2.0 -pre-commit>=3.2.1 +pre-commit>=3.2.1,<4.0.0 opencv-stubs>=0.0.8 pytest-cov>=4.1.0 pytest-subtests>=0.12.1 From d5edde0a8dba056ce5bdb2edc176a40613e75355 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= Date: Mon, 7 Oct 2024 10:27:20 +0200 Subject: [PATCH 2/6] Archive without Export (#96) --- luxonis_train/__main__.py | 4 ++-- luxonis_train/callbacks/archive_on_train_end.py | 17 ++++++++++------- luxonis_train/callbacks/export_on_train_end.py | 6 +++--- luxonis_train/config/config.py | 2 +- luxonis_train/core/core.py | 1 + luxonis_train/core/utils/archive_utils.py | 3 ++- .../nodes/heads/ddrnet_segmentation_head.py | 11 ++++++++++- 7 files changed, 29 insertions(+), 15 deletions(-) diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py index 28274786..7b8ffbbb 100644 --- a/luxonis_train/__main__.py +++ b/luxonis_train/__main__.py @@ -179,11 +179,11 @@ def inspect( @app.command() def archive( executable: Annotated[ - str, + str | None, typer.Option( help="Path to the model file.", show_default=False, metavar="FILE" ), - ], + ] = None, config: ConfigType = None, opts: OptsType = None, ): diff --git a/luxonis_train/callbacks/archive_on_train_end.py b/luxonis_train/callbacks/archive_on_train_end.py index 30949e4e..0ed69bb5 100644 --- a/luxonis_train/callbacks/archive_on_train_end.py +++ b/luxonis_train/callbacks/archive_on_train_end.py @@ -25,16 +25,19 @@ def on_train_end( @param pl_module: Pytorch Lightning module. """ - path = self.get_checkpoint(pl_module) - if path is None: # pragma: no cover - logger.warning("Skipping model archiving.") - return - onnx_path = pl_module.core._exported_models.get("onnx") + if onnx_path is None: # pragma: no cover + checkpoint = self.get_checkpoint(pl_module) + if checkpoint is None: + logger.warning("Skipping model archiving.") + return + logger.info("Exported model not found. Exporting to ONNX...") + pl_module.core.export(weights=checkpoint) + onnx_path = pl_module.core._exported_models.get("onnx") + if onnx_path is None: # pragma: no cover logger.error( - "Model executable not found. " - "Make sure to run exporter callback before archiver callback. " + "Model executable not found and couldn't be created. " "Skipping model archiving." ) return diff --git a/luxonis_train/callbacks/export_on_train_end.py b/luxonis_train/callbacks/export_on_train_end.py index e727e81f..80d2a648 100644 --- a/luxonis_train/callbacks/export_on_train_end.py +++ b/luxonis_train/callbacks/export_on_train_end.py @@ -24,9 +24,9 @@ def on_train_end( @type pl_module: L{pl.LightningModule} @param pl_module: Pytorch Lightning module. """ - path = self.get_checkpoint(pl_module) - if path is None: # pragma: no cover + checkpoint = self.get_checkpoint(pl_module) + if checkpoint is None: # pragma: no cover logger.warning("Skipping model export.") return - pl_module.core.export(weights=self.get_checkpoint(pl_module)) + pl_module.core.export(weights=checkpoint) diff --git a/luxonis_train/config/config.py b/luxonis_train/config/config.py index a37f1da9..218abee3 100644 --- a/luxonis_train/config/config.py +++ b/luxonis_train/config/config.py @@ -498,7 +498,7 @@ def get_config( cls, cfg: str | dict[str, Any] | None = None, overrides: dict[str, Any] | list[str] | tuple[str, ...] | None = None, - ): + ) -> "Config": instance = super().get_config(cfg, overrides) if not isinstance(cfg, str): return instance diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py index fea53acc..bc3d3673 100644 --- a/luxonis_train/core/core.py +++ b/luxonis_train/core/core.py @@ -640,6 +640,7 @@ def archive(self, path: str | Path | None = None) -> Path: outputs = [] if path is None: + logger.warning("No model executable specified for archiving.") if "onnx" not in self._exported_models: logger.info("Exporting model to ONNX...") self.export() diff --git a/luxonis_train/core/utils/archive_utils.py b/luxonis_train/core/utils/archive_utils.py index 2a9e5dae..c8904f08 100644 --- a/luxonis_train/core/utils/archive_utils.py +++ b/luxonis_train/core/utils/archive_utils.py @@ -200,7 +200,8 @@ def _get_head_outputs( # TODO: Fix this, will require refactoring custom ONNX output names logger.error( "ONNX model uses custom output names, trying to determine outputs based on the head type. " - "This will likely result in incorrect archive for multi-head models." + "This will likely result in incorrect archive for multi-head models. " + "You can ignore this error if your model has only one head." ) if head_type == "ClassificationHead": diff --git a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py index 39293fed..b850910e 100644 --- a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py +++ b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py @@ -1,4 +1,5 @@ import logging +from typing import Literal import torch import torch.nn as nn @@ -22,7 +23,15 @@ class DDRNetSegmentationHead(BaseNode[Tensor, Tensor]): def __init__( self, inter_channels: int = 64, - inter_mode: str = "bilinear", + inter_mode: Literal[ + "nearest", + "linear", + "bilinear", + "bicubic", + "trilinear", + "area", + "pixel_shuffle", + ] = "bilinear", **kwargs, ): """DDRNet segmentation head. From 4c90b8bd8a64f2564fae0030b1a6c0e0fd425107 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= Date: Mon, 7 Oct 2024 10:27:36 +0200 Subject: [PATCH 3/6] Generalized `luxonis_train inspect` (#95) --- luxonis_train/__main__.py | 97 ++++++++++++++++++++++++++++----------- 1 file changed, 71 insertions(+), 26 deletions(-) diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py index 7b8ffbbb..798a9baa 100644 --- a/luxonis_train/__main__.py +++ b/luxonis_train/__main__.py @@ -1,13 +1,13 @@ -import tempfile from enum import Enum from importlib.metadata import version from pathlib import Path from typing import Annotated import typer -import yaml from luxonis_ml.utils import setup_logging +from luxonis_train.config import Config + setup_logging(use_rich=True) @@ -121,7 +121,7 @@ def infer( def inspect( config: ConfigType = None, view: Annotated[ - str, + _ViewType, typer.Option( ..., "--view", @@ -145,35 +145,80 @@ def inspect( ] = 1.0, opts: OptsType = None, ): - """Inspect dataset.""" - from lightning.pytorch import seed_everything - from luxonis_ml.data.__main__ import inspect as lxml_inspect + """Inspect the dataset. + + To close the window press 'q' or 'Esc'. + """ + import cv2 + from luxonis_ml.data import Augmentations, LabelType + from luxonis_ml.data.utils.visualizations import visualize - from luxonis_train.config import Config + from luxonis_train.utils.registry import LOADERS cfg = Config.get_config(config, opts) - if cfg.trainer.seed is not None: - seed_everything(cfg.trainer.seed, workers=True) - - with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as f: - yaml.dump( - [ - a.model_dump() - for a in cfg.trainer.preprocessing.get_active_augmentations() - if a.name != "Normalize" - ], - f, - ) + train_augmentations = Augmentations( + image_size=cfg.trainer.preprocessing.train_image_size, + augmentations=[ + i.model_dump() + for i in cfg.trainer.preprocessing.get_active_augmentations() + if i.name != "Normalize" + ], + train_rgb=cfg.trainer.preprocessing.train_rgb, + keep_aspect_ratio=cfg.trainer.preprocessing.keep_aspect_ratio, + ) + val_augmentations = Augmentations( + image_size=cfg.trainer.preprocessing.train_image_size, + augmentations=[ + i.model_dump() + for i in cfg.trainer.preprocessing.get_active_augmentations() + ], + train_rgb=cfg.trainer.preprocessing.train_rgb, + keep_aspect_ratio=cfg.trainer.preprocessing.keep_aspect_ratio, + only_normalize=True, + ) - if "dataset_name" not in cfg.loader.params: - raise ValueError("dataset_name is not set in the config") + Loader = LOADERS.get(cfg.loader.name) + loader = Loader( + augmentations=( + train_augmentations if view == "train" else val_augmentations + ), + view={ + "train": cfg.loader.train_view, + "val": cfg.loader.val_view, + "test": cfg.loader.test_view, + }[view], + image_source=cfg.loader.image_source, + **cfg.loader.params, + ) - lxml_inspect( - name=cfg.loader.params["dataset_name"], - view=[view], - aug_config=f.name, - size_multiplier=size_multiplier, + for images, labels in loader: + for img in images.values(): + if len(img.shape) != 3: + raise ValueError( + "Only 3D images are supported for visualization." + ) + np_images = { + k: v.numpy().transpose(1, 2, 0) for k, v in images.items() + } + main_image = np_images[loader.image_source] + main_image = cv2.cvtColor(main_image, cv2.COLOR_RGB2BGR) + np_labels = { + task: (label.numpy(), LabelType(task_type)) + for task, (label, task_type) in labels.items() + } + + h, w, _ = main_image.shape + new_h, new_w = int(h * size_multiplier), int(w * size_multiplier) + main_image = cv2.resize(main_image, (new_w, new_h)) + viz = visualize( + main_image, + np_labels, + loader.get_classes(), ) + cv2.imshow("Visualization", viz) + if cv2.waitKey(0) in [ord("q"), 27]: + break + cv2.destroyAllWindows() @app.command() From 157b44de2a2a3ac81f67e4868ffc54bd0504e6a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= Date: Mon, 7 Oct 2024 16:47:56 +0200 Subject: [PATCH 4/6] Remove `ImplicitKeypointBBoxHead` (#98) --- .../attached_modules/losses/__init__.py | 4 - .../losses/implicit_keypoint_bbox_loss.py | 349 ------------------ .../attached_modules/losses/keypoint_loss.py | 112 ------ luxonis_train/core/utils/archive_utils.py | 12 +- luxonis_train/nodes/blocks/__init__.py | 8 - luxonis_train/nodes/blocks/blocks.py | 80 ---- .../nodes/enums/head_categorization.py | 2 - luxonis_train/nodes/heads/__init__.py | 2 - .../heads/implicit_keypoint_bbox_head.py | 282 -------------- luxonis_train/utils/__init__.py | 9 +- luxonis_train/utils/boundingbox.py | 224 ----------- luxonis_train/utils/dataset_metadata.py | 26 -- luxonis_train/utils/keypoints.py | 18 - tests/configs/archive_config.yaml | 4 - tests/configs/parking_lot_config.yaml | 22 -- tests/integration/parking_lot.json | 29 -- tests/integration/test_detection.py | 17 - tests/integration/test_simple.py | 1 - tests/unittests/test_utils/test_boxutils.py | 16 - .../test_utils/test_dataset_metadata.py | 6 - tests/unittests/test_utils/test_keypoints.py | 14 +- 21 files changed, 3 insertions(+), 1234 deletions(-) delete mode 100644 luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py delete mode 100644 luxonis_train/attached_modules/losses/keypoint_loss.py delete mode 100644 luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py diff --git a/luxonis_train/attached_modules/losses/__init__.py b/luxonis_train/attached_modules/losses/__init__.py index 28585504..5b3f7a63 100644 --- a/luxonis_train/attached_modules/losses/__init__.py +++ b/luxonis_train/attached_modules/losses/__init__.py @@ -3,8 +3,6 @@ from .bce_with_logits import BCEWithLogitsLoss from .cross_entropy import CrossEntropyLoss from .efficient_keypoint_bbox_loss import EfficientKeypointBBoxLoss -from .implicit_keypoint_bbox_loss import ImplicitKeypointBBoxLoss -from .keypoint_loss import KeypointLoss from .sigmoid_focal_loss import SigmoidFocalLoss from .smooth_bce_with_logits import SmoothBCEWithLogitsLoss from .softmax_focal_loss import SoftmaxFocalLoss @@ -14,8 +12,6 @@ "BCEWithLogitsLoss", "CrossEntropyLoss", "EfficientKeypointBBoxLoss", - "ImplicitKeypointBBoxLoss", - "KeypointLoss", "BaseLoss", "SigmoidFocalLoss", "SmoothBCEWithLogitsLoss", diff --git a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py deleted file mode 100644 index 99eea6f3..00000000 --- a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py +++ /dev/null @@ -1,349 +0,0 @@ -import logging -from typing import Any, cast - -import torch -from torch import Tensor -from torchvision.ops import box_convert - -from luxonis_train.attached_modules.losses.keypoint_loss import KeypointLoss -from luxonis_train.enums import TaskType -from luxonis_train.nodes import ImplicitKeypointBBoxHead -from luxonis_train.utils import ( - Labels, - Packet, - compute_iou_loss, - match_to_anchor, - process_bbox_predictions, -) - -from .base_loss import BaseLoss -from .bce_with_logits import BCEWithLogitsLoss -from .smooth_bce_with_logits import SmoothBCEWithLogitsLoss - -KeypointTargetType = tuple[ - list[Tensor], - list[Tensor], - list[Tensor], - list[tuple[Tensor, Tensor, Tensor, Tensor]], - list[Tensor], -] - -logger = logging.getLogger(__name__) - - -# TODO: BROKEN! -class ImplicitKeypointBBoxLoss(BaseLoss[list[Tensor], KeypointTargetType]): - node: ImplicitKeypointBBoxHead - supported_tasks: list[tuple[TaskType, ...]] = [ - (TaskType.BOUNDINGBOX, TaskType.KEYPOINTS) - ] - - def __init__( - self, - cls_pw: float = 1.0, - viz_pw: float = 1.0, - obj_pw: float = 1.0, - label_smoothing: float = 0.0, - min_objectness_iou: float = 0.0, - bbox_loss_weight: float = 0.05, - keypoint_visibility_loss_weight: float = 0.6, - keypoint_regression_loss_weight: float = 0.5, - sigmas: list[float] | None = None, - area_factor: float | None = None, - class_loss_weight: float = 0.6, - objectness_loss_weight: float = 0.7, - anchor_threshold: float = 4.0, - bias: float = 0.5, - balance: list[float] | None = None, - **kwargs: Any, - ): - """Joint loss for keypoint and box predictions for cases where - the keypoints and boxes are inherently linked. - - Based on U{YOLO-Pose: Enhancing YOLO for Multi Person Pose Estimation Using Object - Keypoint Similarity Loss}. - - @type cls_pw: float - @param cls_pw: Power for the BCE loss for classes. Defaults to C{1.0}. - @type viz_pw: float - @param viz_pw: Power for the BCE loss for keypoints. - @type obj_pw: float - @param obj_pw: Power for the BCE loss for objectness. Defaults to C{1.0}. - @type label_smoothing: float - @param label_smoothing: Label smoothing factor. Defaults to C{0.0}. - @type min_objectness_iou: float - @param min_objectness_iou: Minimum objectness iou. Defaults to C{0.0}. - @type bbox_loss_weight: float - @param bbox_loss_weight: Weight for the bounding box loss. - @type keypoint_visibility_loss_weight: float - @param keypoint_visibility_loss_weight: Weight for the keypoint visibility loss. Defaults to C{0.6}. - @type keypoint_regression_loss_weight: float - @param keypoint_regression_loss_weight: Weight for the keypoint regression loss. Defaults to C{0.5}. - @type sigmas: list[float] | None - @param sigmas: Sigmas used in KeypointLoss for OKS metric. If None then use COCO ones if possible or default ones. Defaults to C{None}. - @type area_factor: float | None - @param area_factor: Factor by which we multiply bbox area which is used in KeypointLoss. If None then use default one. Defaults to C{None}. - @type class_loss_weight: float - @param class_loss_weight: Weight for the class loss. Defaults to C{0.6}. - @type objectness_loss_weight: float - @param objectness_loss_weight: Weight for the objectness loss. Defaults to C{0.7}. - @type anchor_threshold: float - @param anchor_threshold: Threshold for matching anchors to targets. Defaults to C{4.0}. - @type bias: float - @param bias: Bias for matching anchors to targets. Defaults to C{0.5}. - @type balance: list[float] | None - @param balance: Balance for the different heads. Defaults to C{None}. - """ - - super().__init__(**kwargs) - - self.n_anchors = self.node.n_anchors - self.n_heads = self.node.n_heads - self.box_offset = self.node.box_offset - self.anchors = self.node.anchors - self.balance = balance or [4.0, 1.0, 0.4] - if len(self.balance) < self.n_heads: - logger.warning( - f"Balance list must have at least {self.n_heads} elements." - "Filling the rest with 1.0." - ) - self.balance += [1.0] * (self.n_heads - len(self.balance)) - - self.min_objectness_iou = min_objectness_iou - self.bbox_weight = bbox_loss_weight - self.class_weight = class_loss_weight - self.objectness_weight = objectness_loss_weight - self.anchor_threshold = anchor_threshold - - self.bias = bias - - self.b_cross_entropy = BCEWithLogitsLoss( - pos_weight=torch.tensor([obj_pw]) - ) - self.class_loss = SmoothBCEWithLogitsLoss( - label_smoothing=label_smoothing, - bce_pow=cls_pw, - ) - self.keypoint_loss = KeypointLoss( - n_keypoints=self.n_keypoints, - bce_power=viz_pw, - sigmas=sigmas, - area_factor=area_factor, - regression_loss_weight=keypoint_regression_loss_weight, - visibility_loss_weight=keypoint_visibility_loss_weight, - ) - - self.positive_smooth_const = 1 - 0.5 * label_smoothing - self.negative_smooth_const = 0.5 * label_smoothing - - def prepare( - self, outputs: Packet[Tensor], labels: Labels - ) -> tuple[list[Tensor], KeypointTargetType]: - """Prepares the labels to be in the correct format for loss - calculation. - - @type outputs: Packet[Tensor] - @param outputs: Output from the forward pass. - @type labels: L{Labels} - @param labels: Dictionary containing the labels. - @rtype: tuple[list[Tensor], tuple[list[Tensor], list[Tensor], - list[Tensor], list[tuple[Tensor, Tensor, Tensor, Tensor]], - list[Tensor]]] - @return: Tuple containing the original output and the - postprocessed labels. The processed labels are a tuple - containing the class targets, box targets, keypoint targets, - indices and anchors. Indicies are a tuple containing vectors - of indices for batch, anchor, feature y and feature x - dimensions, respectively. They are all of shape - (n_targets,). The indices are used to index the output - tensors of shape (batch_size, n_anchors, feature_height, - feature_width, n_classes + box_offset + n_keypoints * 3) to - get a tensor of shape (n_targets, n_classes + box_offset + - n_keypoints * 3). - """ - predictions = self.get_input_tensors(outputs, "features") - - kpt_label = self.get_label(labels, TaskType.KEYPOINTS) - bbox_label = self.get_label(labels, TaskType.BOUNDINGBOX) - - targets = torch.zeros( - (kpt_label.shape[0], self.n_keypoints * 3 + self.box_offset + 1) - ) - targets[:, :2] = kpt_label[:, :2] - targets[:, 2 : self.box_offset + 1] = box_convert( - bbox_label[:, 2:], "xywh", "cxcywh" - ) - - # insert keypoints - for i in range(1, 4): - targets[:, self.box_offset + i :: 3] = kpt_label[:, i + 1 :: 3] - - n_targets = targets.shape[0] - - class_targets: list[Tensor] = [] - box_targets: list[Tensor] = [] - keypoint_targets: list[Tensor] = [] - indices: list[tuple[Tensor, Tensor, Tensor, Tensor]] = [] - anchors: list[Tensor] = [] - - anchor_indices = ( - torch.arange( - self.n_anchors, device=targets.device, dtype=torch.float32 - ) - .reshape(self.n_anchors, 1) - .repeat(1, n_targets) - .unsqueeze(-1) - ) - targets = torch.cat( - (targets.repeat(self.n_anchors, 1, 1), anchor_indices), 2 - ) - - xy_deltas = ( - torch.tensor( - [[0, 0], [1, 0], [0, 1], [-1, 0], [0, -1]], - device=targets.device, - ).float() - * self.bias - ) - - for i in range(self.n_heads): - anchor = self.anchors[i] - feature_height, feature_width = predictions[i].shape[2:4] - scaled_targets, xy_shifts = match_to_anchor( - targets, - anchor, - xy_deltas, - feature_width, - feature_height, - self.n_keypoints, - self.anchor_threshold, - self.bias, - self.box_offset, - ) - - batch_index, cls = scaled_targets[:, :2].long().T - box_xy = scaled_targets[:, 2:4] - box_wh = scaled_targets[:, 4:6] - box_xy_deltas = (box_xy - xy_shifts).long() - feature_x_index = box_xy_deltas[:, 0].clamp_(0, feature_width - 1) - feature_y_index = box_xy_deltas[:, 1].clamp_(0, feature_height - 1) - - anchor_indices = scaled_targets[:, -1].long() - indices.append( - ( - batch_index, - anchor_indices, - feature_y_index, - feature_x_index, - ) - ) - class_targets.append(cls) - box_targets.append(torch.cat((box_xy - box_xy_deltas, box_wh), 1)) - anchors.append(anchor[anchor_indices]) - - keypoint_targets.append( - self._create_keypoint_target(scaled_targets, box_xy_deltas) - ) - - return predictions, ( - class_targets, - box_targets, - keypoint_targets, - indices, - anchors, - ) - - def forward( - self, - predictions: list[Tensor], - targets: KeypointTargetType, - ) -> tuple[Tensor, dict[str, Tensor]]: - device = predictions[0].device - sub_losses = { - "bboxes": torch.tensor(0.0, device=device), - "objectness": torch.tensor(0.0, device=device), - "class": torch.tensor(0.0, device=device), - "kpt_visibility": torch.tensor(0.0, device=device), - "kpt_regression": torch.tensor(0.0, device=device), - } - - for ( - pred, - class_target, - box_target, - kpt_target, - index, - anchor, - balance, - ) in zip(predictions, *targets, self.balance): - obj_targets = torch.zeros_like(pred[..., 0], device=device) - n_targets = len(class_target) - - if n_targets > 0: - pred_subset = pred[index] - - bbox_cx_cy, bbox_w_h, _ = process_bbox_predictions( - pred_subset, anchor.to(device) - ) - bbox_loss, bbox_iou = compute_iou_loss( - torch.cat((bbox_cx_cy, bbox_w_h), dim=1), - box_target, - iou_type="ciou", - bbox_format="cxcywh", - reduction="mean", - ) - - sub_losses["bboxes"] += bbox_loss * self.bbox_weight - - area = box_target[:, 2] * box_target[:, 3] - - _, kpt_sublosses = self.keypoint_loss.forward( - pred_subset[:, self.box_offset + self.n_classes :], - kpt_target.to(device), - area.to(device), - ) - for name, kpt_subloss in kpt_sublosses.items(): - sub_losses[name] += kpt_subloss - - obj_targets[index] = (self.min_objectness_iou) + ( - 1 - self.min_objectness_iou - ) * bbox_iou.squeeze(-1).to(obj_targets.dtype) - - if self.n_classes > 1: - sub_losses["class"] += ( - self.class_loss.forward( - pred_subset[ - :, - self.box_offset : self.box_offset - + self.n_classes, - ], - class_target, - ) - * self.class_weight - ) - - sub_losses["objectness"] += ( - self.b_cross_entropy.forward(pred[..., 4], obj_targets) - * balance - * self.objectness_weight - ) - - loss = cast(Tensor, sum(sub_losses.values())).reshape([]) - return loss, {name: loss.detach() for name, loss in sub_losses.items()} - - def _create_keypoint_target( - self, scaled_targets: Tensor, box_xy_deltas: Tensor - ): - keypoint_target = scaled_targets[:, self.box_offset + 1 : -1] - for j in range(self.n_keypoints): - idx = 3 * j - keypoint_coords = keypoint_target[:, idx : idx + 2] - visibility = keypoint_target[:, idx + 2] - - keypoint_mask = visibility != 0 - keypoint_coords[keypoint_mask] -= box_xy_deltas[keypoint_mask] - - keypoint_target[:, idx : idx + 2] = keypoint_coords - keypoint_target[:, idx + 2] = visibility - - return keypoint_target diff --git a/luxonis_train/attached_modules/losses/keypoint_loss.py b/luxonis_train/attached_modules/losses/keypoint_loss.py deleted file mode 100644 index 1327d460..00000000 --- a/luxonis_train/attached_modules/losses/keypoint_loss.py +++ /dev/null @@ -1,112 +0,0 @@ -from typing import Any - -import torch -from torch import Tensor - -from luxonis_train.enums import TaskType -from luxonis_train.utils import ( - get_sigmas, - get_with_default, - process_keypoints_predictions, -) - -from .base_loss import BaseLoss -from .bce_with_logits import BCEWithLogitsLoss - - -# TODO: Make it work on its own -class KeypointLoss(BaseLoss[Tensor, Tensor]): - supported_tasks: list[TaskType] = [TaskType.KEYPOINTS] - - def __init__( - self, - n_keypoints: int, - bce_power: float = 1.0, - sigmas: list[float] | None = None, - area_factor: float | None = None, - regression_loss_weight: float = 1.0, - visibility_loss_weight: float = 1.0, - **kwargs: Any, - ): - """Keypoint based loss that is computed from OKS-based - regression and visibility loss. - - @type n_keypoints: int - @param n_keypoints: Number of keypoints. - @type bce_power: float - @param bce_power: Power used for BCE visibility loss. Defaults - to C{1.0}. - @param sigmas: Sigmas used for OKS. If None then use COCO ones - if possible or default ones. Defaults to C{None}. - @type area_factor: float | None - @param area_factor: Factor by which we multiply bbox area. If - None then use default one. Defaults to C{None}. - @type regression_loss_weight: float - @param regression_loss_weight: Weight of regression loss. - Defaults to C{1.0}. - @type visibility_loss_weight: float - @param visibility_loss_weight: Weight of visibility loss. - Defaults to C{1.0}. - """ - - super().__init__(**kwargs) - self.b_cross_entropy = BCEWithLogitsLoss( - pos_weight=torch.tensor([bce_power]), **kwargs - ) - self.sigmas = get_sigmas(sigmas, n_keypoints, caller_name=self.name) - self.area_factor = get_with_default( - area_factor, "bbox area scaling", self.name, default=0.53 - ) - self.regression_loss_weight = regression_loss_weight - self.visibility_loss_weight = visibility_loss_weight - - def forward( - self, prediction: Tensor, target: Tensor, area: Tensor - ) -> tuple[Tensor, dict[str, Tensor]]: - """Computes the keypoint loss and visibility loss for a given - prediction and target. - - @type prediction: Tensor - @param prediction: Predicted tensor of shape C{[n_detections, - n_keypoints * 3]}. - @type target: Tensor - @param target: Target tensor of shape C{[n_detections, - n_keypoints * 3]}. - @type area: Tensor - @param area: Area tensor of shape C{[n_detections]}. - @rtype: tuple[Tensor, dict[str, Tensor]] - @return: A tuple containing the total loss tensor of shape - C{[1,]} and a dictionary with the regression loss and - visibility loss tensors. - """ - sigmas = self.sigmas.to(prediction.device) - - pred_x, pred_y, pred_v = process_keypoints_predictions(prediction) - target_x = target[:, 0::3] - target_y = target[:, 1::3] - target_visibility = (target[:, 2::3] > 0).float() - - visibility_loss = ( - self.b_cross_entropy.forward(pred_v, target_visibility) - * self.visibility_loss_weight - ) - scales = area * self.area_factor - - distance = (target_x - pred_x) ** 2 + (target_y - pred_y) ** 2 - normalized_distance = ( - distance / (2 * sigmas**2) / (scales.view(-1, 1) + 1e-9) / 2 - ) - - regression_loss = 1 - torch.exp(-normalized_distance) - regression_loss = (regression_loss * target_visibility).sum(dim=1) / ( - target_visibility.sum(dim=1) + 1e-9 - ) - regression_loss = regression_loss.mean() - regression_loss *= self.regression_loss_weight - - total_loss = regression_loss + visibility_loss - - return total_loss, { - "kpt_regression": regression_loss, - "kpt_visibility": visibility_loss, - } diff --git a/luxonis_train/core/utils/archive_utils.py b/luxonis_train/core/utils/archive_utils.py index c8904f08..dbcc214a 100644 --- a/luxonis_train/core/utils/archive_utils.py +++ b/luxonis_train/core/utils/archive_utils.py @@ -116,7 +116,7 @@ def _get_classes( node_task = "boundingbox" case "SegmentationHead" | "BiSeNetHead": node_task = "segmentation" - case "ImplicitKeypointBBoxHead" | "EfficientKeypointBBoxHead": + case "EfficientKeypointBBoxHead": node_task = "keypoints" case _: # pragma: no cover raise ValueError("Node does not map to a default task.") @@ -152,14 +152,6 @@ def _get_head_specific_parameters( parameters["is_softmax"] = getattr( ImplementedHeadsIsSoxtmaxed, head_name ).value - elif head_name == "ImplicitKeypointBBoxHead": - parameters["subtype"] = ObjectDetectionSubtypeYOLO.YOLOv7.value - head_node = nodes[head_alias] - parameters["iou_threshold"] = head_node.iou_thres - parameters["conf_threshold"] = head_node.conf_thres - parameters["max_det"] = head_node.max_det - parameters["n_keypoints"] = head_node.n_keypoints - parameters["anchors"] = head_node.anchors.tolist() elif head_name == "EfficientKeypointBBoxHead": # or appropriate subtype head_node = nodes[head_alias] @@ -210,8 +202,6 @@ def _get_head_outputs( return [output["name"] for output in outputs] elif head_type in ["SegmentationHead", "BiSeNetHead"]: return [outputs[0]["name"]] - elif head_type == "ImplicitKeypointBBoxHead": - return [outputs[0]["name"]] elif head_type == "EfficientKeypointBBoxHead": return [outputs[0]["name"]] else: diff --git a/luxonis_train/nodes/blocks/__init__.py b/luxonis_train/nodes/blocks/__init__.py index c35186e1..ce0181c9 100644 --- a/luxonis_train/nodes/blocks/__init__.py +++ b/luxonis_train/nodes/blocks/__init__.py @@ -8,10 +8,6 @@ DropPath, EfficientDecoupledBlock, FeatureFusionBlock, - KeypointBlock, - LearnableAdd, - LearnableMulAddConv, - LearnableMultiply, RepVGGBlock, SpatialPyramidPoolingBlock, SqueezeExciteBlock, @@ -32,10 +28,6 @@ "AttentionRefinmentBlock", "SpatialPyramidPoolingBlock", "FeatureFusionBlock", - "LearnableAdd", - "LearnableMultiply", - "LearnableMulAddConv", - "KeypointBlock", "BasicResNetBlock", "Bottleneck", "UpscaleOnline", diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py index 79a8f738..5059c651 100644 --- a/luxonis_train/nodes/blocks/blocks.py +++ b/luxonis_train/nodes/blocks/blocks.py @@ -654,86 +654,6 @@ def forward(self, x1: Tensor, x2: Tensor) -> Tensor: return out -class LearnableAdd(nn.Module): - """Implicit add block.""" - - def __init__(self, channel: int): - super().__init__() - self.channel = channel - self.implicit = nn.Parameter(torch.zeros(1, channel, 1, 1)) - nn.init.normal_(self.implicit, std=0.02) - - def forward(self, x: Tensor) -> Tensor: - return self.implicit.expand_as(x) + x - - -class LearnableMultiply(nn.Module): - """Implicit multiply block.""" - - def __init__(self, channel: int): - super().__init__() - self.channel = channel - self.implicit = nn.Parameter(torch.ones(1, channel, 1, 1)) - nn.init.normal_(self.implicit, mean=1.0, std=0.02) - - def forward(self, x: Tensor) -> Tensor: - return self.implicit.expand_as(x) * x - - -class LearnableMulAddConv(nn.Module): - def __init__( - self, - add_channel: int, - mul_channel: int, - conv_in_channel: int, - conv_out_channel: int, - ): - super().__init__() - self.add = LearnableAdd(add_channel) - self.mul = LearnableMultiply(mul_channel) - self.conv = nn.Conv2d(conv_in_channel, conv_out_channel, 1) - - def forward(self, x: Tensor) -> Tensor: - return self.mul(self.conv(self.add(x))) - - -class KeypointBlock(nn.Module): - """Keypoint head block for keypoint predictions.""" - - def __init__(self, in_channels: int, out_channels: int): - super().__init__() - layers: list[nn.Module] = [] - for i in range(6): - depth_wise_conv = ConvModule( - in_channels, - in_channels, - kernel_size=3, - padding=autopad(3), - groups=math.gcd(in_channels, in_channels), - activation=nn.SiLU(), - ) - conv = ( - ConvModule( - in_channels, - in_channels, - kernel_size=1, - padding=autopad(1), - activation=nn.SiLU(), - ) - if i < 5 - else nn.Conv2d(in_channels, out_channels, 1) - ) - - layers.append(depth_wise_conv) - layers.append(conv) - - self.block = nn.Sequential(*layers) - - def forward(self, x: Tensor) -> Tensor: - out = self.block(x) - return out - - T = TypeVar("T", int, tuple[int, ...]) diff --git a/luxonis_train/nodes/enums/head_categorization.py b/luxonis_train/nodes/enums/head_categorization.py index 90f75725..606ed872 100644 --- a/luxonis_train/nodes/enums/head_categorization.py +++ b/luxonis_train/nodes/enums/head_categorization.py @@ -6,7 +6,6 @@ class ImplementedHeads(Enum): ClassificationHead = "ClassificationParser" EfficientBBoxHead = "YOLO" - ImplicitKeypointBBoxHead = "YoloDetectionNetwork" EfficientKeypointBBoxHead = "YoloDetectionNetwork" SegmentationHead = "SegmentationParser" BiSeNetHead = "SegmentationParser" @@ -17,7 +16,6 @@ class ImplementedHeadsIsSoxtmaxed(Enum): ClassificationHead = False EfficientBBoxHead = None - ImplicitKeypointBBoxHead = None EfficientKeypointBBoxHead = None SegmentationHead = False BiSeNetHead = False diff --git a/luxonis_train/nodes/heads/__init__.py b/luxonis_train/nodes/heads/__init__.py index e188f188..9d04c310 100644 --- a/luxonis_train/nodes/heads/__init__.py +++ b/luxonis_train/nodes/heads/__init__.py @@ -3,7 +3,6 @@ from .ddrnet_segmentation_head import DDRNetSegmentationHead from .efficient_bbox_head import EfficientBBoxHead from .efficient_keypoint_bbox_head import EfficientKeypointBBoxHead -from .implicit_keypoint_bbox_head import ImplicitKeypointBBoxHead from .segmentation_head import SegmentationHead __all__ = [ @@ -11,7 +10,6 @@ "ClassificationHead", "EfficientBBoxHead", "EfficientKeypointBBoxHead", - "ImplicitKeypointBBoxHead", "SegmentationHead", "DDRNetSegmentationHead", ] diff --git a/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py b/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py deleted file mode 100644 index 114c6f82..00000000 --- a/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py +++ /dev/null @@ -1,282 +0,0 @@ -import logging -import math -from typing import Any, cast - -import torch -from torch import Tensor, nn - -from luxonis_train.enums import TaskType -from luxonis_train.nodes.base_node import BaseNode -from luxonis_train.nodes.blocks import KeypointBlock, LearnableMulAddConv -from luxonis_train.utils import ( - Packet, - non_max_suppression, - process_bbox_predictions, - process_keypoints_predictions, -) - -logger = logging.getLogger(__name__) - - -class ImplicitKeypointBBoxHead( - BaseNode[list[Tensor], tuple[list[Tensor], Tensor]] -): - tasks = [TaskType.KEYPOINTS, TaskType.BOUNDINGBOX] - in_channels: list[int] - - def __init__( - self, - n_heads: int = 3, - anchors: list[list[float]] | None = None, - init_coco_biases: bool = True, - conf_thres: float = 0.25, - iou_thres: float = 0.45, - max_det: int = 300, - **kwargs: Any, - ): - """Head for object and keypoint detection. - - Adapted from U{YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time - object detectors}. - - TODO: more technical documentation - - @type n_heads: int - @param n_heads: Number of output heads. Defaults to C{3}. - B{Note:} Should be same also on neck in most cases. - @type anchors: list[list[float]] | None - @param anchors: Anchors used for object detection. - @type init_coco_biases: bool - @param init_coco_biases: Whether to use COCO bias and weight - @type conf_thres: float - @param conf_thres: Threshold for confidence. Defaults to C{0.25}. - @type iou_thres: float - @param iou_thres: Threshold for IoU. Defaults to C{0.45}. - @type max_det: int - @param max_det: Maximum number of detections retained after NMS. Defaults to C{300}. - """ - super().__init__(**kwargs) - - self.conf_thres = conf_thres - self.iou_thres = iou_thres - self.max_det = max_det - - self.n_heads = n_heads - if len(self.in_channels) < self.n_heads: - logger.warning( - f"Head '{self.name}' was set to use {self.n_heads} heads, " - f"but received only {len(self.in_channels)} inputs. " - f"Changing number of heads to {len(self.in_channels)}." - ) - self.n_heads = len(self.in_channels) - - if anchors is None: - logger.info("No anchors provided, generating them automatically.") - anchors, recall = self.dataset_metadata.autogenerate_anchors( - self.n_heads - ) - logger.info( - f"Anchors generated. Best possible recall: {recall:.2f}" - ) - - self.box_offset = 5 - self.n_det_out = self.n_classes + self.box_offset - self.n_kpt_out = 3 * self.n_keypoints - self.n_out = self.n_det_out + self.n_kpt_out - self.n_anchors = len(anchors[0]) // 2 - self.grid: list[Tensor] = [] - - self.anchors = torch.tensor(anchors).float().view(self.n_heads, -1, 2) - self.anchor_grid = self.anchors.clone().view( - self.n_heads, 1, -1, 1, 1, 2 - ) - - self.channel_list, self.stride = self._fit_to_n_heads(self.in_channels) - - self.learnable_mul_add_conv = nn.ModuleList( - LearnableMulAddConv( - add_channel=in_channels, - mul_channel=self.n_det_out * self.n_anchors, - conv_in_channel=in_channels, - conv_out_channel=self.n_det_out * self.n_anchors, - ) - for in_channels in self.channel_list - ) - - self.kpt_heads = nn.ModuleList( - KeypointBlock( - in_channels=in_channels, - out_channels=self.n_kpt_out * self.n_anchors, - ) - for in_channels in self.channel_list - ) - - self.anchors /= self.stride.view(-1, 1, 1) - self._check_anchor_order() - - if init_coco_biases: - self._initialize_weights_and_biases() - - def forward(self, inputs: list[Tensor]) -> tuple[list[Tensor], Tensor]: - predictions: list[Tensor] = [] - features: list[Tensor] = [] - - self.anchor_grid = self.anchor_grid.to(inputs[0].device) - - for i in range(self.n_heads): - feat = cast( - Tensor, - torch.cat( - ( - self.learnable_mul_add_conv[i](inputs[i]), - self.kpt_heads[i](inputs[i]), - ), - axis=1, - ), # type: ignore - ) - - batch_size, _, feature_height, feature_width = feat.shape - if i >= len(self.grid): - self.grid.append( - self._construct_grid(feature_width, feature_height).to( - feat.device - ) - ) - - feat = feat.reshape( - batch_size, - self.n_anchors, - self.n_out, - feature_height, - feature_width, - ).permute(0, 1, 3, 4, 2) - - features.append(feat) - predictions.append( - self._build_predictions( - feat, self.anchor_grid[i], self.grid[i], self.stride[i] - ) - ) - - return features, torch.cat(predictions, dim=1) - - def wrap(self, output: tuple[list[Tensor], Tensor]) -> Packet[Tensor]: - features, predictions = output - - if self.export: - return {"boxes_and_keypoints": [predictions]} - - if self.training: - return {"features": features} - - nms = non_max_suppression( - predictions, - n_classes=self.n_classes, - conf_thres=self.conf_thres, - iou_thres=self.iou_thres, - bbox_format="cxcywh", - max_det=self.max_det, - ) - - return { - "boundingbox": [detection[:, :6] for detection in nms], - "keypoints": [ - detection[:, 6:].reshape(-1, self.n_keypoints, 3) - for detection in nms - ], - "features": features, - } - - def _build_predictions( - self, feat: Tensor, anchor_grid: Tensor, grid: Tensor, stride: Tensor - ) -> Tensor: - batch_size = feat.shape[0] - bbox = feat[..., : self.box_offset + self.n_classes] - keypoints = feat[..., self.box_offset + self.n_classes :] - - box_cxcy, box_wh, box_tail = process_bbox_predictions( - bbox, anchor_grid - ) - grid = grid.to(box_cxcy.device) - stride = stride.to(box_cxcy.device) - box_cxcy = (box_cxcy + grid) * stride - out_bbox = torch.cat((box_cxcy, box_wh, box_tail), dim=-1) - - grid_x = grid[..., 0:1] - grid_y = grid[..., 1:2] - kpt_x, kpt_y, kpt_vis = process_keypoints_predictions(keypoints) - kpt_x = (kpt_x + grid_x) * stride - kpt_y = (kpt_y + grid_y) * stride - kpt_vis_sig = kpt_vis.sigmoid() - out_kpt = torch.cat((kpt_x, kpt_y, kpt_vis_sig), dim=-1) - out_kpt = out_kpt.reshape(*kpt_x.shape[:-1], -1) - out = torch.cat((out_bbox, out_kpt), dim=-1) - - return out.reshape(batch_size, -1, self.n_out) - - def _infer_bbox( - self, bbox: Tensor, stride: Tensor, grid: Tensor, anchor_grid: Tensor - ) -> Tensor: - out_bbox = bbox.sigmoid() - out_bbox_xy = (out_bbox[..., 0:2] * 2.0 - 0.5 + grid) * stride - out_bbox_wh = (out_bbox[..., 2:4] * 2) ** 2 * anchor_grid.view( - 1, self.n_anchors, 1, 1, 2 - ) - return torch.cat((out_bbox_xy, out_bbox_wh, out_bbox[..., 4:]), dim=-1) - - def _fit_to_n_heads( - self, channel_list: list[int] - ) -> tuple[list[int], Tensor]: - out_channel_list = channel_list[: self.n_heads] - stride = torch.tensor( - [ - self.original_in_shape[1] / h - for h in cast(list[int], self.in_height)[: self.n_heads] - ], - dtype=torch.int, - ) - return out_channel_list, stride - - def _initialize_weights_and_biases(self, class_freq: Tensor | None = None): - for m in self.modules(): - if isinstance(m, nn.Conv2d): - nn.init.kaiming_normal_( - m.weight, mode="fan_out", nonlinearity="relu" - ) - elif isinstance(m, nn.BatchNorm2d): - m.eps = 1e-3 - m.momentum = 0.03 - elif isinstance( - m, (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6) - ): - m.inplace = True - - for mi, s in zip(self.learnable_mul_add_conv, self.stride): - b = mi.conv.bias.view(self.n_anchors, -1) - b.data[:, 4] += math.log(8 / (640 / s) ** 2) - b.data[:, 5:] += ( - math.log(0.6 / (self.n_classes - 0.99)) - if class_freq is None - else torch.log(class_freq / class_freq.sum()) - ) - mi.conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) - - def _construct_grid(self, feature_width: int, feature_height: int): - grid_y, grid_x = torch.meshgrid( - [torch.arange(feature_height), torch.arange(feature_width)], - indexing="ij", - ) - return ( - torch.stack((grid_x, grid_y), 2) - .view((1, 1, feature_height, feature_width, 2)) - .float() - ) - - def _check_anchor_order(self): - a = self.anchor_grid.prod(-1).view(-1) - delta_a = a[-1] - a[0] - delta_s = self.stride[-1] - self.stride[0] - if delta_a.sign() != delta_s.sign(): - logger.warning("Reversing anchor order") - self.anchors[:] = self.anchors.flip(0) - self.anchor_grid[:] = self.anchor_grid.flip(0) diff --git a/luxonis_train/utils/__init__.py b/luxonis_train/utils/__init__.py index 164f7da5..1231bc10 100644 --- a/luxonis_train/utils/__init__.py +++ b/luxonis_train/utils/__init__.py @@ -1,13 +1,10 @@ from .boundingbox import ( anchors_for_fpn_features, - anchors_from_dataset, bbox2dist, bbox_iou, compute_iou_loss, dist2bbox, - match_to_anchor, non_max_suppression, - process_bbox_predictions, ) from .dataset_metadata import DatasetMetadata from .exceptions import IncompatibleException @@ -18,7 +15,7 @@ to_shape_packet, ) from .graph import traverse_graph -from .keypoints import get_sigmas, process_keypoints_predictions +from .keypoints import get_sigmas from .tracker import LuxonisTrackerPL from .types import AttachIndexType, Kwargs, Labels, Packet @@ -34,16 +31,12 @@ "to_shape_packet", "get_with_default", "LuxonisTrackerPL", - "match_to_anchor", "dist2bbox", "bbox2dist", "bbox_iou", "non_max_suppression", - "anchors_from_dataset", "anchors_for_fpn_features", - "process_bbox_predictions", "compute_iou_loss", - "process_keypoints_predictions", "get_sigmas", "traverse_graph", ] diff --git a/luxonis_train/utils/boundingbox.py b/luxonis_train/utils/boundingbox.py index 8c5a9d40..c8f69802 100644 --- a/luxonis_train/utils/boundingbox.py +++ b/luxonis_train/utils/boundingbox.py @@ -2,7 +2,6 @@ from typing import Literal, TypeAlias import torch -from scipy.cluster.vq import kmeans from torch import Tensor from torchvision.ops import ( batched_nms, @@ -12,96 +11,10 @@ generalized_box_iou, ) -from luxonis_train.enums import TaskType -from luxonis_train.loaders import BaseLoaderTorch - IoUType: TypeAlias = Literal["none", "giou", "diou", "ciou", "siou"] BBoxFormatType: TypeAlias = Literal["xyxy", "xywh", "cxcywh"] -def match_to_anchor( - targets: Tensor, - anchor: Tensor, - xy_shifts: Tensor, - scale_width: int, - scale_height: int, - n_keypoints: int, - anchor_threshold: float, - bias: float, - box_offset: int = 5, -) -> tuple[Tensor, Tensor]: - """Matches targets to anchors. - - 1. Scales the targets to the size of the feature map - 2. Matches the targets to the anchor, filtering out targets whose aspect - ratio is too far from the anchor's aspect ratio. - - @type targets: Tensor - @param targets: Targets in xyxy format - @type anchor: Tensor - @param anchor: Anchor boxes - @type xy_shifts: Tensor - @param xy_shifts: Shifts in x and y direction - @type scale_width: int - @param scale_width: Width of the feature map - @type scale_height: int - @param scale_height: Height of the feature map - @type n_keypoints: int - @param n_keypoints: Number of keypoints - @type anchor_threshold: float - @param anchor_threshold: Threshold for anchor filtering - @type bias: float - @param bias: Bias for anchor filtering - @type box_offset: int - @param box_offset: Offset for box. Defaults to 5. - - @rtype: tuple[Tensor, Tensor] - @return: Scaled targets and shifts. - """ - - # The boxes and keypoints need to be scaled to the size of the features - # First two indices are batch index and class label, - # last index is anchor index. Those are not scaled. - scale_length = 3 * n_keypoints + box_offset + 2 - scales = torch.ones(scale_length, device=targets.device) - - # Scale box and keypoint coordinates, but not visibility - for i in range(n_keypoints): - scales[box_offset + 1 + 3 * i] = scale_width - scales[box_offset + 2 + 3 * i] = scale_height - - scales[2 : box_offset + 1] = torch.tensor( - [scale_width, scale_height, scale_width, scale_height] - ) - - scaled_targets = targets * scales - - if targets.size(1) == 0: - return targets[0], torch.zeros(1, device=targets.device) - - wh_to_anchor_ratio = scaled_targets[:, :, 4:6] / anchor.unsqueeze(1) - ratio_mask = ( - torch.max(wh_to_anchor_ratio, 1.0 / wh_to_anchor_ratio).max(2)[0] - < anchor_threshold - ) - - filtered_targets = scaled_targets[ratio_mask] - - box_xy = filtered_targets[:, 2:4] - box_wh = torch.tensor([scale_width, scale_height]) - box_xy - - def decimal_part(x: Tensor) -> Tensor: - return x % 1.0 - - x, y = ((decimal_part(box_xy) < bias) & (box_xy > 1.0)).T - w, h = ((decimal_part(box_wh) < bias) & (box_wh > 1.0)).T - mask = torch.stack((torch.ones_like(x), x, y, w, h)) - final_targets = filtered_targets.repeat((len(xy_shifts), 1, 1))[mask] - - shifts = xy_shifts.unsqueeze(1).repeat((1, len(box_xy), 1))[mask] - return final_targets, shifts - - def dist2bbox( distance: Tensor, anchor_points: Tensor, @@ -411,123 +324,6 @@ def non_max_suppression( return output -def anchors_from_dataset( - loader: BaseLoaderTorch, - n_anchors: int = 9, - n_generations: int = 1000, - ratio_threshold: float = 4.0, -) -> tuple[Tensor, float]: - """Generates anchors based on bounding box annotations present in - provided data loader. It uses K-Means for initial proposals which - are then refined with genetic algorithm. - - @type loader: L{torch.utils.data.DataLoader} - @param loader: Data loader. - @type n_anchors: int - @param n_anchors: Number of anchors, this is normally n_heads * 3 - which generates 3 anchors per layer. Defaults to 9. - @type n_generations: int - @param n_generations: Number of iterations for anchor improvement - with genetic algorithm. Defaults to 1000. - @type ratio_threshold: float - @param ratio_threshold: Minimum threshold for ratio. Defaults to - 4.0. - @rtype: tuple[Tensor, float] - @return: Proposed anchors and the best possible recall. - """ - - widths: list[Tensor] = [] - for _, labels in loader: - for tensor, task_type in labels.values(): - if task_type == TaskType.BOUNDINGBOX: - curr_wh = tensor[:, 4:] - widths.append(curr_wh) - _, h, w = loader.input_shape - img_size = torch.tensor([w, h]) - wh = torch.vstack(widths) * img_size - - # filter out small objects (w or h < 2 pixels) - wh = wh[(wh >= 2).any(1)] - - try: - assert n_anchors <= len( - wh - ), "More requested anchors than number of bounding boxes." - std = wh.std(0) - proposed_anchors = kmeans(wh / std, n_anchors, iter=30) - proposed_anchors = torch.tensor(proposed_anchors[0]) * std - assert n_anchors == len( - proposed_anchors - ), "KMeans returned insufficient number of points" - except Exception: - print("Fallback to random anchor init") - proposed_anchors = ( - torch.sort(torch.rand(n_anchors * 2))[0].reshape(n_anchors, 2) - * img_size - ) - - proposed_anchors = proposed_anchors[ - torch.argsort(proposed_anchors.prod(1)) - ] # sort small to large - - def calc_best_anchor_ratio(anchors: Tensor, wh: Tensor) -> Tensor: - """Calculate how well most suitable anchor box matches each - target bbox.""" - symmetric_size_ratios = torch.min( - wh[:, None] / anchors[None], anchors[None] / wh[:, None] - ) - worst_side_size_ratio = symmetric_size_ratios.min(-1).values - best_anchor_ratio = worst_side_size_ratio.max(-1).values - return best_anchor_ratio - - def calc_best_possible_recall(anchors: Tensor, wh: Tensor) -> Tensor: - """Calculate best possible recall if every bbox is matched to an - appropriate anchor.""" - best_anchor_ratio = calc_best_anchor_ratio(anchors, wh) - best_possible_recall = ( - (best_anchor_ratio > 1 / ratio_threshold).float().mean() - ) - return best_possible_recall - - def anchor_fitness(anchors: Tensor, wh: Tensor) -> Tensor: - """Fitness function used for anchor evolve.""" - best_anchor_ratio = calc_best_anchor_ratio(anchors, wh) - return ( - best_anchor_ratio - * (best_anchor_ratio > 1 / ratio_threshold).float() - ).mean() - - # Genetic algorithm - best_fitness = anchor_fitness(proposed_anchors, wh) - anchor_shape = proposed_anchors.shape - mutation_probability = 0.9 - mutation_noise_mean = 1 - mutation_noise_std = 0.1 - for _ in range(n_generations): - anchor_mutation = torch.ones(anchor_shape) - anchor_mutation = ( - (torch.rand(anchor_shape) < mutation_probability) - * torch.randn(anchor_shape) - * mutation_noise_std - + mutation_noise_mean - ).clip(0.3, 3.0) - - mutated_anchors = (proposed_anchors.clone() * anchor_mutation).clip( - min=2.0 - ) - mutated_fitness = anchor_fitness(mutated_anchors, wh) - if mutated_fitness > best_fitness: - best_fitness = mutated_fitness - proposed_anchors = mutated_anchors.clone() - - proposed_anchors = proposed_anchors[ - torch.argsort(proposed_anchors.prod(1)) - ] # sort small to large - recall = calc_best_possible_recall(proposed_anchors, wh) - - return proposed_anchors, recall.item() - - def anchors_for_fpn_features( features: list[Tensor], strides: Tensor, @@ -605,26 +401,6 @@ def anchors_for_fpn_features( ) -def process_bbox_predictions( - bbox: Tensor, anchor: Tensor -) -> tuple[Tensor, Tensor, Tensor]: - """Transforms bbox predictions to correct format. - - @type bbox: Tensor - @param bbox: Bbox predictions - @type anchor: Tensor - @param anchor: Anchor boxes - @rtype: tuple[Tensor, Tensor, Tensor] - @return: xy and wh predictions and tail. The tail is anything after - xywh. - """ - out_bbox = bbox.sigmoid() - out_bbox_xy = out_bbox[..., 0:2] * 2.0 - 0.5 - out_bbox_wh = (out_bbox[..., 2:4] * 2) ** 2 * anchor - out_bbox_tail = out_bbox[..., 4:] - return out_bbox_xy, out_bbox_wh, out_bbox_tail - - def compute_iou_loss( pred_bboxes: Tensor, target_bboxes: Tensor, diff --git a/luxonis_train/utils/dataset_metadata.py b/luxonis_train/utils/dataset_metadata.py index 22b81618..3a9cecdf 100644 --- a/luxonis_train/utils/dataset_metadata.py +++ b/luxonis_train/utils/dataset_metadata.py @@ -1,5 +1,4 @@ from luxonis_train.loaders import BaseLoaderTorch -from luxonis_train.utils import anchors_from_dataset class DatasetMetadata: @@ -112,31 +111,6 @@ def classes(self, task: str | None = None) -> list[str]: ) return class_names - def autogenerate_anchors( - self, n_heads: int - ) -> tuple[list[list[float]], float]: - """Automatically generates anchors for the provided dataset. - - @type n_heads: int - @param n_heads: Number of heads to generate anchors for. - @rtype: tuple[list[list[float]], float] - @return: List of anchors in [-1,6] format and recall of the - anchors. - @raises RuntimeError: If the dataset loader was not provided - during initialization. - """ - if self._loader is None: - raise RuntimeError( - "Cannot generate anchors without a dataset loader. " - "Please provide a dataset loader to the constructor " - "or call `set_loader` method." - ) - - proposed_anchors, recall = anchors_from_dataset( - self._loader, n_anchors=n_heads * 3 - ) - return proposed_anchors.reshape(-1, 6).tolist(), recall - @classmethod def from_loader(cls, loader: BaseLoaderTorch) -> "DatasetMetadata": """Creates a L{DatasetMetadata} object from a L{LuxonisDataset}. diff --git a/luxonis_train/utils/keypoints.py b/luxonis_train/utils/keypoints.py index 9fbc741d..8073c399 100644 --- a/luxonis_train/utils/keypoints.py +++ b/luxonis_train/utils/keypoints.py @@ -6,24 +6,6 @@ logger = logging.getLogger(__name__) -def process_keypoints_predictions( - keypoints: Tensor, -) -> tuple[Tensor, Tensor, Tensor]: - """Extracts x, y and visibility from keypoints predictions. - - @type keypoints: Tensor - @param keypoints: Keypoints predictions. The last dimension must be divisible by 3 - and is expected to be in format [x1, y1, v1, x2, y2, v2, ...]. - - @rtype: tuple[Tensor, Tensor, Tensor] - @return: x, y and visibility tensors. - """ - x = keypoints[..., ::3] - y = keypoints[..., 1::3] - visibility = keypoints[..., 2::3] - return x, y, visibility - - def get_sigmas( sigmas: list[float] | None, n_keypoints: int, diff --git a/tests/configs/archive_config.yaml b/tests/configs/archive_config.yaml index 71589f4d..73766823 100644 --- a/tests/configs/archive_config.yaml +++ b/tests/configs/archive_config.yaml @@ -12,10 +12,6 @@ model: inputs: - EfficientRep - - name: ImplicitKeypointBBoxHead - inputs: - - EfficientRep - - name: SegmentationHead inputs: - EfficientRep diff --git a/tests/configs/parking_lot_config.yaml b/tests/configs/parking_lot_config.yaml index bf0b9da3..78711178 100644 --- a/tests/configs/parking_lot_config.yaml +++ b/tests/configs/parking_lot_config.yaml @@ -16,14 +16,6 @@ model: inputs: - neck - - name: ImplicitKeypointBBoxHead - alias: car-detection-head - inputs: - - neck - task: - keypoints: car-keypoints - boundingbox: car-boundingbox - - name: EfficientKeypointBBoxHead alias: motorbike-detection-head task: @@ -65,14 +57,10 @@ model: attached_to: vehicle-type-segmentation-head - name: CrossEntropyLoss attached_to: color-segmentation-head - - name: ImplicitKeypointBBoxLoss - attached_to: car-detection-head - name: EfficientKeypointBBoxLoss attached_to: motorbike-detection-head metrics: - - name: ObjectKeypointSimilarity - attached_to: car-detection-head - name: MeanAveragePrecisionKeypoints attached_to: motorbike-detection-head - name: MeanAveragePrecision @@ -88,16 +76,6 @@ model: attached_to: brand-segmentation-head visualizers: - - name: MultiVisualizer - alias: multi-visualizer-car - attached_to: car-detection-head - params: - visualizers: - - name: KeypointVisualizer - params: - nonvisible_color: blue - - name: BBoxVisualizer - - name: MultiVisualizer alias: multi-visualizer-motorbike attached_to: motorbike-detection-head diff --git a/tests/integration/parking_lot.json b/tests/integration/parking_lot.json index 28ca0a61..c8842c1f 100644 --- a/tests/integration/parking_lot.json +++ b/tests/integration/parking_lot.json @@ -90,16 +90,6 @@ ], "layout": "NCHW" }, - { - "name": "car-detection-head/boxes_and_keypoints/0", - "dtype": "float32", - "shape": [ - 1, - 5040, - 24 - ], - "layout": "NCD" - }, { "name": "color-segmentation-head/color-segmentation/0", "dtype": "float32", @@ -179,25 +169,6 @@ "bbox-head/boundingbox/2" ] }, - { - "name": "car-detection-head", - "parser": "YoloDetectionNetwork", - "metadata": { - "postprocessor_path": null, - "classes": [ - "car" - ], - "n_classes": 1, - "iou_threshold": 0.45, - "conf_threshold": 0.25, - "max_det": 300, - "subtype": "yolov7", - "n_keypoints": 6 - }, - "outputs": [ - "car-detection-head/boxes_and_keypoints/0" - ] - }, { "name": "motorbike-detection-head", "parser": "YoloDetectionNetwork", diff --git a/tests/integration/test_detection.py b/tests/integration/test_detection.py index c88851ba..45e83f0a 100644 --- a/tests/integration/test_detection.py +++ b/tests/integration/test_detection.py @@ -26,14 +26,6 @@ def get_opts_backbone(backbone: str) -> dict[str, Any]: }, "inputs": [backbone], }, - { - "name": "ImplicitKeypointBBoxHead", - "task": { - "keypoints": "car-keypoints", - "boundingbox": "car-boundingbox", - }, - "inputs": [backbone], - }, ], "losses": [ { @@ -45,10 +37,6 @@ def get_opts_backbone(backbone: str) -> dict[str, Any]: "attached_to": "EfficientKeypointBBoxHead", "params": {"area_factor": 0.5}, }, - { - "name": "ImplicitKeypointBBoxLoss", - "attached_to": "ImplicitKeypointBBoxHead", - }, ], "metrics": [ { @@ -60,11 +48,6 @@ def get_opts_backbone(backbone: str) -> dict[str, Any]: "alias": "EfficientKeypointBBoxHead-MaP", "attached_to": "EfficientKeypointBBoxHead", }, - { - "name": "MeanAveragePrecisionKeypoints", - "alias": "ImplicitKeypointBBoxHead-MaP", - "attached_to": "ImplicitKeypointBBoxHead", - }, ], } } diff --git a/tests/integration/test_simple.py b/tests/integration/test_simple.py index 7b2f0f91..3d489c4a 100644 --- a/tests/integration/test_simple.py +++ b/tests/integration/test_simple.py @@ -115,7 +115,6 @@ def test_custom_tasks( ), "Config JSON not found in the archive." generated_config = json.loads(extracted_cfg.read().decode()) - del generated_config["model"]["heads"][1]["metadata"]["anchors"] assert generated_config == correct_archive_config diff --git a/tests/unittests/test_utils/test_boxutils.py b/tests/unittests/test_utils/test_boxutils.py index 8df1ab74..6056cf8e 100644 --- a/tests/unittests/test_utils/test_boxutils.py +++ b/tests/unittests/test_utils/test_boxutils.py @@ -10,7 +10,6 @@ bbox_iou, compute_iou_loss, dist2bbox, - process_bbox_predictions, ) @@ -105,21 +104,6 @@ def test_compute_iou_loss(): assert 0 <= iou.min() and iou.max() <= 1 -def test_process_bbox_predictions(): - bbox = generate_random_bboxes(10, 64, 64, "xywh") - data = torch.rand(10, 4) - prediction = torch.concat([bbox, data], dim=-1) - anchor = torch.rand(10, 2) - - out_bbox_xy, out_bbox_wh, out_bbox_tail = process_bbox_predictions( - prediction, anchor - ) - - assert out_bbox_xy.shape == (10, 2) - assert out_bbox_wh.shape == (10, 2) - assert out_bbox_tail.shape == (10, 4) - - def test_anchors_for_fpn_features(): features = [torch.rand(1, 256, 14, 14), torch.rand(1, 256, 28, 28)] strides = torch.tensor([8, 16]) diff --git a/tests/unittests/test_utils/test_dataset_metadata.py b/tests/unittests/test_utils/test_dataset_metadata.py index 8dba11a8..daf01725 100644 --- a/tests/unittests/test_utils/test_dataset_metadata.py +++ b/tests/unittests/test_utils/test_dataset_metadata.py @@ -45,9 +45,3 @@ def test_class_names(metadata): metadata._classes["segmentation"] = ["car", "person", "tree"] with pytest.raises(RuntimeError): metadata.classes() - - -def test_no_loader(): - metadata = DatasetMetadata() - with pytest.raises(RuntimeError): - metadata.autogenerate_anchors(3) diff --git a/tests/unittests/test_utils/test_keypoints.py b/tests/unittests/test_utils/test_keypoints.py index 3d20dae6..5763386f 100644 --- a/tests/unittests/test_utils/test_keypoints.py +++ b/tests/unittests/test_utils/test_keypoints.py @@ -1,10 +1,6 @@ import pytest -import torch -from luxonis_train.utils.keypoints import ( - get_sigmas, - process_keypoints_predictions, -) +from luxonis_train.utils.keypoints import get_sigmas def test_get_sigmas(): @@ -14,11 +10,3 @@ def test_get_sigmas(): get_sigmas(sigmas, 2) assert len(get_sigmas(None, 17)) == 17 assert len(get_sigmas(None, 5)) == 5 - - -def test_process_keypoints_predictions(): - keypoints = torch.tensor([[0.1, 0.2, 1.0, 0.4, 0.5, 0.0]]) - x, y, visibility = process_keypoints_predictions(keypoints) - pytest.approx(x[0].tolist(), [0.1, 0.4]) - pytest.approx(y[0].tolist(), [0.2, 0.5]) - pytest.approx(visibility[0].tolist(), [1.0, 0.0]) From a6c99f970ffa86808a6f8649bcfbbc18967eb9e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= Date: Tue, 8 Oct 2024 22:44:48 +0200 Subject: [PATCH 5/6] Updated README and Code Consistency (#101) --- README.md | 718 +++++++++++++++--- configs/README.md | 557 +++++++++----- configs/classification_heavy_model.yaml | 4 +- configs/classification_light_model.yaml | 4 +- configs/complex_model.yaml | 57 +- configs/detection_heavy_model.yaml | 4 +- configs/detection_light_model.yaml | 4 +- configs/example_export.yaml | 6 +- configs/example_tuning.yaml | 10 +- configs/keypoint_bbox_heavy_model.yaml | 4 +- configs/keypoint_bbox_light_model.yaml | 4 +- configs/segmentation_heavy_model.yaml | 4 +- configs/segmentation_light_model.yaml | 4 +- luxonis_train/__main__.py | 40 +- .../attached_modules/losses/README.md | 126 +-- .../attached_modules/metrics/README.md | 19 +- .../attached_modules/visualizers/README.md | 103 +-- luxonis_train/callbacks/README.md | 66 +- .../config/predefined_models/README.md | 288 +++---- luxonis_train/core/core.py | 120 ++- luxonis_train/loaders/README.md | 34 + luxonis_train/loaders/base_loader.py | 2 +- luxonis_train/loaders/luxonis_loader_torch.py | 5 +- luxonis_train/models/luxonis_lightning.py | 2 +- luxonis_train/nodes/README.md | 266 +++---- luxonis_train/utils/registry.py | 4 +- media/coverage_badge.svg | 21 - media/pybadge.svg | 1 - pyproject.toml | 3 + tests/configs/ddrnet.yaml | 10 +- tests/configs/parking_lot_config.yaml | 14 +- tests/configs/segmentation_parse_loader.yaml | 4 +- 32 files changed, 1639 insertions(+), 869 deletions(-) create mode 100644 luxonis_train/loaders/README.md delete mode 100644 media/coverage_badge.svg delete mode 100644 media/pybadge.svg diff --git a/README.md b/README.md index 2aab7d86..33908c08 100644 --- a/README.md +++ b/README.md @@ -5,130 +5,597 @@ ![MacOS](https://img.shields.io/badge/mac%20os-000000?style=for-the-badge&logo=apple&logoColor=white) [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) -![PyBadge](https://github.com/luxonis/luxonis-train/blob/main/media/pybadge.svg) +![PyBadge](https://img.shields.io/pypi/pyversions/luxonis-train?logo=data:image/svg+xml%3Bbase64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAxMDAgMTAwIj4KICA8ZGVmcz4KICAgIDxsaW5lYXJHcmFkaWVudCBpZD0icHlZZWxsb3ciIGdyYWRpZW50VHJhbnNmb3JtPSJyb3RhdGUoNDUpIj4KICAgICAgPHN0b3Agc3RvcC1jb2xvcj0iI2ZlNSIgb2Zmc2V0PSIwLjYiLz4KICAgICAgPHN0b3Agc3RvcC1jb2xvcj0iI2RhMSIgb2Zmc2V0PSIxIi8+CiAgICA8L2xpbmVhckdyYWRpZW50PgogICAgPGxpbmVhckdyYWRpZW50IGlkPSJweUJsdWUiIGdyYWRpZW50VHJhbnNmb3JtPSJyb3RhdGUoNDUpIj4KICAgICAgPHN0b3Agc3RvcC1jb2xvcj0iIzY5ZiIgb2Zmc2V0PSIwLjQiLz4KICAgICAgPHN0b3Agc3RvcC1jb2xvcj0iIzQ2OCIgb2Zmc2V0PSIxIi8+CiAgICA8L2xpbmVhckdyYWRpZW50PgogIDwvZGVmcz4KCiAgPHBhdGggZD0iTTI3LDE2YzAtNyw5LTEzLDI0LTEzYzE1LDAsMjMsNiwyMywxM2wwLDIyYzAsNy01LDEyLTExLDEybC0yNCwwYy04LDAtMTQsNi0xNCwxNWwwLDEwbC05LDBjLTgsMC0xMy05LTEzLTI0YzAtMTQsNS0yMywxMy0yM2wzNSwwbDAtM2wtMjQsMGwwLTlsMCwweiBNODgsNTB2MSIgZmlsbD0idXJsKCNweUJsdWUpIi8+CiAgPHBhdGggZD0iTTc0LDg3YzAsNy04LDEzLTIzLDEzYy0xNSwwLTI0LTYtMjQtMTNsMC0yMmMwLTcsNi0xMiwxMi0xMmwyNCwwYzgsMCwxNC03LDE0LTE1bDAtMTBsOSwwYzcsMCwxMyw5LDEzLDIzYzAsMTUtNiwyNC0xMywyNGwtMzUsMGwwLDNsMjMsMGwwLDlsMCwweiBNMTQwLDUwdjEiIGZpbGw9InVybCgjcHlZZWxsb3cpIi8+CgogIDxjaXJjbGUgcj0iNCIgY3g9IjY0IiBjeT0iODgiIGZpbGw9IiNGRkYiLz4KICA8Y2lyY2xlIHI9IjQiIGN4PSIzNyIgY3k9IjE1IiBmaWxsPSIjRkZGIi8+Cjwvc3ZnPgo=) [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) ![CI](https://github.com/luxonis/luxonis-train/actions/workflows/ci.yaml/badge.svg) ![Docs](https://github.com/luxonis/luxonis-train/actions/workflows/docs.yaml/badge.svg) [![codecov](https://codecov.io/gh/luxonis/luxonis-train/graph/badge.svg?token=647MTHBYD5)](https://codecov.io/gh/luxonis/luxonis-train) -Luxonis training framework (`luxonis-train`) is intended for training deep learning models that can run fast on OAK products. + -**The project is in a beta state and might be unstable or contain bugs - please report any feedback.** +## 🌟 Overview -## Table Of Contents +`LuxonisTrain` is a user-friendly tool designed to streamline the training of deep learning models, especially for edge devices. Built on top of `PyTorch Lightning`, it simplifies the process of training, testing, and exporting models with minimal coding required. -- [Installation](#installation) -- [Training](#training) -- [Customizations](#customizations) -- [Tuning](#tuning) -- [Exporting](#exporting) -- [Credentials](#credentials) -- [Contributing](#contributing) +### ✨ Key Features -## Installation +- **No Coding Required**: Define your training pipeline entirely through a single `YAML` configuration file. +- **Predefined Configurations**: Utilize ready-made configs for common computer vision tasks to start quickly. +- **Customizable**: Extend functionality with custom components using an intuitive Python API. +- **Edge Optimized**: Focus on models optimized for deployment on edge devices with limited compute resources. -`luxonis-train` is hosted on PyPi and can be installed with `pip` as: +> \[!WARNING\] +> **The project is in a beta state and might be unstable or contain bugs - please report any feedback.** + + + +## 🚀 Quick Start + +Get started with `LuxonisTrain` in just a few steps: + +1. **Install `LuxonisTrain`** + + ```bash + pip install luxonis-train + ``` + + This will create the `luxonis_train` executable in your `PATH`. + +1. **Use the provided `configs/detection_light_model.yaml` configuration file** + + You can download the file by executing the following command: + + ```bash + wget https://raw.githubusercontent.com/luxonis/luxonis-train/main/configs/detection_light_model.yaml + ``` + +1. **Find a suitable dataset for your task** + + We will use a sample COCO dataset from `RoboFlow` in this example. + +1. **Start training** + + ```bash + luxonis_train train \ + --config detection_light_model.yaml \ + loader.params.dataset_dir "roboflow://team-roboflow/coco-128/2/coco" + ``` + +1. **Monitor progress with `TensorBoard`** + + ```bash + tensorboard --logdir output/tensorboard_logs + ``` + + Open the provided URL in your browser to visualize the training progress + +## 📜 Table Of Contents + +- [🌟 Overview](#overview) + - [✨ Key Features](#key-features) +- [🚀 Quick Start](#quick-start) +- [🛠️ Installation](#installation) +- [📝 Usage](#usage) + - [💻 CLI](#cli) +- [⚙️ Configuration](#configuration) +- [🗃️ Data Preparation](#data-preparation) + - [📂 Data Directory](#data-directory) + - [💾 `LuxonisDataset`](#luxonis-dataset) +- [🏋️‍♂️Training](#training) +- [✍ Testing](#testing) +- [🧠 Inference](#inference) +- [🤖 Exporting](#exporting) +- [🗂️ NN Archive](#nn-archive) +- [🔬 Tuning](#tuning) +- [🎨 Customizations](#customizations) +- [📚 Tutorials and Examples](#tutorials-and-examples) +- [🔑 Credentials](#credentials) +- [🤝 Contributing](#contributing) + + + +## 🛠️ Installation + +`LuxonisTrain` requires **Python 3.10** or higher. We recommend using a virtual environment to manage dependencies. + +**Install via `pip`**: ```bash pip install luxonis-train ``` -This command will also create a `luxonis_train` executable in your `PATH`. -See `luxonis_train --help` for more information. +This will also install the `luxonis_train` CLI. For more information on how to use it, see [CLI Usage](#cli). + + + +## 📝 Usage -## Usage +You can use `LuxonisTrain` either from the **command line** or via the **Python API**. +We will demonstrate both ways in the following sections. -The entire configuration is specified in a `yaml` file. This includes the model -structure, used losses, metrics, optimizers etc. For specific instructions and example -configuration files, see [Configuration](https://github.com/luxonis/luxonis-train/blob/main/configs/README.md). + -### Data Preparation +### 💻 CLI -This library requires data to be in the Luxonis Dataset Format. +The CLI is the most straightforward way how to use `LuxonisTrain`. The CLI provides several commands for training, testing, tuning, exporting and more. -For instructions on how to create a dataset in the LDF, follow the -[examples](https://github.com/luxonis/luxonis-ml/tree/main/examples) in -the [luxonis-ml](https://github.com/luxonis/luxonis-ml) repository. +**Available commands:** -To inspect dataset images by split (train, val, test), use the command: +- `train` - Start the training process +- `test` - Test the model on a specific dataset view +- `infer` - Run inference on a dataset, image directory, or a video file. +- `export` - Export the model to either `ONNX` or `BLOB` format that can be run on edge devices +- `archive` - Create an `NN Archive` file that can be used with our `DepthAI` API (coming soon) +- `tune` - Tune the hyperparameters of the model for better performance +- `inspect` - Inspect the dataset you are using and visualize the annotations + +**To get help on any command:** ```bash -luxonis_train data inspect --config --view +luxonis_train --help ``` -## Training +Specific usage examples can be found in the respective sections below. -Once you've created your `config.yaml` file you can train the model using this command: + -```bash -luxonis_train train --config config.yaml +## ⚙️ Configuration + +`LuxonisTrain` uses `YAML` configuration files to define the training pipeline. Here's a breakdown of the key sections: + +```yaml +model: + name: model_name + + # Use a predefined detection model instead of defining + # the model architecture manually + predefined_model: + name: DetectionModel + params: + variant: light + +# Download and parse the coco dataset from RoboFlow. +# Save it internally as `coco_test` dataset for future reference. +loader: + params: + dataset_name: coco_test + dataset_dir: "roboflow://team-roboflow/coco-128/2/coco" + +trainer: + batch_size: 8 + epochs: 200 + n_workers: 8 + validation_interval: 10 + + preprocessing: + train_image_size: [384, 384] + + # Uses the imagenet normalization by default + normalize: + active: true + + # Augmentations are powered by Albumentations + augmentations: + - name: Defocus + - name: Sharpen + - name: Flip + + callbacks: + - name: ExportOnTrainEnd + - name: ArchiveOnTrainEnd + - name: TestOnTrainEnd + + optimizer: + name: SGD + params: + lr: 0.02 + + scheduler: + name: ConstantLR ``` -If you wish to manually override some config parameters you can do this by providing the key-value pairs. Example of this is: +For an extensive list of all the available options, see [Configuration](https://github.com/luxonis/luxonis-train/blob/main/configs/README.md). + +We provide a set of predefined configuration files for the most common computer vision tasks. +You can find them in the `configs` directory. + + + +## 🗃️ Data Preparation + +`LuxonisTrain` supports several ways of loading data: + +- using a data directory in one of the supported formats +- using an already existing dataset in our custom `LuxonisDataset` format +- using a custom loader + - to learn how to implement and use custom loaders, see [Customizations](#customizations) + + + +### 📂 Data Directory + +The easiest way to load data is to use a directory with the dataset in one of the supported formats. + +**Supported formats:** + +- `COCO` - We support COCO JSON format in two variants: + - [`RoboFlow`](https://roboflow.com/formats/coco-json) + - [`FiftyOne`](https://docs.voxel51.com/user_guide/export_datasets.html#cocodetectiondataset-export) +- [`Pascal VOC XML`](https://roboflow.com/formats/pascal-voc-xml) +- [`YOLO Darknet TXT`](https://roboflow.com/formats/yolo-darknet-txt) +- [`YOLOv4 PyTorch TXT`](https://roboflow.com/formats/yolov4-pytorch-txt) +- [`MT YOLOv6`](https://roboflow.com/formats/mt-yolov6) +- [`CreateML JSON`](https://roboflow.com/formats/createml-json) +- [`TensorFlow Object Detection CSV`](https://roboflow.com/formats/tensorflow-object-detection-csv) +- `Classification Directory` - A directory with subdirectories for each class + ```plaintext + dataset_dir/ + ├── train/ + │ ├── class1/ + │ │ ├── img1.jpg + │ │ ├── img2.jpg + │ │ └── ... + │ ├── class2/ + │ └── ... + ├── valid/ + └── test/ + ``` +- `Segmentation Mask Directory` - A directory with images and corresponding masks. + ```plaintext + dataset_dir/ + ├── train/ + │ ├── img1.jpg + │ ├── img1_mask.png + │ ├── ... + │ └── _classes.csv + ├── valid/ + └── test/ + ``` + The masks are stored as grayscale `PNG` images where each pixel value corresponds to a class. + The mapping from pixel values to classes is defined in the `_classes.csv` file. + ```csv + Pixel Value, Class + 0, background + 1, class1 + 2, class2 + 3, class3 + ``` + +#### Preparing your Data + +1. Organize your dataset into one of the supported formats. +1. Place your dataset in a directory accessible by the training script. +1. Update the `dataset_dir` parameter in the configuration file to point to the dataset directory. + +**The `dataset_dir` can be one of the following:** + +- Local path to the dataset directory +- URL to a remote dataset + - The dataset will be downloaded to a `"data"` directory in the current working directory + - **Supported URL protocols:** + - `s3://bucket/path/to/directory` fo **AWS S3** + - `gs://buclet/path/to/directory` for **Google Cloud Storage** + - `roboflow://workspace/project/version/format` for **RoboFlow** + - `workspace` - name of the workspace the dataset belongs to + - `project` - name of the project the dataset belongs to + - `version` - version of the dataset + - `format` - one of `coco`, `darknet`, `voc`, `yolov4pytorch`, `mt-yolov6`, `createml`, `tensorflow`, `folder`, `png-mask-semantic` + - **example:** `roboflow://team-roboflow/coco-128/2/coco` + +**Example:** + +```yaml +loader: + params: + dataset_name: "coco_test" + dataset_dir: "roboflow://team-roboflow/coco-128/2/coco" +``` + + + +### 💾 `LuxonisDataset` + +`LuxonisDataset` is our custom dataset format designed for easy and efficient dataset management. +To learn more about how to create a dataset in this format from scratch, see the [Luxonis ML](https://github.com/luxonis/luxonis-ml) repository. + +To use the `LuxonisDataset` as a source of the data, specify the following in the config file: + +```yaml +loader: + params: + # name of the dataset + dataset_name: "dataset_name" + + # one of local (default), s3, gcs + bucket_storage: "local" +``` + +> \[!TIP\] +> To inspect the loader output, use the `luxonis_train inspect` command: +> +> ```bash +> luxonis_train inspect --config configs/detection_light_model.yaml +> ``` +> +> **The `inspect` command is currently only available in the CLI** + + + +## 🏋️‍♂️ Training + +Once your configuration file and dataset are ready, start the training process. + +**CLI:** ```bash -luxonis_train train --config config.yaml trainer.batch_size 8 trainer.epochs 10 +luxonis_train train --config configs/detection_light_model.yaml ``` -where key and value are space separated and sub-keys are dot (`.`) separated. If the configuration field is a list, then key/sub-key should be a number (e.g. `trainer.preprocessing.augmentations.0.name RotateCustom`). +> \[!TIP\] +> To change a configuration parameter from the command line, use the following syntax: +> +> ```bash +> luxonis_train train \ +> --config configs/detection_light_model.yaml \ +> loader.params.dataset_dir "roboflow://team-roboflow/coco-128/2/coco" +> ``` -## Evaluating +**Python API:** -To evaluate the model on a specific dataset split (train, test, or val), use the following command: +```python +from luxonis_train import LuxonisModel + +model = LuxonisModel( + "configs/detection_light_model.yaml", + {"loader.params.dataset_dir": "roboflow://team-roboflow/coco-128/2/coco"} +) +model.train() +``` + +**Expected Output:** + +```log +INFO Using predefined model: `DetectionModel` +INFO Main metric: `MeanAveragePrecision` +INFO GPU available: True (cuda), used: True +INFO TPU available: False, using: 0 TPU cores +INFO HPU available: False, using: 0 HPUs +... +INFO Training finished +INFO Checkpoints saved in: output/1-coral-wren +``` + +**Monitoring with `TensorBoard`:** + +If not explicitly disabled, the training process will be monitored by `TensorBoard`. To start the `TensorBoard` server, run: ```bash -luxonis_train eval --config --view +tensorboard --logdir output/tensorboard_logs ``` -## Tuning +Open the provided URL to visualize training metrics. + + -To improve training performance you can use `Tuner` for hyperparameter optimization. -To use tuning, you have to specify [tuner](https://github.com/luxonis/luxonis-train/blob/main/configs/README.md#tuner) section in the config file. +## ✍ Testing -To start the tuning, run +Evaluate your trained model on a specific dataset view (`train`, `val`, or `test`). + +**CLI:** ```bash -luxonis_train tune --config config.yaml +luxonis_train test --config configs/detection_light_model.yaml \ + --view val \ + --weights path/to/checkpoint.ckpt ``` -You can see an example tuning configuration [here](https://github.com/luxonis/luxonis-train/blob/main/configs/example_tuning.yaml). +**Python API:** + +```python +from luxonis_train import LuxonisModel -## Exporting +model = LuxonisModel("configs/detection_light_model.yaml") +model.test(weights="path/to/checkpoint.ckpt") +``` -We support export to `ONNX`, and `DepthAI .blob format` which is used for OAK cameras. By default, we export to `ONNX` format. +The testing process can be started automatically at the end of the training by using the `TestOnTrainEnd` callback. +To learn more about callbacks, see [Callbacks](https://github.com/luxonis/luxonis-train/blob/main/luxonis_train/callbacks/README.md). -To use the exporter, you have to specify the [exporter](https://github.com/luxonis/luxonis-train/blob/main/configs/README.md#exporter) section in the config file. + -Once you have the config file ready you can export the model using +## 🧠 Inference + +Run inference on images, datasets, or videos. + +**CLI:** + +- **Inference on a Dataset View:** + +```bash +luxonis_train infer --config configs/detection_light_model.yaml \ + --view val \ + --weights path/to/checkpoint.ckpt +``` + +- **Inference on a Video File:** + +```bash +luxonis_train infer --config configs/detection_light_model.yaml \ + --weights path/to/checkpoint.ckpt \ + --source-path path/to/video.mp4 +``` + +- **Inference on an Image Directory:** ```bash -luxonis_train export --config config.yaml +luxonis_train infer --config configs/detection_light_model.yaml \ + --weights path/to/checkpoint.ckpt \ + --source-path path/to/images \ + --save-dir path/to/save_directory +``` + +**Python API:** + +```python +from luxonis_train import LuxonisModel + +model = LuxonisModel("configs/detection_light_model.yaml") + +# infer on a dataset view +model.infer(weights="path/to/checkpoint.ckpt", view="val") + +# infer on a video file +model.infer(weights="path/to/checkpoint.ckpt", source_path="path/to/video.mp4") + +# infer on an image directory and save the results +model.infer( + weights="path/to/checkpoint.ckpt", + source_path="path/to/images", + save_dir="path/to/save_directory", +) ``` + + +## 🤖 Exporting + +Export your trained models to formats suitable for deployment on edge devices. + +Supported formats: + +- **ONNX**: Open Neural Network Exchange format. +- **BLOB**: Format compatible with OAK-D cameras. + +To configure the exporter, you can specify the [exporter](https://github.com/luxonis/luxonis-train/blob/main/configs/README.md#exporter) section in the config file. + You can see an example export configuration [here](https://github.com/luxonis/luxonis-train/blob/main/configs/example_export.yaml). -## Customizations +**CLI:** + +```bash +luxonis_train export --config configs/example_export.yaml --weights path/to/weights.ckpt +``` + +**Python API:** + +```python +from luxonis_train import LuxonisModel + +model = LuxonisModel("configs/example_export.yaml") +model.export(weights="path/to/weights.ckpt") +``` + +Model export can be run automatically at the end of the training by using the `ExportOnTrainEnd` callback. + +The exported models are saved in the export directory within your `output` folder. + + + +## 🗂️ NN Archive + +Create an `NN Archive` file for easy deployment with the `DepthAI` API. + +The archive contains the exported model together with all the metadata needed for running the model. + +**CLI:** + +```bash +luxonis_train archive \ + --config configs/detection_light_model.yaml \ + --weights path/to/checkpoint.ckpt +``` + +**Python API:** + +```python +from luxonis_train import LuxonisModel + +model = LuxonisModel("configs/detection_light_model.yaml") +model.archive(weights="path/to/checkpoint.ckpt") +``` + +The archive can be created automatically at the end of the training by using the `ArchiveOnTrainEnd` callback. + + + +## 🔬 Tuning + +Optimize your model's performance using hyperparameter tuning powered by [`Optuna`](https://optuna.org/). + +**Configuration:** + +Include a [`tuner`](https://github.com/luxonis/luxonis-train/blob/main/configs/README.md#tuner) section in your configuration file. + +```yaml + +tuner: + study_name: det_study + n_trials: 10 + storage: + storage_type: local + params: + trainer.optimizer.name_categorical: ["Adam", "SGD"] + trainer.optimizer.params.lr_float: [0.0001, 0.001] + trainer.batch_size_int: [4, 16, 4] +``` + +**CLI:** + +```bash +luxonis_train tune --config configs/example_tuning.yaml +``` + +**Python API:** + +```python +from luxonis_train import LuxonisModel + +model = LuxonisModel("configs/example_tuning.yaml") +model.tune() +``` + + + +## 🎨 Customizations + +`LuxonisTrain` is highly modular, allowing you to customize various components: + +- **Loaders**: Handle data loading and preprocessing. +- **Nodes**: Represent computational units in the model architecture. +- **Losses**: Define the loss functions used to train the model. +- **Metrics**: Measure the model's performance during training. +- **Visualizers**: Visualize the model's predictions during training. +- **Callbacks**: Allow custom code to be executed at different stages of training. +- **Optimizers/Schedulers**: Control how the model's weights are updated. + +Understanding these components helps in tailoring the framework to your specific needs. We provide a registry interface through which you can create new -[nodes](https://github.com/luxonis/luxonis-train/blob/main/luxonis_train/nodes/README.md), -[losses](https://github.com/luxonis/luxonis-train/blob/main/luxonis_train/attached_modules/losses/README.md), -[metrics](https://github.com/luxonis/luxonis-train/blob/main/luxonis_train/attached_modules/metrics/README.md), -[visualizers](https://github.com/luxonis/luxonis-train/blob/main/luxonis_train/attached_modules/visualizers/README.md), -[callbacks](https://github.com/luxonis/luxonis-train/blob/main/luxonis_train/callbacks/README.md), -[optimizers](https://github.com/luxonis/luxonis-train/blob/main/configs/README.md#optimizer), -and [schedulers](https://github.com/luxonis/luxonis-train/blob/main/configs/README.md#scheduler). - -Registered components can be then referenced in the config file. Custom components need to inherit from their respective base classes: - -- Node - [BaseNode](https://github.com/luxonis/luxonis-train/blob/main/luxonis_train/models/nodes/base_node.py) -- Loss - [BaseLoss](https://github.com/luxonis/luxonis-train/blob/main/luxonis_train/attached_modules/losses/base_loss.py) -- Metric - [BaseMetric](https://github.com/luxonis/luxonis-train/blob/main/luxonis_train/attached_modules/metrics/base_metric.py) -- Visualizer - [BaseVisualizer](https://github.com/luxonis/luxonis-train/blob/main/luxonis_train/attached_modules/visualizers/base_visualizer.py) -- Callback - [Callback from lightning.pytorch.callbacks](lightning.pytorch.callbacks) -- Optimizer - [Optimizer from torch.optim](https://pytorch.org/docs/stable/optim.html#torch.optim.Optimizer) -- Scheduler - [LRScheduler from torch.optim.lr_scheduler](https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate) - -Here is an example of how to create custom components: + +- [**Loaders**](https://github.com/luxonis/luxonis-train/blob/main/luxonis_train/loaders/README.md): Handles data loading and preprocessing. +- [**Nodes**](https://github.com/luxonis/luxonis-train/blob/main/luxonis_train/nodes/README.md): Represents computational units in the model architecture. +- [**Losses**](https://github.com/luxonis/luxonis-train/blob/main/luxonis_train/attached_modules/losses/README.md): Define the loss functions used to train the model. +- [**Metrics**](https://github.com/luxonis/luxonis-train/blob/main/luxonis_train/attached_modules/metrics/README.md): Measure the model's performance during training. +- [**Visualizers**](https://github.com/luxonis/luxonis-train/blob/main/luxonis_train/attached_modules/visualizers/README.md): Visualize the model's predictions during training. +- [**Callbacks**](https://github.com/luxonis/luxonis-train/blob/main/luxonis_train/callbacks/README.md): Allow custom code to be executed at different stages of training. +- [**Optimizers**](https://github.com/luxonis/luxonis-train/blob/main/configs/README.md#optimizer): Control how the model's weights are updated. +- [**Schedulers**](https://github.com/luxonis/luxonis-train/blob/main/configs/README.md#scheduler): Adjust the learning rate during training. + +**Creating Custom Components:** + +Implement custom components by subclassing the respective base classes and/or registering them. +Registered components can be referenced in the config file. Custom components need to inherit from their respective base classes: + +- **Loaders** - [`BaseLoader`](https://github.com/luxonis/luxonis-train/blob/main/luxonis_train/loaders/base_loader.py) +- **Nodes** - [`BaseNode`](https://github.com/luxonis/luxonis-train/blob/main/luxonis_train/models/nodes/base_node.py) +- **Losses** - [`BaseLoss`](https://github.com/luxonis/luxonis-train/blob/main/luxonis_train/attached_modules/losses/base_loss.py) +- **Metrics** - [`BaseMetric`](https://github.com/luxonis/luxonis-train/blob/main/luxonis_train/attached_modules/metrics/base_metric.py) +- **Visualizers** - [`BaseVisualizer`](https://github.com/luxonis/luxonis-train/blob/main/luxonis_train/attached_modules/visualizers/base_visualizer.py) +- **Callbacks** - [`lightning.pytorch.callbacks.Callback`](https://lightning.ai/docs/pytorch/stable/extensions/callbacks.html), requires manual registration to the `CALLBACKS` registry +- **Optimizers** - [`torch.optim.Optimizer`](https://pytorch.org/docs/stable/optim.html#torch.optim.Optimizer), requires manual registration to the `OPTIMIZERS` registry +- **Schedulers** - [`torch.optim.lr_scheduler.LRScheduler`](https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate), requires manual registration to the `SCHEDULERS` registry + +**Example:** ```python from torch.optim import Optimizer @@ -137,82 +604,103 @@ from luxonis_train.attached_modules.losses import BaseLoss @OPTIMIZERS.register_module() class CustomOptimizer(Optimizer): - ... + def __init__(self, params, lr=0.001): + super().__init__(params, defaults={'lr': lr}) + # Implement optimizer logic -# Subclasses of BaseNode, LuxonisLoss, LuxonisMetric +# Subclasses of BaseNode, BaseLoss, BaseMetric # and BaseVisualizer are registered automatically. - class CustomLoss(BaseLoss): - # This class is automatically registered under `CustomLoss` name. + # This class is automatically registered under the name `CustomLoss`. def __init__(self, k_steps: int, **kwargs): super().__init__(**kwargs) ... ``` -And then in the config you reference this `CustomOptimizer` and `CustomLoss` by their names: +**Using custom components in config:** ```yaml -losses: - - name: CustomLoss - params: # additional parameters - k_steps: 12 - +model: + nodes: + - name: SegmentationHead + losses: + - name: CustomLoss + params: + k_steps: 12 + +optimizer: + name: CustomOptimizer + params: + lr: 0.01 ``` -For more information on how to define custom components, consult the respective in-source documentation. +> \[!NOTE\] +> Files containing the custom components must be sourced before the training script is run. +> To do that in CLI, you can use the `--source` argument: +> +> ```bash +> luxonis_train --source custom_components.py train --config config.yaml +> ``` -## Credentials +**Python API:** -Local use is supported by default. In addition, we also integrate some cloud services which can be primarily used for logging and storing. When these are used, you need to load environment variables to set up the correct credentials. +You have to import the custom components before creating the `LuxonisModel` instance. -You have these options how to set up the environment variables: +```python +from custom_components import * +from luxonis_train import LuxonisModel -- Using standard environment variables -- Specifying the variables in a `.env` file. If a variable is both in the environment and present in `.env` file, the exported variable takes precedence. -- Specifying the variables in the [ENVIRON](https://github.com/luxonis/luxonis-train/blob/main/configs/README.md#environ) section of the config file. Note that this is not a recommended way. Variables defined in config take precedence over environment and `.env` variables. +model = LuxonisModel("config.yaml") +model.train() +``` -### S3 +For more information on how to define custom components, consult the respective in-source documentation. -If you are working with LuxonisDataset that is hosted on S3, you need to specify these env variables: + -```bash -AWS_ACCESS_KEY_ID=********** -AWS_SECRET_ACCESS_KEY=********** -AWS_S3_ENDPOINT_URL=********** -``` +## 📚 Tutorials and Examples -### MLFlow +We are actively working on providing examples and tutorials for different parts of the library which will help you to start more easily. The tutorials can be found [here](https://github.com/luxonis/depthai-ml-training/tree/master) and will be updated regularly. -If you want to use MLFlow for logging and storing artifacts you also need to specify MLFlow-related env variables like this: + -```bash -MLFLOW_S3_BUCKET=********** -MLFLOW_S3_ENDPOINT_URL=********** -MLFLOW_TRACKING_URI=********** -``` +## 🔑 Credentials -### WandB +When using cloud services, avoid hard-coding credentials or placing them directly in your configuration files. +Instead: -If you are using WandB for logging, you have to sign in first in your environment. +- Use environment variables to store sensitive information. +- Use a `.env` file and load it securely, ensuring it's excluded from version control. -### POSTGRESS +**Supported Cloud Services:** -There is an option for remote storage for [Tuning](#tuning). We use POSTGRES and to connect to the database you need to specify the following env variables: +- **AWS S3**, requires: + - `AWS_ACCESS_KEY_ID` + - `AWS_SECRET_ACCESS_KEY` + - `AWS_S3_ENDPOINT_URL` +- **Google Cloud Storage**, requires: + - `GOOGLE_APPLICATION_CREDENTIALS` +- **RoboFlow**, requires: + - `ROBOFLOW_API_KEY` -```bash -POSTGRES_USER=********** -POSTGRES_PASSWORD=********** -POSTGRES_HOST=********** -POSTGRES_PORT=********** -POSTGRES_DB=********** -``` +**For logging and tracking, we support:** -## Contributing +- **MLFlow**, requires: + - `MLFLOW_S3_BUCKET` + - `MLFLOW_S3_ENDPOINT_URL` + - `MLFLOW_TRACKING_URI` +- **WandB**, requires: + - `WANDB_API_KEY` -If you want to contribute to the development, install the dev version of the package: +**For remote database storage, we support:** -```bash -pip install luxonis-train[dev] -``` +- `POSTGRES_PASSWORD` +- `POSTGRES_HOST` +- `POSTGRES_PORT` +- `POSTGRES_DB` + + + +## 🤝 Contributing -Consult the [Contribution guide](https://github.com/luxonis/luxonis-train/blob/main/CONTRIBUTING.md) for further instructions. +We welcome contributions! Please read our [Contribution Guide](https://github.com/luxonis/luxonis-train/blob/main/CONTRIBUTING.md) to get started. Whether it's reporting bugs, improving documentation, or adding new features, your help is appreciated. diff --git a/configs/README.md b/configs/README.md index 384f6220..b06c9495 100644 --- a/configs/README.md +++ b/configs/README.md @@ -1,6 +1,6 @@ # Configuration -The configuration is defined in a yaml file, which you must provide. +The configuration is defined in a `YAML` file, which you must provide. The configuration file consists of a few major blocks that are described below. You can create your own config or use/edit one of the examples. @@ -9,19 +9,20 @@ You can create your own config or use/edit one of the examples. - [Top-level Options](#top-level-options) - [Model](#model) - [Nodes](#nodes) - - [Attached Modules](#attached-modules) - [Losses](#losses) - [Metrics](#metrics) - [Visualizers](#visualizers) - [Tracker](#tracker) - [Loader](#loader) -- [Trainer](#train) + - [`LuxonisLoaderTorch`](#luxonisloadertorch) +- [Trainer](#trainer) - [Preprocessing](#preprocessing) + - [Augmentations](#augmentations) + - [Callbacks](#callbacks) - [Optimizer](#optimizer) - [Scheduler](#scheduler) - - [Callbacks](#callbacks) - [Exporter](#exporter) - - [ONNX](#onnx) + - [`ONNX`](#onnx) - [Blob](#blob) - [Tuner](#tuner) - [Storage](#storage) @@ -29,288 +30,462 @@ You can create your own config or use/edit one of the examples. ## Top-level Options -| Key | Type | Default value | Description | -| -------- | --------------------- | ------------- | ---------------- | -| model | [Model](#model) | | Model section | -| loader | [loader](#loader) | | Loader section | -| train | [train](#train) | | Train section | -| tracker | [tracker](#tracker) | | Tracker section | -| trainer | [trainer](#trainer) | | Trainer section | -| exporter | [exporter](#exporter) | | Exporter section | -| tuner | [tuner](#tuner) | | Tuner section | +| Key | Type | Description | +| ---------- | ----------------------- | ---------------- | +| `model` | [`model`](#model) | Model section | +| `loader` | [`loader`](#loader) | Loader section | +| `train` | [`train`](#train) | Train section | +| `tracker` | [`tracker`](#tracker) | Tracker section | +| `trainer` | [`trainer`](#trainer) | Trainer section | +| `exporter` | [`exporter`](#exporter) | Exporter section | +| `tuner` | [`tuner`](#tuner) | Tuner section | ## Model This is the most important block, that **must be always defined by the user**. There are two different ways you can create the model. -| Key | Type | Default value | Description | -| ---------------- | ---- | ------------- | ---------------------------------------------------------- | -| name | str | "model" | Name of the model | -| weights | path | None | Path to weights to load | -| predefined_model | str | None | Name of a predefined model to use | -| params | dict | {} | Parameters for the predefined model | -| nodes | list | \[\] | List of nodes (see [nodes](#nodes) | -| losses | list | \[\] | lList of losses (see [losses](#losses) | -| metrics | list | \[\] | List of metrics (see [metrics](#metrics) | -| visualziers | list | \[\] | List of visualizers (see [visualizers](#visualizers) | -| outputs | list | \[\] | List of outputs nodes, inferred from nodes if not provided | +| Key | Type | Default value | Description | +| ------------------ | ------ | ------------- | ---------------------------------------------------------- | +| `name` | `str` | `"model"` | Name of the model | +| `weights` | `path` | `None` | Path to weights to load | +| `predefined_model` | `str` | `None` | Name of a predefined model to use | +| `params` | `dict` | `{}` | Parameters for the predefined model | +| `nodes` | `list` | `[]` | List of nodes (see [nodes](#nodes)) | +| `losses` | `list` | `[]` | List of losses (see [losses](#losses)) | +| `metrics` | `list` | `[]` | List of metrics (see [metrics](#metrics)) | +| `visualziers` | `list` | `[]` | List of visualizers (see [visualizers](#visualizers)) | +| `outputs` | `list` | `[]` | List of outputs nodes, inferred from nodes if not provided | ### Nodes For list of all nodes, see [nodes](../luxonis_train/nodes/README.md). -| Key | Type | Default value | Description | -| ----------------------- | -------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------- | -| name | str | | Name of the node | -| alias | str | None | Custom name for the node | -| params | dict | {} | Parameters for the node | -| inputs | list | \[\] | List of input nodes for this node, if empty, the node is understood to be an input node of the model | -| freezing.active | bool | False | whether to freeze the modules so the weights are not updated | -| freezing.unfreeze_after | int \| float \| None | None | After how many epochs should the modules be unfrozen, can be `int` for a specific number of epochs or `float` for a portion of the training | -| remove_on_export | bool | False | Whether the node should be removed when exporting | - -### Attached Modules - -Modules that are attached to a node. This include losses, metrics and visualziers. - -| Key | Type | Default value | Description | -| ----------- | ---- | ------------- | ------------------------------------------- | -| name | str | | Name of the module | -| attached_to | str | | Name of the node the module is attached to. | -| alias | str | None | Custom name for the module | -| params | dict | {} | Parameters of the module | +| Key | Type | Default value | Description | +| ------------------------- | ---------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------- | +| `name` | `str` | - | Name of the node | +| `alias` | `str` | `None` | Custom name for the node | +| `params` | `dict` | `{}` | Parameters for the node | +| `inputs` | `list` | `[]` | List of input nodes for this node, if empty, the node is understood to be an input node of the model | +| `freezing.active` | `bool` | `False` | whether to freeze the modules so the weights are not updated | +| `freezing.unfreeze_after` | `int \| float \| None` | `None` | After how many epochs should the modules be unfrozen, can be `int` for a specific number of epochs or `float` for a portion of the training | +| `remove_on_export` | `bool` | `False` | Whether the node should be removed when exporting | +| `losses` | `list` | `[]` | List of losses attached to this node | +| `metrics` | `list` | `[]` | List of metrics attached to this node | +| `visualizers` | `list` | `[]` | List of visualizers attached to this node | #### Losses At least one node must have a loss attached to it. You can see the list of all currently supported loss functions and their parameters [here](../luxonis_train/attached_modules/losses/README.md). -| Key | Type | Default value | Description | -| ------ | ----- | ------------- | ---------------------------------------- | -| weight | float | 1.0 | Weight of the loss used in the final sum | +| Key | Type | Default value | Description | +| -------- | ------- | ------------- | ---------------------------------------- | +| `weight` | `float` | `1.0` | Weight of the loss used in the final sum | +| `alias` | `str` | `None` | Custom name for the loss | +| `params` | `dict` | `{}` | Additional parameters for the loss | #### Metrics In this section, you configure which metrics should be used for which node. You can see the list of all currently supported metrics and their parameters [here](../luxonis_train/attached_modules/metrics/README.md). -| Key | Type | Default value | Description | -| -------------- | ---- | ------------- | --------------------------------------------------------------------------------------- | -| is_main_metric | bool | False | Marks this specific metric as the main one. Main metric is used for saving checkpoints. | +| Key | Type | Default value | Description | +| ---------------- | ------ | ------------- | -------------------------------------------------------------------------------------- | +| `is_main_metric` | `bool` | `False` | Marks this specific metric as the main one. Main metric is used for saving checkpoints | +| `alias` | `str` | `None` | Custom name for the metric | +| `params` | `dict` | `{}` | Additional parameters for the metric | #### Visualizers In this section, you configure which visualizers should be used for which node. Visualizers are responsible for creating images during training. You can see the list of all currently supported visualizers and their parameters [here](../luxonis_train/attached_modules/visualizers/README.md). -Visualizers have no specific configuration. +| Key | Type | Default value | Description | +| -------- | ------ | ------------- | ---------------------------------------- | +| `alias` | `str` | `None` | Custom name for the visualizer | +| `params` | `dict` | `{}` | Additional parameters for the visualizer | + +**Example:** + +```yaml +name: "SegmentationHead" +inputs: + - "RepPANNeck" +losses: + - name: "BCEWithLogitsLoss" +metrics: + - name: "F1Score" + params: + task: "binary" + - name: "JaccardIndex" + params: + task: "binary" +visualizers: + - name: "SegmentationVisualizer" + params: + colors: "#FF5055" +``` ## Tracker -This library uses [LuxonisTrackerPL](https://github.com/luxonis/luxonis-ml/blob/b2399335efa914ef142b1b1a5db52ad90985c539/src/luxonis_ml/ops/tracker.py#L152). +This library uses [`LuxonisTrackerPL`](https://github.com/luxonis/luxonis-ml/blob/b2399335efa914ef142b1b1a5db52ad90985c539/src/luxonis_ml/ops/tracker.py#L152). You can configure it like this: -| Key | Type | Default value | Description | -| -------------- | ----------- | ------------- | ---------------------------------------------------------- | -| project_name | str \| None | None | Name of the project used for logging. | -| project_id | str \| None | None | Id of the project used for logging (relevant for MLFlow). | -| run_name | str \| None | None | Name of the run. If empty, then it will be auto-generated. | -| run_id | str \| None | None | Id of an already created run (relevant for MLFLow.) | -| save_directory | str | "output" | Path to the save directory. | -| is_tensorboard | bool | True | Whether to use tensorboard. | -| is_wandb | bool | False | Whether to use WandB. | -| wandb_entity | str \| None | None | Name of WandB entity. | -| is_mlflow | bool | False | Whether to use MLFlow. | +| Key | Type | Default value | Description | +| ---------------- | ------------- | ------------- | ---------------------------------------------------------- | +| `project_name` | `str \| None` | `None` | Name of the project used for logging | +| `project_id` | `str \| None` | `None` | ID of the project used for logging (relevant for `MLFlow`) | +| `run_name` | `str \| None` | `None` | Name of the run. If empty, then it will be auto-generated | +| `run_id` | `str \| None` | `None` | ID of an already created run (relevant for `MLFLow`) | +| `save_directory` | `str` | `"output"` | Path to the save directory | +| `is_tensorboard` | `bool` | `True` | Whether to use `Tensorboard` | +| `is_wandb` | `bool` | `False` | Whether to use `WandB` | +| `wandb_entity` | `str \| None` | `None` | Name of `WandB` entity | +| `is_mlflow` | `bool` | `False` | Whether to use `MLFlow` | + +**Example:** + +```yaml +tracker: + project_name: "project_name" + save_directory: "output" + is_tensorboard: true + is_wandb: false + is_mlflow: false +``` ## Loader This section controls the data loading process and parameters regarding the dataset. -To store and load the data we use LuxonisDataset and LuxonisLoader. For specific config parameters refer to [LuxonisML](https://github.com/luxonis/luxonis-ml). +To store and load the data we use `LuxonisDataset` and `LuxonisLoader.` For specific config parameters refer to [`LuxonisML`](https://github.com/luxonis/luxonis-ml). -| Key | Type | Default value | Description | -| ------------ | ------------------ | ------------------ | -------------------------------- | -| name | str | LuxonisLoaderTorch | Name of the Loader | -| image_source | str | image | Name of the input image group | -| train_view | str \| list\[str\] | train | splits to use for training | -| val_view | str \| list\[str\] | val | splits to use for validation | -| test_view | str \| list\[str\] | test | splits to use for testing | -| params | Dict\[str, Any\] | {} | Additional parameters for loader | +| Key | Type | Default value | Description | +| -------------- | ------------------ | ---------------------- | ------------------------------------ | +| `name` | `str` | `"LuxonisLoaderTorch"` | Name of the Loader | +| `image_source` | `str` | `"image"` | Name of the input image group | +| `train_view` | `str \| list[str]` | `"train"` | splits to use for training | +| `val_view` | `str \| list[str]` | `"val"` | splits to use for validation | +| `test_view` | `str \| list[str]` | `"test"` | splits to use for testing | +| `params` | `dict[str, Any]` | `{}` | Additional parameters for the loader | -### LuxonisLoaderTorch +### `LuxonisLoaderTorch` -By default LuxonisLoaderTorch which can either use an existing LuxonisDataset or create a new one if it can be parsed automatically by LuxonisParser (check [LuxonisML](https://github.com/luxonis/luxonis-ml) `data` subpackage for more info). +By default, `LuxonisLoaderTorch` can either use an existing `LuxonisDataset` or create a new one if it can be parsed automatically by `LuxonisParser` (check [`LuxonisML`](https://github.com/luxonis/luxonis-ml) `data` sub-package for more info). -In most cases you want to change one of the parameters below. You can check all the parameters in `LuxonisLoaderTorch` class itself. +In most cases you want to set one of the parameters below. You can check all the parameters in the `LuxonisLoaderTorch` class itself. -| dataset_name | str | None | None | Name of an existing LuxonisDataset. | -| dataset_dir | str | None | None | Location of the data from which new LuxonisDataset will be created | -| dataset_type | DatasetType | None | None | Can specify exact format of the data. If None and new dataset needs to be created then it will be infered automatically. | +| Key | Type | Default value | Description | +| -------------- | ----- | ------------- | -------------------------------------------------------------------- | +| `dataset_name` | `str` | `None` | Name of an existing `LuxonisDataset` | +| `dataset_dir` | `str` | `None` | Location of the data from which new `LuxonisDataset` will be created | + +**Example:** + +```yaml +loader: + # using default loader with an existing dataset + params: + dataset_name: "dataset_name" +``` + +```yaml +loader: + # using default loader with a directory + params: + dataset_name: "dataset_name" + dataset_dir: "path/to/dataset" +``` ## Trainer Here you can change everything related to actual training of the model. -| Key | Type | Default value | Description | -| ----------------------- | ---------------------------------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ | -| seed | int | None | Seed for reproducibility | -| deterministic | bool \| "warn" \| None | None | Whether pytorch should use deterministic backend | -| batch_size | int | 32 | Batch size used for training | -| accumulate_grad_batches | int | 1 | Number of batches for gradient accumulation | -| use_weighted_sampler | bool | False | Bool if use WeightedRandomSampler for training, only works with classification tasks | -| epochs | int | 100 | Number of training epochs | -| n_workers | int | 4 | Number of workers for data loading | -| validation_interval | int | 5 | Frequency of computing metrics on validation data | -| n_log_images | int | 4 | Maximum number of images to visualize and log | -| skip_last_batch | bool | True | Whether to skip last batch while training | -| accelerator | Literal\["auto", "cpu", "gpu"\] | "auto" | What accelerator to use for training. | -| devices | int \| list\[int\] \| str | "auto" | Either specify how many devices to use (int), list specific devices, or use "auto" for automatic configuration based on the selected accelerator | -| matmul_precision | Literal\["medium", "high", "highest"\] \| None | None | Sets the internal precision of float32 matrix multiplications. | -| strategy | Literal\["auto", "ddp"\] | "auto" | What strategy to use for training. | -| n_sanity_val_steps | int | 2 | Number of sanity validation steps performed before training. | -| profiler | Literal\["simple", "advanced"\] \| None | None | PL profiler for GPU/CPU/RAM utilization analysis | -| verbose | bool | True | Print all intermediate results to console. | -| pin_memory | bool | True | Whether to pin memory in the DataLoader | -| save_top_k | -1 \| NonNegativeInt | 3 | Save top K checkpoints based on validation loss when training. | +| Key | Type | Default value | Description | +| ------------------------- | ---------------------------------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ | +| `seed` | `int` | `None` | Seed for reproducibility | +| `deterministic` | `bool \| "warn" \| None` | `None` | Whether PyTorch should use deterministic backend | +| `batch_size` | `int` | `32` | Batch size used for training | +| `accumulate_grad_batches` | `int` | `1` | Number of batches for gradient accumulation | +| `use_weighted_sampler` | `bool` | `False` | Whether to use `WeightedRandomSampler` for training, only works with classification tasks | +| `epochs` | `int` | `100` | Number of training epochs | +| `n_workers` | `int` | `4` | Number of workers for data loading | +| `validation_interval` | `int` | `5` | Frequency of computing metrics on validation data | +| `n_log_images` | `int` | `4` | Maximum number of images to visualize and log | +| `skip_last_batch` | `bool` | `True` | Whether to skip last batch while training | +| `accelerator` | `Literal["auto", "cpu", "gpu"]` | `"auto"` | What accelerator to use for training | +| `devices` | `int \| list[int] \| str` | `"auto"` | Either specify how many devices to use (int), list specific devices, or use "auto" for automatic configuration based on the selected accelerator | +| `matmul_precision` | `Literal["medium", "high", "highest"] \| None` | `None` | Sets the internal precision of float32 matrix multiplications | +| `strategy` | `Literal["auto", "ddp"]` | `"auto"` | What strategy to use for training | +| `n_sanity_val_steps` | `int` | `2` | Number of sanity validation steps performed before training | +| `profiler` | `Literal["simple", "advanced"] \| None` | `None` | PL profiler for GPU/CPU/RAM utilization analysis | +| `verbose` | `bool` | `True` | Print all intermediate results to console | +| `pin_memory` | `bool` | `True` | Whether to pin memory in the `DataLoader` | +| `save_top_k` | `-1 \| NonNegativeInt` | `3` | Save top K checkpoints based on validation loss when training | + +**Example:** + +```yaml + +trainer: + accelerator: "auto" + devices: "auto" + strategy: "auto" + + n_sanity_val_steps: 1 + profiler: null + verbose: true + batch_size: 8 + accumulate_grad_batches: 1 + epochs: 200 + n_workers: 8 + validation_interval: 10 + n_log_images: 8 + skip_last_batch: true + log_sub_losses: true + save_top_k: 3 +``` ### Preprocessing -We use [Albumentations](https://albumentations.ai/docs/) library for `augmentations`. [Here](https://albumentations.ai/docs/api_reference/full_reference/#pixel-level-transforms) you can see a list of all pixel level augmentations supported, and [here](https://albumentations.ai/docs/api_reference/full_reference/#spatial-level-transforms) you see all spatial level transformations. In config you can specify any augmentation from this lists and their params. +We use [`Albumentations`](https://albumentations.ai/docs/) library for `augmentations`. [Here](https://albumentations.ai/docs/api_reference/full_reference/#pixel-level-transforms) you can see a list of all pixel level augmentations supported, and [here](https://albumentations.ai/docs/api_reference/full_reference/#spatial-level-transforms) you see all spatial level transformations. In the configuration you can specify any augmentation from these lists and their parameters. + +Additionally, we support `Mosaic4` and `MixUp` batch augmentations and letterbox resizing if `keep_aspect_ratio: true`. + +| Key | Type | Default value | Description | +| ------------------- | ------------ | ------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `train_image_size` | `list[int]` | `[256, 256]` | Image size used for training as `[height, width]` | +| `keep_aspect_ratio` | `bool` | `True` | Whether to keep the aspect ratio while resizing | +| `train_rgb` | `bool` | `True` | Whether to train on RGB or BGR images | +| `normalize.active` | `bool` | `True` | Whether to use normalization | +| `normalize.params` | `dict` | `{}` | Parameters for normalization, see [Normalize](https://albumentations.ai/docs/api_reference/augmentations/transforms/#albumentations.augmentations.transforms.Normalize) | +| `augmentations` | `list[dict]` | `[]` | List of `Albumentations` augmentations | + +#### Augmentations + +| Key | Type | Default value | Description | +| -------- | ------ | ------------- | ---------------------------------- | +| `name` | `str` | - | Name of the augmentation | +| `active` | `bool` | `True` | Whether the augmentation is active | +| `params` | `dict` | `{}` | Parameters of the augmentation | + +**Example:** + +```yaml + +trainer: + preprocessing: + # using YAML capture to reuse the image size + train_image_size: [&height 384, &width 384] + keep_aspect_ratio: true + train_rgb: true + normalize: + active: true + augmentations: + - name: "Defocus" + params: + p: 0.1 + - name: "Sharpen" + params: + p: 0.1 + - name: "Flip" + - name: "RandomRotate90" + - name: "Mosaic4" + params: + out_width: *width + out_height: *height -Additionaly we support `Mosaic4` and `MixUp` batch augmentations and letterbox resizing if `keep_aspect_ratio: True`. +``` -| Key | Type | Default value | Description | -| ----------------- | --------------------------------------------------------------------------------------------------------------------------------------------- | ------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| train_image_size | list\[int\] | \[256, 256\] | Image size used for training \[height, width\] | -| keep_aspect_ratio | bool | True | Bool if keep aspect ration while resizing | -| train_rgb | bool | True | Bool if train on rgb or bgr | -| normalize.active | bool | True | Bool if use normalization | -| normalize.params | dict | {} | Params for normalization, see [documentation](https://albumentations.ai/docs/api_reference/augmentations/transforms/#albumentations.augmentations.transforms.Normalize) | -| augmentations | list\[{"name": Name of the augmentation, "active": Bool if aug is active, by default set to True, "params": Parameters of the augmentation}\] | \[\] | List of Albumentations augmentations | +### Callbacks + +Callbacks sections contain a list of callbacks. +More information on callbacks and a list of available ones can be found [here](../luxonis_train/callbacks/README.md) +Each callback is a dictionary with the following fields: + +| Key | Type | Default value | Description | +| -------- | ------ | ------------- | -------------------------- | +| `name` | `str` | - | Name of the callback | +| `active` | `bool` | `True` | Whether callback is active | +| `params` | `dict` | `{}` | Parameters of the callback | + +**Example:** + +```yaml + +trainer: + callbacks: + - name: "LearningRateMonitor" + params: + logging_interval: "step" + - name: MetadataLogger + params: + hyperparams: ["trainer.epochs", "trainer.batch_size"] + - name: "EarlyStopping" + params: + patience: 3 + monitor: "val/loss" + mode: "min" + verbose: true + - name: "ExportOnTrainEnd" + - name: "TestOnTrainEnd" +``` ### Optimizer What optimizer to use for training. List of all optimizers can be found [here](https://pytorch.org/docs/stable/optim.html). -| Key | Type | Default value | Description | -| ------ | ---- | ------------- | ---------------------------- | -| name | str | "Adam" | Name of the optimizer. | -| params | dict | {} | Parameters of the optimizer. | +| Key | Type | Default value | Description | +| -------- | ------ | ------------- | --------------------------- | +| `name` | `str` | `"Adam"` | Name of the optimizer | +| `params` | `dict` | `{}` | Parameters of the optimizer | + +**Example:** + +```yaml +optimizer: + name: "SGD" + params: + lr: 0.02 + momentum: 0.937 + nesterov: true + weight_decay: 0.0005 +``` ### Scheduler What scheduler to use for training. List of all optimizers can be found [here](https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate). -| Key | Type | Default value | Description | -| ------ | ---- | ------------- | ---------------------------- | -| name | str | "ConstantLR" | Name of the scheduler. | -| params | dict | {} | Parameters of the scheduler. | +| Key | Type | Default value | Description | +| -------- | ------ | -------------- | --------------------------- | +| `name` | `str` | `"ConstantLR"` | Name of the scheduler | +| `params` | `dict` | `{}` | Parameters of the scheduler | -### Callbacks - -Callbacks sections contains a list of callbacks. -More information on callbacks and a list of available ones can be found [here](../luxonis_train/callbacks/README.md) -Each callback is a dictionary with the following fields: +**Example:** -| Key | Type | Default value | Description | -| ------ | ---- | ------------- | --------------------------- | -| name | str | | Name of the callback. | -| active | bool | True | Whether calback is active. | -| params | dict | {} | Parameters of the callback. | +```yaml +trainer: + scheduler: + name: "CosineAnnealingLR" + params: + T_max: *epochs + eta_min: 0 +``` ## Exporter Here you can define configuration for exporting. -| Key | Type | Default value | Description | -| ---------------------- | --------------------------------- | ------------- | ----------------------------------------------------------------------------------------------- | -| name | str \| None | None | Name of the exported model. | -| input_shape | list\[int\] \| None | None | Input shape of the model. If not provided, inferred from the dataset. | -| data_type | Literal\["INT8", "FP16", "FP32"\] | "FP16" | Data type of the exported model. Only used for conversion to BLOB. | -| reverse_input_channels | bool | True | Whether to reverse the image channels in the exported model. Relevant for `.blob` export | -| scale_values | list\[float\] \| None | None | What scale values to use for input normalization. If not provided, inferred from augmentations. | -| mean_values | list\[float\] \| None | None | What mean values to use for input normalizations. If not provided, inferred from augmentations. | -| upload_to_run | bool | True | Whether to upload the exported files to tracked run as artifact. | -| upload_url | str \| None | None | Exported model will be uploaded to this url if specified. | +| Key | Type | Default value | Description | +| ------------------------ | --------------------------------- | ------------- | ---------------------------------------------------------------------------------------------- | +| `name` | `str \| None` | `None` | Name of the exported model | +| `input_shape` | `list\[int\] \| None` | `None` | Input shape of the model. If not provided, inferred from the dataset | +| `data_type` | `Literal["INT8", "FP16", "FP32"]` | `"FP16"` | Data type of the exported model. Only used for conversion to BLOB | +| `reverse_input_channels` | `bool` | `True` | Whether to reverse the image channels in the exported model. Relevant for `BLOB` export | +| `scale_values` | `list[float] \| None` | `None` | What scale values to use for input normalization. If not provided, inferred from augmentations | +| `mean_values` | `list[float] \| None` | `None` | What mean values to use for input normalization. If not provided, inferred from augmentations | +| `upload_to_run` | `bool` | `True` | Whether to upload the exported files to tracked run as artifact | +| `upload_url` | `str \| None` | `None` | Exported model will be uploaded to this URL if specified | +| `output_names` | `list[str] \| None` | `None` | Optional list of output names to override the default ones | -### ONNX +### `ONNX` -Option specific for ONNX export. +Option specific for `ONNX` export. -| Key | Type | Default value | Description | -| ------------- | ------------------------ | ------------- | -------------------------------- | -| opset_version | int | 12 | Which opset version to use. | -| dynamic_axes | dict\[str, Any\] \| None | None | Whether to specify dinamic axes. | +| Key | Type | Default value | Description | +| --------------- | ------------------------ | ------------- | --------------------------------- | +| `opset_version` | `int` | `12` | Which `ONNX` opset version to use | +| `dynamic_axes` | `dict[str, Any] \| None` | `None` | Whether to specify dynamic axes | ### Blob -| Key | Type | Default value | Description | -| ------- | ---------------------------------------------------------------- | ------------- | --------------------------------------- | -| active | bool | False | Whether to export to `.blob` format. | -| shaves | int | 6 | How many shaves. | -| version | Literal\["2021.2", "2021.3", "2021.4", "2022.1", "2022.3_RVC3"\] | "2022.1" | OpenVINO version to use for conversion. | +| Key | Type | Default value | Description | +| --------- | ---------------------------------------------------------------- | ------------- | ---------------------------------------- | +| `active` | `bool` | `False` | Whether to export to `BLOB` format | +| `shaves` | `int` | `6` | How many shaves | +| `version` | `Literal["2021.2", "2021.3", "2021.4", "2022.1", "2022.3_RVC3"]` | `"2022.1"` | `OpenVINO` version to use for conversion | + +**Example:** + +```yaml +exporter: + output_names: ["output1", "output2"] + onnx: + opset_version: 11 + blobconverter: + active: true + shaves: 8 +``` ## Tuner Here you can specify options for tuning. -| Key | Type | Default value | Description | -| ---------------------- | ----------------- | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| study_name | str | "test-study" | Name of the study. | -| continue_exising_study | bool | True | Whether to continue an existing study with this name. | -| use_pruner | bool | True | Whether to use the MedianPruner. | -| n_trials | int \| None | 15 | Number of trials for each process. `None` represents no limit in terms of numbner of trials. | -| timeout | int \| None | None | Stop study after the given number of seconds. | -| params | dict\[str, list\] | {} | Which parameters to tune. The keys should be in the format `key1.key2.key3_`. Type can be one of `[categorical, float, int, longuniform, uniform, subset]`. For more information about the types, visit [Optuna documentation](https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html). | +| Key | Type | Default value | Description | +| ------------------------ | ----------------- | -------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `study_name` | `str` | `"test-study"` | Name of the study | +| `continue_exising_study` | `bool` | `True` | Whether to continue an existing study with this name | +| `use_pruner` | `bool` | `True` | Whether to use the `MedianPruner` | +| `n_trials` | `int \| None` | `15` | Number of trials for each process. `None` represents no limit in terms of number of trials | +| `timeout` | `int \| None` | `None` | Stop study after the given number of seconds | +| `params` | `dict[str, list]` | `{}` | Which parameters to tune. The keys should be in the format `key1.key2.key3_`. Type can be one of `[categorical, float, int, longuniform, uniform, subset]`. For more information about the types, visit [`Optuna` documentation](https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html) | + +> \[!NOTE\] +> `"subset"` sampling is currently only supported for augmentations. +> You can specify a set of augmentations defined in `trainer` to choose from. +> Every run, only a subset of random $N$ augmentations will be active (`is_active` parameter will be `True` for chosen ones and `False` for the rest in the set). + +### Storage -**Note**: "subset" sampling is currently only supported for augmentations. You can specify a set of augmentations defined in `trainer` to choose from and every run subset of random N augmentations will be active (`is_active` parameter will be True for chosen ones and False for the rest in the set). +| Key | Type | Default value | Description | +| -------------- | ---------------------------- | ------------- | --------------------------------------------------- | +| `active` | `bool` | `True` | Whether to use storage to make the study persistent | +| `storage_type` | `Literal["local", "remote"]` | `"local"` | Type of the storage | -Example of params for tuner block: +**Example:** ```yaml -tuner: +t uner: + study_name: "seg_study" + n_trials: 10 + storage: + storage_type: "local" params: trainer.optimizer.name_categorical: ["Adam", "SGD"] trainer.optimizer.params.lr_float: [0.0001, 0.001] trainer.batch_size_int: [4, 16, 4] + # each run will have 2 of the following augmentations active trainer.preprocessing.augmentations_subset: [["Defocus", "Sharpen", "Flip"], 2] ``` -### Storage - -| Key | Type | Default value | Description | -| ------------ | ---------------------------- | ------------- | ---------------------------------------------------- | -| active | bool | True | Whether to use storage to make the study persistent. | -| storage_type | Literal\["local", "remote"\] | "local" | Type of the storage. | - ## ENVIRON A special section of the config file where you can specify environment variables. For more info on the variables, see [Credentials](../README.md#credentials). -**NOTE** - -This is not a recommended way due to possible leakage of secrets. This section is intended for testing purposes only. - -| Key | Type | Default value | Description | -| ------------------------ | ---------------------------------------------------------- | -------------- | ----------- | -| AWS_ACCESS_KEY_ID | str \| None | None | | -| AWS_SECRET_ACCESS_KEY | str \| None | None | | -| AWS_S3_ENDPOINT_URL | str \| None | None | | -| MLFLOW_CLOUDFLARE_ID | str \| None | None | | -| MLFLOW_CLOUDFLARE_SECRET | str \| None | None | | -| MLFLOW_S3_BUCKET | str \| None | None | | -| MLFLOW_S3_ENDPOINT_URL | str \| None | None | | -| MLFLOW_TRACKING_URI | str \| None | None | | -| POSTGRES_USER | str \| None | None | | -| POSTGRES_PASSWORD | str \| None | None | | -| POSTGRES_HOST | str \| None | None | | -| POSTGRES_PORT | str \| None | None | | -| POSTGRES_DB | str \| None | None | | -| LUXONISML_BUCKET | str \| None | None | | -| LUXONISML_BASE_PATH | str | "~/luxonis_ml" | | -| LUXONISML_TEAM_ID | str | "offline" | | -| LUXONISML_TEAM_NAME | str | "offline" | | -| LOG_LEVEL | Literal\["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"\] | "INFO" | | +> \[!WARNING\] +> This is not a recommended way due to possible leakage of secrets! +> This section is intended for testing purposes only! +> Use environment variables or `.env` files instead. + +| Key | Type | Default value | +| -------------------------- | ---------------------------------------------------------- | ---------------- | +| `AWS_ACCESS_KEY_ID` | `str \| None` | `None` | +| `AWS_SECRET_ACCESS_KEY` | `str \| None` | `None` | +| `AWS_S3_ENDPOINT_URL` | `str \| None` | `None` | +| `MLFLOW_CLOUDFLARE_ID` | `str \| None` | `None` | +| `MLFLOW_CLOUDFLARE_SECRET` | `str \| None` | `None` | +| `MLFLOW_S3_BUCKET` | `str \| None` | `None` | +| `MLFLOW_S3_ENDPOINT_URL` | `str \| None` | `None` | +| `MLFLOW_TRACKING_URI` | `str \| None` | `None` | +| `POSTGRES_USER` | `str \| None` | `None` | +| `POSTGRES_PASSWORD` | `str \| None` | `None` | +| `POSTGRES_HOST` | `str \| None` | `None` | +| `POSTGRES_PORT` | `str \| None` | `None` | +| `POSTGRES_DB` | `str \| None` | `None` | +| `LUXONISML_BUCKET` | `str \| None` | `None` | +| `LUXONISML_BASE_PATH` | `str` | `"~/luxonis_ml"` | +| `LUXONISML_TEAM_ID` | `str` | `"offline"` | +| `LOG_LEVEL` | `Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]` | `"INFO"` | diff --git a/configs/classification_heavy_model.yaml b/configs/classification_heavy_model.yaml index 22b590e6..6ef1f443 100644 --- a/configs/classification_heavy_model.yaml +++ b/configs/classification_heavy_model.yaml @@ -14,9 +14,9 @@ loader: trainer: preprocessing: train_image_size: [384, 512] - keep_aspect_ratio: True + keep_aspect_ratio: true normalize: - active: True + active: true batch_size: 8 epochs: 200 diff --git a/configs/classification_light_model.yaml b/configs/classification_light_model.yaml index 32f7d96b..6eeba5fd 100644 --- a/configs/classification_light_model.yaml +++ b/configs/classification_light_model.yaml @@ -14,9 +14,9 @@ loader: trainer: preprocessing: train_image_size: [384, 512] - keep_aspect_ratio: True + keep_aspect_ratio: true normalize: - active: True + active: true batch_size: 8 epochs: 200 diff --git a/configs/complex_model.yaml b/configs/complex_model.yaml index 1ba11dd1..149530ad 100644 --- a/configs/complex_model.yaml +++ b/configs/complex_model.yaml @@ -15,7 +15,7 @@ model: - RepPANNeck losses: - name: EfficientKeypointBboxLoss + - name: EfficientKeypointBBoxLoss metrics: - name: ObjectKeypointSimilarity @@ -23,22 +23,22 @@ model: - name: MeanAveragePrecisionKeypoints visualizers: - name: MultiVisualizer - params: - visualizers: - - name: KeypointVisualizer - params: - nonvisible_color: blue - - name: BBoxVisualizer - params: - colors: - person: "#FF5055" + - name: MultiVisualizer + params: + visualizers: + - name: KeypointVisualizer + params: + nonvisible_color: blue + - name: BBoxVisualizer + params: + colors: + person: "#FF5055" - name: SegmentationHead inputs: - RepPANNeck losses: - name: BCEWithLogitsLoss + - name: BCEWithLogitsLoss metrics: - name: F1Score params: @@ -47,9 +47,9 @@ model: params: task: binary visualizers: - name: SegmentationVisualizer - params: - colors: "#FF5055" + - name: SegmentationVisualizer + params: + colors: "#FF5055" - name: EfficientBBoxHead inputs: @@ -58,18 +58,18 @@ model: conf_thres: 0.75 iou_thres: 0.45 losses: - name: AdaptiveDetectionLoss + - name: AdaptiveDetectionLoss metrics: - name: MeanAveragePrecision + - name: MeanAveragePrecision visualizers: - name: BBoxVisualizer + - name: BBoxVisualizer tracker: project_name: coco_test save_directory: output - is_tensorboard: True - is_wandb: False - is_mlflow: False + is_tensorboard: true + is_wandb: false + is_mlflow: false loader: train_view: train @@ -86,23 +86,23 @@ trainer: n_sanity_val_steps: 1 profiler: null - verbose: True + verbose: true batch_size: 8 accumulate_grad_batches: 1 epochs: &epochs 200 n_workers: 8 validation_interval: 10 n_log_images: 8 - skip_last_batch: True - log_sub_losses: True + skip_last_batch: true + log_sub_losses: true save_top_k: 3 preprocessing: train_image_size: [&height 384, &width 384] - keep_aspect_ratio: True - train_rgb: True + keep_aspect_ratio: true + train_rgb: true normalize: - active: True + active: true augmentations: - name: Defocus params: @@ -131,6 +131,7 @@ trainer: mode: min verbose: true - name: ExportOnTrainEnd + - name: ArchiveOnTrainEnd - name: TestOnTrainEnd optimizer: @@ -138,7 +139,7 @@ trainer: params: lr: 0.02 momentum: 0.937 - nesterov: True + nesterov: true weight_decay: 0.0005 scheduler: diff --git a/configs/detection_heavy_model.yaml b/configs/detection_heavy_model.yaml index f35c1ed3..294034c2 100644 --- a/configs/detection_heavy_model.yaml +++ b/configs/detection_heavy_model.yaml @@ -14,9 +14,9 @@ loader: trainer: preprocessing: train_image_size: [384, 512] - keep_aspect_ratio: True + keep_aspect_ratio: true normalize: - active: True + active: true batch_size: 8 epochs: &epochs 200 diff --git a/configs/detection_light_model.yaml b/configs/detection_light_model.yaml index 1f982d92..aca202bd 100644 --- a/configs/detection_light_model.yaml +++ b/configs/detection_light_model.yaml @@ -14,9 +14,9 @@ loader: trainer: preprocessing: train_image_size: [384, 512] - keep_aspect_ratio: True + keep_aspect_ratio: true normalize: - active: True + active: true batch_size: 8 epochs: &epochs 200 diff --git a/configs/example_export.yaml b/configs/example_export.yaml index 78f1c650..ff9b1f3d 100644 --- a/configs/example_export.yaml +++ b/configs/example_export.yaml @@ -15,9 +15,9 @@ loader: trainer: preprocessing: train_image_size: [384, 512] - keep_aspect_ratio: True + keep_aspect_ratio: true normalize: - active: True + active: true batch_size: 8 epochs: &epochs 200 @@ -46,5 +46,5 @@ exporter: onnx: opset_version: 11 blobconverter: - active: True + active: true shaves: 8 diff --git a/configs/example_tuning.yaml b/configs/example_tuning.yaml index 9e63c877..8e7a6215 100755 --- a/configs/example_tuning.yaml +++ b/configs/example_tuning.yaml @@ -2,9 +2,9 @@ model: - name: segmentation_light + name: detection_light predefined_model: - name: SegmentationModel + name: DetectionModel params: variant: light @@ -15,9 +15,9 @@ loader: trainer: preprocessing: train_image_size: [384, 512] - keep_aspect_ratio: True + keep_aspect_ratio: true normalize: - active: True + active: true augmentations: - name: Defocus params: @@ -40,7 +40,7 @@ trainer: tuner: - study_name: seg_study + study_name: det_study n_trials: 10 storage: storage_type: local diff --git a/configs/keypoint_bbox_heavy_model.yaml b/configs/keypoint_bbox_heavy_model.yaml index c6b22f35..10527921 100644 --- a/configs/keypoint_bbox_heavy_model.yaml +++ b/configs/keypoint_bbox_heavy_model.yaml @@ -14,9 +14,9 @@ loader: trainer: preprocessing: train_image_size: [384, 512] - keep_aspect_ratio: True + keep_aspect_ratio: true normalize: - active: True + active: true batch_size: 8 epochs: &epochs 200 diff --git a/configs/keypoint_bbox_light_model.yaml b/configs/keypoint_bbox_light_model.yaml index a095a551..57042b04 100644 --- a/configs/keypoint_bbox_light_model.yaml +++ b/configs/keypoint_bbox_light_model.yaml @@ -14,9 +14,9 @@ loader: trainer: preprocessing: train_image_size: [384, 512] - keep_aspect_ratio: True + keep_aspect_ratio: true normalize: - active: True + active: true batch_size: 8 epochs: &epochs 200 diff --git a/configs/segmentation_heavy_model.yaml b/configs/segmentation_heavy_model.yaml index e9bc16d6..8da7eba8 100644 --- a/configs/segmentation_heavy_model.yaml +++ b/configs/segmentation_heavy_model.yaml @@ -14,9 +14,9 @@ loader: trainer: preprocessing: train_image_size: [384, 512] - keep_aspect_ratio: True + keep_aspect_ratio: true normalize: - active: True + active: true batch_size: 8 epochs: &epochs 200 diff --git a/configs/segmentation_light_model.yaml b/configs/segmentation_light_model.yaml index c03703f4..40d38595 100644 --- a/configs/segmentation_light_model.yaml +++ b/configs/segmentation_light_model.yaml @@ -14,9 +14,9 @@ loader: trainer: preprocessing: train_image_size: [384, 512] - keep_aspect_ratio: True + keep_aspect_ratio: true normalize: - active: True + active: true batch_size: 8 epochs: &epochs 200 diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py index 798a9baa..c0aae2dc 100644 --- a/luxonis_train/__main__.py +++ b/luxonis_train/__main__.py @@ -41,6 +41,15 @@ class _ViewType(str, Enum): ), ] +WeightsType = Annotated[ + Path | None, + typer.Option( + help="Path to the model weights.", + show_default=False, + metavar="FILE", + ), +] + ViewType = Annotated[ _ViewType, typer.Option(help="Which dataset view to use.") ] @@ -77,12 +86,13 @@ def train( def test( config: ConfigType = None, view: ViewType = _ViewType.VAL, + weights: WeightsType = None, opts: OptsType = None, ): """Evaluate model.""" from luxonis_train.core import LuxonisModel - LuxonisModel(config, opts).test(view=view.value) + LuxonisModel(config, opts).test(view=view.value, weights=weights) @app.command() @@ -94,11 +104,21 @@ def tune(config: ConfigType = None, opts: OptsType = None): @app.command() -def export(config: ConfigType = None, opts: OptsType = None): +def export( + config: ConfigType = None, + save_path: Annotated[ + Path | None, + typer.Option(help="Path where to save the exported model."), + ] = None, + weights: WeightsType = None, + opts: OptsType = None, +): """Export model.""" from luxonis_train.core import LuxonisModel - LuxonisModel(config, opts).export() + LuxonisModel(config, opts).export( + onnx_save_path=save_path, weights=weights + ) @app.command() @@ -107,13 +127,17 @@ def infer( view: ViewType = _ViewType.VAL, save_dir: SaveDirType = None, source_path: SourcePathType = None, + weights: WeightsType = None, opts: OptsType = None, ): """Run inference.""" from luxonis_train.core import LuxonisModel LuxonisModel(config, opts).infer( - view=view.value, save_dir=save_dir, source_path=source_path + view=view.value, + save_dir=save_dir, + source_path=source_path, + weights=weights, ) @@ -138,7 +162,8 @@ def inspect( "-s", help=( "Multiplier for the image size. " - "By default the images are shown in their original size." + "By default the images are shown in their original size. " + "Use this option to scale them." ), show_default=False, ), @@ -223,19 +248,20 @@ def inspect( @app.command() def archive( + config: ConfigType = None, executable: Annotated[ str | None, typer.Option( help="Path to the model file.", show_default=False, metavar="FILE" ), ] = None, - config: ConfigType = None, + weights: WeightsType = None, opts: OptsType = None, ): """Generate NN archive.""" from luxonis_train.core import LuxonisModel - LuxonisModel(str(config), opts).archive(executable) + LuxonisModel(str(config), opts).archive(path=executable, weights=weights) def version_callback(value: bool): diff --git a/luxonis_train/attached_modules/losses/README.md b/luxonis_train/attached_modules/losses/README.md index a8a982ba..724174c7 100644 --- a/luxonis_train/attached_modules/losses/README.md +++ b/luxonis_train/attached_modules/losses/README.md @@ -4,97 +4,97 @@ List of all the available loss functions. ## Table Of Contents -- [CrossEntropyLoss](#crossentropyloss) -- [BCEWithLogitsLoss](#bcewithlogitsloss) -- [SmoothBCEWithLogitsLoss](#smoothbcewithlogitsloss) -- [SigmoidFocalLoss](#sigmoidfocalloss) -- [SoftmaxFocalLoss](#softmaxfocalloss) -- [AdaptiveDetectionLoss](#adaptivedetectionloss) -- [EfficientKeypointBBoxLoss](#efficientkeypointbboxloss) +- [`CrossEntropyLoss`](#crossentropyloss) +- [`BCEWithLogitsLoss`](#bcewithlogitsloss) +- [`SmoothBCEWithLogitsLoss`](#smoothbcewithlogitsloss) +- [`SigmoidFocalLoss`](#sigmoidfocalloss) +- [`SoftmaxFocalLoss`](#softmaxfocalloss) +- [`AdaptiveDetectionLoss`](#adaptivedetectionloss) +- [`EfficientKeypointBBoxLoss`](#efficientkeypointbboxloss) -## CrossEntropyLoss +## `CrossEntropyLoss` Adapted from [here](https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html). -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| --------------- | -------------------------------- | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| weight | list\[float\] \| None | None | A manual rescaling weight given to each class. If given, it has to be a list of the same length as there are classes. | -| reduction | Literal\["none", "mean", "sum"\] | "mean" | Specifies the reduction to apply to the output. | -| label_smoothing | float\[0.0, 1.0\] | 0.0 | Specifies the amount of smoothing when computing the loss, where 0.0 means no smoothing. The targets become a mixture of the original ground truth and a uniform distribution as described in [Rethinking the Inception Architecture for Computer Vision](https://arxiv.org/abs/1512.00567). | +| Key | Type | Default value | Description | +| ----------------- | -------------------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `weight` | `list[float] \| None` | `None` | A manual rescaling weight given to each class. If given, it has to be a list of the same length as there are classes | +| `reduction` | `Literal["none", "mean", "sum"]` | `"mean"` | Specifies the reduction to apply to the output | +| `label_smoothing` | `float` $\\in \[0.0, 1.0\]$ | `0.0` | Specifies the amount of smoothing when computing the loss, where 0.0 means no smoothing. The targets become a mixture of the original ground truth and a uniform distribution as described in [Rethinking the Inception Architecture for Computer Vision](https://arxiv.org/abs/1512.00567) | -## BCEWithLogitsLoss +## `BCEWithLogitsLoss` Adapted from [here](https://pytorch.org/docs/stable/generated/torch.nn.BCEWithLogitsLoss.html). -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| ---------- | -------------------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------ | -| weight | list\[float\] \| None | None | A manual rescaling weight given to each class. If given, has to be a list of the same length as there are classes. | -| reduction | Literal\["none", "mean", "sum"\] | "mean" | Specifies the reduction to apply to the output. | -| pos_weight | Tensor \| None | None | A weight of positive examples to be broadcasted with target. | +| Key | Type | Default value | Description | +| ------------ | -------------------------------- | ------------- | ----------------------------------------------------------------------------------------------------------------- | +| `weight` | `list[float] \| None` | `None` | A manual rescaling weight given to each class. If given, has to be a list of the same length as there are classes | +| `reduction` | `Literal["none", "mean", "sum"]` | `"mean"` | Specifies the reduction to apply to the output | +| `pos_weight` | `Tensor \| None` | `None` | A weight of positive examples to be broadcasted with target | -## SmoothBCEWithLogitsLoss +## `SmoothBCEWithLogitsLoss` -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| --------------- | -------------------------------- | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| weight | list\[float\] \| None | None | A manual rescaling weight given to each class. If given, has to be a list of the same length as there are classes. | -| reduction | Literal\["none", "mean", "sum"\] | "mean" | Specifies the reduction to apply to the output. | -| label_smoothing | float\[0.0, 1.0\] | 0.0 | Specifies the amount of smoothing when computing the loss, where 0.0 means no smoothing. The targets become a mixture of the original ground truth and a uniform distribution as described in [Rethinking the Inception Architecture for Computer Vision](https://arxiv.org/abs/1512.00567). | -| bce_pow | float | 1.0 | Weight for the positive samples. | +| Key | Type | Default value | Description | +| ----------------- | -------------------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `weight` | `list[float] \| None` | `None` | A manual rescaling weight given to each class. If given, has to be a list of the same length as there are classes | +| `reduction` | `Literal["none", "mean", "sum"]` | `"mean"` | Specifies the reduction to apply to the output | +| `label_smoothing` | `float` $\\in \[0.0, 1.0\]$ | `0.0` | Specifies the amount of smoothing when computing the loss, where 0.0 means no smoothing. The targets become a mixture of the original ground truth and a uniform distribution as described in [Rethinking the Inception Architecture for Computer Vision](https://arxiv.org/abs/1512.00567) | +| `bce_pow` | `float` | `1.0` | Weight for the positive samples | -## SigmoidFocalLoss +## `SigmoidFocalLoss` Adapted from [here](https://pytorch.org/vision/stable/generated/torchvision.ops.sigmoid_focal_loss.html#torchvision.ops.sigmoid_focal_loss). -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| --------- | -------------------------------- | ------------- | ------------------------------------------------------------------------------------------ | -| alpha | float | 0.25 | Weighting factor in range (0,1) to balance positive vs negative examples or -1 for ignore. | -| gamma | float | 2.0 | Exponent of the modulating factor $(1 - p_t)$ to balance easy vs hard examples | -| reduction | Literal\["none", "mean", "sum"\] | "mean" | Specifies the reduction to apply to the output. | +| Key | Type | Default value | Description | +| ----------- | -------------------------------- | ------------- | ------------------------------------------------------------------------------------------- | +| `alpha` | `float` | `0.25` | Weighting factor in range $(0,1)$ to balance positive vs negative examples or -1 for ignore | +| `gamma` | `float` | `2.0` | Exponent of the modulating factor $(1 - p_t)$ to balance easy vs hard examples | +| `reduction` | `Literal["none", "mean", "sum"]` | `"mean"` | Specifies the reduction to apply to the output | -## SoftmaxFocalLoss +## `SoftmaxFocalLoss` -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| --------- | -------------------------------- | ------------- | ----------------------------------------------------------------------------- | -| alpha | float \| list | 0.25 | Either a float for all channels or list of alphas for each channel. | -| gamma | float | 2.0 | Exponent of the modulating factor (1 - p_t) to balance easy vs hard examples. | -| reduction | Literal\["none", "mean", "sum"\] | "mean" | Specifies the reduction to apply to the output. | +| Key | Type | Default value | Description | +| ----------- | -------------------------------- | ------------- | ------------------------------------------------------------------------------ | +| `alpha` | `float \| list` | `0.25` | Either a float for all channels or list of alphas for each channel | +| `gamma` | `float` | `2.0` | Exponent of the modulating factor $(1 - p_t)$ to balance easy vs hard examples | +| `reduction` | `Literal["none", "mean", "sum"]` | `"mean"` | Specifies the reduction to apply to the output | -## AdaptiveDetectionLoss +## `AdaptiveDetectionLoss` Adapted from [here](https://arxiv.org/pdf/2209.02976.pdf). -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| ----------------- | ------------------------------------------------- | ------------- | ----------------------------------------------------------------------------------- | -| n_warmup_epochs | int | 4 | Number of epochs where ATSS assigner is used, after that we switch to TAL assigner. | -| iou_type | Literal\["none", "giou", "diou", "ciou", "siou"\] | "giou" | IoU type used for bbox regression loss. | -| class_loss_weight | float | 1.0 | Weight used for the classification part of the loss. | -| iou_loss_weight | float | 2.5 | Weight used for the IoU part of the loss. | +| Key | Type | Default value | Description | +| ------------------- | ------------------------------------------------- | ------------- | -------------------------------------------------------------------------------------- | +| `n_warmup_epochs` | `int` | `4` | Number of epochs where `ATSS` assigner is used, after that we switch to `TAL` assigner | +| `iou_type` | `Literal["none", "giou", "diou", "ciou", "siou"]` | `"giou"` | `IoU` type used for bounding box regression loss | +| `class_loss_weight` | `float` | `1.0` | Weight used for the classification part of the loss | +| `iou_loss_weight` | `float` | `2.5` | Weight used for the `IoU` part of the loss | -## EfficientKeypointBBoxLoss +## `EfficientKeypointBBoxLoss` Adapted from [YOLO-Pose: Enhancing YOLO for Multi Person Pose Estimation Using Object Keypoint Similarity Loss](https://arxiv.org/ftp/arxiv/papers/2204/2204.06806.pdf). -| Key | Type | Default value | Description | -| --------------------- | ------------------------------------------------- | ------------- | --------------------------------------------------------------------------------------------------- | -| n_warmup_epochs | int | 4 | Number of epochs where ATSS assigner is used, after that we switch to TAL assigner. | -| iou_type | Literal\["none", "giou", "diou", "ciou", "siou"\] | "giou" | IoU type used for bbox regression sub-loss | -| reduction | Literal\["mean", "sum"\] | "mean" | Specifies the reduction to apply to the output. | -| class_loss_weight | float | 1.0 | Weight used for the classification sub-loss. | -| iou_loss_weight | float | 2.5 | Weight used for the IoU sub-loss. | -| regr_kpts_loss_weight | float | 1.5 | Weight used for the OKS sub-loss. | -| vis_kpts_loss_weight | float | 1.0 | Weight used for the keypoint visibility sub-loss. | -| sigmas | list\[float\] \\ None | None | Sigmas used in KeypointLoss for OKS metric. If None then use COCO ones if possible or default ones. | -| area_factor | float \| None | None | Factor by which we multiply bbox area which is used in KeypointLoss. If None then use default one. | +| Key | Type | Default value | Description | +| ----------------------- | ------------------------------------------------- | ------------- | ------------------------------------------------------------------------------------------------------------- | +| `n_warmup_epochs` | `int` | `4` | Number of epochs where `ATSS` assigner is used, after that we switch to `TAL` assigner | +| `iou_type` | `Literal["none", "giou", "diou", "ciou", "siou"]` | `"giou"` | `IoU` type used for bounding box regression sub-loss | +| `reduction` | `Literal["mean", "sum"]` | `"mean"` | Specifies the reduction to apply to the output | +| `class_loss_weight` | `float` | `1.0` | Weight used for the classification sub-loss | +| `iou_loss_weight` | `float` | `2.5` | Weight used for the `IoU` sub-loss | +| `regr_kpts_loss_weight` | `float` | `1.5` | Weight used for the `OKS` sub-loss | +| `vis_kpts_loss_weight` | `float` | `1.0` | Weight used for the keypoint visibility sub-loss | +| `sigmas` | `list[float] \ None` | `None` | Sigmas used in `KeypointLoss` for `OKS` metric. If `None` then use COCO ones if possible or default ones | +| `area_factor` | `float \| None` | `None` | Factor by which we multiply bounding box area which is used in `KeypointLoss.` If `None` then use default one | diff --git a/luxonis_train/attached_modules/metrics/README.md b/luxonis_train/attached_modules/metrics/README.md index 17735540..b61f4843 100644 --- a/luxonis_train/attached_modules/metrics/README.md +++ b/luxonis_train/attached_modules/metrics/README.md @@ -23,6 +23,14 @@ Metrics from the [`torchmetrics`](https://lightning.ai/docs/torchmetrics/stable/ For more information, see [object-keypoint-similarity](https://learnopencv.com/object-keypoint-similarity/). +**Params** + +| Key | Type | Default value | Description | +| ------------------ | --------------------- | ------------- | --------------------------------------------------------------------- | +| `sigmas` | `list[float] \| None` | `None` | List of sigmas for each keypoint. If `None`, the COCO sigmas are used | +| `area_factor` | `float` | `0.53` | Factor by which to multiply the bounding box area | +| `use_cocoeval_oks` | `bool` | `True` | Whether to use the same OKS formula as in COCO evaluation | + ## MeanAveragePrecision Compute the `Mean-Average-Precision (mAP) and Mean-Average-Recall (mAR)` for object detection predictions. @@ -43,4 +51,13 @@ boxes. Similar to [MeanAveragePrecision](#meanaverageprecision), but uses [OKS](#objectkeypointsimilarity) as `IoU` measure. For a deeper understanding of how OKS works, please refer to the detailed explanation provided [here](https://learnopencv.com/object-keypoint-similarity/). -Evaluation leverages COCO evaluation framework (COCOeval) to assess mAP performance. +Evaluation leverages COCO evaluation framework (COCOeval) to assess mAP performance. + +**Params** + +| Key | Type | Default value | Description | +| ------------- | ----------------------------------- | ------------- | --------------------------------------------------------------------- | +| `sigmas` | `list[float] \| None` | `None` | List of sigmas for each keypoint. If `None`, the COCO sigmas are used | +| `area_factor` | `float` | `0.53` | Factor by which to multiply the bounding box area | +| `max_dets` | `int` | `20` | Maximum number of detections per image | +| `box_fotmat` | `Literal["xyxy", "xywh", "cxcywh"]` | `"xyxy"` | Format of the bounding boxes | diff --git a/luxonis_train/attached_modules/visualizers/README.md b/luxonis_train/attached_modules/visualizers/README.md index 8bedaed9..1fca42e2 100644 --- a/luxonis_train/attached_modules/visualizers/README.md +++ b/luxonis_train/attached_modules/visualizers/README.md @@ -1,87 +1,88 @@ # Visualizers +Visualizers are used to render the output of a node. They are used in the `visualizers` field of the `Node` configuration. + ## Table Of Contents -- [BBoxVisualizer](#bboxvisualizer) -- [ClassificationVisualizer](#classificationvisualizer) -- [KeypointVisualizer](#keypointvisualizer) -- [SegmentationVisualizer](#segmentationvisualizer) -- [MultiVisualizer](#multivisualizer) +- [`BBoxVisualizer`](#bboxvisualizer) +- [`ClassificationVisualizer`](#classificationvisualizer) +- [`KeypointVisualizer`](#keypointvisualizer) +- [`MultiVisualizer`](#multivisualizer) -## BBoxVisualizer +## `BBoxVisualizer` Visualizer for bounding boxes. -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| --------- | ------------------------------------------------------------------------------------------- | ------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| labels | dict\[int, str\] \| list\[str\] \| None | None | Either a dictionary mapping class indices to names, or a list of names. If list is provided, the label mapping is done by index. By default, no labels are drawn. | -| colors | dict\[int, tuple\[int, int, int\] \| str\] \| list\[tuple\[int, int, int\] \| str\] \| None | None | Colors to use for the boundig boxes. Either a dictionary mapping class names to colors, or a list of colors. | -| fill | bool | False | Whether or not to fill the bounding boxes. | -| width | int | 1 | The width of the bounding box lines. | -| font | str \| None | None | A filename containing a TrueType font. | -| font_size | int \| None | None | Font size used for the labels. | +| Key | Type | Default value | Description | +| ----------- | ------------------------------------------------------------------------------------- | ------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `labels` | `dict[int, str] \| list[str] \| None` | `None` | Either a dictionary mapping class indices to names, or a list of names. If list is provided, the label mapping is done by index. By default, no labels are drawn | +| `colors` | `dict[int, tuple[int, int, int] \| str] \| list[tuple[int, int, int] \| str] \| None` | `None` | Colors to use for the bounding boxes. Either a dictionary mapping class names to colors, or a list of colors. Color can be either a tuple of RGB values or a hex string | +| `fill` | `bool` | `False` | Whether to fill the bounding boxes | +| `width` | `int` | `1` | The width of the bounding box lines | +| `font` | `str \| None` | `None` | A filename containing a `TrueType` font | +| `font_size` | `int \| None` | `None` | Font size used for the labels | -**Example** +**Example:** -![bbox_viz_example](https://github.com/luxonis/luxonis-train/blob/main/media/example_viz/bbox.png) +![bounding_box_viz_example](https://github.com/luxonis/luxonis-train/blob/main/media/example_viz/bbox.png) -## KeypointVisualizer +## `KeypointVisualizer` -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| -------------------- | -------------------------------------- | ------------- | -------------------------------------------------------------------------------------------------------------------------------- | -| visibility_threshold | float | 0.5 | Threshold for visibility of keypoints. If the visibility of a keypoint is below this threshold, it is considered as not visible. | -| connectivity | list\[tuple\[int, int\]\] \| None | None | List of tuples of keypoint indices that define the connections in the skeleton. | -| visible_color | str \| tuple\[int, int, int\] | "red" | Color of visible keypoints. | -| nonvisible_color | str \| tuple\[int, int, int \] \| None | None | Color of nonvisible keypoints. If None, nonvisible keypoints are not drawn. | +| Key | Type | Default value | Description | +| ---------------------- | -------------------------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------- | +| `visibility_threshold` | `float` | `0.5` | Threshold for visibility of keypoints. If the visibility of a keypoint is below this threshold, it is considered as not visible | +| `connectivity` | `list[tuple[int, int]] \| None` | `None` | List of tuples of keypoint indices that define the connections in the skeleton | +| `visible_color` | `str \| tuple[int, int, int]` | `"red"` | Color of visible keypoints | +| `nonvisible_color` | `str \| tuple[int, int, int ] \| None` | `None` | Color of non-visible keypoints. If `None`, non-visible keypoints are not drawn | -**Example** +**Example:** -![kpt_viz_example](https://github.com/luxonis/luxonis-train/blob/main/media/example_viz/kpts.png) +![keypoints_viz_example](https://github.com/luxonis/luxonis-train/blob/main/media/example_viz/kpts.png) -## SegmentationVisualizer +## `SegmentationVisualizer` -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| ----- | ----------------------------- | ------------- | -------------------------------------- | -| color | str \| tuple\[int, int, int\] | #5050FF | Color of the segmentation masks. | -| alpha | float | 0.6 | Alpha value of the segmentation masks. | +| Key | Type | Default value | Description | +| ------- | ----------------------------- | ------------- | ------------------------------------- | +| `color` | `str \| tuple[int, int, int]` | `"#5050FF"` | Color of the segmentation masks | +| `alpha` | `float` | `0.6` | Alpha value of the segmentation masks | -**Example** +**Example:** -![seg_viz_example](https://github.com/luxonis/luxonis-train/blob/main/media/example_viz/segmentation.png) +![segmentation_viz_example](https://github.com/luxonis/luxonis-train/blob/main/media/example_viz/segmentation.png) -## ClassificationVisualizer +## `ClassificationVisualizer` -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| ------------ | ---------------------- | ------------- | -------------------------------------------------------------------------- | -| include_plot | bool | True | Whether to include a plot of the class probabilities in the visualization. | -| color | tuple\[int, int, int\] | (255, 0, 0) | Color of the text. | -| font_scale | float | 1.0 | Scale of the font. | -| thickness | int | 1 | Line thickness of the font. | +| Key | Type | Default value | Description | +| -------------- | ---------------------- | ------------- | ------------------------------------------------------------------------- | +| `include_plot` | `bool` | `True` | Whether to include a plot of the class probabilities in the visualization | +| `color` | `tuple[int, int, int]` | `(255, 0, 0)` | Color of the text | +| `font_scale` | `float` | `1.0` | Scale of the font | +| `thickness` | `int` | `1` | Line thickness of the font | -**Example** +**Example:** ![class_viz_example](https://github.com/luxonis/luxonis-train/blob/main/media/example_viz/class.png) -## MultiVisualizer +## `MultiVisualizer` Special type of meta-visualizer that combines several visualizers into one. The combined visualizers share canvas. -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| ----------- | ------------ | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| visualizers | list\[dict\] | \[ \] | List of visualizers to combine. Each item in the list is a dictionary with the following keys:
- name (str): Name of the visualizer. Must be a key in the VISUALIZERS registry.
- params (dict): Parameters to pass to the visualizer. | +| Key | Type | Default value | Description | +| ------------- | ------------ | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `visualizers` | `list[dict]` | `[]` | List of visualizers to combine. Each item in the list is a dictionary with the following keys:
- `"name"` (`str`): Name of the visualizer. Must be a key in the `VISUALIZERS` registry.
- `"params"` (`dict`): Parameters to pass to the visualizer | -**Example** +**Example:** -Example of combining [KeypointVisualizer](#keypointvisualizer) and [BBoxVisualizer](#bboxvisualizer). +Example of combining [`KeypointVisualizer`](#keypointvisualizer) and [`BBoxVisualizer`](#bboxvisualizer). ![multi_viz_example](https://github.com/luxonis/luxonis-train/blob/main/media/example_viz/multi.png) diff --git a/luxonis_train/callbacks/README.md b/luxonis_train/callbacks/README.md index dc015ccd..64fbdf4f 100644 --- a/luxonis_train/callbacks/README.md +++ b/luxonis_train/callbacks/README.md @@ -4,54 +4,64 @@ List of all supported callbacks. ## Table Of Contents -- [PytorchLightning Callbacks](#pytorchlightning-callbacks) -- [ExportOnTrainEnd](#exportontrainend) -- [LuxonisProgressBar](#luxonisprogressbar) -- [MetadataLogger](#metadatalogger) -- [TestOnTrainEnd](#testontrainend) -- [UploadCheckpoint](#uploadcheckpoint) +- [`PytorchLightning` Callbacks](#pytorchlightning-callbacks) +- [`ExportOnTrainEnd`](#exportontrainend) +- [`ArchiveOnTrainEnd`](#archiveontrainend) +- [`MetadataLogger`](#metadatalogger) +- [`TestOnTrainEnd`](#testontrainend) +- [`UploadCheckpoint`](#uploadcheckpoint) -## PytorchLightning Callbacks +## `PytorchLightning` Callbacks List of supported callbacks from `lightning.pytorch`. -- [GPUStatsMonitor](https://pytorch-lightning.readthedocs.io/en/1.5.10/api/pytorch_lightning.callbacks.gpu_stats_monitor.html) -- [DeviceStatsMonitor](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.DeviceStatsMonitor.html#lightning.pytorch.callbacks.DeviceStatsMonitor) -- [EarlyStopping](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.EarlyStopping.html#lightning.pytorch.callbacks.EarlyStopping) -- [LearningRateMonitor](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.LearningRateMonitor.html#lightning.pytorch.callbacks.LearningRateMonitor) -- [ModelCheckpoint](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.ModelCheckpoint.html#lightning.pytorch.callbacks.ModelCheckpoint) -- [RichModelSummary](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.RichModelSummary.html#lightning.pytorch.callbacks.RichModelSummary) +- [`GPUStatsMonitor`](https://pytorch-lightning.readthedocs.io/en/1.5.10/api/pytorch_lightning.callbacks.gpu_stats_monitor.html) +- [`DeviceStatsMonitor`](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.DeviceStatsMonitor.html#lightning.pytorch.callbacks.DeviceStatsMonitor) +- [`EarlyStopping`](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.EarlyStopping.html#lightning.pytorch.callbacks.EarlyStopping) +- [`LearningRateMonitor`](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.LearningRateMonitor.html#lightning.pytorch.callbacks.LearningRateMonitor) +- [`ModelCheckpoint`](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.ModelCheckpoint.html#lightning.pytorch.callbacks.ModelCheckpoint) +- [`RichModelSummary`](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.RichModelSummary.html#lightning.pytorch.callbacks.RichModelSummary) +- [`GradientAccumulationScheduler`](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.GradientAccumulationScheduler.html#lightning.pytorch.callbacks.GradientAccumulationScheduler) +- [`StochasticWeightAveraging`](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.StochasticWeightAveraging.html#lightning.pytorch.callbacks.StochasticWeightAveraging) +- [`Timer`](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.Timer.html#lightning.pytorch.callbacks.Timer) +- [`ModelPruning`](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.ModelPruning.html#lightning.pytorch.callbacks.ModelPruning) -## ExportOnTrainEnd +## `ExportOnTrainEnd` Performs export on train end with best weights. -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| -------------------- | --------------------------- | ------------- | ----------------------------------------------------------------------------------------------------------- | -| preferred_checkpoint | Literal\["metric", "loss"\] | metric | Which checkpoint should we use. If preferred is not available then try to use the other one if its present. | +| Key | Type | Default value | Description | +| ---------------------- | --------------------------- | ------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `preferred_checkpoint` | `Literal["metric", "loss"]` | `"metric"` | Which checkpoint should the callback use. If the preferred checkpoint is not available, the other option is used. If none is available, the callback is skipped | -## LuxonisProgressBar +## `ArchiveOnTrainEnd` -Custom rich text progress bar based on RichProgressBar from Pytorch Lightning. +Callback to create an `NN Archive` at the end of the training. -## MetadataLogger +**Parameters:** + +| Key | Type | Default value | Description | +| ---------------------- | --------------------------- | ------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `preferred_checkpoint` | `Literal["metric", "loss"]` | `"metric"` | Which checkpoint should the callback use. If the preferred checkpoint is not available, the other option is used. If none is available, the callback is skipped | + +## `MetadataLogger` Callback that logs training metadata. Metadata include all defined hyperparameters together with git hashes of `luxonis-ml` and `luxonis-train` packages. Also stores this information locally. -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| ----------- | ----------- | ------------- | ----------------------------------------------------------------------------------------------------------------------- | -| hyperparams | list\[str\] | \[\] | List of hyperparameters to log. The hyperparameters are provided as config keys in dot notation. E.g. "trainer.epochs". | +| Key | Type | Default value | Description | +| ------------- | ----------- | ------------- | -------------------------------------------------------------------------------------------------------------------------- | +| `hyperparams` | `list[str]` | `[]` | List of hyperparameters to log. The hyperparameters are provided as config keys in dot notation. _E.g._ `"trainer.epochs"` | -## TestOnTrainEnd +## `TestOnTrainEnd` Callback to perform a test run at the end of the training. -## UploadCheckpoint +## `UploadCheckpoint` -Callback that uploads currently best checkpoint (based on validation loss) to the tracker location - where all other logs are stored. +Callback that uploads currently the best checkpoint (based on validation loss) to the tracker location - where all other logs are stored. diff --git a/luxonis_train/config/predefined_models/README.md b/luxonis_train/config/predefined_models/README.md index 3733534d..27976b3a 100644 --- a/luxonis_train/config/predefined_models/README.md +++ b/luxonis_train/config/predefined_models/README.md @@ -1,150 +1,150 @@ # Predefined models -In addition to definig the model by hand, we offer a list of simple predefined +In addition to defining the model by hand, we offer a list of simple predefined models which can be used instead. ## Table Of Contents -- [SegmentationModel](#segmentationmodel) -- [DetectionModel](#detectionmodel) -- [KeypointDetectionModel](#keypointdetectionmodel) -- [ClassificationModel](#classificationmodel) - -**Params** - -| Key | Type | Default value | Description | -| ------------------- | ---------------- | ------------- | --------------------------------------------------------------------- | -| name | str | | Name of the predefined architecture. See below the available options. | -| params | dict\[str, Any\] | {} | Additional parameters of the predefined model. | -| include_nodes | bool | True | Whether to include nodes of the model. | -| include_losses | bool | True | Whether to include loss functions. | -| include_metrics | bool | True | Whether to include metrics. | -| include_visualizers | bool | True | Whether to include visualizers. | - -## SegmentationModel - -The `SegmentationModel` allows for both "light" and "heavy" variants, where the "heavy" variant is more accurate, and the "light" variant is faster. - -See an example configuration file using this predefined model [here](../../../configs/segmentation_light_model.yaml) for the "light" variant, and [here](../../../configs/segmentation_heavy_model.yaml) for the "heavy" variant. - -**Components** - -| Name | Alias | Function | -| --------------------------------------------------------------------------------------------- | -------------------------- | -------------------------------------------------------------------------------------------- | -| [DDRNet](../../nodes/README.md#ddrnet) | segmentation_backbone | Backbone of the model. Available variants: "light" (DDRNet-23-slim) and "heavy" (DDRNet-23). | -| [SegmentationHead](../../nodes/README.md#segmentationhead) | segmentation_head | Head of the model. | -| [BCEWithLogitsLoss](../../attached_modules/losses/README.md#bcewithlogitsloss) | segmentation_loss | Loss of the model when the task is set to "binary". | -| [CrossEntropyLoss](../../attached_modules/losses/README.md#crossentropyloss) | segmentation_loss | Loss of the model when the task is set to "multiclass" or "multilabel". | -| [JaccardIndex](../../attached_modules/metrics/README.md#torchmetrics) | segmentation_jaccard_index | Main metric of the model. | -| [F1Score](../../attached_modules/metrics/README.md#torchmetrics) | segmentation_f1_score | Secondary metric of the model. | -| [SegmentationVisualizer](../../attached_modules/visualizers/README.md#segmentationvisualizer) | segmentation_visualizer | Visualizer of the `SegmentationHead`. | - -**Params** - -| Key | Type | Default value | Description | -| ----------------- | --------------------------------- | ------------- | ------------------------------------------------------------------------------------------------ | -| variant | Literal\["light", "heavy"\] | "light" | Defines the variant of the model. "light" uses DDRNet-23-slim, "heavy" uses DDRNet-23. | -| backbone | str | "DDRNet" | Name of the node to be used as a backbone. | -| backbone_params | dict | {} | Additional parameters for the backbone. If not provided, variant-specific defaults will be used. | -| head_params | dict | {} | Additional parameters for the head. | -| aux_head_params | dict | {} | Additional parameters for auxiliary heads. | -| loss_params | dict | {} | Additional parameters for the loss. | -| visualizer_params | dict | {} | Additional parameters for the visualizer. | -| task | Literal\["binary", "multiclass"\] | "binary" | Type of the task of the model. | -| task_name | str \| None | None | Custom task name for the head. | - -## DetectionModel - -The `DetectionModel` allows for both "light" and "heavy" variants, where the "heavy" variant is more accurate, and the "light" variant is faster. - -See an example configuration file using this predefined model [here](../../../configs/detection_light_model.yaml) for the "light" variant, and [here](../../../configs/detection_heavy_model.yaml) for the "heavy" variant. - -**Components** - -| Name | Alias | Function | -| -------------------------------------------------------------------------------------- | -------------------- | ------------------------------------------------------------------------------------------------- | -| [EfficientRep](../../nodes/README.md#efficientrep) | detection_backbone | Backbone of the model. Available variants: "light" (EfficientRep-N) and "heavy" (EfficientRep-L). | -| [RepPANNeck](../../nodes/README.md#reppanneck) | detection_neck | Neck of the model. | -| [EfficientBBoxHead](../../nodes/README.md#efficientbboxhead) | detection_head | Head of the model. | -| [AdaptiveDetectionLoss](../../attached_modules/losses/README.md#adaptivedetectionloss) | detection_loss | Loss of the model. | -| [MeanAveragePrecision](../../attached_modules/metrics/README.md#meanaverageprecision) | detection_map | Main metric of the model. | -| [BBoxVisualizer](../../attached_modules/visualizers/README.md#bboxvisualizer) | detection_visualizer | Visualizer of the `detection_head`. | - -**Params** - -| Key | Type | Default value | Description | -| ----------------- | --------------------------- | -------------- | ------------------------------------------------------------------------------------------- | -| variant | Literal\["light", "heavy"\] | "light" | Defines the variant of the model. "light" uses EfficientRep-N, "heavy" uses EfficientRep-L. | -| use_neck | bool | True | Whether to include the neck in the model. | -| backbone | str | "EfficientRep" | Name of the node to be used as a backbone. | -| backbone_params | dict | {} | Additional parameters to the backbone. | -| neck_params | dict | {} | Additional parameters to the neck. | -| head_params | dict | {} | Additional parameters to the head. | -| loss_params | dict | {} | Additional parameters to the loss. | -| visualizer_params | dict | {} | Additional parameters to the visualizer. | -| task_name | str \| None | None | Custom task name for the head. | - -## KeypointDetectionModel - -The `KeypointDetectionModel` allows for both "light" and "heavy" variants, where the "heavy" variant is more accurate, and the "light" variant is faster. - -See an example configuration file using this predefined model [here](../../../configs/keypoint_bbox_light_model.yaml) for the "light" variant, and [here](../../../configs/keypoint_bbox_heavy_model.yaml) for the "heavy" variant. - -**Components** - -| Name | Alias | Function | -| ------------------------------------------------------------------------------------------------------- | ---------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------- | -| [EfficientRep](../../nodes/README.md#efficientrep) | kpt_detection_backbone | Backbone of the model.. Available variants: "light" (EfficientRep-N) and "heavy" (EfficientRep-L). | -| [RepPANNeck](../../nodes/README.md#reppanneck) | kpt_detection_neck | Neck of the model. | -| [EfficientKeypointBBoxHead](../../nodes/README.md#efficientkeypointbboxhead) | kpt_detection_head | Head of the model. | -| [EfficientKeypointBBoxLoss](../../attached_modules/losses/README.md#efficientkeypointbboxloss) | kpt_detection_loss | Loss of the model. | -| [ObjectKeypointSimilarity](../../attached_modules/metrics/README.md#objectkeypointsimilarity) | kpt_detection_oks | Main metric of the model. | -| [MeanAveragePrecisionKeypoints](../../attached_modules/metrics/README.md#meanaverageprecisionkeypoints) | kpt_detection_map | Secondary metric of the model. | -| [BBoxVisualizer](../../attached_modules/visualizers/README.md#bboxvisualizer) | | Visualizer for bounding boxes. Combined with keypoint visualizer in [MultiVisualizer](../../attached_modules/visualizers/README.md#multivisualizer). | -| [KeypointVisualizer](../../attached_modules/visualizers/README.md#keypointvisualizer) | | Visualizer for keypoints. Combined with keypoint visualizer in [MultiVisualizer](../../attached_modules/visualizers/README.md#multivisualizer) | - -**Params** - -| Key | Type | Default value | Description | -| ---------------------- | --------------------------- | -------------- | ------------------------------------------------------------------------------------------- | -| variant | Literal\["light", "heavy"\] | "light" | Defines the variant of the model. "light" uses EfficientRep-N, "heavy" uses EfficientRep-L. | -| use_neck | bool | True | Whether to include the neck in the model. | -| backbone | str | "EfficientRep" | Name of the node to be used as a backbone. | -| backbone_params | dict | {} | Additional parameters to the backbone. | -| neck_params | dict | {} | Additional parameters to the neck. | -| head_params | dict | {} | Additional parameters to the head. | -| loss_params | dict | {} | Additional parameters to the loss. | -| kpt_visualizer_params | dict | {} | Additional parameters to the keypoint visualizer. | -| bbox_visualizer_params | dict | {} | Additional parameters to the bbox visualizer. | -| bbox_task_name | str \| None | None | Custom task name for the detection head. | -| kpt_task_name | str \| None | None | Custom task name for the keypoint head. | - -## ClassificationModel - -The `ClassificationModel` allows for both "light" and "heavy" variants, where the "heavy" variant is more accurate, and the "light" variant is faster. Can be used for multiclass and multilabel tasks. - -See an example configuration file using this predefined model [here](../../../configs/classification_light_model.yaml) for the "light" variant, and [here](../../../configs/classification_heavy_model.yaml) for the "heavy" variant. - -**Components** - -| Name | Alias | Function | -| ---------------------------------------------------------------------------- | ----------------------- | ----------------------------------------------------------------------------------------------------- | -| [ResNet](../../nodes/README.md#resnet) | classification_backbone | Backbone of the model. The "light" variant uses ResNet-18, while the "heavy" variant uses ResNet-101. | -| [ClassificationHead](../../nodes/README.md#classificationhead) | classification_head | Head of the model. | -| [CrossEntropyLoss](../../attached_modules/losses/README.md#crossentropyloss) | classification_loss | Loss of the model. | -| [F1Score](../../attached_modules/metrics/README.md#torchmetrics) | classification_f1_score | Main metric of the model. | -| [Accuracy](../../attached_modules/metrics/README.md#torchmetrics) | classification_accuracy | Secondary metric of the model. | -| [Recall](../../attached_modules/metrics/README.md#torchmetrics) | classification_recall | Secondary metric of the model. | - -**Params** - -| Key | Type | Default value | Description | -| ----------------- | ------------------------------------- | ------------- | ----------------------------------------------------------------------------------- | -| variant | Literal\["light", "heavy"\] | "light" | Defines the variant of the model. "light" uses ResNet-18, "heavy" uses ResNet-101. | -| backbone | str | "ResNet" | Name of the node to be used as a backbone. | -| backbone_params | dict | {} | Additional parameters to the backbone. | -| head_params | dict | {} | Additional parameters to the head. | -| loss_params | dict | {} | Additional parameters to the loss. | -| visualizer_params | dict | {} | Additional parameters to the visualizer. | -| task | Literal\["multiclass", "multilabel"\] | "multiclass" | Type of the task of the model. | -| task_name | str \| None | None | Custom task name for the head. | +- [`SegmentationModel`](#segmentationmodel) +- [`DetectionModel`](#detectionmodel) +- [`KeypointDetectionModel`](#keypointdetectionmodel) +- [`ClassificationModel`](#classificationmodel) + +**Parameters:** + +| Key | Type | Default value | Description | +| --------------------- | ---------------- | ------------- | -------------------------------------------------------------------- | +| `name` | `str` | - | Name of the predefined architecture. See below the available options | +| `params` | `dict[str, Any]` | `{}` | Additional parameters of the predefined model | +| `include_nodes` | `bool` | `True` | Whether to include nodes of the model | +| `include_losses` | `bool` | `True` | Whether to include loss functions | +| `include_metrics` | `bool` | `True` | Whether to include metrics | +| `include_visualizers` | `bool` | `True` | Whether to include visualizers | + +## `SegmentationModel` + +The `SegmentationModel` allows for both `"light"` and `"heavy"` variants, where the `"heavy"` variant is more accurate, and the `"light"` variant is faster. + +See an example configuration file using this predefined model [here](../../../configs/segmentation_light_model.yaml) for the `"light"` variant, and [here](../../../configs/segmentation_heavy_model.yaml) for the `"heavy"` variant. + +**Components:** + +| Name | Alias | Function | +| ----------------------------------------------------------------------------------------------- | ------------------------------ | --------------------------------------------------------------------------------------------------- | +| [`DDRNet`](../../nodes/README.md#ddrnet) | `"segmentation_backbone"` | Backbone of the model. Available variants: `"light"` (`DDRNet-23-slim`) and `"heavy"` (`DDRNet-23`) | +| [`SegmentationHead`](../../nodes/README.md#segmentationhead) | `"segmentation_head"` | Head of the model | +| [`BCEWithLogitsLoss`](../../attached_modules/losses/README.md#bcewithlogitsloss) | `"segmentation_loss"` | Loss of the model when the task is set to `"binary"` | +| [`CrossEntropyLoss`](../../attached_modules/losses/README.md#crossentropyloss) | `"segmentation_loss"` | Loss of the model when the task is set to `"multiclass"` or `"multilabel"` | +| [`JaccardIndex`](../../attached_modules/metrics/README.md#torchmetrics) | `"segmentation_jaccard_index"` | Main metric of the model | +| [`F1Score`](../../attached_modules/metrics/README.md#torchmetrics) | `"segmentation_f1_score"` | Secondary metric of the model | +| [`SegmentationVisualizer`](../../attached_modules/visualizers/README.md#segmentationvisualizer) | `"segmentation_visualizer"` | Visualizer of the `SegmentationHead` | + +**Parameters:** + +| Key | Type | Default value | Description | +| ------------------- | --------------------------------- | ------------- | ----------------------------------------------------------------------------------------------- | +| `variant` | `Literal["light", "heavy"]` | `"light"` | Defines the variant of the model. `"light"` uses `DDRNet-23-slim`, `"heavy"` uses `DDRNet-23` | +| `backbone` | `str` | `"DDRNet"` | Name of the node to be used as a backbone | +| `backbone_params` | `dict` | `{}` | Additional parameters for the backbone. If not provided, variant-specific defaults will be used | +| `head_params` | `dict` | `{}` | Additional parameters for the head | +| `aux_head_params` | `dict` | `{}` | Additional parameters for auxiliary heads | +| `loss_params` | `dict` | `{}` | Additional parameters for the loss | +| `visualizer_params` | `dict` | `{}` | Additional parameters for the visualizer | +| `task` | `Literal["binary", "multiclass"]` | `"binary"` | Type of the task of the model | +| `task_name` | `str \| None` | `None` | Custom task name for the head | + +## `DetectionModel` + +The `DetectionModel` allows for both `"light"` and `"heavy"` variants, where the `"heavy"` variant is more accurate, and the `"light"` variant is faster. + +See an example configuration file using this predefined model [here](../../../configs/detection_light_model.yaml) for the `"light"` variant, and [here](../../../configs/detection_heavy_model.yaml) for the `"heavy"` variant. + +**Components:** + +| Name | Alias | Function | +| ---------------------------------------------------------------------------------------- | ------------------------ | -------------------------------------------------------------------------------------------------------- | +| [`EfficientRep`](../../nodes/README.md#efficientrep) | `"detection_backbone"` | Backbone of the model. Available variants: `"light"` (`EfficientRep-N`) and `"heavy"` (`EfficientRep-L`) | +| [`RepPANNeck`](../../nodes/README.md#reppanneck) | `"detection_neck"` | Neck of the model | +| [`EfficientBBoxHead`](../../nodes/README.md#efficientbboxhead) | `"detection_head"` | Head of the model | +| [`AdaptiveDetectionLoss`](../../attached_modules/losses/README.md#adaptivedetectionloss) | `"detection_loss"` | Loss of the model | +| [`MeanAveragePrecision`](../../attached_modules/metrics/README.md#meanaverageprecision) | `"detection_map"` | Main metric of the model | +| [`BBoxVisualizer`](../../attached_modules/visualizers/README.md#bboxvisualizer) | `"detection_visualizer"` | Visualizer of the `detection_head` | + +**Parameters:** + +| Key | Type | Default value | Description | +| ------------------- | --------------------------- | ---------------- | -------------------------------------------------------------------------------------------------- | +| `variant` | `Literal["light", "heavy"]` | `"light"` | Defines the variant of the model. `"light"` uses `EfficientRep-N`, `"heavy"` uses `EfficientRep-L` | +| `use_neck` | `bool` | `True` | Whether to include the neck in the model | +| `backbone` | `str` | `"EfficientRep"` | Name of the node to be used as a backbone | +| `backbone_params` | `dict` | `{}` | Additional parameters to the backbone | +| `neck_params` | `dict` | `{}` | Additional parameters to the neck | +| `head_params` | `dict` | `{}` | Additional parameters to the head | +| `loss_params` | `dict` | `{}` | Additional parameters to the loss | +| `visualizer_params` | `dict` | `{}` | Additional parameters to the visualizer | +| `task_name` | `str \| None` | `None` | Custom task name for the head | + +## `KeypointDetectionModel` + +The `KeypointDetectionModel` allows for both `"light"` and `"heavy"` variants, where the `"heavy"` variant is more accurate, and the `"light"` variant is faster. + +See an example configuration file using this predefined model [here](../../../configs/keypoint_bbox_light_model.yaml) for the `"light"` variant, and [here](../../../configs/keypoint_bbox_heavy_model.yaml) for the `"heavy"` variant. + +**Components:** + +| Name | Alias | Function | +| --------------------------------------------------------------------------------------------------------- | ---------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- | +| [`EfficientRep`](../../nodes/README.md#efficientrep) | `"kpt_detection_backbone"` | Backbone of the model. Available variants: `"light"` (`EfficientRep-N`) and `"heavy"` (`EfficientRep-L`) | +| [`RepPANNeck`](../../nodes/README.md#reppanneck) | `"kpt_detection_neck"` | Neck of the model | +| [`EfficientKeypointBBoxHead`](../../nodes/README.md#efficientkeypointbboxhead) | `"kpt_detection_head"` | Head of the model | +| [`EfficientKeypointBBoxLoss`](../../attached_modules/losses/README.md#efficientkeypointbboxloss) | `"kpt_detection_loss"` | Loss of the model | +| [`ObjectKeypointSimilarity`](../../attached_modules/metrics/README.md#objectkeypointsimilarity) | `"kpt_detection_oks"` | Main metric of the model | +| [`MeanAveragePrecisionKeypoints`](../../attached_modules/metrics/README.md#meanaverageprecisionkeypoints) | `"kpt_detection_map"` | Secondary metric of the model | +| [`BBoxVisualizer`](../../attached_modules/visualizers/README.md#bboxvisualizer) | `"kpt_detection_visualizer"` | Visualizer for bounding boxes. Combined with keypoint visualizer using [`MultiVisualizer`](../../attached_modules/visualizers/README.md#multivisualizer) | +| [`KeypointVisualizer`](../../attached_modules/visualizers/README.md#keypointvisualizer) | `"kpt_detection_visualizer"` | Visualizer for keypoints. Combined with keypoint visualizer using [`MultiVisualizer`](../../attached_modules/visualizers/README.md#multivisualizer) | + +**Parameters:** + +| Key | Type | Default value | Description | +| ------------------------ | --------------------------- | ---------------- | -------------------------------------------------------------------------------------------------- | +| `variant` | `Literal["light", "heavy"]` | `"light"` | Defines the variant of the model. `"light"` uses `EfficientRep-N`, `"heavy"` uses `EfficientRep-L` | +| `use_neck` | `bool` | `True` | Whether to include the neck in the model | +| `backbone` | `str` | `"EfficientRep"` | Name of the node to be used as a backbone | +| `backbone_params` | `dict` | `{}` | Additional parameters to the backbone | +| `neck_params` | `dict` | `{}` | Additional parameters to the neck | +| `head_params` | `dict` | `{}` | Additional parameters to the head | +| `loss_params` | `dict` | `{}` | Additional parameters to the loss | +| `kpt_visualizer_params` | `dict` | `{}` | Additional parameters to the keypoint visualizer | +| `bbox_visualizer_params` | `dict` | `{}` | Additional parameters to the bounding box visualizer | +| `bbox_task_name` | `str \| None` | `None` | Custom task name for the detection head | +| `kpt_task_name` | `str \| None` | `None` | Custom task name for the keypoint head | + +## `ClassificationModel` + +The `ClassificationModel` allows for both `"light"` and `"heavy"` variants, where the `"heavy"` variant is more accurate, and the `"light"` variant is faster. Can be used for multi-class and multi-label tasks. + +See an example configuration file using this predefined model [here](../../../configs/classification_light_model.yaml) for the `"light"` variant, and [here](../../../configs/classification_heavy_model.yaml) for the `"heavy"` variant. + +**Components:** + +| Name | Alias | Function | +| ------------------------------------------------------------------------------ | --------------------------- | ------------------------------------------------------------------------------------------------------------ | +| [`ResNet`](../../nodes/README.md#resnet) | `"classification_backbone"` | Backbone of the model. The `"light"` variant uses `ResNet-18`, while the `"heavy"` variant uses `ResNet-101` | +| [`ClassificationHead`](../../nodes/README.md#classificationhead) | `"classification_head"` | Head of the model | +| [`CrossEntropyLoss`](../../attached_modules/losses/README.md#crossentropyloss) | `"classification_loss"` | Loss of the model | +| [F1Score](../../attached_modules/metrics/README.md#torchmetrics) | `"classification_f1_score"` | Main metric of the model | +| [Accuracy](../../attached_modules/metrics/README.md#torchmetrics) | `"classification_accuracy"` | Secondary metric of the model | +| [Recall](../../attached_modules/metrics/README.md#torchmetrics) | `"classification_recall"` | Secondary metric of the model | + +**Parameters:** + +| Key | Type | Default value | Description | +| ------------------- | ------------------------------------- | -------------- | ----------------------------------------------------------------------------------------- | +| `variant` | `Literal["light", "heavy"]` | `"light"` | Defines the variant of the model. `"light"` uses `ResNet-18`, `"heavy"` uses `ResNet-101` | +| `backbone` | `str` | `"ResNet"` | Name of the node to be used as a backbone | +| `backbone_params` | `dict` | `{}` | Additional parameters to the backbone | +| `head_params` | `dict` | `{}` | Additional parameters to the head | +| `loss_params` | `dict` | `{}` | Additional parameters to the loss | +| `visualizer_params` | `dict` | `{}` | Additional parameters to the visualizer | +| `task` | `Literal["multiclass", "multilabel"]` | `"multiclass"` | Type of the task of the model | +| `task_name` | `str \| None` | `None` | Custom task name for the head | diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py index bc3d3673..46405b0c 100644 --- a/luxonis_train/core/core.py +++ b/luxonis_train/core/core.py @@ -285,17 +285,22 @@ def thread_exception_hook(args): def export( self, - onnx_save_path: str | None = None, - *, + onnx_save_path: str | Path | None = None, weights: str | Path | None = None, ) -> None: """Runs export. - @type onnx_path: str | None - @param onnx_path: Path to .onnx model. If not specified, model will be saved - to export directory with name specified in config file. - - @raises RuntimeError: If `onnxsim` fails to simplify the model. + @type onnx_save_path: str | Path | None + @param onnx_save_path: Path to where the exported ONNX model will be saved. + If not specified, model will be saved to the export directory + with the name specified in config file. + @type weights: str | Path | None + @param weights: Path to the checkpoint from which to load weights. + If not specified, the value of `model.weights` from the + configuration file will be used. The current weights of the + model will be temporarily replaced with the weights from the + specified checkpoint. + @raises RuntimeError: If C{onnxsim} fails to simplify the model. """ weights = weights or self.cfg.model.weights @@ -311,8 +316,8 @@ def export( export_path = export_save_dir / ( self.cfg.exporter.name or self.cfg.model.name ) - onnx_save_path = onnx_save_path or str( - export_path.with_suffix(".onnx") + onnx_save_path = str( + onnx_save_path or export_path.with_suffix(".onnx") ) with replace_weights(self.lightning_module, weights): @@ -381,6 +386,7 @@ def test( self, new_thread: Literal[False] = ..., view: Literal["train", "test", "val"] = "val", + weights: str | Path | None = ..., ) -> Mapping[str, float]: ... @overload @@ -388,6 +394,7 @@ def test( self, new_thread: Literal[True] = ..., view: Literal["train", "test", "val"] = "val", + weights: str | Path | None = ..., ) -> None: ... @typechecked @@ -395,6 +402,7 @@ def test( self, new_thread: bool = False, view: Literal["train", "val", "test"] = "val", + weights: str | Path | None = None, ) -> Mapping[str, float] | None: """Runs testing. @@ -405,61 +413,78 @@ def test( @rtype: Mapping[str, float] | None @return: If new_thread is False, returns a dictionary test results. + @type weights: str | Path | None + @param weights: Path to the checkpoint from which to load weights. + If not specified, the value of `model.weights` from the + configuration file will be used. The current weights of the + model will be temporarily replaced with the weights from the + specified checkpoint. """ + weights = weights or self.cfg.model.weights loader = self.pytorch_loaders[view] - if not new_thread: - return self.pl_trainer.test(self.lightning_module, loader)[0] - else: # pragma: no cover - self.thread = threading.Thread( - target=self.pl_trainer.test, - args=(self.lightning_module, loader), - daemon=True, - ) - self.thread.start() + with replace_weights(self.lightning_module, weights): + if not new_thread: + return self.pl_trainer.test(self.lightning_module, loader)[0] + else: # pragma: no cover + self.thread = threading.Thread( + target=self.pl_trainer.test, + args=(self.lightning_module, loader), + daemon=True, + ) + self.thread.start() @typechecked def infer( self, view: Literal["train", "val", "test"] = "val", save_dir: str | Path | None = None, - source_path: str | None = None, + source_path: str | Path | None = None, + weights: str | Path | None = None, ) -> None: """Runs inference. @type view: str @param view: Which split to run the inference on. Valid values - are: 'train', 'val', 'test'. Defaults to "val". + are: C{"train"}, C{"val"}, C{"test"}. Defaults to C{"val"}. @type save_dir: str | Path | None @param save_dir: Directory where to save the visualizations. If not specified, visualizations will be rendered on the screen. - @type source_path: str | None + @type source_path: str | Path | None @param source_path: Path to the image file, video file or directory. If None, defaults to using dataset images. + @type weights: str | Path | None + @param weights: Path to the checkpoint from which to load weights. + If not specified, the value of `model.weights` from the + configuration file will be used. The current weights of the + model will be temporarily replaced with the weights from the + specified checkpoint. """ self.lightning_module.eval() + weights = weights or self.cfg.model.weights - if source_path: - source_path_obj = Path(source_path) - if source_path_obj.suffix.lower() in VIDEO_FORMATS: - process_video(self, source_path_obj, view, save_dir) - elif source_path_obj.is_file(): - process_images(self, [source_path_obj], view, save_dir) - elif source_path_obj.is_dir(): - image_files = [ - f - for f in source_path_obj.iterdir() - if f.suffix.lower() in IMAGE_FORMATS - ] - process_images(self, image_files, view, save_dir) + with replace_weights(self.lightning_module, weights): + if source_path: + source_path_obj = Path(source_path) + if source_path_obj.suffix.lower() in VIDEO_FORMATS: + process_video(self, source_path_obj, view, save_dir) + elif source_path_obj.is_file(): + process_images(self, [source_path_obj], view, save_dir) + elif source_path_obj.is_dir(): + image_files = [ + f + for f in source_path_obj.iterdir() + if f.suffix.lower() in IMAGE_FORMATS + ] + process_images(self, image_files, view, save_dir) + else: + raise ValueError( + f"Source path {source_path} is not a valid file or directory." + ) else: - raise ValueError( - f"Source path {source_path} is not a valid file or directory." - ) - else: - process_dataset_images(self, view, save_dir) + process_dataset_images(self, view, save_dir) def tune(self) -> None: """Runs Optuna tunning of hyperparameters.""" @@ -622,15 +647,30 @@ def _objective(trial: optuna.trial.Trial) -> float: ) wandb_parent_tracker.log_hyperparams(study.best_params) - def archive(self, path: str | Path | None = None) -> Path: + def archive( + self, + path: str | Path | None = None, + weights: str | Path | None = None, + ) -> Path: """Generates an NN Archive out of a model executable. @type path: str | Path | None @param path: Path to the model executable. If not specified, the model will be exported first. + @type weights: str | Path | None + @param weights: Path to the checkpoint from which to load weights. + If not specified, the value of `model.weights` from the + configuration file will be used. The current weights of the + model will be temporarily replaced with the weights from the + specified checkpoint. @rtype: Path @return: Path to the generated NN Archive. """ + weights = weights or self.cfg.model.weights + with replace_weights(self.lightning_module, weights): + return self._archive(path) + + def _archive(self, path: str | Path | None = None) -> Path: from .utils.archive_utils import get_heads, get_inputs, get_outputs archive_name = self.cfg.archiver.name or self.cfg.model.name diff --git a/luxonis_train/loaders/README.md b/luxonis_train/loaders/README.md new file mode 100644 index 00000000..0a1a5bca --- /dev/null +++ b/luxonis_train/loaders/README.md @@ -0,0 +1,34 @@ +# Loaders + +## Table Of Contents + +- [`LuxonisLoaderTorch`](#luxonisloadertorch) + - [Implementing a custom loader](#implementing-a-custom-loader) + +## `LuxonisLoaderTorch` + +The default loader used with `LuxonisTrain`. It can either load data from an already created dataset in the `LuxonisDataFormat` or create a new dataset automatically from a set of supported formats. + +**Parameters:** + +| Key | Type | Default value | Description | +| ----------------- | --------------------------------------------------------------------------------------------------------- | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `dataset_name` | `str` | `None` | Name of the dataset to load. If not provided, the `dataset_dir` must be provided instead | +| `dataset_dir` | `str` | `None` | Path to the directory containing the dataset. If not provided, the `dataset_name` must be provided instead. Can be a path to a local directory or a URL. The data can be in a zip archive. New `LuxonisDataset` will be created using data from this directory and saved under the provided `dataset_name` | +| `dataset_type` | `Literal["coco", "voc", "darknet", "yolov6", "yolov4", "createml", "tfcsv", "clsdir", "segmask"] \| None` | `None` | Type of the dataset. If not provided, the type will be inferred from the directory structure | +| `team_id` | `str \| None` | `None` | Optional unique team identifier for the cloud | +| `bucket_storage` | `Literal["local", "s3", "gcs"]` | `"local"` | Type of the bucket storage | +| `delete_existing` | `bool` | `False` | Whether to delete the existing dataset with the same name. Only relevant if `dataset_dir` is provided. Use if you want to reparse the directory in case the data changed | + +### Implementing a custom loader + +To implement a loader, you need to create a class that inherits from `BaseLoaderTorch` and implement the following methods: + +- `input_shapes(self) -> dict[str, torch.Size]`: Returns a dictionary with input shapes for each input image. +- `__len__(self) -> int`: Returns the number of samples in the dataset. +- `__getitem__(self, idx: int) -> tuple[dict[str, torch.Tensor], dict[str, tuple[torch.Tensor, luxonis_train.enums.TaskType]]`: Returns a dictionary with input tensors for each input image. +- `get_classes(self) -> dict[str, list[str]]`: Returns a dictionary with class names for each task in the dataset. + +For loaders yielding keypoint tasks, you also have to implement `get_n_keypoints(self) -> dict[str, int]` method. + +For more information, consult the in-source [documentation](https://github.com/luxonis/luxonis-train/blob/main/luxonis_train/loaders/base_loader.py). diff --git a/luxonis_train/loaders/base_loader.py b/luxonis_train/loaders/base_loader.py index 25ffc922..0c056d98 100644 --- a/luxonis_train/loaders/base_loader.py +++ b/luxonis_train/loaders/base_loader.py @@ -34,7 +34,7 @@ def __init__( def image_source(self) -> str: """Name of the input image group. - Example: 'image' + Example: C{"image"} @type: str """ diff --git a/luxonis_train/loaders/luxonis_loader_torch.py b/luxonis_train/loaders/luxonis_loader_torch.py index b0e83a94..230128b5 100644 --- a/luxonis_train/loaders/luxonis_loader_torch.py +++ b/luxonis_train/loaders/luxonis_loader_torch.py @@ -1,5 +1,4 @@ import logging -from pathlib import Path from typing import Literal import numpy as np @@ -136,9 +135,7 @@ def _parse_dataset( delete_existing: bool, ) -> LuxonisDataset: if dataset_name is None: - dataset_name = Path(dataset_dir).stem - if dataset_type is not None: - dataset_name += f"_{dataset_type.value}" + dataset_name = dataset_dir.split("/")[-1] if LuxonisDataset.exists(dataset_name): if not delete_existing: diff --git a/luxonis_train/models/luxonis_lightning.py b/luxonis_train/models/luxonis_lightning.py index 03d633c9..459b20d1 100644 --- a/luxonis_train/models/luxonis_lightning.py +++ b/luxonis_train/models/luxonis_lightning.py @@ -803,7 +803,7 @@ def configure_optimizers( self, ) -> tuple[ list[torch.optim.Optimizer], - list[torch.optim.lr_scheduler._LRScheduler], + list[torch.optim.lr_scheduler.LRScheduler], ]: """Configures model optimizers and schedulers.""" cfg_optimizer = self.cfg.trainer.optimizer diff --git a/luxonis_train/nodes/README.md b/luxonis_train/nodes/README.md index dad43921..9e561cc8 100644 --- a/luxonis_train/nodes/README.md +++ b/luxonis_train/nodes/README.md @@ -1,235 +1,235 @@ # Nodes Nodes are the basic building structures of the model. They can be connected together -arbitrarily as long as the two nodes are compatible with each other. We've grouped together nodes that are similar so it's easier to build an architecture that makes sense. +arbitrarily as long as the two nodes are compatible with each other. We've grouped together nodes that are similar, so it's easier to build an architecture that makes sense. ## Table Of Contents - [Backbones](#backbones) - - [ResNet](#resnet) - - [MicroNet](#micronet) - - [RepVGG](#repvgg) - - [EfficientRep](#efficientrep) - - [RexNetV1_lite](#rexnetv1_lite) - - [MobileOne](#mobileone) - - [MobileNetV2](#mobilenetv2) - - [EfficientNet](#efficientnet) - - [ContextSpatial](#contextspatial) - - [DDRNet](#ddrnet) + - [`ResNet`](#resnet) + - [`MicroNet`](#micronet) + - [`RepVGG`](#repvgg) + - [`EfficientRep`](#efficientrep) + - [`RexNetV1_lite`](#rexnetv1_lite) + - [`MobileOne`](#mobileone) + - [`MobileNetV2`](#mobilenetv2) + - [`EfficientNet`](#efficientnet) + - [`ContextSpatial`](#contextspatial) + - [`DDRNet`](#ddrnet) - [Necks](#necks) - - [RepPANNeck](#reppanneck) + - [`RepPANNeck`](#reppanneck) - [Heads](#heads) - - [ClassificationHead](#classificationhead) - - [SegmentationHead](#segmentationhead) - - [BiSeNetHead](#bisenethead) - - [EfficientBBoxHead](#efficientbboxhead) - - [EfficientKeypointBBoxHead](#efficientkeypointbboxhead) - - [DDRNetSegmentationHead](#ddrnetsegmentationhead) + - [`ClassificationHead`](#classificationhead) + - [`SegmentationHead`](#segmentationhead) + - [`BiSeNetHead`](#bisenethead) + - [`EfficientBBoxHead`](#efficientbboxhead) + - [`EfficientKeypointBBoxHead`](#efficientkeypointbboxhead) + - [`DDRNetSegmentationHead`](#ddrnetsegmentationhead) Every node takes these parameters: -| Key | Type | Default value | Description | -| ---------------- | ----------- | ------------- | ---------------------------------------------------------------------------- | -| n_classes | int \| None | None | Number of classes in the dataset. Inferred from the dataset if not provided. | -| remove_on_export | bool | False | Whether node should be removed when exporting the whole model. | +| Key | Type | Default value | Description | +| ------------------ | ------------- | ------------- | --------------------------------------------------------------------------- | +| `n_classes` | `int \| None` | `None` | Number of classes in the dataset. Inferred from the dataset if not provided | +| `remove_on_export` | `bool` | `False` | Whether node should be removed when exporting the whole model | -In addition, the following class attributes can be overriden: +In addition, the following class attributes can be overridden: -| Key | Type | Default value | Description | -| ------------ | ------------------------------------------------------------------- | ------------- | --------------------------------------------------------------------------------------------------------------------------------------------- | -| attach_index | int \| "all" \| Tuple\[int, int\] \| Tuple\[int, int, int\] \| None | None | Index of previous output that the head attaches to. Each node has a sensible default. Usually should not be manually set in most cases. | -| tasks | List\[TaskType\] \| Dict\[TaskType, str\] \| None | None | Tasks supported by the node. Should be overriden for head nodes. Either a list of tasks or a dictionary mapping tasks to their default names. | +| Key | Type | Default value | Description | +| -------------- | ----------------------------------------------------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `attach_index` | `int \| "all" \| tuple[int, int] \| tuple[int, int, int] \| None` | `None` | Index of previous output that the head attaches to. Each node has a sensible default. Usually should not be manually set in most cases. Can be either a single index, a slice (negative indexing is also supported), or `"all"` | +| `tasks` | `list[TaskType] \| Dict[TaskType, str] \| None` | `None` | Tasks supported by the node. Should be overridden for head nodes. Either a list of tasks or a dictionary mapping tasks to their default names | Additional parameters for specific nodes are listed below. ## Backbones -### ResNet +### `ResNet` Adapted from [here](https://pytorch.org/vision/main/models/resnet.html). -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| ---------------- | ----------------------------------------- | ------------- | -------------------------------------- | -| variant | Literal\["18", "34", "50", "101", "152"\] | "18" | Variant of the network. | -| download_weights | bool | False | If True download weights from imagenet | +| Key | Type | Default value | Description | +| ------------------ | ----------------------------------------- | ------------- | -------------------------------------- | +| `variant` | `Literal["18", "34", "50", "101", "152"]` | `"18"` | Variant of the network | +| `download_weights` | `bool` | `False` | If True download weights from ImageNet | -### MicroNet +### `MicroNet` Adapted from [here](https://github.com/liyunsheng13/micronet). -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| ------- | --------------------------- | ------------- | ----------------------- | -| variant | Literal\["M1", "M2", "M3"\] | "M1" | Variant of the network. | +| Key | Type | Default value | Description | +| --------- | --------------------------- | ------------- | ---------------------- | +| `variant` | `Literal["M1", "M2", "M3"]` | `"M1"` | Variant of the network | -### RepVGG +### `RepVGG` Adapted from [here](https://github.com/DingXiaoH/RepVGG). -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| ------- | --------------------------- | ------------- | ----------------------- | -| variant | Literal\["A0", "A1", "A2"\] | "A0" | Variant of the network. | +| Key | Type | Default value | Description | +| --------- | --------------------------- | ------------- | ---------------------- | +| `variant` | `Literal["A0", "A1", "A2"]` | `"A0"` | Variant of the network | -### EfficientRep +### `EfficientRep` Adapted from [here](https://arxiv.org/pdf/2209.02976.pdf). -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| ------------- | ----------------------------------------------------------------- | --------------------------- | --------------------------------------------------------------- | -| variant | Literal\["n", "nano", "s", "small", "m", "medium", "l", "large"\] | "nano" | Variant of the network | -| channels_list | List\[int\] | \[64, 128, 256, 512, 1024\] | List of number of channels for each block | -| n_repeats | List\[int\] | \[1, 6, 12, 18, 6\] | List of number of repeats of RepVGGBlock | -| depth_mul | float | 0.33 | Depth multiplier | -| width_mul | float | 0.25 | Width multiplier | -| block | Literal\["RepBlock", "CSPStackRepBlock"\] | "RepBlock" | Base block used | -| csp_e | float | 0.5 | Factor for intermediate channels when block=="CSPStackRepBlock" | +| Key | Type | Default value | Description | +| --------------- | ----------------------------------------------------------------- | --------------------------- | -------------------------------------------------------------------------- | +| `variant` | `Literal["n", "nano", "s", "small", "m", "medium", "l", "large"]` | `"nano"` | Variant of the network | +| `channels_list` | `list[int]` | \[64, 128, 256, 512, 1024\] | List of number of channels for each block | +| `n_repeats` | `list[int]` | \[1, 6, 12, 18, 6\] | List of number of repeats of `RepVGGBlock` | +| `depth_mul` | `float` | `0.33` | Depth multiplier | +| `width_mul` | `float` | `0.25` | Width multiplier | +| `block` | `Literal["RepBlock", "CSPStackRepBlock"]` | `"RepBlock"` | Base block used | +| `csp_e` | `float` | `0.5` | Factor for intermediate channels when block is set to `"CSPStackRepBlock"` | ### RexNetV1_lite -Adapted from ([here](https://github.com/clovaai/rexnet). +Adapted from [here](https://github.com/clovaai/rexnet) -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| --------------- | ------------------ | ------------- | ----------------------------- | -| fix_head_stem | bool | False | Whether to multiply head stem | -| divisible_value | int | 8 | Divisor used | -| input_ch | int | 16 | tarting channel dimension | -| final_ch | int | 164 | Final channel dimension | -| multiplier | float | 1.0 | Channel dimension multiplier | -| kernel_sizes | int \| list\[int\] | 3 | Kernel sizes | +| Key | Type | Default value | Description | +| ----------------- | ------------------ | ------------- | ----------------------------- | +| `fix_head_stem` | `bool` | `False` | Whether to multiply head stem | +| `divisible_value` | `int` | `8` | Divisor used | +| `input_ch` | `int` | `16` | tarting channel dimension | +| `final_ch` | `int` | `164` | Final channel dimension | +| `multiplier` | `float` | `1.0` | Channel dimension multiplier | +| `kernel_sizes` | `int \| list[int]` | `3` | Kernel sizes | -### MobileOne +### `MobileOne` Adapted from [here](https://github.com/apple/ml-mobileone). -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| ------- | --------------------------------------- | ------------- | ----------------------- | -| variant | Literal\["s0", "s1", "s2", "s3", "s4"\] | "s0" | Variant of the network. | +| Key | Type | Default value | Description | +| --------- | --------------------------------------- | ------------- | ---------------------- | +| `variant` | `Literal["s0", "s1", "s2", "s3", "s4"]` | `"s0"` | Variant of the network | -### MobileNetV2 +### `MobileNetV2` Adapted from [here](https://pytorch.org/vision/main/models/generated/torchvision.models.mobilenet_v2.html). -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| ---------------- | ---- | ------------- | -------------------------------------- | -| download_weights | bool | False | If True download weights from imagenet | +| Key | Type | Default value | Description | +| ------------------ | ------ | ------------- | -------------------------------------- | +| `download_weights` | `bool` | `False` | If True download weights from ImageNet | -### EfficientNet +### `EfficientNet` Adapted from [here](https://github.com/rwightman/gen-efficientnet-pytorch). -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| ---------------- | ---- | ------------- | --------------------------------------- | -| download_weights | bool | False | If True download weights from imagenet. | +| Key | Type | Default value | Description | +| ------------------ | ------ | ------------- | -------------------------------------- | +| `download_weights` | `bool` | `False` | If True download weights from ImageNet | -### ContextSpatial +### `ContextSpatial` Adapted from [here](https://github.com/taveraantonio/BiseNetv1). -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| ---------------- | ---- | ------------- | ------------- | -| context_backbone | str | "MobileNetV2" | Backbone used | +| Key | Type | Default value | Description | +| ------------------ | ----- | --------------- | ---------------------------------------------------------------------------------------------------- | +| `context_backbone` | `str` | `"MobileNetV2"` | Backbone used for the context path. Must be a reference to a node registered in the `NODES` registry | -### DDRNet +### `DDRNet` Adapted from [here](https://github.com/ydhongHIT/DDRNet) -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| ------- | -------------------------- | ------------- | ----------------------- | -| variant | Literal\["23-slim", "23"\] | "23-slim" | Variant of the network. | +| Key | Type | Default value | Description | +| --------- | -------------------------- | ------------- | ---------------------- | +| `variant` | `Literal["23-slim", "23"]` | `"23-slim"` | Variant of the network | ## Neck -### RepPANNeck +### `RepPANNeck` Adapted from [here](https://arxiv.org/pdf/2209.02976.pdf). -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| ------------- | ----------------------------------------------------------------- | ------------------------------------------------------- | --------------------------------------------------------------- | -| variant | Literal\["n", "nano", "s", "small", "m", "medium", "l", "large"\] | "nano" | Variant of the network | -| n_heads | Literal\[2,3,4\] | 3 ***Note:** Should be same also on head in most cases* | Number of output heads | -| channels_list | List\[int\] | \[256, 128, 128, 256, 256, 512\] | List of number of channels for each block | -| n_repeats | List\[int\] | \[12, 12, 12, 12\] | List of number of repeats of RepVGGBlock | -| depth_mul | float | 0.33 | Depth multiplier | -| width_mul | float | 0.25 | Width multiplier | -| block | Literal\["RepBlock", "CSPStackRepBlock"\] | "RepBlock" | Base block used | -| csp_e | float | 0.5 | Factor for intermediate channels when block=="CSPStackRepBlock" | +| Key | Type | Default value | Description | +| --------------- | ----------------------------------------------------------------- | -------------------------------- | ------------------------------------------------------------------------------- | +| `variant` | `Literal["n", "nano", "s", "small", "m", "medium", "l", "large"]` | `"nano"` | Variant of the network | +| `n_heads` | `Literal[2,3,4]` | `3` | Number of output heads. Should be same also on the connected head in most cases | +| `channels_list` | `list[int]` | `[256, 128, 128, 256, 256, 512]` | List of number of channels for each block | +| `n_repeats` | `list[int]` | `[12, 12, 12, 12]` | List of number of repeats of `RepVGGBlock` | +| `depth_mul` | `float` | `0.33` | Depth multiplier | +| `width_mul` | `float` | `0.25` | Width multiplier | +| `block` | `Literal["RepBlock", "CSPStackRepBlock"]` | `"RepBlock"` | Base block used | +| `csp_e` | `float` | `0.5` | Factor for intermediate channels when block is set to `"CSPStackRepBlock"` | ## Heads -### ClassificationHead +### `ClassificationHead` -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| ---------- | ----- | ------------- | --------------------------------------------- | -| fc_dropout | float | 0.2 | Dropout rate before last layer, range \[0,1\] | +| Key | Type | Default value | Description | +| ------------ | ------- | ------------- | ------------------------------------------------ | +| `fc_dropout` | `float` | `0.2` | Dropout rate before last layer, range $\[0, 1\]$ | -### SegmentationHead +### `SegmentationHead` Adapted from [here](https://github.com/pytorch/vision/blob/main/torchvision/models/segmentation/fcn.py). -### BiSeNetHead +### `BiSeNetHead` Adapted from [here](https://github.com/taveraantonio/BiseNetv1). -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| --------------------- | ---- | ------------- | -------------------------------------- | -| intermediate_channels | int | 64 | How many intermediate channels to use. | +| Key | Type | Default value | Description | +| ----------------------- | ----- | ------------- | ------------------------------------- | +| `intermediate_channels` | `int` | `64` | How many intermediate channels to use | -### EfficientBBoxHead +### `EfficientBBoxHead` Adapted from [here](https://arxiv.org/pdf/2209.02976.pdf). -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| ---------- | ----- | ------------- | -------------------------------------------------- | -| n_heads | bool | 3 | Number of output heads | -| conf_thres | float | 0.25 | Confidence threshold for nms (used for evaluation) | -| iou_thres | float | 0.45 | Iou threshold for nms (used for evaluation) | +| Key | Type | Default value | Description | +| ------------ | ------- | ------------- | --------------------------------------------------------------------- | +| `n_heads` | `bool` | `3` | Number of output heads | +| `conf_thres` | `float` | `0.25` | Confidence threshold for non-maxima-suppression (used for evaluation) | +| `iou_thres` | `float` | `0.45` | `IoU` threshold for non-maxima-suppression (used for evaluation) | -### EfficientKeypointBBoxHead +### `EfficientKeypointBBoxHead` Adapted from [here](https://arxiv.org/pdf/2207.02696.pdf). -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| ----------- | ----------- | ------------- | -------------------------------------------------- | -| n_keypoints | int \| None | None | Number of keypoints. | -| n_heads | int | 3 | Number of output heads | -| conf_thres | float | 0.25 | Confidence threshold for nms (used for evaluation) | -| iou_thres | float | 0.45 | Iou threshold for nms (used for evaluation) | +| Key | Type | Default value | Description | +| ------------- | -------------- | ------------- | --------------------------------------------------------------------- | +| `n_keypoints` | `int \| None ` | `None` | Number of keypoints | +| `n_heads` | `int` | `3` | Number of output heads | +| `conf_thres` | `float` | `0.25` | Confidence threshold for non-maxima-suppression (used for evaluation) | +| `iou_thres` | `float` | `0.45` | `IoU` threshold for non-maxima-suppression (used for evaluation) | -### DDRNetSegmentationHead +### `DDRNetSegmentationHead` Adapted from [here](https://github.com/ydhongHIT/DDRNet). -**Params** +**Parameters:** -| Key | Type | Default value | Description | -| -------------- | ---- | ------------- | ---------------------------------------------------------------------------------------------- | -| inter_channels | int | 64 | Width of internal conv. Must be a multiple of scale_factor^2 when inter_mode is pixel_shuffle. | -| inter_mode | str | "bilinear | Upsampling method. | +| Key | Type | Default value | Description | +| ---------------- | ----- | ------------- | ------------------------------------------------------------------------------------------------------------------------- | +| `inter_channels` | `int` | `64` | Width of internal convolutions | +| `inter_mode` | `str` | `"bilinear"` | Up-sampling method. One of `"nearest"`, `"linear"`, `"bilinear"`, `"bicubic"`, `"trilinear"`, `"area"`, `"pixel_shuffle"` | diff --git a/luxonis_train/utils/registry.py b/luxonis_train/utils/registry.py index 57ca0066..8044f13c 100644 --- a/luxonis_train/utils/registry.py +++ b/luxonis_train/utils/registry.py @@ -3,7 +3,7 @@ import lightning.pytorch as pl from luxonis_ml.utils.registry import Registry -from torch.optim.lr_scheduler import _LRScheduler +from torch.optim.lr_scheduler import LRScheduler from torch.optim.optimizer import Optimizer import luxonis_train as lt @@ -32,7 +32,7 @@ OPTIMIZERS: Registry[type[Optimizer]] = Registry(name="optimizers") """Registry for all optimizers.""" -SCHEDULERS: Registry[type[_LRScheduler]] = Registry(name="schedulers") +SCHEDULERS: Registry[type[LRScheduler]] = Registry(name="schedulers") """Registry for all schedulers.""" VISUALIZERS: Registry[type["lt.visualizers.BaseVisualizer"]] = Registry( diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg deleted file mode 100644 index ee07d4c2..00000000 --- a/media/coverage_badge.svg +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - - - - - - - - - - coverage - coverage - 96% - 96% - - diff --git a/media/pybadge.svg b/media/pybadge.svg deleted file mode 100644 index 983d6f42..00000000 --- a/media/pybadge.svg +++ /dev/null @@ -1 +0,0 @@ -pythonpython3.10 | 3.113.10 | 3.11 \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 67dc3d16..39c11a92 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,6 +12,8 @@ classifiers = [ "License :: OSI Approved :: Apache Software License", "Development Status :: 4 - Beta", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Scientific/Engineering :: Image Processing", "Topic :: Scientific/Engineering :: Image Recognition", @@ -47,6 +49,7 @@ select = ["E4", "E7", "E9", "F", "W", "B", "I"] [tool.docformatter] black = true +style = "epytext" wrap-summaries = 72 wrap-descriptions = 72 diff --git a/tests/configs/ddrnet.yaml b/tests/configs/ddrnet.yaml index e5c7ea9f..542fc0f6 100644 --- a/tests/configs/ddrnet.yaml +++ b/tests/configs/ddrnet.yaml @@ -21,12 +21,12 @@ model: name: CrossEntropyLoss trainer: preprocessing: - train_image_size: - - &height 128 - - &width 128 - keep_aspect_ratio: False + train_image_size: + - 128 + - 128 + keep_aspect_ratio: false normalize: - active: True + active: true batch_size: 2 epochs: &epochs 1 diff --git a/tests/configs/parking_lot_config.yaml b/tests/configs/parking_lot_config.yaml index 78711178..5cda65c1 100644 --- a/tests/configs/parking_lot_config.yaml +++ b/tests/configs/parking_lot_config.yaml @@ -104,7 +104,7 @@ model: tracker: project_name: Parking_Lot - is_tensorboard: True + is_tensorboard: true loader: train_view: val @@ -118,23 +118,23 @@ trainer: n_sanity_val_steps: 1 profiler: null - verbose: True + verbose: true batch_size: 2 accumulate_grad_batches: 1 epochs: 200 n_workers: 8 validation_interval: 10 n_log_images: 8 - skip_last_batch: True - log_sub_losses: True + skip_last_batch: true + log_sub_losses: true save_top_k: 3 preprocessing: train_image_size: [256, 320] - keep_aspect_ratio: False - train_rgb: True + keep_aspect_ratio: false + train_rgb: true normalize: - active: True + active: true augmentations: - name: Defocus params: diff --git a/tests/configs/segmentation_parse_loader.yaml b/tests/configs/segmentation_parse_loader.yaml index 14814571..178a89cb 100644 --- a/tests/configs/segmentation_parse_loader.yaml +++ b/tests/configs/segmentation_parse_loader.yaml @@ -16,9 +16,9 @@ loader: trainer: preprocessing: train_image_size: [&height 128, &width 128] - keep_aspect_ratio: False + keep_aspect_ratio: false normalize: - active: True + active: true batch_size: 4 epochs: &epochs 1 From e5250549ec9709b5a67f339104df8a96923a2873 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= Date: Wed, 9 Oct 2024 01:40:03 +0200 Subject: [PATCH 6/6] Inference Fix (#100) --- .github/workflows/ci.yaml | 3 + .gitignore | 1 + .../attached_modules/base_attached_module.py | 9 +- .../metrics/object_keypoint_similarity.py | 1 - .../visualizers/base_visualizer.py | 2 +- .../visualizers/bbox_visualizer.py | 25 +- .../visualizers/classification_visualizer.py | 32 ++- .../visualizers/keypoint_visualizer.py | 21 +- .../visualizers/multi_visualizer.py | 6 +- .../visualizers/segmentation_visualizer.py | 28 +- .../attached_modules/visualizers/utils.py | 6 +- luxonis_train/core/core.py | 35 +-- luxonis_train/core/utils/infer_utils.py | 254 ++++++++++-------- luxonis_train/core/utils/tune_utils.py | 4 +- luxonis_train/models/luxonis_lightning.py | 1 - .../nodes/backbones/mobileone/mobileone.py | 2 +- .../nodes/backbones/repvgg/repvgg.py | 8 +- luxonis_train/nodes/blocks/blocks.py | 2 +- tests/integration/test_simple.py | 65 ++++- 19 files changed, 306 insertions(+), 199 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 847601c3..0d7dbdc7 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -43,6 +43,9 @@ jobs: with: ref: ${{ github.head_ref }} + - name: Install pre-commit + run: python -m pip install 'pre-commit<4.0.0' + - name: Run pre-commit uses: pre-commit/action@v3.0.1 diff --git a/.gitignore b/.gitignore index 03ba884c..36d3b3e3 100644 --- a/.gitignore +++ b/.gitignore @@ -153,4 +153,5 @@ mlruns wandb tests/_data tests/integration/save-directory +tests/integration/infer-save-directory data diff --git a/luxonis_train/attached_modules/base_attached_module.py b/luxonis_train/attached_modules/base_attached_module.py index a4ac8e8f..c65a4b7d 100644 --- a/luxonis_train/attached_modules/base_attached_module.py +++ b/luxonis_train/attached_modules/base_attached_module.py @@ -275,7 +275,7 @@ def get_input_tensors( return inputs[self.node_tasks[self.required_labels[0]]] def prepare( - self, inputs: Packet[Tensor], labels: Labels + self, inputs: Packet[Tensor], labels: Labels | None ) -> tuple[Unpack[Ts]]: """Prepares node outputs for the forward pass of the module. @@ -287,8 +287,9 @@ def prepare( @type inputs: L{Packet}[Tensor] @param inputs: Output from the node, inputs to the attached module. - @type labels: L{Labels} - @param labels: Labels from the dataset. + @type labels: L{Labels} | None + @param labels: Labels from the dataset. If not provided, empty labels are used. + This is useful in visualizers for working with standalone images. @rtype: tuple[Unpack[Ts]] @return: Prepared inputs. Should allow the following usage with the @@ -325,6 +326,8 @@ def prepare( set(self.supported_tasks) & set(self.node_tasks) ) x = self.get_input_tensors(inputs) + if labels is None: + return x, None # type: ignore label, task_type = self._get_label(labels) if task_type in [TaskType.CLASSIFICATION, TaskType.SEGMENTATION]: if len(x) == 1: diff --git a/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py b/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py index 248ebe10..d291e7e0 100644 --- a/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py +++ b/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py @@ -36,7 +36,6 @@ class ObjectKeypointSimilarity( def __init__( self, - n_keypoints: int | None = None, sigmas: list[float] | None = None, area_factor: float | None = None, use_cocoeval_oks: bool = True, diff --git a/luxonis_train/attached_modules/visualizers/base_visualizer.py b/luxonis_train/attached_modules/visualizers/base_visualizer.py index 817a09d5..a02aa933 100644 --- a/luxonis_train/attached_modules/visualizers/base_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/base_visualizer.py @@ -66,7 +66,7 @@ def run( label_canvas: Tensor, prediction_canvas: Tensor, inputs: Packet[Tensor], - labels: Labels, + labels: Labels | None, ) -> Tensor | tuple[Tensor, Tensor] | tuple[Tensor, list[Tensor]]: return self( label_canvas, prediction_canvas, *self.prepare(inputs, labels) diff --git a/luxonis_train/attached_modules/visualizers/bbox_visualizer.py b/luxonis_train/attached_modules/visualizers/bbox_visualizer.py index 8dded134..87a3fd2d 100644 --- a/luxonis_train/attached_modules/visualizers/bbox_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/bbox_visualizer.py @@ -173,8 +173,8 @@ def forward( label_canvas: Tensor, prediction_canvas: Tensor, predictions: list[Tensor], - targets: Tensor, - ) -> tuple[Tensor, Tensor]: + targets: Tensor | None, + ) -> tuple[Tensor, Tensor] | Tensor: """Creates a visualization of the bounding box predictions and labels. @@ -189,26 +189,29 @@ def forward( @type targets: Tensor @param targets: The target bounding boxes. """ - targets_viz = self.draw_targets( - label_canvas, - targets, - color_dict=self.colors, + predictions_viz = self.draw_predictions( + prediction_canvas, + predictions, label_dict=self.bbox_labels, + color_dict=self.colors, draw_labels=self.draw_labels, fill=self.fill, font=self.font, font_size=self.font_size, width=self.width, ) - predictions_viz = self.draw_predictions( - prediction_canvas, - predictions, - label_dict=self.bbox_labels, + if targets is None: + return predictions_viz + + targets_viz = self.draw_targets( + label_canvas, + targets, color_dict=self.colors, + label_dict=self.bbox_labels, draw_labels=self.draw_labels, fill=self.fill, font=self.font, font_size=self.font_size, width=self.width, ) - return targets_viz, predictions_viz.to(targets_viz.device) + return targets_viz, predictions_viz diff --git a/luxonis_train/attached_modules/visualizers/classification_visualizer.py b/luxonis_train/attached_modules/visualizers/classification_visualizer.py index 91096f54..3ba5ce8c 100644 --- a/luxonis_train/attached_modules/visualizers/classification_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/classification_visualizer.py @@ -5,6 +5,7 @@ from torch import Tensor from luxonis_train.enums import TaskType +from luxonis_train.utils import Labels, Packet from .base_visualizer import BaseVisualizer from .utils import figure_to_torch, numpy_to_torch_img, torch_img_to_numpy @@ -56,29 +57,38 @@ def _generate_plot( ax.grid(True) return figure_to_torch(fig, width, height) + def prepare( + self, inputs: Packet[Tensor], labels: Labels | None + ) -> tuple[Tensor, Tensor]: + predictions, targets = super().prepare(inputs, labels) + if isinstance(predictions, list): + predictions = predictions[0] + return predictions, targets + def forward( self, label_canvas: Tensor, prediction_canvas: Tensor, predictions: Tensor, - labels: Tensor, + targets: Tensor | None, ) -> Tensor | tuple[Tensor, Tensor]: overlay = torch.zeros_like(label_canvas) plots = torch.zeros_like(prediction_canvas) for i in range(len(overlay)): prediction = predictions[i] - gt = self._get_class_name(labels[i]) arr = torch_img_to_numpy(label_canvas[i].clone()) curr_class = self._get_class_name(prediction) - arr = cv2.putText( - arr, - f"GT: {gt}", - (5, 10), - cv2.FONT_HERSHEY_SIMPLEX, - self.font_scale, - self.color, - self.thickness, - ) + if targets is not None: + gt = self._get_class_name(targets[i]) + arr = cv2.putText( + arr, + f"GT: {gt}", + (5, 10), + cv2.FONT_HERSHEY_SIMPLEX, + self.font_scale, + self.color, + self.thickness, + ) arr = cv2.putText( arr, f"Pred: {curr_class}", diff --git a/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py b/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py index da4ddc7c..8c7252ee 100644 --- a/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py @@ -94,16 +94,9 @@ def forward( label_canvas: Tensor, prediction_canvas: Tensor, predictions: list[Tensor], - targets: Tensor, + targets: Tensor | None, **kwargs, - ) -> tuple[Tensor, Tensor]: - target_viz = self.draw_targets( - label_canvas, - targets, - colors=self.visible_color, - connectivity=self.connectivity, - **kwargs, - ) + ) -> tuple[Tensor, Tensor] | Tensor: pred_viz = self.draw_predictions( prediction_canvas, predictions, @@ -113,4 +106,14 @@ def forward( visibility_threshold=self.visibility_threshold, **kwargs, ) + if targets is None: + return pred_viz + + target_viz = self.draw_targets( + label_canvas, + targets, + colors=self.visible_color, + connectivity=self.connectivity, + **kwargs, + ) return target_viz, pred_viz diff --git a/luxonis_train/attached_modules/visualizers/multi_visualizer.py b/luxonis_train/attached_modules/visualizers/multi_visualizer.py index b7ecbfbb..dd884c8d 100644 --- a/luxonis_train/attached_modules/visualizers/multi_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/multi_visualizer.py @@ -42,8 +42,8 @@ def forward( label_canvas: Tensor, prediction_canvas: Tensor, outputs: Packet[Tensor], - labels: Labels, - ) -> tuple[Tensor, Tensor]: + labels: Labels | None, + ) -> tuple[Tensor, Tensor] | Tensor: for visualizer in self.visualizers: match visualizer.run( label_canvas, prediction_canvas, outputs, labels @@ -57,4 +57,6 @@ def forward( raise NotImplementedError( "Unexpected return type from visualizer." ) + if labels is None: + return prediction_canvas return label_canvas, prediction_canvas diff --git a/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py b/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py index e1b22e0d..7194a506 100644 --- a/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py @@ -4,6 +4,7 @@ from torch import Tensor from luxonis_train.enums import TaskType +from luxonis_train.utils import Labels, Packet from .base_visualizer import BaseVisualizer from .utils import ( @@ -95,14 +96,22 @@ def draw_targets( return viz + def prepare( + self, inputs: Packet[Tensor], labels: Labels | None + ) -> tuple[Tensor, Tensor]: + predictions, targets = super().prepare(inputs, labels) + if isinstance(predictions, list): + predictions = predictions[0] + return predictions, targets + def forward( self, label_canvas: Tensor, prediction_canvas: Tensor, predictions: Tensor, - targets: Tensor, + targets: Tensor | None, **kwargs, - ) -> tuple[Tensor, Tensor]: + ) -> tuple[Tensor, Tensor] | Tensor: """Creates a visualization of the segmentation predictions and labels. @@ -118,18 +127,21 @@ def forward( @return: A tuple of the label and prediction visualizations. """ - targets_vis = self.draw_targets( - label_canvas, - targets, + predictions_vis = self.draw_predictions( + prediction_canvas, + predictions, colors=self.colors, alpha=self.alpha, background_class=self.background_class, background_color=self.background_color, **kwargs, ) - predictions_vis = self.draw_predictions( - prediction_canvas, - predictions, + if targets is None: + return predictions_vis + + targets_vis = self.draw_targets( + label_canvas, + targets, colors=self.colors, alpha=self.alpha, background_class=self.background_class, diff --git a/luxonis_train/attached_modules/visualizers/utils.py b/luxonis_train/attached_modules/visualizers/utils.py index 76478421..a8965020 100644 --- a/luxonis_train/attached_modules/visualizers/utils.py +++ b/luxonis_train/attached_modules/visualizers/utils.py @@ -232,11 +232,7 @@ def get_unnormalized_images(cfg: Config, inputs: dict[str, Tensor]) -> Tensor: if cfg.trainer.preprocessing.normalize.active: mean = normalize_params.get("mean", [0.485, 0.456, 0.406]) std = normalize_params.get("std", [0.229, 0.224, 0.225]) - return preprocess_images( - images, - mean=mean, - std=std, - ) + return preprocess_images(images, mean=mean, std=std) def number_to_hsl(seed: int) -> tuple[float, float, float]: diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py index 46405b0c..04f006d5 100644 --- a/luxonis_train/core/core.py +++ b/luxonis_train/core/core.py @@ -37,9 +37,9 @@ from .utils.infer_utils import ( IMAGE_FORMATS, VIDEO_FORMATS, - process_dataset_images, - process_images, - process_video, + infer_from_dataset, + infer_from_directory, + infer_from_video, ) from .utils.train_utils import create_trainer @@ -466,25 +466,30 @@ def infer( weights = weights or self.cfg.model.weights with replace_weights(self.lightning_module, weights): - if source_path: - source_path_obj = Path(source_path) - if source_path_obj.suffix.lower() in VIDEO_FORMATS: - process_video(self, source_path_obj, view, save_dir) - elif source_path_obj.is_file(): - process_images(self, [source_path_obj], view, save_dir) - elif source_path_obj.is_dir(): - image_files = [ + if save_dir is not None: + save_dir = Path(save_dir) + save_dir.mkdir(parents=True, exist_ok=True) + if source_path is not None: + source_path = Path(source_path) + if source_path.suffix.lower() in VIDEO_FORMATS: + infer_from_video( + self, video_path=source_path, save_dir=save_dir + ) + elif source_path.is_file(): + infer_from_directory(self, [source_path], save_dir) + elif source_path.is_dir(): + image_files = ( f - for f in source_path_obj.iterdir() + for f in source_path.iterdir() if f.suffix.lower() in IMAGE_FORMATS - ] - process_images(self, image_files, view, save_dir) + ) + infer_from_directory(self, image_files, save_dir) else: raise ValueError( f"Source path {source_path} is not a valid file or directory." ) else: - process_dataset_images(self, view, save_dir) + infer_from_dataset(self, view, save_dir) def tune(self) -> None: """Runs Optuna tunning of hyperparameters.""" diff --git a/luxonis_train/core/utils/infer_utils.py b/luxonis_train/core/utils/infer_utils.py index 0240e5fc..ffeaa1cb 100644 --- a/luxonis_train/core/utils/infer_utils.py +++ b/luxonis_train/core/utils/infer_utils.py @@ -1,14 +1,15 @@ from collections import defaultdict +from collections.abc import Iterable from pathlib import Path +from typing import Literal import cv2 import numpy as np import torch -import tqdm from torch import Tensor +import luxonis_train from luxonis_train.attached_modules.visualizers import get_unnormalized_images -from luxonis_train.enums import TaskType IMAGE_FORMATS = { ".bmp", @@ -22,51 +23,33 @@ ".mpo", ".pfm", } -VIDEO_FORMATS = {".mp4", ".mov", ".avi", ".mkv"} +VIDEO_FORMATS = {".mp4", ".mov", ".avi", ".mkv", ".webm"} -def render_visualizations( - visualizations: dict[str, dict[str, Tensor]], - save_dir: str | Path | None, - show: bool = True, -) -> dict[str, list[np.ndarray]]: +def process_visualizations( + visualizations: dict[str, dict[str, Tensor]], batch_size: int +) -> dict[tuple[str, str], list[np.ndarray]]: """Render or save visualizations.""" - save_dir = Path(save_dir) if save_dir is not None else None - if save_dir is not None: - save_dir.mkdir(exist_ok=True, parents=True) - - rendered_visualizations = defaultdict(list) - i = 0 - for node_name, vzs in visualizations.items(): - for viz_name, viz_batch in vzs.items(): - for i, viz in enumerate(viz_batch): + renders = defaultdict(list) + + for i in range(batch_size): + for node_name, vzs in visualizations.items(): + for viz_name, viz_batch in vzs.items(): + viz = viz_batch[i] viz_arr = viz.detach().cpu().numpy().transpose(1, 2, 0) viz_arr = cv2.cvtColor(viz_arr, cv2.COLOR_RGB2BGR) - name = f"{node_name}/{viz_name}/{i}" - if save_dir is not None: - name = name.replace("/", "_") - cv2.imwrite(str(save_dir / f"{name}_{i}.png"), viz_arr) - i += 1 - elif show: - cv2.imshow(name, viz_arr) - else: - rendered_visualizations[name].append(viz_arr) - - if save_dir is None and show: - if cv2.waitKey(0) == ord("q"): - exit() + renders[(node_name, viz_name)].append(viz_arr) - return rendered_visualizations + return renders -def prepare_and_infer_image(model, img: np.ndarray, labels: dict, view: str): +def prepare_and_infer_image( + model: "luxonis_train.core.LuxonisModel", + img: np.ndarray, +): """Prepares the image for inference and runs the model.""" img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) - img, _ = ( - model.train_augmentations([(img, {})]) - if view == "train" - else model.val_augmentations([(img, {})]) - ) + img, _ = model.val_augmentations([(img, {})]) inputs = { "image": torch.tensor(img).unsqueeze(0).permute(0, 3, 1, 2).float() @@ -74,112 +57,149 @@ def prepare_and_infer_image(model, img: np.ndarray, labels: dict, view: str): images = get_unnormalized_images(model.cfg, inputs) outputs = model.lightning_module.forward( - inputs, labels, images=images, compute_visualizations=True + inputs, images=images, compute_visualizations=True ) return outputs -def process_video( - model, +def window_closed() -> bool: # pragma: no cover + return cv2.waitKey(0) in {27, ord("q")} + + +def infer_from_video( + model: "luxonis_train.core.LuxonisModel", video_path: str | Path, - view: str, - save_dir: str | Path | None, - show: bool = False, + save_dir: Path | None, ) -> None: - """Handles inference on a video.""" - cap = cv2.VideoCapture(filename=str(video_path)) # type: ignore - total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - progress_bar = tqdm.tqdm( - total=total_frames, position=0, leave=True, desc="Processing video" - ) + """Runs inference on individual frames from a video. + + @type model: L{LuxonisModel} + @param model: The model to use for inference. + @type video_path: str | Path + @param video_path: The path to the video. + @type save_dir: Path | None + @param save_dir: The directory to save the visualizations to. + @type show: bool + @param show: Whether to display the visualizations. + """ - if save_dir is not None: - out_writers = {} - save_dir = Path(save_dir) - save_dir.mkdir(exist_ok=True, parents=True) + cap = cv2.VideoCapture(filename=str(video_path)) # type: ignore - labels = create_dummy_labels( - model, view, (int(cap.get(4)), int(cap.get(3)), 3) - ) + writers: dict[str, cv2.VideoWriter] = {} while cap.isOpened(): ret, frame = cap.read() - if not ret: + if not ret: # pragma: no cover break - outputs = prepare_and_infer_image(model, frame, labels, view) - rendered_visualizations = render_visualizations( - outputs.visualizations, None, show - ) - if save_dir is not None: - for name, viz_arrs in rendered_visualizations.items(): - if name not in out_writers: - out_writers[name] = cv2.VideoWriter( - filename=str( # type: ignore - save_dir / f"{name.replace('/', '-')}.mp4" - ), + # TODO: batched inference + outputs = prepare_and_infer_image(model, frame) + renders = process_visualizations(outputs.visualizations, batch_size=1) + + for (node_name, viz_name), [viz] in renders.items(): + if save_dir is not None: + name = f"{node_name}_{viz_name}" + if name not in writers: + w, h = viz.shape[1], viz.shape[0] + writers[name] = cv2.VideoWriter( + filename=str(save_dir / f"{name}.mp4"), # type: ignore fourcc=cv2.VideoWriter_fourcc(*"mp4v"), # type: ignore fps=cap.get(cv2.CAP_PROP_FPS), # type: ignore - frameSize=(viz_arrs[0].shape[1], viz_arrs[0].shape[0]), # type: ignore - ) # type: ignore - for viz_arr in viz_arrs: - out_writers[name].write(viz_arr) - - progress_bar.update(1) - - if save_dir is not None: - for writer in out_writers.values(): - writer.release() + frameSize=(w, h), # type: ignore + ) + if name in writers: + writers[name].write(viz) + else: # pragma: no cover + cv2.imshow(f"{node_name}/{viz_name}", viz) + + if not save_dir and window_closed(): # pragma: no cover + break cap.release() - progress_bar.close() + cv2.destroyAllWindows() + for writer in writers.values(): + writer.release() -def process_images( - model, img_paths: list[Path], view: str, save_dir: str | Path | None + +def infer_from_directory( + model: "luxonis_train.core.LuxonisModel", + img_paths: Iterable[Path], + save_dir: Path | None, ) -> None: - """Handles inference on one or more images.""" - first_image = cv2.cvtColor( - cv2.imread(str(img_paths[0])), cv2.COLOR_BGR2RGB - ) - labels = create_dummy_labels(model, view, first_image.shape) + """Runs inference on individual images from a directory. + + @type model: L{LuxonisModel} + @param model: The model to use for inference. + @type img_paths: Iterable[Path] + @param img_paths: Iterable of paths to the images. + @type save_dir: Path | None + @param save_dir: The directory to save the visualizations to. + """ for img_path in img_paths: img = cv2.imread(str(img_path)) - outputs = prepare_and_infer_image(model, img, labels, view) - render_visualizations(outputs.visualizations, save_dir) + outputs = prepare_and_infer_image(model, img) + renders = process_visualizations(outputs.visualizations, batch_size=1) + + for (node_name, viz_name), [viz] in renders.items(): + if save_dir is not None: + cv2.imwrite( + str( + save_dir + / f"{img_path.stem}_{node_name}_{viz_name}.png" + ), + viz, + ) + else: # pragma: no cover + cv2.imshow(f"{node_name}/{viz_name}", viz) + + if not save_dir and window_closed(): # pragma: no cover + break + + cv2.destroyAllWindows() -def process_dataset_images( - model, view: str, save_dir: str | Path | None +def infer_from_dataset( + model: "luxonis_train.core.LuxonisModel", + view: Literal["train", "val", "test"], + save_dir: Path | None, ) -> None: - """Handles the inference on dataset images.""" - for inputs, labels in model.pytorch_loaders[view]: + """Runs inference on images from the dataset. + + @type model: L{LuxonisModel} + @param model: The model to use for inference. + @type view: Literal["train", "val", "test"] + @param view: The view of the dataset to use. + @type save_dir: str | Path | None + @param save_dir: The directory to save the visualizations to. + """ + broken = False + for i, (inputs, labels) in enumerate(model.pytorch_loaders[view]): + if broken: # pragma: no cover + break + images = get_unnormalized_images(model.cfg, inputs) + batch_size = images.shape[0] outputs = model.lightning_module.forward( inputs, labels, images=images, compute_visualizations=True ) - render_visualizations(outputs.visualizations, save_dir) - - -def create_dummy_labels(model, view: str, img_shape: tuple) -> dict: - """Prepares the labels for different tasks (classification, - keypoints, etc.).""" - tasks = list(model.loaders["train"].get_classes().keys()) - h, w, _ = img_shape - labels = {} - - for task in tasks: - if task == "classification": - labels[task] = [-1, TaskType.CLASSIFICATION] - elif task == "keypoints": - nk = model.loaders[view].get_n_keypoints()["keypoints"] - labels[task] = [torch.zeros((1, nk * 3 + 2)), TaskType.KEYPOINTS] - elif task == "segmentation": - labels[task] = [torch.zeros((1, h, w)), TaskType.SEGMENTATION] - elif task == "boundingbox": - labels[task] = [ - torch.tensor([[-1, 0, 0, 0, 0, 0]]), - TaskType.BOUNDINGBOX, - ] - - return labels + renders = process_visualizations( + outputs.visualizations, + batch_size=batch_size, + ) + for j in range(batch_size): + for (node_name, viz_name), visualizations in renders.items(): + viz = visualizations[j] + if save_dir is not None: + name = f"{node_name}_{viz_name}" + cv2.imwrite( + str(save_dir / f"{name}_{i * batch_size + j}.png"), viz + ) + else: + cv2.imshow(f"{node_name}/{viz_name}", viz) + + if not save_dir and window_closed(): # pragma: no cover + broken = True + break + + cv2.destroyAllWindows() diff --git a/luxonis_train/core/utils/tune_utils.py b/luxonis_train/core/utils/tune_utils.py index d9d6c4c0..ead8a3dd 100644 --- a/luxonis_train/core/utils/tune_utils.py +++ b/luxonis_train/core/utils/tune_utils.py @@ -12,7 +12,7 @@ def _augs_to_indices(all_augs: list[str], aug_names: list[str]) -> list[int]: aug_indices = [] for aug_name in aug_names: if aug_name == "Normalize": - logger.warn( + logger.warning( f"'{aug_name}' should be tuned directly by adding '...normalize.active_categorical' to the tuner params, skipping." ) continue @@ -20,7 +20,7 @@ def _augs_to_indices(all_augs: list[str], aug_names: list[str]) -> list[int]: index = all_augs.index(aug_name) aug_indices.append(index) except ValueError: - logger.warn( + logger.warning( f"Augmentation '{aug_name}' not found under trainer augemntations, skipping." ) continue diff --git a/luxonis_train/models/luxonis_lightning.py b/luxonis_train/models/luxonis_lightning.py index 459b20d1..2ca59117 100644 --- a/luxonis_train/models/luxonis_lightning.py +++ b/luxonis_train/models/luxonis_lightning.py @@ -415,7 +415,6 @@ def forward( compute_visualizations and node_name in self.visualizers and images is not None - and labels is not None ): for viz_name, visualizer in self.visualizers[ node_name diff --git a/luxonis_train/nodes/backbones/mobileone/mobileone.py b/luxonis_train/nodes/backbones/mobileone/mobileone.py index 8180f960..2047e474 100644 --- a/luxonis_train/nodes/backbones/mobileone/mobileone.py +++ b/luxonis_train/nodes/backbones/mobileone/mobileone.py @@ -67,7 +67,7 @@ def __init__( @type n_conv_branches: int | None @param n_conv_branches: Number of linear convolution branches in MobileOne block. If provided, overrides the variant values. @type use_se: bool | None - @param use_se: Whether to use SE blocks in the network. If provided, overrides the variant value. + @param use_se: Whether to use C{Squeeze-and-Excitation} blocks in the network. If provided, overrides the variant value. """ super().__init__(**kwargs) diff --git a/luxonis_train/nodes/backbones/repvgg/repvgg.py b/luxonis_train/nodes/backbones/repvgg/repvgg.py index fd8a5e67..ac1407a2 100644 --- a/luxonis_train/nodes/backbones/repvgg/repvgg.py +++ b/luxonis_train/nodes/backbones/repvgg/repvgg.py @@ -46,16 +46,16 @@ def __init__( @type variant: Literal["A0", "A1", "A2"] @param variant: RepVGG model variant. Defaults to "A0". + @type n_blocks: tuple[int, int, int, int] | None + @param n_blocks: Number of blocks in each stage. + @type width_multiplier: tuple[float, float, float, float] | None + @param width_multiplier: Width multiplier for each stage. @type override_groups_map: dict[int, int] | None @param override_groups_map: Dictionary mapping layer index to number of groups. The layers are indexed starting from 0. @type use_se: bool @param use_se: Whether to use Squeeze-and-Excitation blocks. @type use_checkpoint: bool @param use_checkpoint: Whether to use checkpointing. - @type n_blocks: tuple[int, int, int, int] | None - @param n_blocks: Number of blocks in each stage. - @type width_multiplier: tuple[float, float, float, float] | None - @param width_multiplier: Width multiplier for each stage. """ super().__init__(**kwargs) var = get_variant(variant) diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py index 5059c651..25bea7c5 100644 --- a/luxonis_train/nodes/blocks/blocks.py +++ b/luxonis_train/nodes/blocks/blocks.py @@ -862,7 +862,7 @@ class DropPath(nn.Module): ... def forward(self, x): ... return x + self.drop_path(self.conv_bn_act(x)) - @see U{Original code (TIMM) } + @see: U{Original code (TIMM) } @license: U{Apache License 2.0 } """ diff --git a/tests/integration/test_simple.py b/tests/integration/test_simple.py index 3d489c4a..d4c5b46f 100644 --- a/tests/integration/test_simple.py +++ b/tests/integration/test_simple.py @@ -6,8 +6,9 @@ from pathlib import Path from typing import Any +import cv2 import pytest -from luxonis_ml.data import LuxonisDataset +from luxonis_ml.data import LuxonisDataset, LuxonisLoader from luxonis_ml.utils import environ from luxonis_train.core import LuxonisModel @@ -19,6 +20,14 @@ STUDY_PATH = Path("study_local.db") +@pytest.fixture +def infer_path() -> Path: + if INFER_PATH.exists(): + shutil.rmtree(INFER_PATH) + INFER_PATH.mkdir() + return INFER_PATH + + @pytest.fixture def opts(test_output_dir: Path) -> dict[str, Any]: return { @@ -33,11 +42,9 @@ def opts(test_output_dir: Path) -> dict[str, Any]: @pytest.fixture(scope="function", autouse=True) def clear_files(): - # todo yield STUDY_PATH.unlink(missing_ok=True) ONNX_PATH.unlink(missing_ok=True) - shutil.rmtree(INFER_PATH, ignore_errors=True) @pytest.mark.parametrize( @@ -72,7 +79,7 @@ def test_predefined_models( model.test() -def test_multi_input(opts: dict[str, Any]): +def test_multi_input(opts: dict[str, Any], infer_path: Path): config_file = "tests/configs/multi_input.yaml" model = LuxonisModel(config_file, opts) model.train() @@ -82,9 +89,9 @@ def test_multi_input(opts: dict[str, Any]): model.export(str(ONNX_PATH)) assert ONNX_PATH.exists() - assert not INFER_PATH.exists() - model.infer(view="val", save_dir=INFER_PATH) - assert INFER_PATH.exists() + assert len(list(infer_path.iterdir())) == 0 + model.infer(view="val", save_dir=infer_path) + assert infer_path.exists() def test_custom_tasks( @@ -149,6 +156,46 @@ def test_tune(opts: dict[str, Any], coco_dataset: LuxonisDataset): assert STUDY_PATH.exists() +def test_infer(coco_dataset: LuxonisDataset, infer_path: Path): + loader = LuxonisLoader(coco_dataset) + img_dir = Path("tests/data/img_dir") + video_writer = cv2.VideoWriter( + "tests/data/video.avi", # type: ignore + cv2.VideoWriter_fourcc(*"XVID"), + 1, + (256, 256), + ) + if img_dir.exists(): + shutil.rmtree(img_dir) + img_dir.mkdir() + for i, (img, _) in enumerate(loader): + img = cv2.resize(img, (256, 256)) + cv2.imwrite(str(img_dir / f"{i}.jpg"), img) + video_writer.write(img) + video_writer.release() + + opts = { + "loader.params.dataset_name": coco_dataset.identifier, + "trainer.preprocessing.augmentations": [], + } + model = LuxonisModel("configs/complex_model.yaml", opts) + + model.infer(source_path=img_dir / "0.jpg", save_dir=infer_path) + assert len(list(infer_path.glob("*.png"))) == 3 + + model.infer(source_path=img_dir, save_dir=infer_path) + assert len(list(infer_path.glob("*.png"))) == len(loader) * 3 + + model.infer(source_path="tests/data/video.avi", save_dir=infer_path) + assert len(list(infer_path.glob("*.mp4"))) == 3 + + model.infer(save_dir=infer_path, view="train") + assert len(list(infer_path.glob("*.png"))) == len(loader) * 3 * 2 + + with pytest.raises(ValueError): + model.infer(source_path="tests/data/invalid.jpg", save_dir=infer_path) + + def test_archive(test_output_dir: Path, coco_dataset: LuxonisDataset): opts = { "tracker.save_directory": str(test_output_dir), @@ -187,6 +234,10 @@ def test_callbacks(opts: dict[str, Any], parking_lot_dataset: LuxonisDataset): { "name": "ExportOnTrainEnd", }, + { + "name": "ExportOnTrainEnd", + "params": {"preferred_checkpoint": "loss"}, + }, { "name": "ArchiveOnTrainEnd", "params": {"preferred_checkpoint": "loss"},