diff --git a/luxonis_train/attached_modules/losses/__init__.py b/luxonis_train/attached_modules/losses/__init__.py index 28585504..5b3f7a63 100644 --- a/luxonis_train/attached_modules/losses/__init__.py +++ b/luxonis_train/attached_modules/losses/__init__.py @@ -3,8 +3,6 @@ from .bce_with_logits import BCEWithLogitsLoss from .cross_entropy import CrossEntropyLoss from .efficient_keypoint_bbox_loss import EfficientKeypointBBoxLoss -from .implicit_keypoint_bbox_loss import ImplicitKeypointBBoxLoss -from .keypoint_loss import KeypointLoss from .sigmoid_focal_loss import SigmoidFocalLoss from .smooth_bce_with_logits import SmoothBCEWithLogitsLoss from .softmax_focal_loss import SoftmaxFocalLoss @@ -14,8 +12,6 @@ "BCEWithLogitsLoss", "CrossEntropyLoss", "EfficientKeypointBBoxLoss", - "ImplicitKeypointBBoxLoss", - "KeypointLoss", "BaseLoss", "SigmoidFocalLoss", "SmoothBCEWithLogitsLoss", diff --git a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py deleted file mode 100644 index 99eea6f3..00000000 --- a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py +++ /dev/null @@ -1,349 +0,0 @@ -import logging -from typing import Any, cast - -import torch -from torch import Tensor -from torchvision.ops import box_convert - -from luxonis_train.attached_modules.losses.keypoint_loss import KeypointLoss -from luxonis_train.enums import TaskType -from luxonis_train.nodes import ImplicitKeypointBBoxHead -from luxonis_train.utils import ( - Labels, - Packet, - compute_iou_loss, - match_to_anchor, - process_bbox_predictions, -) - -from .base_loss import BaseLoss -from .bce_with_logits import BCEWithLogitsLoss -from .smooth_bce_with_logits import SmoothBCEWithLogitsLoss - -KeypointTargetType = tuple[ - list[Tensor], - list[Tensor], - list[Tensor], - list[tuple[Tensor, Tensor, Tensor, Tensor]], - list[Tensor], -] - -logger = logging.getLogger(__name__) - - -# TODO: BROKEN! -class ImplicitKeypointBBoxLoss(BaseLoss[list[Tensor], KeypointTargetType]): - node: ImplicitKeypointBBoxHead - supported_tasks: list[tuple[TaskType, ...]] = [ - (TaskType.BOUNDINGBOX, TaskType.KEYPOINTS) - ] - - def __init__( - self, - cls_pw: float = 1.0, - viz_pw: float = 1.0, - obj_pw: float = 1.0, - label_smoothing: float = 0.0, - min_objectness_iou: float = 0.0, - bbox_loss_weight: float = 0.05, - keypoint_visibility_loss_weight: float = 0.6, - keypoint_regression_loss_weight: float = 0.5, - sigmas: list[float] | None = None, - area_factor: float | None = None, - class_loss_weight: float = 0.6, - objectness_loss_weight: float = 0.7, - anchor_threshold: float = 4.0, - bias: float = 0.5, - balance: list[float] | None = None, - **kwargs: Any, - ): - """Joint loss for keypoint and box predictions for cases where - the keypoints and boxes are inherently linked. - - Based on U{YOLO-Pose: Enhancing YOLO for Multi Person Pose Estimation Using Object - Keypoint Similarity Loss}. - - @type cls_pw: float - @param cls_pw: Power for the BCE loss for classes. Defaults to C{1.0}. - @type viz_pw: float - @param viz_pw: Power for the BCE loss for keypoints. - @type obj_pw: float - @param obj_pw: Power for the BCE loss for objectness. Defaults to C{1.0}. - @type label_smoothing: float - @param label_smoothing: Label smoothing factor. Defaults to C{0.0}. - @type min_objectness_iou: float - @param min_objectness_iou: Minimum objectness iou. Defaults to C{0.0}. - @type bbox_loss_weight: float - @param bbox_loss_weight: Weight for the bounding box loss. - @type keypoint_visibility_loss_weight: float - @param keypoint_visibility_loss_weight: Weight for the keypoint visibility loss. Defaults to C{0.6}. - @type keypoint_regression_loss_weight: float - @param keypoint_regression_loss_weight: Weight for the keypoint regression loss. Defaults to C{0.5}. - @type sigmas: list[float] | None - @param sigmas: Sigmas used in KeypointLoss for OKS metric. If None then use COCO ones if possible or default ones. Defaults to C{None}. - @type area_factor: float | None - @param area_factor: Factor by which we multiply bbox area which is used in KeypointLoss. If None then use default one. Defaults to C{None}. - @type class_loss_weight: float - @param class_loss_weight: Weight for the class loss. Defaults to C{0.6}. - @type objectness_loss_weight: float - @param objectness_loss_weight: Weight for the objectness loss. Defaults to C{0.7}. - @type anchor_threshold: float - @param anchor_threshold: Threshold for matching anchors to targets. Defaults to C{4.0}. - @type bias: float - @param bias: Bias for matching anchors to targets. Defaults to C{0.5}. - @type balance: list[float] | None - @param balance: Balance for the different heads. Defaults to C{None}. - """ - - super().__init__(**kwargs) - - self.n_anchors = self.node.n_anchors - self.n_heads = self.node.n_heads - self.box_offset = self.node.box_offset - self.anchors = self.node.anchors - self.balance = balance or [4.0, 1.0, 0.4] - if len(self.balance) < self.n_heads: - logger.warning( - f"Balance list must have at least {self.n_heads} elements." - "Filling the rest with 1.0." - ) - self.balance += [1.0] * (self.n_heads - len(self.balance)) - - self.min_objectness_iou = min_objectness_iou - self.bbox_weight = bbox_loss_weight - self.class_weight = class_loss_weight - self.objectness_weight = objectness_loss_weight - self.anchor_threshold = anchor_threshold - - self.bias = bias - - self.b_cross_entropy = BCEWithLogitsLoss( - pos_weight=torch.tensor([obj_pw]) - ) - self.class_loss = SmoothBCEWithLogitsLoss( - label_smoothing=label_smoothing, - bce_pow=cls_pw, - ) - self.keypoint_loss = KeypointLoss( - n_keypoints=self.n_keypoints, - bce_power=viz_pw, - sigmas=sigmas, - area_factor=area_factor, - regression_loss_weight=keypoint_regression_loss_weight, - visibility_loss_weight=keypoint_visibility_loss_weight, - ) - - self.positive_smooth_const = 1 - 0.5 * label_smoothing - self.negative_smooth_const = 0.5 * label_smoothing - - def prepare( - self, outputs: Packet[Tensor], labels: Labels - ) -> tuple[list[Tensor], KeypointTargetType]: - """Prepares the labels to be in the correct format for loss - calculation. - - @type outputs: Packet[Tensor] - @param outputs: Output from the forward pass. - @type labels: L{Labels} - @param labels: Dictionary containing the labels. - @rtype: tuple[list[Tensor], tuple[list[Tensor], list[Tensor], - list[Tensor], list[tuple[Tensor, Tensor, Tensor, Tensor]], - list[Tensor]]] - @return: Tuple containing the original output and the - postprocessed labels. The processed labels are a tuple - containing the class targets, box targets, keypoint targets, - indices and anchors. Indicies are a tuple containing vectors - of indices for batch, anchor, feature y and feature x - dimensions, respectively. They are all of shape - (n_targets,). The indices are used to index the output - tensors of shape (batch_size, n_anchors, feature_height, - feature_width, n_classes + box_offset + n_keypoints * 3) to - get a tensor of shape (n_targets, n_classes + box_offset + - n_keypoints * 3). - """ - predictions = self.get_input_tensors(outputs, "features") - - kpt_label = self.get_label(labels, TaskType.KEYPOINTS) - bbox_label = self.get_label(labels, TaskType.BOUNDINGBOX) - - targets = torch.zeros( - (kpt_label.shape[0], self.n_keypoints * 3 + self.box_offset + 1) - ) - targets[:, :2] = kpt_label[:, :2] - targets[:, 2 : self.box_offset + 1] = box_convert( - bbox_label[:, 2:], "xywh", "cxcywh" - ) - - # insert keypoints - for i in range(1, 4): - targets[:, self.box_offset + i :: 3] = kpt_label[:, i + 1 :: 3] - - n_targets = targets.shape[0] - - class_targets: list[Tensor] = [] - box_targets: list[Tensor] = [] - keypoint_targets: list[Tensor] = [] - indices: list[tuple[Tensor, Tensor, Tensor, Tensor]] = [] - anchors: list[Tensor] = [] - - anchor_indices = ( - torch.arange( - self.n_anchors, device=targets.device, dtype=torch.float32 - ) - .reshape(self.n_anchors, 1) - .repeat(1, n_targets) - .unsqueeze(-1) - ) - targets = torch.cat( - (targets.repeat(self.n_anchors, 1, 1), anchor_indices), 2 - ) - - xy_deltas = ( - torch.tensor( - [[0, 0], [1, 0], [0, 1], [-1, 0], [0, -1]], - device=targets.device, - ).float() - * self.bias - ) - - for i in range(self.n_heads): - anchor = self.anchors[i] - feature_height, feature_width = predictions[i].shape[2:4] - scaled_targets, xy_shifts = match_to_anchor( - targets, - anchor, - xy_deltas, - feature_width, - feature_height, - self.n_keypoints, - self.anchor_threshold, - self.bias, - self.box_offset, - ) - - batch_index, cls = scaled_targets[:, :2].long().T - box_xy = scaled_targets[:, 2:4] - box_wh = scaled_targets[:, 4:6] - box_xy_deltas = (box_xy - xy_shifts).long() - feature_x_index = box_xy_deltas[:, 0].clamp_(0, feature_width - 1) - feature_y_index = box_xy_deltas[:, 1].clamp_(0, feature_height - 1) - - anchor_indices = scaled_targets[:, -1].long() - indices.append( - ( - batch_index, - anchor_indices, - feature_y_index, - feature_x_index, - ) - ) - class_targets.append(cls) - box_targets.append(torch.cat((box_xy - box_xy_deltas, box_wh), 1)) - anchors.append(anchor[anchor_indices]) - - keypoint_targets.append( - self._create_keypoint_target(scaled_targets, box_xy_deltas) - ) - - return predictions, ( - class_targets, - box_targets, - keypoint_targets, - indices, - anchors, - ) - - def forward( - self, - predictions: list[Tensor], - targets: KeypointTargetType, - ) -> tuple[Tensor, dict[str, Tensor]]: - device = predictions[0].device - sub_losses = { - "bboxes": torch.tensor(0.0, device=device), - "objectness": torch.tensor(0.0, device=device), - "class": torch.tensor(0.0, device=device), - "kpt_visibility": torch.tensor(0.0, device=device), - "kpt_regression": torch.tensor(0.0, device=device), - } - - for ( - pred, - class_target, - box_target, - kpt_target, - index, - anchor, - balance, - ) in zip(predictions, *targets, self.balance): - obj_targets = torch.zeros_like(pred[..., 0], device=device) - n_targets = len(class_target) - - if n_targets > 0: - pred_subset = pred[index] - - bbox_cx_cy, bbox_w_h, _ = process_bbox_predictions( - pred_subset, anchor.to(device) - ) - bbox_loss, bbox_iou = compute_iou_loss( - torch.cat((bbox_cx_cy, bbox_w_h), dim=1), - box_target, - iou_type="ciou", - bbox_format="cxcywh", - reduction="mean", - ) - - sub_losses["bboxes"] += bbox_loss * self.bbox_weight - - area = box_target[:, 2] * box_target[:, 3] - - _, kpt_sublosses = self.keypoint_loss.forward( - pred_subset[:, self.box_offset + self.n_classes :], - kpt_target.to(device), - area.to(device), - ) - for name, kpt_subloss in kpt_sublosses.items(): - sub_losses[name] += kpt_subloss - - obj_targets[index] = (self.min_objectness_iou) + ( - 1 - self.min_objectness_iou - ) * bbox_iou.squeeze(-1).to(obj_targets.dtype) - - if self.n_classes > 1: - sub_losses["class"] += ( - self.class_loss.forward( - pred_subset[ - :, - self.box_offset : self.box_offset - + self.n_classes, - ], - class_target, - ) - * self.class_weight - ) - - sub_losses["objectness"] += ( - self.b_cross_entropy.forward(pred[..., 4], obj_targets) - * balance - * self.objectness_weight - ) - - loss = cast(Tensor, sum(sub_losses.values())).reshape([]) - return loss, {name: loss.detach() for name, loss in sub_losses.items()} - - def _create_keypoint_target( - self, scaled_targets: Tensor, box_xy_deltas: Tensor - ): - keypoint_target = scaled_targets[:, self.box_offset + 1 : -1] - for j in range(self.n_keypoints): - idx = 3 * j - keypoint_coords = keypoint_target[:, idx : idx + 2] - visibility = keypoint_target[:, idx + 2] - - keypoint_mask = visibility != 0 - keypoint_coords[keypoint_mask] -= box_xy_deltas[keypoint_mask] - - keypoint_target[:, idx : idx + 2] = keypoint_coords - keypoint_target[:, idx + 2] = visibility - - return keypoint_target diff --git a/luxonis_train/attached_modules/losses/keypoint_loss.py b/luxonis_train/attached_modules/losses/keypoint_loss.py deleted file mode 100644 index 1327d460..00000000 --- a/luxonis_train/attached_modules/losses/keypoint_loss.py +++ /dev/null @@ -1,112 +0,0 @@ -from typing import Any - -import torch -from torch import Tensor - -from luxonis_train.enums import TaskType -from luxonis_train.utils import ( - get_sigmas, - get_with_default, - process_keypoints_predictions, -) - -from .base_loss import BaseLoss -from .bce_with_logits import BCEWithLogitsLoss - - -# TODO: Make it work on its own -class KeypointLoss(BaseLoss[Tensor, Tensor]): - supported_tasks: list[TaskType] = [TaskType.KEYPOINTS] - - def __init__( - self, - n_keypoints: int, - bce_power: float = 1.0, - sigmas: list[float] | None = None, - area_factor: float | None = None, - regression_loss_weight: float = 1.0, - visibility_loss_weight: float = 1.0, - **kwargs: Any, - ): - """Keypoint based loss that is computed from OKS-based - regression and visibility loss. - - @type n_keypoints: int - @param n_keypoints: Number of keypoints. - @type bce_power: float - @param bce_power: Power used for BCE visibility loss. Defaults - to C{1.0}. - @param sigmas: Sigmas used for OKS. If None then use COCO ones - if possible or default ones. Defaults to C{None}. - @type area_factor: float | None - @param area_factor: Factor by which we multiply bbox area. If - None then use default one. Defaults to C{None}. - @type regression_loss_weight: float - @param regression_loss_weight: Weight of regression loss. - Defaults to C{1.0}. - @type visibility_loss_weight: float - @param visibility_loss_weight: Weight of visibility loss. - Defaults to C{1.0}. - """ - - super().__init__(**kwargs) - self.b_cross_entropy = BCEWithLogitsLoss( - pos_weight=torch.tensor([bce_power]), **kwargs - ) - self.sigmas = get_sigmas(sigmas, n_keypoints, caller_name=self.name) - self.area_factor = get_with_default( - area_factor, "bbox area scaling", self.name, default=0.53 - ) - self.regression_loss_weight = regression_loss_weight - self.visibility_loss_weight = visibility_loss_weight - - def forward( - self, prediction: Tensor, target: Tensor, area: Tensor - ) -> tuple[Tensor, dict[str, Tensor]]: - """Computes the keypoint loss and visibility loss for a given - prediction and target. - - @type prediction: Tensor - @param prediction: Predicted tensor of shape C{[n_detections, - n_keypoints * 3]}. - @type target: Tensor - @param target: Target tensor of shape C{[n_detections, - n_keypoints * 3]}. - @type area: Tensor - @param area: Area tensor of shape C{[n_detections]}. - @rtype: tuple[Tensor, dict[str, Tensor]] - @return: A tuple containing the total loss tensor of shape - C{[1,]} and a dictionary with the regression loss and - visibility loss tensors. - """ - sigmas = self.sigmas.to(prediction.device) - - pred_x, pred_y, pred_v = process_keypoints_predictions(prediction) - target_x = target[:, 0::3] - target_y = target[:, 1::3] - target_visibility = (target[:, 2::3] > 0).float() - - visibility_loss = ( - self.b_cross_entropy.forward(pred_v, target_visibility) - * self.visibility_loss_weight - ) - scales = area * self.area_factor - - distance = (target_x - pred_x) ** 2 + (target_y - pred_y) ** 2 - normalized_distance = ( - distance / (2 * sigmas**2) / (scales.view(-1, 1) + 1e-9) / 2 - ) - - regression_loss = 1 - torch.exp(-normalized_distance) - regression_loss = (regression_loss * target_visibility).sum(dim=1) / ( - target_visibility.sum(dim=1) + 1e-9 - ) - regression_loss = regression_loss.mean() - regression_loss *= self.regression_loss_weight - - total_loss = regression_loss + visibility_loss - - return total_loss, { - "kpt_regression": regression_loss, - "kpt_visibility": visibility_loss, - } diff --git a/luxonis_train/core/utils/archive_utils.py b/luxonis_train/core/utils/archive_utils.py index c8904f08..dbcc214a 100644 --- a/luxonis_train/core/utils/archive_utils.py +++ b/luxonis_train/core/utils/archive_utils.py @@ -116,7 +116,7 @@ def _get_classes( node_task = "boundingbox" case "SegmentationHead" | "BiSeNetHead": node_task = "segmentation" - case "ImplicitKeypointBBoxHead" | "EfficientKeypointBBoxHead": + case "EfficientKeypointBBoxHead": node_task = "keypoints" case _: # pragma: no cover raise ValueError("Node does not map to a default task.") @@ -152,14 +152,6 @@ def _get_head_specific_parameters( parameters["is_softmax"] = getattr( ImplementedHeadsIsSoxtmaxed, head_name ).value - elif head_name == "ImplicitKeypointBBoxHead": - parameters["subtype"] = ObjectDetectionSubtypeYOLO.YOLOv7.value - head_node = nodes[head_alias] - parameters["iou_threshold"] = head_node.iou_thres - parameters["conf_threshold"] = head_node.conf_thres - parameters["max_det"] = head_node.max_det - parameters["n_keypoints"] = head_node.n_keypoints - parameters["anchors"] = head_node.anchors.tolist() elif head_name == "EfficientKeypointBBoxHead": # or appropriate subtype head_node = nodes[head_alias] @@ -210,8 +202,6 @@ def _get_head_outputs( return [output["name"] for output in outputs] elif head_type in ["SegmentationHead", "BiSeNetHead"]: return [outputs[0]["name"]] - elif head_type == "ImplicitKeypointBBoxHead": - return [outputs[0]["name"]] elif head_type == "EfficientKeypointBBoxHead": return [outputs[0]["name"]] else: diff --git a/luxonis_train/nodes/blocks/__init__.py b/luxonis_train/nodes/blocks/__init__.py index c35186e1..ce0181c9 100644 --- a/luxonis_train/nodes/blocks/__init__.py +++ b/luxonis_train/nodes/blocks/__init__.py @@ -8,10 +8,6 @@ DropPath, EfficientDecoupledBlock, FeatureFusionBlock, - KeypointBlock, - LearnableAdd, - LearnableMulAddConv, - LearnableMultiply, RepVGGBlock, SpatialPyramidPoolingBlock, SqueezeExciteBlock, @@ -32,10 +28,6 @@ "AttentionRefinmentBlock", "SpatialPyramidPoolingBlock", "FeatureFusionBlock", - "LearnableAdd", - "LearnableMultiply", - "LearnableMulAddConv", - "KeypointBlock", "BasicResNetBlock", "Bottleneck", "UpscaleOnline", diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py index 79a8f738..5059c651 100644 --- a/luxonis_train/nodes/blocks/blocks.py +++ b/luxonis_train/nodes/blocks/blocks.py @@ -654,86 +654,6 @@ def forward(self, x1: Tensor, x2: Tensor) -> Tensor: return out -class LearnableAdd(nn.Module): - """Implicit add block.""" - - def __init__(self, channel: int): - super().__init__() - self.channel = channel - self.implicit = nn.Parameter(torch.zeros(1, channel, 1, 1)) - nn.init.normal_(self.implicit, std=0.02) - - def forward(self, x: Tensor) -> Tensor: - return self.implicit.expand_as(x) + x - - -class LearnableMultiply(nn.Module): - """Implicit multiply block.""" - - def __init__(self, channel: int): - super().__init__() - self.channel = channel - self.implicit = nn.Parameter(torch.ones(1, channel, 1, 1)) - nn.init.normal_(self.implicit, mean=1.0, std=0.02) - - def forward(self, x: Tensor) -> Tensor: - return self.implicit.expand_as(x) * x - - -class LearnableMulAddConv(nn.Module): - def __init__( - self, - add_channel: int, - mul_channel: int, - conv_in_channel: int, - conv_out_channel: int, - ): - super().__init__() - self.add = LearnableAdd(add_channel) - self.mul = LearnableMultiply(mul_channel) - self.conv = nn.Conv2d(conv_in_channel, conv_out_channel, 1) - - def forward(self, x: Tensor) -> Tensor: - return self.mul(self.conv(self.add(x))) - - -class KeypointBlock(nn.Module): - """Keypoint head block for keypoint predictions.""" - - def __init__(self, in_channels: int, out_channels: int): - super().__init__() - layers: list[nn.Module] = [] - for i in range(6): - depth_wise_conv = ConvModule( - in_channels, - in_channels, - kernel_size=3, - padding=autopad(3), - groups=math.gcd(in_channels, in_channels), - activation=nn.SiLU(), - ) - conv = ( - ConvModule( - in_channels, - in_channels, - kernel_size=1, - padding=autopad(1), - activation=nn.SiLU(), - ) - if i < 5 - else nn.Conv2d(in_channels, out_channels, 1) - ) - - layers.append(depth_wise_conv) - layers.append(conv) - - self.block = nn.Sequential(*layers) - - def forward(self, x: Tensor) -> Tensor: - out = self.block(x) - return out - - T = TypeVar("T", int, tuple[int, ...]) diff --git a/luxonis_train/nodes/enums/head_categorization.py b/luxonis_train/nodes/enums/head_categorization.py index 90f75725..606ed872 100644 --- a/luxonis_train/nodes/enums/head_categorization.py +++ b/luxonis_train/nodes/enums/head_categorization.py @@ -6,7 +6,6 @@ class ImplementedHeads(Enum): ClassificationHead = "ClassificationParser" EfficientBBoxHead = "YOLO" - ImplicitKeypointBBoxHead = "YoloDetectionNetwork" EfficientKeypointBBoxHead = "YoloDetectionNetwork" SegmentationHead = "SegmentationParser" BiSeNetHead = "SegmentationParser" @@ -17,7 +16,6 @@ class ImplementedHeadsIsSoxtmaxed(Enum): ClassificationHead = False EfficientBBoxHead = None - ImplicitKeypointBBoxHead = None EfficientKeypointBBoxHead = None SegmentationHead = False BiSeNetHead = False diff --git a/luxonis_train/nodes/heads/__init__.py b/luxonis_train/nodes/heads/__init__.py index e188f188..9d04c310 100644 --- a/luxonis_train/nodes/heads/__init__.py +++ b/luxonis_train/nodes/heads/__init__.py @@ -3,7 +3,6 @@ from .ddrnet_segmentation_head import DDRNetSegmentationHead from .efficient_bbox_head import EfficientBBoxHead from .efficient_keypoint_bbox_head import EfficientKeypointBBoxHead -from .implicit_keypoint_bbox_head import ImplicitKeypointBBoxHead from .segmentation_head import SegmentationHead __all__ = [ @@ -11,7 +10,6 @@ "ClassificationHead", "EfficientBBoxHead", "EfficientKeypointBBoxHead", - "ImplicitKeypointBBoxHead", "SegmentationHead", "DDRNetSegmentationHead", ] diff --git a/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py b/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py deleted file mode 100644 index 114c6f82..00000000 --- a/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py +++ /dev/null @@ -1,282 +0,0 @@ -import logging -import math -from typing import Any, cast - -import torch -from torch import Tensor, nn - -from luxonis_train.enums import TaskType -from luxonis_train.nodes.base_node import BaseNode -from luxonis_train.nodes.blocks import KeypointBlock, LearnableMulAddConv -from luxonis_train.utils import ( - Packet, - non_max_suppression, - process_bbox_predictions, - process_keypoints_predictions, -) - -logger = logging.getLogger(__name__) - - -class ImplicitKeypointBBoxHead( - BaseNode[list[Tensor], tuple[list[Tensor], Tensor]] -): - tasks = [TaskType.KEYPOINTS, TaskType.BOUNDINGBOX] - in_channels: list[int] - - def __init__( - self, - n_heads: int = 3, - anchors: list[list[float]] | None = None, - init_coco_biases: bool = True, - conf_thres: float = 0.25, - iou_thres: float = 0.45, - max_det: int = 300, - **kwargs: Any, - ): - """Head for object and keypoint detection. - - Adapted from U{YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time - object detectors}. - - TODO: more technical documentation - - @type n_heads: int - @param n_heads: Number of output heads. Defaults to C{3}. - B{Note:} Should be same also on neck in most cases. - @type anchors: list[list[float]] | None - @param anchors: Anchors used for object detection. - @type init_coco_biases: bool - @param init_coco_biases: Whether to use COCO bias and weight - @type conf_thres: float - @param conf_thres: Threshold for confidence. Defaults to C{0.25}. - @type iou_thres: float - @param iou_thres: Threshold for IoU. Defaults to C{0.45}. - @type max_det: int - @param max_det: Maximum number of detections retained after NMS. Defaults to C{300}. - """ - super().__init__(**kwargs) - - self.conf_thres = conf_thres - self.iou_thres = iou_thres - self.max_det = max_det - - self.n_heads = n_heads - if len(self.in_channels) < self.n_heads: - logger.warning( - f"Head '{self.name}' was set to use {self.n_heads} heads, " - f"but received only {len(self.in_channels)} inputs. " - f"Changing number of heads to {len(self.in_channels)}." - ) - self.n_heads = len(self.in_channels) - - if anchors is None: - logger.info("No anchors provided, generating them automatically.") - anchors, recall = self.dataset_metadata.autogenerate_anchors( - self.n_heads - ) - logger.info( - f"Anchors generated. Best possible recall: {recall:.2f}" - ) - - self.box_offset = 5 - self.n_det_out = self.n_classes + self.box_offset - self.n_kpt_out = 3 * self.n_keypoints - self.n_out = self.n_det_out + self.n_kpt_out - self.n_anchors = len(anchors[0]) // 2 - self.grid: list[Tensor] = [] - - self.anchors = torch.tensor(anchors).float().view(self.n_heads, -1, 2) - self.anchor_grid = self.anchors.clone().view( - self.n_heads, 1, -1, 1, 1, 2 - ) - - self.channel_list, self.stride = self._fit_to_n_heads(self.in_channels) - - self.learnable_mul_add_conv = nn.ModuleList( - LearnableMulAddConv( - add_channel=in_channels, - mul_channel=self.n_det_out * self.n_anchors, - conv_in_channel=in_channels, - conv_out_channel=self.n_det_out * self.n_anchors, - ) - for in_channels in self.channel_list - ) - - self.kpt_heads = nn.ModuleList( - KeypointBlock( - in_channels=in_channels, - out_channels=self.n_kpt_out * self.n_anchors, - ) - for in_channels in self.channel_list - ) - - self.anchors /= self.stride.view(-1, 1, 1) - self._check_anchor_order() - - if init_coco_biases: - self._initialize_weights_and_biases() - - def forward(self, inputs: list[Tensor]) -> tuple[list[Tensor], Tensor]: - predictions: list[Tensor] = [] - features: list[Tensor] = [] - - self.anchor_grid = self.anchor_grid.to(inputs[0].device) - - for i in range(self.n_heads): - feat = cast( - Tensor, - torch.cat( - ( - self.learnable_mul_add_conv[i](inputs[i]), - self.kpt_heads[i](inputs[i]), - ), - axis=1, - ), # type: ignore - ) - - batch_size, _, feature_height, feature_width = feat.shape - if i >= len(self.grid): - self.grid.append( - self._construct_grid(feature_width, feature_height).to( - feat.device - ) - ) - - feat = feat.reshape( - batch_size, - self.n_anchors, - self.n_out, - feature_height, - feature_width, - ).permute(0, 1, 3, 4, 2) - - features.append(feat) - predictions.append( - self._build_predictions( - feat, self.anchor_grid[i], self.grid[i], self.stride[i] - ) - ) - - return features, torch.cat(predictions, dim=1) - - def wrap(self, output: tuple[list[Tensor], Tensor]) -> Packet[Tensor]: - features, predictions = output - - if self.export: - return {"boxes_and_keypoints": [predictions]} - - if self.training: - return {"features": features} - - nms = non_max_suppression( - predictions, - n_classes=self.n_classes, - conf_thres=self.conf_thres, - iou_thres=self.iou_thres, - bbox_format="cxcywh", - max_det=self.max_det, - ) - - return { - "boundingbox": [detection[:, :6] for detection in nms], - "keypoints": [ - detection[:, 6:].reshape(-1, self.n_keypoints, 3) - for detection in nms - ], - "features": features, - } - - def _build_predictions( - self, feat: Tensor, anchor_grid: Tensor, grid: Tensor, stride: Tensor - ) -> Tensor: - batch_size = feat.shape[0] - bbox = feat[..., : self.box_offset + self.n_classes] - keypoints = feat[..., self.box_offset + self.n_classes :] - - box_cxcy, box_wh, box_tail = process_bbox_predictions( - bbox, anchor_grid - ) - grid = grid.to(box_cxcy.device) - stride = stride.to(box_cxcy.device) - box_cxcy = (box_cxcy + grid) * stride - out_bbox = torch.cat((box_cxcy, box_wh, box_tail), dim=-1) - - grid_x = grid[..., 0:1] - grid_y = grid[..., 1:2] - kpt_x, kpt_y, kpt_vis = process_keypoints_predictions(keypoints) - kpt_x = (kpt_x + grid_x) * stride - kpt_y = (kpt_y + grid_y) * stride - kpt_vis_sig = kpt_vis.sigmoid() - out_kpt = torch.cat((kpt_x, kpt_y, kpt_vis_sig), dim=-1) - out_kpt = out_kpt.reshape(*kpt_x.shape[:-1], -1) - out = torch.cat((out_bbox, out_kpt), dim=-1) - - return out.reshape(batch_size, -1, self.n_out) - - def _infer_bbox( - self, bbox: Tensor, stride: Tensor, grid: Tensor, anchor_grid: Tensor - ) -> Tensor: - out_bbox = bbox.sigmoid() - out_bbox_xy = (out_bbox[..., 0:2] * 2.0 - 0.5 + grid) * stride - out_bbox_wh = (out_bbox[..., 2:4] * 2) ** 2 * anchor_grid.view( - 1, self.n_anchors, 1, 1, 2 - ) - return torch.cat((out_bbox_xy, out_bbox_wh, out_bbox[..., 4:]), dim=-1) - - def _fit_to_n_heads( - self, channel_list: list[int] - ) -> tuple[list[int], Tensor]: - out_channel_list = channel_list[: self.n_heads] - stride = torch.tensor( - [ - self.original_in_shape[1] / h - for h in cast(list[int], self.in_height)[: self.n_heads] - ], - dtype=torch.int, - ) - return out_channel_list, stride - - def _initialize_weights_and_biases(self, class_freq: Tensor | None = None): - for m in self.modules(): - if isinstance(m, nn.Conv2d): - nn.init.kaiming_normal_( - m.weight, mode="fan_out", nonlinearity="relu" - ) - elif isinstance(m, nn.BatchNorm2d): - m.eps = 1e-3 - m.momentum = 0.03 - elif isinstance( - m, (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6) - ): - m.inplace = True - - for mi, s in zip(self.learnable_mul_add_conv, self.stride): - b = mi.conv.bias.view(self.n_anchors, -1) - b.data[:, 4] += math.log(8 / (640 / s) ** 2) - b.data[:, 5:] += ( - math.log(0.6 / (self.n_classes - 0.99)) - if class_freq is None - else torch.log(class_freq / class_freq.sum()) - ) - mi.conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) - - def _construct_grid(self, feature_width: int, feature_height: int): - grid_y, grid_x = torch.meshgrid( - [torch.arange(feature_height), torch.arange(feature_width)], - indexing="ij", - ) - return ( - torch.stack((grid_x, grid_y), 2) - .view((1, 1, feature_height, feature_width, 2)) - .float() - ) - - def _check_anchor_order(self): - a = self.anchor_grid.prod(-1).view(-1) - delta_a = a[-1] - a[0] - delta_s = self.stride[-1] - self.stride[0] - if delta_a.sign() != delta_s.sign(): - logger.warning("Reversing anchor order") - self.anchors[:] = self.anchors.flip(0) - self.anchor_grid[:] = self.anchor_grid.flip(0) diff --git a/luxonis_train/utils/__init__.py b/luxonis_train/utils/__init__.py index 164f7da5..1231bc10 100644 --- a/luxonis_train/utils/__init__.py +++ b/luxonis_train/utils/__init__.py @@ -1,13 +1,10 @@ from .boundingbox import ( anchors_for_fpn_features, - anchors_from_dataset, bbox2dist, bbox_iou, compute_iou_loss, dist2bbox, - match_to_anchor, non_max_suppression, - process_bbox_predictions, ) from .dataset_metadata import DatasetMetadata from .exceptions import IncompatibleException @@ -18,7 +15,7 @@ to_shape_packet, ) from .graph import traverse_graph -from .keypoints import get_sigmas, process_keypoints_predictions +from .keypoints import get_sigmas from .tracker import LuxonisTrackerPL from .types import AttachIndexType, Kwargs, Labels, Packet @@ -34,16 +31,12 @@ "to_shape_packet", "get_with_default", "LuxonisTrackerPL", - "match_to_anchor", "dist2bbox", "bbox2dist", "bbox_iou", "non_max_suppression", - "anchors_from_dataset", "anchors_for_fpn_features", - "process_bbox_predictions", "compute_iou_loss", - "process_keypoints_predictions", "get_sigmas", "traverse_graph", ] diff --git a/luxonis_train/utils/boundingbox.py b/luxonis_train/utils/boundingbox.py index 8c5a9d40..c8f69802 100644 --- a/luxonis_train/utils/boundingbox.py +++ b/luxonis_train/utils/boundingbox.py @@ -2,7 +2,6 @@ from typing import Literal, TypeAlias import torch -from scipy.cluster.vq import kmeans from torch import Tensor from torchvision.ops import ( batched_nms, @@ -12,96 +11,10 @@ generalized_box_iou, ) -from luxonis_train.enums import TaskType -from luxonis_train.loaders import BaseLoaderTorch - IoUType: TypeAlias = Literal["none", "giou", "diou", "ciou", "siou"] BBoxFormatType: TypeAlias = Literal["xyxy", "xywh", "cxcywh"] -def match_to_anchor( - targets: Tensor, - anchor: Tensor, - xy_shifts: Tensor, - scale_width: int, - scale_height: int, - n_keypoints: int, - anchor_threshold: float, - bias: float, - box_offset: int = 5, -) -> tuple[Tensor, Tensor]: - """Matches targets to anchors. - - 1. Scales the targets to the size of the feature map - 2. Matches the targets to the anchor, filtering out targets whose aspect - ratio is too far from the anchor's aspect ratio. - - @type targets: Tensor - @param targets: Targets in xyxy format - @type anchor: Tensor - @param anchor: Anchor boxes - @type xy_shifts: Tensor - @param xy_shifts: Shifts in x and y direction - @type scale_width: int - @param scale_width: Width of the feature map - @type scale_height: int - @param scale_height: Height of the feature map - @type n_keypoints: int - @param n_keypoints: Number of keypoints - @type anchor_threshold: float - @param anchor_threshold: Threshold for anchor filtering - @type bias: float - @param bias: Bias for anchor filtering - @type box_offset: int - @param box_offset: Offset for box. Defaults to 5. - - @rtype: tuple[Tensor, Tensor] - @return: Scaled targets and shifts. - """ - - # The boxes and keypoints need to be scaled to the size of the features - # First two indices are batch index and class label, - # last index is anchor index. Those are not scaled. - scale_length = 3 * n_keypoints + box_offset + 2 - scales = torch.ones(scale_length, device=targets.device) - - # Scale box and keypoint coordinates, but not visibility - for i in range(n_keypoints): - scales[box_offset + 1 + 3 * i] = scale_width - scales[box_offset + 2 + 3 * i] = scale_height - - scales[2 : box_offset + 1] = torch.tensor( - [scale_width, scale_height, scale_width, scale_height] - ) - - scaled_targets = targets * scales - - if targets.size(1) == 0: - return targets[0], torch.zeros(1, device=targets.device) - - wh_to_anchor_ratio = scaled_targets[:, :, 4:6] / anchor.unsqueeze(1) - ratio_mask = ( - torch.max(wh_to_anchor_ratio, 1.0 / wh_to_anchor_ratio).max(2)[0] - < anchor_threshold - ) - - filtered_targets = scaled_targets[ratio_mask] - - box_xy = filtered_targets[:, 2:4] - box_wh = torch.tensor([scale_width, scale_height]) - box_xy - - def decimal_part(x: Tensor) -> Tensor: - return x % 1.0 - - x, y = ((decimal_part(box_xy) < bias) & (box_xy > 1.0)).T - w, h = ((decimal_part(box_wh) < bias) & (box_wh > 1.0)).T - mask = torch.stack((torch.ones_like(x), x, y, w, h)) - final_targets = filtered_targets.repeat((len(xy_shifts), 1, 1))[mask] - - shifts = xy_shifts.unsqueeze(1).repeat((1, len(box_xy), 1))[mask] - return final_targets, shifts - - def dist2bbox( distance: Tensor, anchor_points: Tensor, @@ -411,123 +324,6 @@ def non_max_suppression( return output -def anchors_from_dataset( - loader: BaseLoaderTorch, - n_anchors: int = 9, - n_generations: int = 1000, - ratio_threshold: float = 4.0, -) -> tuple[Tensor, float]: - """Generates anchors based on bounding box annotations present in - provided data loader. It uses K-Means for initial proposals which - are then refined with genetic algorithm. - - @type loader: L{torch.utils.data.DataLoader} - @param loader: Data loader. - @type n_anchors: int - @param n_anchors: Number of anchors, this is normally n_heads * 3 - which generates 3 anchors per layer. Defaults to 9. - @type n_generations: int - @param n_generations: Number of iterations for anchor improvement - with genetic algorithm. Defaults to 1000. - @type ratio_threshold: float - @param ratio_threshold: Minimum threshold for ratio. Defaults to - 4.0. - @rtype: tuple[Tensor, float] - @return: Proposed anchors and the best possible recall. - """ - - widths: list[Tensor] = [] - for _, labels in loader: - for tensor, task_type in labels.values(): - if task_type == TaskType.BOUNDINGBOX: - curr_wh = tensor[:, 4:] - widths.append(curr_wh) - _, h, w = loader.input_shape - img_size = torch.tensor([w, h]) - wh = torch.vstack(widths) * img_size - - # filter out small objects (w or h < 2 pixels) - wh = wh[(wh >= 2).any(1)] - - try: - assert n_anchors <= len( - wh - ), "More requested anchors than number of bounding boxes." - std = wh.std(0) - proposed_anchors = kmeans(wh / std, n_anchors, iter=30) - proposed_anchors = torch.tensor(proposed_anchors[0]) * std - assert n_anchors == len( - proposed_anchors - ), "KMeans returned insufficient number of points" - except Exception: - print("Fallback to random anchor init") - proposed_anchors = ( - torch.sort(torch.rand(n_anchors * 2))[0].reshape(n_anchors, 2) - * img_size - ) - - proposed_anchors = proposed_anchors[ - torch.argsort(proposed_anchors.prod(1)) - ] # sort small to large - - def calc_best_anchor_ratio(anchors: Tensor, wh: Tensor) -> Tensor: - """Calculate how well most suitable anchor box matches each - target bbox.""" - symmetric_size_ratios = torch.min( - wh[:, None] / anchors[None], anchors[None] / wh[:, None] - ) - worst_side_size_ratio = symmetric_size_ratios.min(-1).values - best_anchor_ratio = worst_side_size_ratio.max(-1).values - return best_anchor_ratio - - def calc_best_possible_recall(anchors: Tensor, wh: Tensor) -> Tensor: - """Calculate best possible recall if every bbox is matched to an - appropriate anchor.""" - best_anchor_ratio = calc_best_anchor_ratio(anchors, wh) - best_possible_recall = ( - (best_anchor_ratio > 1 / ratio_threshold).float().mean() - ) - return best_possible_recall - - def anchor_fitness(anchors: Tensor, wh: Tensor) -> Tensor: - """Fitness function used for anchor evolve.""" - best_anchor_ratio = calc_best_anchor_ratio(anchors, wh) - return ( - best_anchor_ratio - * (best_anchor_ratio > 1 / ratio_threshold).float() - ).mean() - - # Genetic algorithm - best_fitness = anchor_fitness(proposed_anchors, wh) - anchor_shape = proposed_anchors.shape - mutation_probability = 0.9 - mutation_noise_mean = 1 - mutation_noise_std = 0.1 - for _ in range(n_generations): - anchor_mutation = torch.ones(anchor_shape) - anchor_mutation = ( - (torch.rand(anchor_shape) < mutation_probability) - * torch.randn(anchor_shape) - * mutation_noise_std - + mutation_noise_mean - ).clip(0.3, 3.0) - - mutated_anchors = (proposed_anchors.clone() * anchor_mutation).clip( - min=2.0 - ) - mutated_fitness = anchor_fitness(mutated_anchors, wh) - if mutated_fitness > best_fitness: - best_fitness = mutated_fitness - proposed_anchors = mutated_anchors.clone() - - proposed_anchors = proposed_anchors[ - torch.argsort(proposed_anchors.prod(1)) - ] # sort small to large - recall = calc_best_possible_recall(proposed_anchors, wh) - - return proposed_anchors, recall.item() - - def anchors_for_fpn_features( features: list[Tensor], strides: Tensor, @@ -605,26 +401,6 @@ def anchors_for_fpn_features( ) -def process_bbox_predictions( - bbox: Tensor, anchor: Tensor -) -> tuple[Tensor, Tensor, Tensor]: - """Transforms bbox predictions to correct format. - - @type bbox: Tensor - @param bbox: Bbox predictions - @type anchor: Tensor - @param anchor: Anchor boxes - @rtype: tuple[Tensor, Tensor, Tensor] - @return: xy and wh predictions and tail. The tail is anything after - xywh. - """ - out_bbox = bbox.sigmoid() - out_bbox_xy = out_bbox[..., 0:2] * 2.0 - 0.5 - out_bbox_wh = (out_bbox[..., 2:4] * 2) ** 2 * anchor - out_bbox_tail = out_bbox[..., 4:] - return out_bbox_xy, out_bbox_wh, out_bbox_tail - - def compute_iou_loss( pred_bboxes: Tensor, target_bboxes: Tensor, diff --git a/luxonis_train/utils/dataset_metadata.py b/luxonis_train/utils/dataset_metadata.py index 22b81618..3a9cecdf 100644 --- a/luxonis_train/utils/dataset_metadata.py +++ b/luxonis_train/utils/dataset_metadata.py @@ -1,5 +1,4 @@ from luxonis_train.loaders import BaseLoaderTorch -from luxonis_train.utils import anchors_from_dataset class DatasetMetadata: @@ -112,31 +111,6 @@ def classes(self, task: str | None = None) -> list[str]: ) return class_names - def autogenerate_anchors( - self, n_heads: int - ) -> tuple[list[list[float]], float]: - """Automatically generates anchors for the provided dataset. - - @type n_heads: int - @param n_heads: Number of heads to generate anchors for. - @rtype: tuple[list[list[float]], float] - @return: List of anchors in [-1,6] format and recall of the - anchors. - @raises RuntimeError: If the dataset loader was not provided - during initialization. - """ - if self._loader is None: - raise RuntimeError( - "Cannot generate anchors without a dataset loader. " - "Please provide a dataset loader to the constructor " - "or call `set_loader` method." - ) - - proposed_anchors, recall = anchors_from_dataset( - self._loader, n_anchors=n_heads * 3 - ) - return proposed_anchors.reshape(-1, 6).tolist(), recall - @classmethod def from_loader(cls, loader: BaseLoaderTorch) -> "DatasetMetadata": """Creates a L{DatasetMetadata} object from a L{LuxonisDataset}. diff --git a/luxonis_train/utils/keypoints.py b/luxonis_train/utils/keypoints.py index 9fbc741d..8073c399 100644 --- a/luxonis_train/utils/keypoints.py +++ b/luxonis_train/utils/keypoints.py @@ -6,24 +6,6 @@ logger = logging.getLogger(__name__) -def process_keypoints_predictions( - keypoints: Tensor, -) -> tuple[Tensor, Tensor, Tensor]: - """Extracts x, y and visibility from keypoints predictions. - - @type keypoints: Tensor - @param keypoints: Keypoints predictions. The last dimension must be divisible by 3 - and is expected to be in format [x1, y1, v1, x2, y2, v2, ...]. - - @rtype: tuple[Tensor, Tensor, Tensor] - @return: x, y and visibility tensors. - """ - x = keypoints[..., ::3] - y = keypoints[..., 1::3] - visibility = keypoints[..., 2::3] - return x, y, visibility - - def get_sigmas( sigmas: list[float] | None, n_keypoints: int, diff --git a/tests/configs/archive_config.yaml b/tests/configs/archive_config.yaml index 71589f4d..73766823 100644 --- a/tests/configs/archive_config.yaml +++ b/tests/configs/archive_config.yaml @@ -12,10 +12,6 @@ model: inputs: - EfficientRep - - name: ImplicitKeypointBBoxHead - inputs: - - EfficientRep - - name: SegmentationHead inputs: - EfficientRep diff --git a/tests/configs/parking_lot_config.yaml b/tests/configs/parking_lot_config.yaml index bf0b9da3..78711178 100644 --- a/tests/configs/parking_lot_config.yaml +++ b/tests/configs/parking_lot_config.yaml @@ -16,14 +16,6 @@ model: inputs: - neck - - name: ImplicitKeypointBBoxHead - alias: car-detection-head - inputs: - - neck - task: - keypoints: car-keypoints - boundingbox: car-boundingbox - - name: EfficientKeypointBBoxHead alias: motorbike-detection-head task: @@ -65,14 +57,10 @@ model: attached_to: vehicle-type-segmentation-head - name: CrossEntropyLoss attached_to: color-segmentation-head - - name: ImplicitKeypointBBoxLoss - attached_to: car-detection-head - name: EfficientKeypointBBoxLoss attached_to: motorbike-detection-head metrics: - - name: ObjectKeypointSimilarity - attached_to: car-detection-head - name: MeanAveragePrecisionKeypoints attached_to: motorbike-detection-head - name: MeanAveragePrecision @@ -88,16 +76,6 @@ model: attached_to: brand-segmentation-head visualizers: - - name: MultiVisualizer - alias: multi-visualizer-car - attached_to: car-detection-head - params: - visualizers: - - name: KeypointVisualizer - params: - nonvisible_color: blue - - name: BBoxVisualizer - - name: MultiVisualizer alias: multi-visualizer-motorbike attached_to: motorbike-detection-head diff --git a/tests/integration/parking_lot.json b/tests/integration/parking_lot.json index 28ca0a61..c8842c1f 100644 --- a/tests/integration/parking_lot.json +++ b/tests/integration/parking_lot.json @@ -90,16 +90,6 @@ ], "layout": "NCHW" }, - { - "name": "car-detection-head/boxes_and_keypoints/0", - "dtype": "float32", - "shape": [ - 1, - 5040, - 24 - ], - "layout": "NCD" - }, { "name": "color-segmentation-head/color-segmentation/0", "dtype": "float32", @@ -179,25 +169,6 @@ "bbox-head/boundingbox/2" ] }, - { - "name": "car-detection-head", - "parser": "YoloDetectionNetwork", - "metadata": { - "postprocessor_path": null, - "classes": [ - "car" - ], - "n_classes": 1, - "iou_threshold": 0.45, - "conf_threshold": 0.25, - "max_det": 300, - "subtype": "yolov7", - "n_keypoints": 6 - }, - "outputs": [ - "car-detection-head/boxes_and_keypoints/0" - ] - }, { "name": "motorbike-detection-head", "parser": "YoloDetectionNetwork", diff --git a/tests/integration/test_detection.py b/tests/integration/test_detection.py index c88851ba..45e83f0a 100644 --- a/tests/integration/test_detection.py +++ b/tests/integration/test_detection.py @@ -26,14 +26,6 @@ def get_opts_backbone(backbone: str) -> dict[str, Any]: }, "inputs": [backbone], }, - { - "name": "ImplicitKeypointBBoxHead", - "task": { - "keypoints": "car-keypoints", - "boundingbox": "car-boundingbox", - }, - "inputs": [backbone], - }, ], "losses": [ { @@ -45,10 +37,6 @@ def get_opts_backbone(backbone: str) -> dict[str, Any]: "attached_to": "EfficientKeypointBBoxHead", "params": {"area_factor": 0.5}, }, - { - "name": "ImplicitKeypointBBoxLoss", - "attached_to": "ImplicitKeypointBBoxHead", - }, ], "metrics": [ { @@ -60,11 +48,6 @@ def get_opts_backbone(backbone: str) -> dict[str, Any]: "alias": "EfficientKeypointBBoxHead-MaP", "attached_to": "EfficientKeypointBBoxHead", }, - { - "name": "MeanAveragePrecisionKeypoints", - "alias": "ImplicitKeypointBBoxHead-MaP", - "attached_to": "ImplicitKeypointBBoxHead", - }, ], } } diff --git a/tests/integration/test_simple.py b/tests/integration/test_simple.py index 7b2f0f91..3d489c4a 100644 --- a/tests/integration/test_simple.py +++ b/tests/integration/test_simple.py @@ -115,7 +115,6 @@ def test_custom_tasks( ), "Config JSON not found in the archive." generated_config = json.loads(extracted_cfg.read().decode()) - del generated_config["model"]["heads"][1]["metadata"]["anchors"] assert generated_config == correct_archive_config diff --git a/tests/unittests/test_utils/test_boxutils.py b/tests/unittests/test_utils/test_boxutils.py index 8df1ab74..6056cf8e 100644 --- a/tests/unittests/test_utils/test_boxutils.py +++ b/tests/unittests/test_utils/test_boxutils.py @@ -10,7 +10,6 @@ bbox_iou, compute_iou_loss, dist2bbox, - process_bbox_predictions, ) @@ -105,21 +104,6 @@ def test_compute_iou_loss(): assert 0 <= iou.min() and iou.max() <= 1 -def test_process_bbox_predictions(): - bbox = generate_random_bboxes(10, 64, 64, "xywh") - data = torch.rand(10, 4) - prediction = torch.concat([bbox, data], dim=-1) - anchor = torch.rand(10, 2) - - out_bbox_xy, out_bbox_wh, out_bbox_tail = process_bbox_predictions( - prediction, anchor - ) - - assert out_bbox_xy.shape == (10, 2) - assert out_bbox_wh.shape == (10, 2) - assert out_bbox_tail.shape == (10, 4) - - def test_anchors_for_fpn_features(): features = [torch.rand(1, 256, 14, 14), torch.rand(1, 256, 28, 28)] strides = torch.tensor([8, 16]) diff --git a/tests/unittests/test_utils/test_dataset_metadata.py b/tests/unittests/test_utils/test_dataset_metadata.py index 8dba11a8..daf01725 100644 --- a/tests/unittests/test_utils/test_dataset_metadata.py +++ b/tests/unittests/test_utils/test_dataset_metadata.py @@ -45,9 +45,3 @@ def test_class_names(metadata): metadata._classes["segmentation"] = ["car", "person", "tree"] with pytest.raises(RuntimeError): metadata.classes() - - -def test_no_loader(): - metadata = DatasetMetadata() - with pytest.raises(RuntimeError): - metadata.autogenerate_anchors(3) diff --git a/tests/unittests/test_utils/test_keypoints.py b/tests/unittests/test_utils/test_keypoints.py index 3d20dae6..5763386f 100644 --- a/tests/unittests/test_utils/test_keypoints.py +++ b/tests/unittests/test_utils/test_keypoints.py @@ -1,10 +1,6 @@ import pytest -import torch -from luxonis_train.utils.keypoints import ( - get_sigmas, - process_keypoints_predictions, -) +from luxonis_train.utils.keypoints import get_sigmas def test_get_sigmas(): @@ -14,11 +10,3 @@ def test_get_sigmas(): get_sigmas(sigmas, 2) assert len(get_sigmas(None, 17)) == 17 assert len(get_sigmas(None, 5)) == 5 - - -def test_process_keypoints_predictions(): - keypoints = torch.tensor([[0.1, 0.2, 1.0, 0.4, 0.5, 0.0]]) - x, y, visibility = process_keypoints_predictions(keypoints) - pytest.approx(x[0].tolist(), [0.1, 0.4]) - pytest.approx(y[0].tolist(), [0.2, 0.5]) - pytest.approx(visibility[0].tolist(), [1.0, 0.0])