diff --git a/luxonis_train/attached_modules/losses/obb_detection_loss.py b/luxonis_train/attached_modules/losses/obb_detection_loss.py index baa80d3d..c2b08b88 100644 --- a/luxonis_train/attached_modules/losses/obb_detection_loss.py +++ b/luxonis_train/attached_modules/losses/obb_detection_loss.py @@ -15,12 +15,19 @@ xywh2xyxy, xyxyxyxy2xywhr, ) -from luxonis_train.utils.types import IncompatibleException, Labels, LabelType, Packet +from luxonis_train.utils.types import ( + IncompatibleException, + Labels, + LabelType, + Packet, +) from .base_loss import BaseLoss -class OBBDetectionLoss(BaseLoss[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]): +class OBBDetectionLoss( + BaseLoss[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor] +): node: EfficientOBBoxHead supported_labels = [LabelType.OBOUNDINGBOX] @@ -127,7 +134,9 @@ def prepare( self.grid_cell_offset, multiply_with_stride=True, ) - self.anchor_points_strided = self.anchor_points / self.stride_tensor + self.anchor_points_strided = ( + self.anchor_points / self.stride_tensor + ) target = self._preprocess_target( target, batch_size @@ -144,7 +153,9 @@ def prepare( ) pred_bboxes = torch.cat( ( - dist2rbbox(pred_distri_tensor, pred_angles, self.anchor_points_strided), + dist2rbbox( + pred_distri_tensor, pred_angles, self.anchor_points_strided + ), pred_angles, ), dim=-1, @@ -198,10 +209,14 @@ def forward( assigned_scores: Tensor, mask_positive: Tensor, ): - one_hot_label = F.one_hot(assigned_labels.long(), self.n_classes + 1)[..., :-1] + one_hot_label = F.one_hot(assigned_labels.long(), self.n_classes + 1)[ + ..., :-1 + ] # CLS loss - loss_cls = self.varifocal_loss(pred_scores, assigned_scores, one_hot_label) + loss_cls = self.varifocal_loss( + pred_scores, assigned_scores, one_hot_label + ) # loss_cls = self.bce(pred_scores, assigned_scores) if assigned_scores.sum() > 1: loss_cls /= assigned_scores.sum() @@ -234,8 +249,8 @@ def forward( return loss, sub_losses def _preprocess_target(self, target: Tensor, batch_size: int): - """Preprocess target in shape [batch_size, N, 6] where N is maximum number of - instances in one image.""" + """Preprocess target in shape [batch_size, N, 6] where N is + maximum number of instances in one image.""" idx_cls = target[:, :2] xyxyxyxy = target[:, 2:] cxcywhr = xyxyxyxy2xywhr(xyxyxyxy) @@ -244,7 +259,8 @@ def _preprocess_target(self, target: Tensor, batch_size: int): else: target = torch.cat([idx_cls, torch.tensor(cxcywhr)], dim=-1) sample_ids, counts = cast( - tuple[Tensor, Tensor], torch.unique(target[:, 0].int(), return_counts=True) + tuple[Tensor, Tensor], + torch.unique(target[:, 0].int(), return_counts=True), ) c_max = int(counts.max()) if counts.numel() > 0 else 0 out_target = torch.zeros(batch_size, c_max, 6, device=target.device) @@ -283,7 +299,8 @@ def forward( self, pred_score: Tensor, target_score: Tensor, label: Tensor ) -> Tensor: weight = ( - self.alpha * pred_score.pow(self.gamma) * (1 - label) + target_score * label + self.alpha * pred_score.pow(self.gamma) * (1 - label) + + target_score * label ) ce_loss = F.binary_cross_entropy( pred_score.float(), target_score.float(), reduction="none" @@ -296,8 +313,8 @@ class DFLoss(nn.Module): """Criterion class for computing DFL losses during training. @type reg_max: int - @param reg_max: Number of bins for predicting the distributions of bounding box - coordinates. + @param reg_max: Number of bins for predicting the distributions of + bounding box coordinates. """ def __init__(self, reg_max=16) -> None: @@ -318,9 +335,13 @@ def __call__(self, pred_dist, target): wl = tr - target # weight left wr = 1 - wl # weight right return ( - F.cross_entropy(pred_dist, tl.view(-1), reduction="none").view(tl.shape) + F.cross_entropy(pred_dist, tl.view(-1), reduction="none").view( + tl.shape + ) * wl - + F.cross_entropy(pred_dist, tr.view(-1), reduction="none").view(tl.shape) + + F.cross_entropy(pred_dist, tr.view(-1), reduction="none").view( + tl.shape + ) * wr ).mean(-1, keepdim=True) @@ -329,13 +350,13 @@ class RotatedBboxLoss(nn.Module): """Criterion class for computing training losses during training. @type reg_max: int - @param reg_max: Number of bins for predicting the distributions of bounding box - coordinates. + @param reg_max: Number of bins for predicting the distributions of + bounding box coordinates. """ def __init__(self, reg_max): - """Initialize the BboxLoss module with regularization maximum and DFL - settings.""" + """Initialize the BboxLoss module with regularization maximum + and DFL settings.""" super().__init__() self.dfl_loss = DFLoss(reg_max) if reg_max > 1 else None diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py b/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py index ea9dfac4..5e98bc71 100644 --- a/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py +++ b/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py @@ -9,8 +9,9 @@ class MeanAveragePrecisionOBB(BaseMetric): - """Compute the Mean-Average-Precision (mAP) and Mean-Average-Recall (mAR) for object - detection predictions using oriented bounding boxes. + """Compute the Mean-Average-Precision (mAP) and Mean-Average-Recall + (mAR) for object detection predictions using oriented bounding + boxes. Partially adapted from U{YOLOv8 OBBMetrics }. @@ -39,15 +40,16 @@ def update( outputs: list[Tensor], # preds labels: list[Tensor], # batch ): - """Update metrics without erasing stats from the previous batch, i.e. the - metrics are calculated cumulatively. + """Update metrics without erasing stats from the previous batch, + i.e. the metrics are calculated cumulatively. @type outputs: list[Tensor] - @param outputs: Network predictions [x1, y1, x2, y2, conf, cls_idx, r] - unnormalized (not in [0, 1] range) [Tensor(n_bboxes, 7)] + @param outputs: Network predictions [x1, y1, x2, y2, conf, + cls_idx, r] unnormalized (not in [0, 1] range) + [Tensor(n_bboxes, 7)] @type labels: list[Tensor] - @param labels: [cls_idx, x1, y1, x2, y2, r] unnormalized (not in [0, 1] range) - [Tensor(n_bboxes, 6)] + @param labels: [cls_idx, x1, y1, x2, y2, r] unnormalized (not in + [0, 1] range) [Tensor(n_bboxes, 6)] """ for si, output in enumerate(outputs): self.stats["conf"].append(output[:, 4]) @@ -97,9 +99,11 @@ def prepare( return output_nms, output_labels - def _preprocess_target(self, target: Tensor, batch_size: int, img_size) -> Tensor: - """Preprocess target in shape [batch_size, N, 6] where N is maximum number of - instances in one image.""" + def _preprocess_target( + self, target: Tensor, batch_size: int, img_size + ) -> Tensor: + """Preprocess target in shape [batch_size, N, 6] where N is + maximum number of instances in one image.""" cls_idx = target[:, 1].unsqueeze(-1) xyxyxyxy = target[:, 2:] xyxyxyxy[:, 0::2] *= img_size[1] # scale x @@ -120,7 +124,8 @@ def reset(self) -> None: def compute( self, ) -> tuple[Tensor, dict[str, Tensor]]: - """Process predicted results for object detection and update metrics.""" + """Process predicted results for object detection and update + metrics.""" results = self._process( torch.cat(self.stats["tp"]).cpu().numpy(), torch.cat(self.stats["conf"]).cpu().numpy(), @@ -143,8 +148,9 @@ def compute( def _process_batch( self, detections: Tensor, gt_bboxes: Tensor, gt_cls: Tensor ) -> Tensor: - """Perform computation of the correct prediction matrix for a batch of # "fp": - torch.from_numpy(results[1]), detections and ground truth bounding boxes. + """Perform computation of the correct prediction matrix for a + batch of # "fp": torch.from_numpy(results[1]), detections and + ground truth bounding boxes. @type detections: Tensor @param detections: A tensor of shape (N, 7) representing the detected bounding boxes and associated @@ -182,23 +188,26 @@ def match_predictions( iou: Tensor, use_scipy: bool = False, ) -> Tensor: - """Matches predictions to ground truth objects (pred_classes, true_classes) - using IoU. + """Matches predictions to ground truth objects (pred_classes, + true_classes) using IoU. @type pred_classes: Tensor @param pred_classes: Predicted class indices of shape(N,). @type true_classes: Tensor @param true_classes: Target class indices of shape(M,). @type iou: Tensor - @param iou: An NxM tensor containing the pairwise IoU values for predictions and - ground of truth + @param iou: An NxM tensor containing the pairwise IoU values for + predictions and ground of truth @type use_scipy: bool - @param use_scipy: Whether to use scipy for matching (more precise). + @param use_scipy: Whether to use scipy for matching (more + precise). @rtype: Tensor @return: Correct tensor of shape(N,10) for 10 IoU thresholds. """ # Dx10 matrix, where D - detections, 10 - IoU thresholds - correct = np.zeros((pred_classes.shape[0], self.iouv.shape[0])).astype(bool) + correct = np.zeros((pred_classes.shape[0], self.iouv.shape[0])).astype( + bool + ) # LxD matrix where L - labels (rows), D - detections (columns) correct_class = true_classes[:, None] == pred_classes iou = iou * correct_class # zero out the wrong classes @@ -210,8 +219,10 @@ def match_predictions( cost_matrix = iou * (iou >= threshold) if cost_matrix.any(): - labels_idx, detections_idx = scipy.optimize.linear_sum_assignment( - cost_matrix, maximize=True + labels_idx, detections_idx = ( + scipy.optimize.linear_sum_assignment( + cost_matrix, maximize=True + ) ) valid = cost_matrix[labels_idx, detections_idx] > 0 if valid.any(): @@ -234,10 +245,13 @@ def match_predictions( np.unique(matches[:, 0], return_index=True)[1] ] correct[matches[:, 1].astype(int), i] = True - return torch.tensor(correct, dtype=torch.bool, device=pred_classes.device) + return torch.tensor( + correct, dtype=torch.bool, device=pred_classes.device + ) def _update_metrics(self, results: tuple[np.ndarray, ...]): - """Updates the evaluation metrics of the model with a new set of results. + """Updates the evaluation metrics of the model with a new set of + results. @type results: tuple[np.ndarray, ...] @param results: A tuple containing the following evaluation metrics: @@ -277,7 +291,8 @@ def _process( pred_cls: np.ndarray, target_cls: np.ndarray, ) -> tuple[np.ndarray, ...]: - """Process predicted results for object detection and update metrics.""" + """Process predicted results for object detection and update + metrics.""" results = MeanAveragePrecisionOBB.ap_per_class( tp, conf, @@ -303,7 +318,8 @@ def ap_per_class( eps: float = 1e-16, # prefix="", ) -> tuple[np.ndarray, ...]: - """Computes the average precision per class for object detection evaluation. + """Computes the average precision per class for object detection + evaluation. Args: tp (np.ndarray): Binary array indicating whether the detection is correct (True) or not (False). @@ -414,7 +430,8 @@ def ap_per_class( def compute_ap( recall: list[float], precision: list[float] ) -> tuple[float, np.ndarray, np.ndarray]: - """Compute the average precision (AP) given the recall and precision curves. + """Compute the average precision (AP) given the recall and + precision curves. Args: recall (list): The recall curve. @@ -441,14 +458,18 @@ def compute_ap( i = np.where(mrec[1:] != mrec[:-1])[ 0 ] # points where x-axis (recall) changes - ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve + ap = np.sum( + (mrec[i + 1] - mrec[i]) * mpre[i + 1] + ) # area under curve return ap, mpre, mrec @staticmethod def smooth(y: np.ndarray, f: float = 0.05) -> np.ndarray: """Box filter of fraction f.""" - nf = round(len(y) * f * 2) // 2 + 1 # number of filter elements (must be odd) + nf = ( + round(len(y) * f * 2) // 2 + 1 + ) # number of filter elements (must be odd) p = np.ones(nf // 2) # ones padding yp = np.concatenate((p * y[0], y, p * y[-1]), 0) # y padded return np.convolve(yp, np.ones(nf) / nf, mode="valid") # y-smoothed diff --git a/luxonis_train/attached_modules/visualizers/obbox_visualizer.py b/luxonis_train/attached_modules/visualizers/obbox_visualizer.py index 1da141c5..94557a2a 100644 --- a/luxonis_train/attached_modules/visualizers/obbox_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/obbox_visualizer.py @@ -25,26 +25,33 @@ def __init__( ): """Visualizer for oriented bounding box predictions. - Creates a visualization of the oriented bounding box predictions and labels. + Creates a visualization of the oriented bounding box predictions + and labels. @type labels: dict[int, str] | list[str] | None - @param labels: Either a dictionary mapping class indices to names, or a list of - names. If list is provided, the label mapping is done by index. By default, - no labels are drawn. + @param labels: Either a dictionary mapping class indices to + names, or a list of names. If list is provided, the label + mapping is done by index. By default, no labels are drawn. @type draw_labels: bool - @param draw_labels: Whether or not to draw labels. Defaults to C{True}. + @param draw_labels: Whether or not to draw labels. Defaults to + C{True}. @type colors: dict[int, Color] | list[Color] | None - @param colors: Either a dictionary mapping class indices to colors, or a list of - colors. If list is provided, the color mapping is done by index. By default, - random colors are used. + @param colors: Either a dictionary mapping class indices to + colors, or a list of colors. If list is provided, the color + mapping is done by index. By default, random colors are + used. @type fill: bool - @param fill: Whether or not to fill the bounding boxes. Defaults to C{False}. + @param fill: Whether or not to fill the bounding boxes. Defaults + to C{False}. @type width: int | None - @param width: The width of the bounding box lines. Defaults to C{1}. + @param width: The width of the bounding box lines. Defaults to + C{1}. @type font: str | None - @param font: A filename containing a TrueType font. Defaults to C{None}. + @param font: A filename containing a TrueType font. Defaults to + C{None}. @type font_size: int | None - @param font_size: The font size to use for the labels. Defaults to C{None}. + @param font_size: The font size to use for the labels. Defaults + to C{None}. """ super().__init__(**kwargs) if isinstance(labels, list): @@ -55,9 +62,13 @@ def __init__( } if colors is None: - colors = {label: get_color(i) for i, label in self.bbox_labels.items()} + colors = { + label: get_color(i) for i, label in self.bbox_labels.items() + } if isinstance(colors, list): - colors = {self.bbox_labels[i]: color for i, color in enumerate(colors)} + colors = { + self.bbox_labels[i]: color for i, color in enumerate(colors) + } self.colors = colors self.fill = fill self.width = width @@ -159,16 +170,18 @@ def forward( predictions: list[Tensor], targets: Tensor, ) -> tuple[Tensor, Tensor]: - """Creates a visualization of the oriented bounding box predictions and labels. + """Creates a visualization of the oriented bounding box + predictions and labels. @type label_canvas: Tensor @param label_canvas: The canvas containing the labels. @type prediction_canvas: Tensor @param prediction_canvas: The canvas containing the predictions. @type predictions: Tensor - @param predictions: The predicted bounding boxes. The shape should be [N, 7], - where N is the number of bounding boxes and the last dimension is [xc, yc, - w, h, conf, class]. # NOTE: check it + @param predictions: The predicted bounding boxes. The shape + should be [N, 7], where N is the number of bounding boxes + and the last dimension is [xc, yc, w, h, conf, class]. # + NOTE: check it @type targets: Tensor @param targets: The target bounding boxes. """ diff --git a/luxonis_train/models/predefined_models/detection_model_obb.py b/luxonis_train/models/predefined_models/detection_model_obb.py index 9ba44e02..dd02901f 100644 --- a/luxonis_train/models/predefined_models/detection_model_obb.py +++ b/luxonis_train/models/predefined_models/detection_model_obb.py @@ -47,7 +47,9 @@ def nodes(self) -> list[ModelNodeConfig]: name="EfficientOBBoxHead", alias="detection_obb_head", freezing=self.head_params.pop("freezing", {}), - inputs=["detection_neck"] if self.use_neck else ["detection_backbone"], + inputs=["detection_neck"] + if self.use_neck + else ["detection_backbone"], params=self.head_params, task=self.task_name, ) diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py index 8e035f16..b0193830 100644 --- a/luxonis_train/nodes/blocks/blocks.py +++ b/luxonis_train/nodes/blocks/blocks.py @@ -82,16 +82,16 @@ def _initialize_weights_and_biases(self, prior_prob: float) -> None: class EfficientOBBDecoupledBlock(EfficientDecoupledBlock): def __init__(self, n_classes: int, in_channels: int, reg_max: int = 16): - """Efficient Decoupled block used for angle, class and regression predictions in - OBB (oriented bounding box) tasks. + """Efficient Decoupled block used for angle, class and + regression predictions in OBB (oriented bounding box) tasks. @type n_classes: int @param n_classes: Number of classes. @type in_channels: int @param in_channels: Number of input channels. @type reg_max: int - @param reg_max: Number of bins for predicting the distributions of bounding box - coordinates. + @param reg_max: Number of bins for predicting the distributions + of bounding box coordinates. """ super().__init__(n_classes, in_channels) @@ -104,7 +104,11 @@ def __init__(self, n_classes: int, in_channels: int, reg_max: int = 16): padding=1, activation=nn.SiLU(), ), - nn.Conv2d(in_channels=in_channels, out_channels=4 * reg_max, kernel_size=1), + nn.Conv2d( + in_channels=in_channels, + out_channels=4 * reg_max, + kernel_size=1, + ), ) self.angle_branch = nn.Sequential( diff --git a/luxonis_train/nodes/heads/efficient_obbox_head.py b/luxonis_train/nodes/heads/efficient_obbox_head.py index 436c8854..14bb587d 100644 --- a/luxonis_train/nodes/heads/efficient_obbox_head.py +++ b/luxonis_train/nodes/heads/efficient_obbox_head.py @@ -82,7 +82,8 @@ def forward( return features, cls_score_list, reg_distri_list, angles_list def wrap( - self, output: tuple[list[Tensor], list[Tensor], list[Tensor], list[Tensor]] + self, + output: tuple[list[Tensor], list[Tensor], list[Tensor], list[Tensor]], ) -> Packet[Tensor]: features, cls_score_list, reg_distri_list, angles_list = output @@ -100,10 +101,15 @@ def wrap( [angles_list[i].flatten(2) for i in range(len(angles_list))], dim=2 ).permute(0, 2, 1) cls_tensor = torch.cat( - [cls_score_list[i].flatten(2) for i in range(len(cls_score_list))], dim=2 + [cls_score_list[i].flatten(2) for i in range(len(cls_score_list))], + dim=2, ).permute(0, 2, 1) reg_tensor = torch.cat( - [reg_distri_list[i].flatten(2) for i in range(len(reg_distri_list))], dim=2 + [ + reg_distri_list[i].flatten(2) + for i in range(len(reg_distri_list)) + ], + dim=2, ).permute(0, 2, 1) if self.training: @@ -129,7 +135,8 @@ def wrap( def _process_to_bbox( self, output: tuple[list[Tensor], Tensor, Tensor, Tensor] ) -> list[Tensor]: - """Performs post-processing of the output and returns bboxs after NMS.""" + """Performs post-processing of the output and returns bboxs + after NMS.""" features, cls_score_tensor, reg_dist_tensor, angles_tensor = output _, anchor_points, _, stride_tensor = anchors_for_fpn_features( features, diff --git a/luxonis_train/utils/__init__.py b/luxonis_train/utils/__init__.py index c47d3d33..2235e11e 100644 --- a/luxonis_train/utils/__init__.py +++ b/luxonis_train/utils/__init__.py @@ -1,6 +1,7 @@ from .boundingbox import ( anchors_for_fpn_features, anchors_from_dataset, + batch_probiou, bbox2dist, bbox_iou, compute_iou_loss, @@ -8,6 +9,8 @@ match_to_anchor, non_max_suppression, process_bbox_predictions, + xywhr2xyxyxyxy, + xyxyxyxy2xywhr, ) from .config import Config from .dataset_metadata import DatasetMetadata @@ -40,6 +43,9 @@ "dist2bbox", "bbox2dist", "bbox_iou", + "batch_probiou", + "xywhr2xyxyxyxy", + "xyxyxyxy2xywhr", "non_max_suppression", "anchors_from_dataset", "anchors_for_fpn_features", diff --git a/luxonis_train/utils/boundingbox.py b/luxonis_train/utils/boundingbox.py index 94f21951..a18bd823 100644 --- a/luxonis_train/utils/boundingbox.py +++ b/luxonis_train/utils/boundingbox.py @@ -136,7 +136,8 @@ def dist2rbbox( pred_angles: Tensor, anchor_points: Tensor, ) -> Tensor: - """Transform distance (ltrb) to a rotated bounding box in "xcycwh" format. + """Transform distance (ltrb) to a rotated bounding box in "xcycwh" + format. @type distance: Tensor @param distance: Distance predictions @@ -173,8 +174,9 @@ def bbox2dist(bbox: Tensor, anchor_points: Tensor, reg_max: float) -> Tensor: def xyxyxyxy2xywhr(x: Tensor) -> Tensor | np.ndarray: - """Convert batched Oriented Bounding Boxes (OBB) from [xy1, xy2, xy3, xy4] to [xywh, - rotation]. Rotation values are returned in radians from 0 to pi/2. + """Convert batched Oriented Bounding Boxes (OBB) from [xy1, xy2, + xy3, xy4] to [xywh, rotation]. Rotation values are returned in + radians from 0 to pi/2. Args: x (numpy.ndarray | torch.Tensor): Input box corners [xy1, xy2, xy3, xy4] of shape (n, 8). @@ -199,8 +201,9 @@ def xyxyxyxy2xywhr(x: Tensor) -> Tensor | np.ndarray: def xywhr2xyxyxyxy(x: Tensor) -> Tensor | np.ndarray: - """Convert batched Oriented Bounding Boxes (OBB) from [xywh, rotation] to [xy1, xy2, - xy3, xy4]. Rotation values should be in radians from 0 to pi/2. + """Convert batched Oriented Bounding Boxes (OBB) from [xywh, + rotation] to [xy1, xy2, xy3, xy4]. Rotation values should be in + radians from 0 to pi/2. Args: x (numpy.ndarray | torch.Tensor): Boxes in [cx, cy, w, h, rotation] format of shape (n, 5) or (b, n, 5). @@ -229,9 +232,9 @@ def xywhr2xyxyxyxy(x: Tensor) -> Tensor | np.ndarray: def xyxy2xywh(x: Tensor) -> Tensor: - """Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, - height) format where (x1, y1) is the top-left corner and (x2, y2) is the bottom- - right corner. + """Convert bounding box coordinates from (x1, y1, x2, y2) format to + (x, y, width, height) format where (x1, y1) is the top-left corner + and (x2, y2) is the bottom- right corner. Args: x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x1, y1, x2, y2) format. @@ -243,7 +246,9 @@ def xyxy2xywh(x: Tensor) -> Tensor: x.shape[-1] == 4 ), f"input shape last dimension expected 4 but input shape is {x.shape}" y = ( - torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) + torch.empty_like(x) + if isinstance(x, torch.Tensor) + else np.empty_like(x) ) # faster than clone/copy y[..., 0] = (x[..., 0] + x[..., 2]) / 2 # x center y[..., 1] = (x[..., 1] + x[..., 3]) / 2 # y center @@ -253,9 +258,10 @@ def xyxy2xywh(x: Tensor) -> Tensor: def xywh2xyxy(x: Tensor) -> Tensor: - """Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, - x2, y2) format where (x1, y1) is the top-left corner and (x2, y2) is the bottom- - right corner. Note: ops per 2 channels faster than per channel. + """Convert bounding box coordinates from (x, y, width, height) + format to (x1, y1, x2, y2) format where (x1, y1) is the top-left + corner and (x2, y2) is the bottom- right corner. Note: ops per 2 + channels faster than per channel. Args: x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x, y, width, height) format. @@ -267,7 +273,9 @@ def xywh2xyxy(x: Tensor) -> Tensor: x.shape[-1] == 4 ), f"input shape last dimension expected 4 but input shape is {x.shape}" y = ( - torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) + torch.empty_like(x) + if isinstance(x, torch.Tensor) + else np.empty_like(x) ) # faster than clone/copy xy = x[..., :2] # centers wh = x[..., 2:] / 2 # half width-height @@ -434,7 +442,10 @@ def probiou( ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2)) / ( 4 - * ((a1 * b1 - c1.pow(2)).clamp_(0) * (a2 * b2 - c2.pow(2)).clamp_(0)).sqrt() + * ( + (a1 * b1 - c1.pow(2)).clamp_(0) + * (a2 * b2 - c2.pow(2)).clamp_(0) + ).sqrt() + eps ) + eps @@ -484,7 +495,10 @@ def batch_probiou(obb1: Tensor, obb2: Tensor, eps: float = 1e-7) -> Tensor: ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2)) / ( 4 - * ((a1 * b1 - c1.pow(2)).clamp_(0) * (a2 * b2 - c2.pow(2)).clamp_(0)).sqrt() + * ( + (a1 * b1 - c1.pow(2)).clamp_(0) + * (a2 * b2 - c2.pow(2)).clamp_(0) + ).sqrt() + eps ) + eps @@ -666,8 +680,8 @@ def non_max_suppression_obb( max_det: int = 300, predicts_objectness: bool = True, ) -> list[Tensor]: - """Non-maximum suppression on model's predictions to keep only best instances for - oriented bounding boxes (obb). + """Non-maximum suppression on model's predictions to keep only best + instances for oriented bounding boxes (obb). @type preds: Tensor @param preds: Model's prediction tensor of shape [bs, N, M]. Bounding boxes are in xywhr format. @@ -744,7 +758,9 @@ def non_max_suppression_obb( if multi_label: box_idx, class_idx = ( - (curr_out[:, 6 : 6 + n_classes] > conf_thres).nonzero(as_tuple=False).T + (curr_out[:, 6 : 6 + n_classes] > conf_thres) + .nonzero(as_tuple=False) + .T ) keep_mask[box_idx] = True curr_out = torch.cat( @@ -756,9 +772,13 @@ def non_max_suppression_obb( 1, ) else: - conf, class_idx = curr_out[:, 6 : 6 + n_classes].max(1, keepdim=True) + conf, class_idx = curr_out[:, 6 : 6 + n_classes].max( + 1, keepdim=True + ) keep_mask[conf.view(-1) > conf_thres] = True - curr_out = torch.cat((bboxes, conf, class_idx.float()), 1)[keep_mask] + curr_out = torch.cat((bboxes, conf, class_idx.float()), 1)[ + keep_mask + ] if keep_classes is not None: curr_out = curr_out[