diff --git a/luxonis_train/attached_modules/losses/__init__.py b/luxonis_train/attached_modules/losses/__init__.py
index 28585504..5b3f7a63 100644
--- a/luxonis_train/attached_modules/losses/__init__.py
+++ b/luxonis_train/attached_modules/losses/__init__.py
@@ -3,8 +3,6 @@
 from .bce_with_logits import BCEWithLogitsLoss
 from .cross_entropy import CrossEntropyLoss
 from .efficient_keypoint_bbox_loss import EfficientKeypointBBoxLoss
-from .implicit_keypoint_bbox_loss import ImplicitKeypointBBoxLoss
-from .keypoint_loss import KeypointLoss
 from .sigmoid_focal_loss import SigmoidFocalLoss
 from .smooth_bce_with_logits import SmoothBCEWithLogitsLoss
 from .softmax_focal_loss import SoftmaxFocalLoss
@@ -14,8 +12,6 @@
     "BCEWithLogitsLoss",
     "CrossEntropyLoss",
     "EfficientKeypointBBoxLoss",
-    "ImplicitKeypointBBoxLoss",
-    "KeypointLoss",
     "BaseLoss",
     "SigmoidFocalLoss",
     "SmoothBCEWithLogitsLoss",
diff --git a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py
deleted file mode 100644
index 99eea6f3..00000000
--- a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py
+++ /dev/null
@@ -1,349 +0,0 @@
-import logging
-from typing import Any, cast
-
-import torch
-from torch import Tensor
-from torchvision.ops import box_convert
-
-from luxonis_train.attached_modules.losses.keypoint_loss import KeypointLoss
-from luxonis_train.enums import TaskType
-from luxonis_train.nodes import ImplicitKeypointBBoxHead
-from luxonis_train.utils import (
-    Labels,
-    Packet,
-    compute_iou_loss,
-    match_to_anchor,
-    process_bbox_predictions,
-)
-
-from .base_loss import BaseLoss
-from .bce_with_logits import BCEWithLogitsLoss
-from .smooth_bce_with_logits import SmoothBCEWithLogitsLoss
-
-KeypointTargetType = tuple[
-    list[Tensor],
-    list[Tensor],
-    list[Tensor],
-    list[tuple[Tensor, Tensor, Tensor, Tensor]],
-    list[Tensor],
-]
-
-logger = logging.getLogger(__name__)
-
-
-# TODO: BROKEN!
-class ImplicitKeypointBBoxLoss(BaseLoss[list[Tensor], KeypointTargetType]):
-    node: ImplicitKeypointBBoxHead
-    supported_tasks: list[tuple[TaskType, ...]] = [
-        (TaskType.BOUNDINGBOX, TaskType.KEYPOINTS)
-    ]
-
-    def __init__(
-        self,
-        cls_pw: float = 1.0,
-        viz_pw: float = 1.0,
-        obj_pw: float = 1.0,
-        label_smoothing: float = 0.0,
-        min_objectness_iou: float = 0.0,
-        bbox_loss_weight: float = 0.05,
-        keypoint_visibility_loss_weight: float = 0.6,
-        keypoint_regression_loss_weight: float = 0.5,
-        sigmas: list[float] | None = None,
-        area_factor: float | None = None,
-        class_loss_weight: float = 0.6,
-        objectness_loss_weight: float = 0.7,
-        anchor_threshold: float = 4.0,
-        bias: float = 0.5,
-        balance: list[float] | None = None,
-        **kwargs: Any,
-    ):
-        """Joint loss for keypoint and box predictions for cases where
-        the keypoints and boxes are inherently linked.
-
-        Based on U{YOLO-Pose: Enhancing YOLO for Multi Person Pose Estimation Using Object
-        Keypoint Similarity Loss<https://arxiv.org/ftp/arxiv/papers/2204/2204.06806.pdf>}.
-
-        @type cls_pw: float
-        @param cls_pw: Power for the BCE loss for classes. Defaults to C{1.0}.
-        @type viz_pw: float
-        @param viz_pw: Power for the BCE loss for keypoints.
-        @type obj_pw: float
-        @param obj_pw: Power for the BCE loss for objectness. Defaults to C{1.0}.
-        @type label_smoothing: float
-        @param label_smoothing: Label smoothing factor. Defaults to C{0.0}.
-        @type min_objectness_iou: float
-        @param min_objectness_iou: Minimum objectness iou. Defaults to C{0.0}.
-        @type bbox_loss_weight: float
-        @param bbox_loss_weight: Weight for the bounding box loss.
-        @type keypoint_visibility_loss_weight: float
-        @param keypoint_visibility_loss_weight: Weight for the keypoint visibility loss. Defaults to C{0.6}.
-        @type keypoint_regression_loss_weight: float
-        @param keypoint_regression_loss_weight: Weight for the keypoint regression loss. Defaults to C{0.5}.
-        @type sigmas: list[float] | None
-        @param sigmas: Sigmas used in KeypointLoss for OKS metric. If None then use COCO ones if possible or default ones. Defaults to C{None}.
-        @type area_factor: float | None
-        @param area_factor: Factor by which we multiply bbox area which is used in KeypointLoss. If None then use default one. Defaults to C{None}.
-        @type class_loss_weight: float
-        @param class_loss_weight: Weight for the class loss. Defaults to C{0.6}.
-        @type objectness_loss_weight: float
-        @param objectness_loss_weight: Weight for the objectness loss. Defaults to C{0.7}.
-        @type anchor_threshold: float
-        @param anchor_threshold: Threshold for matching anchors to targets. Defaults to C{4.0}.
-        @type bias: float
-        @param bias: Bias for matching anchors to targets. Defaults to C{0.5}.
-        @type balance: list[float] | None
-        @param balance: Balance for the different heads. Defaults to C{None}.
-        """
-
-        super().__init__(**kwargs)
-
-        self.n_anchors = self.node.n_anchors
-        self.n_heads = self.node.n_heads
-        self.box_offset = self.node.box_offset
-        self.anchors = self.node.anchors
-        self.balance = balance or [4.0, 1.0, 0.4]
-        if len(self.balance) < self.n_heads:
-            logger.warning(
-                f"Balance list must have at least {self.n_heads} elements."
-                "Filling the rest with 1.0."
-            )
-            self.balance += [1.0] * (self.n_heads - len(self.balance))
-
-        self.min_objectness_iou = min_objectness_iou
-        self.bbox_weight = bbox_loss_weight
-        self.class_weight = class_loss_weight
-        self.objectness_weight = objectness_loss_weight
-        self.anchor_threshold = anchor_threshold
-
-        self.bias = bias
-
-        self.b_cross_entropy = BCEWithLogitsLoss(
-            pos_weight=torch.tensor([obj_pw])
-        )
-        self.class_loss = SmoothBCEWithLogitsLoss(
-            label_smoothing=label_smoothing,
-            bce_pow=cls_pw,
-        )
-        self.keypoint_loss = KeypointLoss(
-            n_keypoints=self.n_keypoints,
-            bce_power=viz_pw,
-            sigmas=sigmas,
-            area_factor=area_factor,
-            regression_loss_weight=keypoint_regression_loss_weight,
-            visibility_loss_weight=keypoint_visibility_loss_weight,
-        )
-
-        self.positive_smooth_const = 1 - 0.5 * label_smoothing
-        self.negative_smooth_const = 0.5 * label_smoothing
-
-    def prepare(
-        self, outputs: Packet[Tensor], labels: Labels
-    ) -> tuple[list[Tensor], KeypointTargetType]:
-        """Prepares the labels to be in the correct format for loss
-        calculation.
-
-        @type outputs: Packet[Tensor]
-        @param outputs: Output from the forward pass.
-        @type labels: L{Labels}
-        @param labels: Dictionary containing the labels.
-        @rtype: tuple[list[Tensor], tuple[list[Tensor], list[Tensor],
-            list[Tensor], list[tuple[Tensor, Tensor, Tensor, Tensor]],
-            list[Tensor]]]
-        @return: Tuple containing the original output and the
-            postprocessed labels. The processed labels are a tuple
-            containing the class targets, box targets, keypoint targets,
-            indices and anchors. Indicies are a tuple containing vectors
-            of indices for batch, anchor, feature y and feature x
-            dimensions, respectively. They are all of shape
-            (n_targets,). The indices are used to index the output
-            tensors of shape (batch_size, n_anchors, feature_height,
-            feature_width, n_classes + box_offset + n_keypoints * 3) to
-            get a tensor of shape (n_targets, n_classes + box_offset +
-            n_keypoints * 3).
-        """
-        predictions = self.get_input_tensors(outputs, "features")
-
-        kpt_label = self.get_label(labels, TaskType.KEYPOINTS)
-        bbox_label = self.get_label(labels, TaskType.BOUNDINGBOX)
-
-        targets = torch.zeros(
-            (kpt_label.shape[0], self.n_keypoints * 3 + self.box_offset + 1)
-        )
-        targets[:, :2] = kpt_label[:, :2]
-        targets[:, 2 : self.box_offset + 1] = box_convert(
-            bbox_label[:, 2:], "xywh", "cxcywh"
-        )
-
-        # insert keypoints
-        for i in range(1, 4):
-            targets[:, self.box_offset + i :: 3] = kpt_label[:, i + 1 :: 3]
-
-        n_targets = targets.shape[0]
-
-        class_targets: list[Tensor] = []
-        box_targets: list[Tensor] = []
-        keypoint_targets: list[Tensor] = []
-        indices: list[tuple[Tensor, Tensor, Tensor, Tensor]] = []
-        anchors: list[Tensor] = []
-
-        anchor_indices = (
-            torch.arange(
-                self.n_anchors, device=targets.device, dtype=torch.float32
-            )
-            .reshape(self.n_anchors, 1)
-            .repeat(1, n_targets)
-            .unsqueeze(-1)
-        )
-        targets = torch.cat(
-            (targets.repeat(self.n_anchors, 1, 1), anchor_indices), 2
-        )
-
-        xy_deltas = (
-            torch.tensor(
-                [[0, 0], [1, 0], [0, 1], [-1, 0], [0, -1]],
-                device=targets.device,
-            ).float()
-            * self.bias
-        )
-
-        for i in range(self.n_heads):
-            anchor = self.anchors[i]
-            feature_height, feature_width = predictions[i].shape[2:4]
-            scaled_targets, xy_shifts = match_to_anchor(
-                targets,
-                anchor,
-                xy_deltas,
-                feature_width,
-                feature_height,
-                self.n_keypoints,
-                self.anchor_threshold,
-                self.bias,
-                self.box_offset,
-            )
-
-            batch_index, cls = scaled_targets[:, :2].long().T
-            box_xy = scaled_targets[:, 2:4]
-            box_wh = scaled_targets[:, 4:6]
-            box_xy_deltas = (box_xy - xy_shifts).long()
-            feature_x_index = box_xy_deltas[:, 0].clamp_(0, feature_width - 1)
-            feature_y_index = box_xy_deltas[:, 1].clamp_(0, feature_height - 1)
-
-            anchor_indices = scaled_targets[:, -1].long()
-            indices.append(
-                (
-                    batch_index,
-                    anchor_indices,
-                    feature_y_index,
-                    feature_x_index,
-                )
-            )
-            class_targets.append(cls)
-            box_targets.append(torch.cat((box_xy - box_xy_deltas, box_wh), 1))
-            anchors.append(anchor[anchor_indices])
-
-            keypoint_targets.append(
-                self._create_keypoint_target(scaled_targets, box_xy_deltas)
-            )
-
-        return predictions, (
-            class_targets,
-            box_targets,
-            keypoint_targets,
-            indices,
-            anchors,
-        )
-
-    def forward(
-        self,
-        predictions: list[Tensor],
-        targets: KeypointTargetType,
-    ) -> tuple[Tensor, dict[str, Tensor]]:
-        device = predictions[0].device
-        sub_losses = {
-            "bboxes": torch.tensor(0.0, device=device),
-            "objectness": torch.tensor(0.0, device=device),
-            "class": torch.tensor(0.0, device=device),
-            "kpt_visibility": torch.tensor(0.0, device=device),
-            "kpt_regression": torch.tensor(0.0, device=device),
-        }
-
-        for (
-            pred,
-            class_target,
-            box_target,
-            kpt_target,
-            index,
-            anchor,
-            balance,
-        ) in zip(predictions, *targets, self.balance):
-            obj_targets = torch.zeros_like(pred[..., 0], device=device)
-            n_targets = len(class_target)
-
-            if n_targets > 0:
-                pred_subset = pred[index]
-
-                bbox_cx_cy, bbox_w_h, _ = process_bbox_predictions(
-                    pred_subset, anchor.to(device)
-                )
-                bbox_loss, bbox_iou = compute_iou_loss(
-                    torch.cat((bbox_cx_cy, bbox_w_h), dim=1),
-                    box_target,
-                    iou_type="ciou",
-                    bbox_format="cxcywh",
-                    reduction="mean",
-                )
-
-                sub_losses["bboxes"] += bbox_loss * self.bbox_weight
-
-                area = box_target[:, 2] * box_target[:, 3]
-
-                _, kpt_sublosses = self.keypoint_loss.forward(
-                    pred_subset[:, self.box_offset + self.n_classes :],
-                    kpt_target.to(device),
-                    area.to(device),
-                )
-                for name, kpt_subloss in kpt_sublosses.items():
-                    sub_losses[name] += kpt_subloss
-
-                obj_targets[index] = (self.min_objectness_iou) + (
-                    1 - self.min_objectness_iou
-                ) * bbox_iou.squeeze(-1).to(obj_targets.dtype)
-
-                if self.n_classes > 1:
-                    sub_losses["class"] += (
-                        self.class_loss.forward(
-                            pred_subset[
-                                :,
-                                self.box_offset : self.box_offset
-                                + self.n_classes,
-                            ],
-                            class_target,
-                        )
-                        * self.class_weight
-                    )
-
-            sub_losses["objectness"] += (
-                self.b_cross_entropy.forward(pred[..., 4], obj_targets)
-                * balance
-                * self.objectness_weight
-            )
-
-        loss = cast(Tensor, sum(sub_losses.values())).reshape([])
-        return loss, {name: loss.detach() for name, loss in sub_losses.items()}
-
-    def _create_keypoint_target(
-        self, scaled_targets: Tensor, box_xy_deltas: Tensor
-    ):
-        keypoint_target = scaled_targets[:, self.box_offset + 1 : -1]
-        for j in range(self.n_keypoints):
-            idx = 3 * j
-            keypoint_coords = keypoint_target[:, idx : idx + 2]
-            visibility = keypoint_target[:, idx + 2]
-
-            keypoint_mask = visibility != 0
-            keypoint_coords[keypoint_mask] -= box_xy_deltas[keypoint_mask]
-
-            keypoint_target[:, idx : idx + 2] = keypoint_coords
-            keypoint_target[:, idx + 2] = visibility
-
-        return keypoint_target
diff --git a/luxonis_train/attached_modules/losses/keypoint_loss.py b/luxonis_train/attached_modules/losses/keypoint_loss.py
deleted file mode 100644
index 1327d460..00000000
--- a/luxonis_train/attached_modules/losses/keypoint_loss.py
+++ /dev/null
@@ -1,112 +0,0 @@
-from typing import Any
-
-import torch
-from torch import Tensor
-
-from luxonis_train.enums import TaskType
-from luxonis_train.utils import (
-    get_sigmas,
-    get_with_default,
-    process_keypoints_predictions,
-)
-
-from .base_loss import BaseLoss
-from .bce_with_logits import BCEWithLogitsLoss
-
-
-# TODO: Make it work on its own
-class KeypointLoss(BaseLoss[Tensor, Tensor]):
-    supported_tasks: list[TaskType] = [TaskType.KEYPOINTS]
-
-    def __init__(
-        self,
-        n_keypoints: int,
-        bce_power: float = 1.0,
-        sigmas: list[float] | None = None,
-        area_factor: float | None = None,
-        regression_loss_weight: float = 1.0,
-        visibility_loss_weight: float = 1.0,
-        **kwargs: Any,
-    ):
-        """Keypoint based loss that is computed from OKS-based
-        regression and visibility loss.
-
-        @type n_keypoints: int
-        @param n_keypoints: Number of keypoints.
-        @type bce_power: float
-        @param bce_power: Power used for BCE visibility loss. Defaults
-            to C{1.0}.
-        @param sigmas: Sigmas used for OKS. If None then use COCO ones
-            if possible or default ones. Defaults to C{None}.
-        @type area_factor: float | None
-        @param area_factor: Factor by which we multiply bbox area. If
-            None then use default one. Defaults to C{None}.
-        @type regression_loss_weight: float
-        @param regression_loss_weight: Weight of regression loss.
-            Defaults to C{1.0}.
-        @type visibility_loss_weight: float
-        @param visibility_loss_weight: Weight of visibility loss.
-            Defaults to C{1.0}.
-        """
-
-        super().__init__(**kwargs)
-        self.b_cross_entropy = BCEWithLogitsLoss(
-            pos_weight=torch.tensor([bce_power]), **kwargs
-        )
-        self.sigmas = get_sigmas(sigmas, n_keypoints, caller_name=self.name)
-        self.area_factor = get_with_default(
-            area_factor, "bbox area scaling", self.name, default=0.53
-        )
-        self.regression_loss_weight = regression_loss_weight
-        self.visibility_loss_weight = visibility_loss_weight
-
-    def forward(
-        self, prediction: Tensor, target: Tensor, area: Tensor
-    ) -> tuple[Tensor, dict[str, Tensor]]:
-        """Computes the keypoint loss and visibility loss for a given
-        prediction and target.
-
-        @type prediction: Tensor
-        @param prediction: Predicted tensor of shape C{[n_detections,
-            n_keypoints * 3]}.
-        @type target: Tensor
-        @param target: Target tensor of shape C{[n_detections,
-            n_keypoints * 3]}.
-        @type area: Tensor
-        @param area: Area tensor of shape C{[n_detections]}.
-        @rtype: tuple[Tensor, dict[str, Tensor]]
-        @return: A tuple containing the total loss tensor of shape
-            C{[1,]} and a dictionary with the regression loss and
-            visibility loss tensors.
-        """
-        sigmas = self.sigmas.to(prediction.device)
-
-        pred_x, pred_y, pred_v = process_keypoints_predictions(prediction)
-        target_x = target[:, 0::3]
-        target_y = target[:, 1::3]
-        target_visibility = (target[:, 2::3] > 0).float()
-
-        visibility_loss = (
-            self.b_cross_entropy.forward(pred_v, target_visibility)
-            * self.visibility_loss_weight
-        )
-        scales = area * self.area_factor
-
-        distance = (target_x - pred_x) ** 2 + (target_y - pred_y) ** 2
-        normalized_distance = (
-            distance / (2 * sigmas**2) / (scales.view(-1, 1) + 1e-9) / 2
-        )
-
-        regression_loss = 1 - torch.exp(-normalized_distance)
-        regression_loss = (regression_loss * target_visibility).sum(dim=1) / (
-            target_visibility.sum(dim=1) + 1e-9
-        )
-        regression_loss = regression_loss.mean()
-        regression_loss *= self.regression_loss_weight
-
-        total_loss = regression_loss + visibility_loss
-
-        return total_loss, {
-            "kpt_regression": regression_loss,
-            "kpt_visibility": visibility_loss,
-        }
diff --git a/luxonis_train/core/utils/archive_utils.py b/luxonis_train/core/utils/archive_utils.py
index c8904f08..dbcc214a 100644
--- a/luxonis_train/core/utils/archive_utils.py
+++ b/luxonis_train/core/utils/archive_utils.py
@@ -116,7 +116,7 @@ def _get_classes(
                 node_task = "boundingbox"
             case "SegmentationHead" | "BiSeNetHead":
                 node_task = "segmentation"
-            case "ImplicitKeypointBBoxHead" | "EfficientKeypointBBoxHead":
+            case "EfficientKeypointBBoxHead":
                 node_task = "keypoints"
             case _:  # pragma: no cover
                 raise ValueError("Node does not map to a default task.")
@@ -152,14 +152,6 @@ def _get_head_specific_parameters(
         parameters["is_softmax"] = getattr(
             ImplementedHeadsIsSoxtmaxed, head_name
         ).value
-    elif head_name == "ImplicitKeypointBBoxHead":
-        parameters["subtype"] = ObjectDetectionSubtypeYOLO.YOLOv7.value
-        head_node = nodes[head_alias]
-        parameters["iou_threshold"] = head_node.iou_thres
-        parameters["conf_threshold"] = head_node.conf_thres
-        parameters["max_det"] = head_node.max_det
-        parameters["n_keypoints"] = head_node.n_keypoints
-        parameters["anchors"] = head_node.anchors.tolist()
     elif head_name == "EfficientKeypointBBoxHead":
         # or appropriate subtype
         head_node = nodes[head_alias]
@@ -210,8 +202,6 @@ def _get_head_outputs(
         return [output["name"] for output in outputs]
     elif head_type in ["SegmentationHead", "BiSeNetHead"]:
         return [outputs[0]["name"]]
-    elif head_type == "ImplicitKeypointBBoxHead":
-        return [outputs[0]["name"]]
     elif head_type == "EfficientKeypointBBoxHead":
         return [outputs[0]["name"]]
     else:
diff --git a/luxonis_train/nodes/blocks/__init__.py b/luxonis_train/nodes/blocks/__init__.py
index c35186e1..ce0181c9 100644
--- a/luxonis_train/nodes/blocks/__init__.py
+++ b/luxonis_train/nodes/blocks/__init__.py
@@ -8,10 +8,6 @@
     DropPath,
     EfficientDecoupledBlock,
     FeatureFusionBlock,
-    KeypointBlock,
-    LearnableAdd,
-    LearnableMulAddConv,
-    LearnableMultiply,
     RepVGGBlock,
     SpatialPyramidPoolingBlock,
     SqueezeExciteBlock,
@@ -32,10 +28,6 @@
     "AttentionRefinmentBlock",
     "SpatialPyramidPoolingBlock",
     "FeatureFusionBlock",
-    "LearnableAdd",
-    "LearnableMultiply",
-    "LearnableMulAddConv",
-    "KeypointBlock",
     "BasicResNetBlock",
     "Bottleneck",
     "UpscaleOnline",
diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py
index 79a8f738..5059c651 100644
--- a/luxonis_train/nodes/blocks/blocks.py
+++ b/luxonis_train/nodes/blocks/blocks.py
@@ -654,86 +654,6 @@ def forward(self, x1: Tensor, x2: Tensor) -> Tensor:
         return out
 
 
-class LearnableAdd(nn.Module):
-    """Implicit add block."""
-
-    def __init__(self, channel: int):
-        super().__init__()
-        self.channel = channel
-        self.implicit = nn.Parameter(torch.zeros(1, channel, 1, 1))
-        nn.init.normal_(self.implicit, std=0.02)
-
-    def forward(self, x: Tensor) -> Tensor:
-        return self.implicit.expand_as(x) + x
-
-
-class LearnableMultiply(nn.Module):
-    """Implicit multiply block."""
-
-    def __init__(self, channel: int):
-        super().__init__()
-        self.channel = channel
-        self.implicit = nn.Parameter(torch.ones(1, channel, 1, 1))
-        nn.init.normal_(self.implicit, mean=1.0, std=0.02)
-
-    def forward(self, x: Tensor) -> Tensor:
-        return self.implicit.expand_as(x) * x
-
-
-class LearnableMulAddConv(nn.Module):
-    def __init__(
-        self,
-        add_channel: int,
-        mul_channel: int,
-        conv_in_channel: int,
-        conv_out_channel: int,
-    ):
-        super().__init__()
-        self.add = LearnableAdd(add_channel)
-        self.mul = LearnableMultiply(mul_channel)
-        self.conv = nn.Conv2d(conv_in_channel, conv_out_channel, 1)
-
-    def forward(self, x: Tensor) -> Tensor:
-        return self.mul(self.conv(self.add(x)))
-
-
-class KeypointBlock(nn.Module):
-    """Keypoint head block for keypoint predictions."""
-
-    def __init__(self, in_channels: int, out_channels: int):
-        super().__init__()
-        layers: list[nn.Module] = []
-        for i in range(6):
-            depth_wise_conv = ConvModule(
-                in_channels,
-                in_channels,
-                kernel_size=3,
-                padding=autopad(3),
-                groups=math.gcd(in_channels, in_channels),
-                activation=nn.SiLU(),
-            )
-            conv = (
-                ConvModule(
-                    in_channels,
-                    in_channels,
-                    kernel_size=1,
-                    padding=autopad(1),
-                    activation=nn.SiLU(),
-                )
-                if i < 5
-                else nn.Conv2d(in_channels, out_channels, 1)
-            )
-
-            layers.append(depth_wise_conv)
-            layers.append(conv)
-
-        self.block = nn.Sequential(*layers)
-
-    def forward(self, x: Tensor) -> Tensor:
-        out = self.block(x)
-        return out
-
-
 T = TypeVar("T", int, tuple[int, ...])
 
 
diff --git a/luxonis_train/nodes/enums/head_categorization.py b/luxonis_train/nodes/enums/head_categorization.py
index 90f75725..606ed872 100644
--- a/luxonis_train/nodes/enums/head_categorization.py
+++ b/luxonis_train/nodes/enums/head_categorization.py
@@ -6,7 +6,6 @@ class ImplementedHeads(Enum):
 
     ClassificationHead = "ClassificationParser"
     EfficientBBoxHead = "YOLO"
-    ImplicitKeypointBBoxHead = "YoloDetectionNetwork"
     EfficientKeypointBBoxHead = "YoloDetectionNetwork"
     SegmentationHead = "SegmentationParser"
     BiSeNetHead = "SegmentationParser"
@@ -17,7 +16,6 @@ class ImplementedHeadsIsSoxtmaxed(Enum):
 
     ClassificationHead = False
     EfficientBBoxHead = None
-    ImplicitKeypointBBoxHead = None
     EfficientKeypointBBoxHead = None
     SegmentationHead = False
     BiSeNetHead = False
diff --git a/luxonis_train/nodes/heads/__init__.py b/luxonis_train/nodes/heads/__init__.py
index e188f188..9d04c310 100644
--- a/luxonis_train/nodes/heads/__init__.py
+++ b/luxonis_train/nodes/heads/__init__.py
@@ -3,7 +3,6 @@
 from .ddrnet_segmentation_head import DDRNetSegmentationHead
 from .efficient_bbox_head import EfficientBBoxHead
 from .efficient_keypoint_bbox_head import EfficientKeypointBBoxHead
-from .implicit_keypoint_bbox_head import ImplicitKeypointBBoxHead
 from .segmentation_head import SegmentationHead
 
 __all__ = [
@@ -11,7 +10,6 @@
     "ClassificationHead",
     "EfficientBBoxHead",
     "EfficientKeypointBBoxHead",
-    "ImplicitKeypointBBoxHead",
     "SegmentationHead",
     "DDRNetSegmentationHead",
 ]
diff --git a/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py b/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py
deleted file mode 100644
index 114c6f82..00000000
--- a/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py
+++ /dev/null
@@ -1,282 +0,0 @@
-import logging
-import math
-from typing import Any, cast
-
-import torch
-from torch import Tensor, nn
-
-from luxonis_train.enums import TaskType
-from luxonis_train.nodes.base_node import BaseNode
-from luxonis_train.nodes.blocks import KeypointBlock, LearnableMulAddConv
-from luxonis_train.utils import (
-    Packet,
-    non_max_suppression,
-    process_bbox_predictions,
-    process_keypoints_predictions,
-)
-
-logger = logging.getLogger(__name__)
-
-
-class ImplicitKeypointBBoxHead(
-    BaseNode[list[Tensor], tuple[list[Tensor], Tensor]]
-):
-    tasks = [TaskType.KEYPOINTS, TaskType.BOUNDINGBOX]
-    in_channels: list[int]
-
-    def __init__(
-        self,
-        n_heads: int = 3,
-        anchors: list[list[float]] | None = None,
-        init_coco_biases: bool = True,
-        conf_thres: float = 0.25,
-        iou_thres: float = 0.45,
-        max_det: int = 300,
-        **kwargs: Any,
-    ):
-        """Head for object and keypoint detection.
-
-        Adapted from U{YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time
-        object detectors<https://arxiv.org/pdf/2207.02696.pdf>}.
-
-        TODO: more technical documentation
-
-        @type n_heads: int
-        @param n_heads: Number of output heads. Defaults to C{3}.
-            B{Note:} Should be same also on neck in most cases.
-        @type anchors: list[list[float]] | None
-        @param anchors: Anchors used for object detection.
-        @type init_coco_biases: bool
-        @param init_coco_biases: Whether to use COCO bias and weight
-        @type conf_thres: float
-        @param conf_thres: Threshold for confidence. Defaults to C{0.25}.
-        @type iou_thres: float
-        @param iou_thres: Threshold for IoU. Defaults to C{0.45}.
-        @type max_det: int
-        @param max_det: Maximum number of detections retained after NMS. Defaults to C{300}.
-        """
-        super().__init__(**kwargs)
-
-        self.conf_thres = conf_thres
-        self.iou_thres = iou_thres
-        self.max_det = max_det
-
-        self.n_heads = n_heads
-        if len(self.in_channels) < self.n_heads:
-            logger.warning(
-                f"Head '{self.name}' was set to use {self.n_heads} heads, "
-                f"but received only {len(self.in_channels)} inputs. "
-                f"Changing number of heads to {len(self.in_channels)}."
-            )
-            self.n_heads = len(self.in_channels)
-
-        if anchors is None:
-            logger.info("No anchors provided, generating them automatically.")
-            anchors, recall = self.dataset_metadata.autogenerate_anchors(
-                self.n_heads
-            )
-            logger.info(
-                f"Anchors generated. Best possible recall: {recall:.2f}"
-            )
-
-        self.box_offset = 5
-        self.n_det_out = self.n_classes + self.box_offset
-        self.n_kpt_out = 3 * self.n_keypoints
-        self.n_out = self.n_det_out + self.n_kpt_out
-        self.n_anchors = len(anchors[0]) // 2
-        self.grid: list[Tensor] = []
-
-        self.anchors = torch.tensor(anchors).float().view(self.n_heads, -1, 2)
-        self.anchor_grid = self.anchors.clone().view(
-            self.n_heads, 1, -1, 1, 1, 2
-        )
-
-        self.channel_list, self.stride = self._fit_to_n_heads(self.in_channels)
-
-        self.learnable_mul_add_conv = nn.ModuleList(
-            LearnableMulAddConv(
-                add_channel=in_channels,
-                mul_channel=self.n_det_out * self.n_anchors,
-                conv_in_channel=in_channels,
-                conv_out_channel=self.n_det_out * self.n_anchors,
-            )
-            for in_channels in self.channel_list
-        )
-
-        self.kpt_heads = nn.ModuleList(
-            KeypointBlock(
-                in_channels=in_channels,
-                out_channels=self.n_kpt_out * self.n_anchors,
-            )
-            for in_channels in self.channel_list
-        )
-
-        self.anchors /= self.stride.view(-1, 1, 1)
-        self._check_anchor_order()
-
-        if init_coco_biases:
-            self._initialize_weights_and_biases()
-
-    def forward(self, inputs: list[Tensor]) -> tuple[list[Tensor], Tensor]:
-        predictions: list[Tensor] = []
-        features: list[Tensor] = []
-
-        self.anchor_grid = self.anchor_grid.to(inputs[0].device)
-
-        for i in range(self.n_heads):
-            feat = cast(
-                Tensor,
-                torch.cat(
-                    (
-                        self.learnable_mul_add_conv[i](inputs[i]),
-                        self.kpt_heads[i](inputs[i]),
-                    ),
-                    axis=1,
-                ),  # type: ignore
-            )
-
-            batch_size, _, feature_height, feature_width = feat.shape
-            if i >= len(self.grid):
-                self.grid.append(
-                    self._construct_grid(feature_width, feature_height).to(
-                        feat.device
-                    )
-                )
-
-            feat = feat.reshape(
-                batch_size,
-                self.n_anchors,
-                self.n_out,
-                feature_height,
-                feature_width,
-            ).permute(0, 1, 3, 4, 2)
-
-            features.append(feat)
-            predictions.append(
-                self._build_predictions(
-                    feat, self.anchor_grid[i], self.grid[i], self.stride[i]
-                )
-            )
-
-        return features, torch.cat(predictions, dim=1)
-
-    def wrap(self, output: tuple[list[Tensor], Tensor]) -> Packet[Tensor]:
-        features, predictions = output
-
-        if self.export:
-            return {"boxes_and_keypoints": [predictions]}
-
-        if self.training:
-            return {"features": features}
-
-        nms = non_max_suppression(
-            predictions,
-            n_classes=self.n_classes,
-            conf_thres=self.conf_thres,
-            iou_thres=self.iou_thres,
-            bbox_format="cxcywh",
-            max_det=self.max_det,
-        )
-
-        return {
-            "boundingbox": [detection[:, :6] for detection in nms],
-            "keypoints": [
-                detection[:, 6:].reshape(-1, self.n_keypoints, 3)
-                for detection in nms
-            ],
-            "features": features,
-        }
-
-    def _build_predictions(
-        self, feat: Tensor, anchor_grid: Tensor, grid: Tensor, stride: Tensor
-    ) -> Tensor:
-        batch_size = feat.shape[0]
-        bbox = feat[..., : self.box_offset + self.n_classes]
-        keypoints = feat[..., self.box_offset + self.n_classes :]
-
-        box_cxcy, box_wh, box_tail = process_bbox_predictions(
-            bbox, anchor_grid
-        )
-        grid = grid.to(box_cxcy.device)
-        stride = stride.to(box_cxcy.device)
-        box_cxcy = (box_cxcy + grid) * stride
-        out_bbox = torch.cat((box_cxcy, box_wh, box_tail), dim=-1)
-
-        grid_x = grid[..., 0:1]
-        grid_y = grid[..., 1:2]
-        kpt_x, kpt_y, kpt_vis = process_keypoints_predictions(keypoints)
-        kpt_x = (kpt_x + grid_x) * stride
-        kpt_y = (kpt_y + grid_y) * stride
-        kpt_vis_sig = kpt_vis.sigmoid()
-        out_kpt = torch.cat((kpt_x, kpt_y, kpt_vis_sig), dim=-1)
-        out_kpt = out_kpt.reshape(*kpt_x.shape[:-1], -1)
-        out = torch.cat((out_bbox, out_kpt), dim=-1)
-
-        return out.reshape(batch_size, -1, self.n_out)
-
-    def _infer_bbox(
-        self, bbox: Tensor, stride: Tensor, grid: Tensor, anchor_grid: Tensor
-    ) -> Tensor:
-        out_bbox = bbox.sigmoid()
-        out_bbox_xy = (out_bbox[..., 0:2] * 2.0 - 0.5 + grid) * stride
-        out_bbox_wh = (out_bbox[..., 2:4] * 2) ** 2 * anchor_grid.view(
-            1, self.n_anchors, 1, 1, 2
-        )
-        return torch.cat((out_bbox_xy, out_bbox_wh, out_bbox[..., 4:]), dim=-1)
-
-    def _fit_to_n_heads(
-        self, channel_list: list[int]
-    ) -> tuple[list[int], Tensor]:
-        out_channel_list = channel_list[: self.n_heads]
-        stride = torch.tensor(
-            [
-                self.original_in_shape[1] / h
-                for h in cast(list[int], self.in_height)[: self.n_heads]
-            ],
-            dtype=torch.int,
-        )
-        return out_channel_list, stride
-
-    def _initialize_weights_and_biases(self, class_freq: Tensor | None = None):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                nn.init.kaiming_normal_(
-                    m.weight, mode="fan_out", nonlinearity="relu"
-                )
-            elif isinstance(m, nn.BatchNorm2d):
-                m.eps = 1e-3
-                m.momentum = 0.03
-            elif isinstance(
-                m, (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6)
-            ):
-                m.inplace = True
-
-        for mi, s in zip(self.learnable_mul_add_conv, self.stride):
-            b = mi.conv.bias.view(self.n_anchors, -1)
-            b.data[:, 4] += math.log(8 / (640 / s) ** 2)
-            b.data[:, 5:] += (
-                math.log(0.6 / (self.n_classes - 0.99))
-                if class_freq is None
-                else torch.log(class_freq / class_freq.sum())
-            )
-            mi.conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
-
-    def _construct_grid(self, feature_width: int, feature_height: int):
-        grid_y, grid_x = torch.meshgrid(
-            [torch.arange(feature_height), torch.arange(feature_width)],
-            indexing="ij",
-        )
-        return (
-            torch.stack((grid_x, grid_y), 2)
-            .view((1, 1, feature_height, feature_width, 2))
-            .float()
-        )
-
-    def _check_anchor_order(self):
-        a = self.anchor_grid.prod(-1).view(-1)
-        delta_a = a[-1] - a[0]
-        delta_s = self.stride[-1] - self.stride[0]
-        if delta_a.sign() != delta_s.sign():
-            logger.warning("Reversing anchor order")
-            self.anchors[:] = self.anchors.flip(0)
-            self.anchor_grid[:] = self.anchor_grid.flip(0)
diff --git a/luxonis_train/utils/__init__.py b/luxonis_train/utils/__init__.py
index 164f7da5..1231bc10 100644
--- a/luxonis_train/utils/__init__.py
+++ b/luxonis_train/utils/__init__.py
@@ -1,13 +1,10 @@
 from .boundingbox import (
     anchors_for_fpn_features,
-    anchors_from_dataset,
     bbox2dist,
     bbox_iou,
     compute_iou_loss,
     dist2bbox,
-    match_to_anchor,
     non_max_suppression,
-    process_bbox_predictions,
 )
 from .dataset_metadata import DatasetMetadata
 from .exceptions import IncompatibleException
@@ -18,7 +15,7 @@
     to_shape_packet,
 )
 from .graph import traverse_graph
-from .keypoints import get_sigmas, process_keypoints_predictions
+from .keypoints import get_sigmas
 from .tracker import LuxonisTrackerPL
 from .types import AttachIndexType, Kwargs, Labels, Packet
 
@@ -34,16 +31,12 @@
     "to_shape_packet",
     "get_with_default",
     "LuxonisTrackerPL",
-    "match_to_anchor",
     "dist2bbox",
     "bbox2dist",
     "bbox_iou",
     "non_max_suppression",
-    "anchors_from_dataset",
     "anchors_for_fpn_features",
-    "process_bbox_predictions",
     "compute_iou_loss",
-    "process_keypoints_predictions",
     "get_sigmas",
     "traverse_graph",
 ]
diff --git a/luxonis_train/utils/boundingbox.py b/luxonis_train/utils/boundingbox.py
index 8c5a9d40..c8f69802 100644
--- a/luxonis_train/utils/boundingbox.py
+++ b/luxonis_train/utils/boundingbox.py
@@ -2,7 +2,6 @@
 from typing import Literal, TypeAlias
 
 import torch
-from scipy.cluster.vq import kmeans
 from torch import Tensor
 from torchvision.ops import (
     batched_nms,
@@ -12,96 +11,10 @@
     generalized_box_iou,
 )
 
-from luxonis_train.enums import TaskType
-from luxonis_train.loaders import BaseLoaderTorch
-
 IoUType: TypeAlias = Literal["none", "giou", "diou", "ciou", "siou"]
 BBoxFormatType: TypeAlias = Literal["xyxy", "xywh", "cxcywh"]
 
 
-def match_to_anchor(
-    targets: Tensor,
-    anchor: Tensor,
-    xy_shifts: Tensor,
-    scale_width: int,
-    scale_height: int,
-    n_keypoints: int,
-    anchor_threshold: float,
-    bias: float,
-    box_offset: int = 5,
-) -> tuple[Tensor, Tensor]:
-    """Matches targets to anchors.
-
-        1. Scales the targets to the size of the feature map
-        2. Matches the targets to the anchor, filtering out targets whose aspect
-            ratio is too far from the anchor's aspect ratio.
-
-    @type targets: Tensor
-    @param targets: Targets in xyxy format
-    @type anchor: Tensor
-    @param anchor: Anchor boxes
-    @type xy_shifts: Tensor
-    @param xy_shifts: Shifts in x and y direction
-    @type scale_width: int
-    @param scale_width: Width of the feature map
-    @type scale_height: int
-    @param scale_height: Height of the feature map
-    @type n_keypoints: int
-    @param n_keypoints: Number of keypoints
-    @type anchor_threshold: float
-    @param anchor_threshold: Threshold for anchor filtering
-    @type bias: float
-    @param bias: Bias for anchor filtering
-    @type box_offset: int
-    @param box_offset: Offset for box. Defaults to 5.
-
-    @rtype: tuple[Tensor, Tensor]
-    @return: Scaled targets and shifts.
-    """
-
-    # The boxes and keypoints need to be scaled to the size of the features
-    # First two indices are batch index and class label,
-    # last index is anchor index. Those are not scaled.
-    scale_length = 3 * n_keypoints + box_offset + 2
-    scales = torch.ones(scale_length, device=targets.device)
-
-    # Scale box and keypoint coordinates, but not visibility
-    for i in range(n_keypoints):
-        scales[box_offset + 1 + 3 * i] = scale_width
-        scales[box_offset + 2 + 3 * i] = scale_height
-
-    scales[2 : box_offset + 1] = torch.tensor(
-        [scale_width, scale_height, scale_width, scale_height]
-    )
-
-    scaled_targets = targets * scales
-
-    if targets.size(1) == 0:
-        return targets[0], torch.zeros(1, device=targets.device)
-
-    wh_to_anchor_ratio = scaled_targets[:, :, 4:6] / anchor.unsqueeze(1)
-    ratio_mask = (
-        torch.max(wh_to_anchor_ratio, 1.0 / wh_to_anchor_ratio).max(2)[0]
-        < anchor_threshold
-    )
-
-    filtered_targets = scaled_targets[ratio_mask]
-
-    box_xy = filtered_targets[:, 2:4]
-    box_wh = torch.tensor([scale_width, scale_height]) - box_xy
-
-    def decimal_part(x: Tensor) -> Tensor:
-        return x % 1.0
-
-    x, y = ((decimal_part(box_xy) < bias) & (box_xy > 1.0)).T
-    w, h = ((decimal_part(box_wh) < bias) & (box_wh > 1.0)).T
-    mask = torch.stack((torch.ones_like(x), x, y, w, h))
-    final_targets = filtered_targets.repeat((len(xy_shifts), 1, 1))[mask]
-
-    shifts = xy_shifts.unsqueeze(1).repeat((1, len(box_xy), 1))[mask]
-    return final_targets, shifts
-
-
 def dist2bbox(
     distance: Tensor,
     anchor_points: Tensor,
@@ -411,123 +324,6 @@ def non_max_suppression(
     return output
 
 
-def anchors_from_dataset(
-    loader: BaseLoaderTorch,
-    n_anchors: int = 9,
-    n_generations: int = 1000,
-    ratio_threshold: float = 4.0,
-) -> tuple[Tensor, float]:
-    """Generates anchors based on bounding box annotations present in
-    provided data loader. It uses K-Means for initial proposals which
-    are then refined with genetic algorithm.
-
-    @type loader: L{torch.utils.data.DataLoader}
-    @param loader: Data loader.
-    @type n_anchors: int
-    @param n_anchors: Number of anchors, this is normally n_heads * 3
-        which generates 3 anchors per layer. Defaults to 9.
-    @type n_generations: int
-    @param n_generations: Number of iterations for anchor improvement
-        with genetic algorithm. Defaults to 1000.
-    @type ratio_threshold: float
-    @param ratio_threshold: Minimum threshold for ratio. Defaults to
-        4.0.
-    @rtype: tuple[Tensor, float]
-    @return: Proposed anchors and the best possible recall.
-    """
-
-    widths: list[Tensor] = []
-    for _, labels in loader:
-        for tensor, task_type in labels.values():
-            if task_type == TaskType.BOUNDINGBOX:
-                curr_wh = tensor[:, 4:]
-                widths.append(curr_wh)
-    _, h, w = loader.input_shape
-    img_size = torch.tensor([w, h])
-    wh = torch.vstack(widths) * img_size
-
-    # filter out small objects (w or h < 2 pixels)
-    wh = wh[(wh >= 2).any(1)]
-
-    try:
-        assert n_anchors <= len(
-            wh
-        ), "More requested anchors than number of bounding boxes."
-        std = wh.std(0)
-        proposed_anchors = kmeans(wh / std, n_anchors, iter=30)
-        proposed_anchors = torch.tensor(proposed_anchors[0]) * std
-        assert n_anchors == len(
-            proposed_anchors
-        ), "KMeans returned insufficient number of points"
-    except Exception:
-        print("Fallback to random anchor init")
-        proposed_anchors = (
-            torch.sort(torch.rand(n_anchors * 2))[0].reshape(n_anchors, 2)
-            * img_size
-        )
-
-    proposed_anchors = proposed_anchors[
-        torch.argsort(proposed_anchors.prod(1))
-    ]  # sort small to large
-
-    def calc_best_anchor_ratio(anchors: Tensor, wh: Tensor) -> Tensor:
-        """Calculate how well most suitable anchor box matches each
-        target bbox."""
-        symmetric_size_ratios = torch.min(
-            wh[:, None] / anchors[None], anchors[None] / wh[:, None]
-        )
-        worst_side_size_ratio = symmetric_size_ratios.min(-1).values
-        best_anchor_ratio = worst_side_size_ratio.max(-1).values
-        return best_anchor_ratio
-
-    def calc_best_possible_recall(anchors: Tensor, wh: Tensor) -> Tensor:
-        """Calculate best possible recall if every bbox is matched to an
-        appropriate anchor."""
-        best_anchor_ratio = calc_best_anchor_ratio(anchors, wh)
-        best_possible_recall = (
-            (best_anchor_ratio > 1 / ratio_threshold).float().mean()
-        )
-        return best_possible_recall
-
-    def anchor_fitness(anchors: Tensor, wh: Tensor) -> Tensor:
-        """Fitness function used for anchor evolve."""
-        best_anchor_ratio = calc_best_anchor_ratio(anchors, wh)
-        return (
-            best_anchor_ratio
-            * (best_anchor_ratio > 1 / ratio_threshold).float()
-        ).mean()
-
-    # Genetic algorithm
-    best_fitness = anchor_fitness(proposed_anchors, wh)
-    anchor_shape = proposed_anchors.shape
-    mutation_probability = 0.9
-    mutation_noise_mean = 1
-    mutation_noise_std = 0.1
-    for _ in range(n_generations):
-        anchor_mutation = torch.ones(anchor_shape)
-        anchor_mutation = (
-            (torch.rand(anchor_shape) < mutation_probability)
-            * torch.randn(anchor_shape)
-            * mutation_noise_std
-            + mutation_noise_mean
-        ).clip(0.3, 3.0)
-
-        mutated_anchors = (proposed_anchors.clone() * anchor_mutation).clip(
-            min=2.0
-        )
-        mutated_fitness = anchor_fitness(mutated_anchors, wh)
-        if mutated_fitness > best_fitness:
-            best_fitness = mutated_fitness
-            proposed_anchors = mutated_anchors.clone()
-
-    proposed_anchors = proposed_anchors[
-        torch.argsort(proposed_anchors.prod(1))
-    ]  # sort small to large
-    recall = calc_best_possible_recall(proposed_anchors, wh)
-
-    return proposed_anchors, recall.item()
-
-
 def anchors_for_fpn_features(
     features: list[Tensor],
     strides: Tensor,
@@ -605,26 +401,6 @@ def anchors_for_fpn_features(
     )
 
 
-def process_bbox_predictions(
-    bbox: Tensor, anchor: Tensor
-) -> tuple[Tensor, Tensor, Tensor]:
-    """Transforms bbox predictions to correct format.
-
-    @type bbox: Tensor
-    @param bbox: Bbox predictions
-    @type anchor: Tensor
-    @param anchor: Anchor boxes
-    @rtype: tuple[Tensor, Tensor, Tensor]
-    @return: xy and wh predictions and tail. The tail is anything after
-        xywh.
-    """
-    out_bbox = bbox.sigmoid()
-    out_bbox_xy = out_bbox[..., 0:2] * 2.0 - 0.5
-    out_bbox_wh = (out_bbox[..., 2:4] * 2) ** 2 * anchor
-    out_bbox_tail = out_bbox[..., 4:]
-    return out_bbox_xy, out_bbox_wh, out_bbox_tail
-
-
 def compute_iou_loss(
     pred_bboxes: Tensor,
     target_bboxes: Tensor,
diff --git a/luxonis_train/utils/dataset_metadata.py b/luxonis_train/utils/dataset_metadata.py
index 22b81618..3a9cecdf 100644
--- a/luxonis_train/utils/dataset_metadata.py
+++ b/luxonis_train/utils/dataset_metadata.py
@@ -1,5 +1,4 @@
 from luxonis_train.loaders import BaseLoaderTorch
-from luxonis_train.utils import anchors_from_dataset
 
 
 class DatasetMetadata:
@@ -112,31 +111,6 @@ def classes(self, task: str | None = None) -> list[str]:
                 )
         return class_names
 
-    def autogenerate_anchors(
-        self, n_heads: int
-    ) -> tuple[list[list[float]], float]:
-        """Automatically generates anchors for the provided dataset.
-
-        @type n_heads: int
-        @param n_heads: Number of heads to generate anchors for.
-        @rtype: tuple[list[list[float]], float]
-        @return: List of anchors in [-1,6] format and recall of the
-            anchors.
-        @raises RuntimeError: If the dataset loader was not provided
-            during initialization.
-        """
-        if self._loader is None:
-            raise RuntimeError(
-                "Cannot generate anchors without a dataset loader. "
-                "Please provide a dataset loader to the constructor "
-                "or call `set_loader` method."
-            )
-
-        proposed_anchors, recall = anchors_from_dataset(
-            self._loader, n_anchors=n_heads * 3
-        )
-        return proposed_anchors.reshape(-1, 6).tolist(), recall
-
     @classmethod
     def from_loader(cls, loader: BaseLoaderTorch) -> "DatasetMetadata":
         """Creates a L{DatasetMetadata} object from a L{LuxonisDataset}.
diff --git a/luxonis_train/utils/keypoints.py b/luxonis_train/utils/keypoints.py
index 9fbc741d..8073c399 100644
--- a/luxonis_train/utils/keypoints.py
+++ b/luxonis_train/utils/keypoints.py
@@ -6,24 +6,6 @@
 logger = logging.getLogger(__name__)
 
 
-def process_keypoints_predictions(
-    keypoints: Tensor,
-) -> tuple[Tensor, Tensor, Tensor]:
-    """Extracts x, y and visibility from keypoints predictions.
-
-    @type keypoints: Tensor
-    @param keypoints: Keypoints predictions. The last dimension must be divisible by 3
-        and is expected to be in format [x1, y1, v1, x2, y2, v2, ...].
-
-    @rtype: tuple[Tensor, Tensor, Tensor]
-    @return: x, y and visibility tensors.
-    """
-    x = keypoints[..., ::3]
-    y = keypoints[..., 1::3]
-    visibility = keypoints[..., 2::3]
-    return x, y, visibility
-
-
 def get_sigmas(
     sigmas: list[float] | None,
     n_keypoints: int,
diff --git a/tests/configs/archive_config.yaml b/tests/configs/archive_config.yaml
index 71589f4d..73766823 100644
--- a/tests/configs/archive_config.yaml
+++ b/tests/configs/archive_config.yaml
@@ -12,10 +12,6 @@ model:
       inputs:
         - EfficientRep
 
-    - name: ImplicitKeypointBBoxHead
-      inputs:
-        - EfficientRep
-
     - name: SegmentationHead
       inputs:
         - EfficientRep
diff --git a/tests/configs/parking_lot_config.yaml b/tests/configs/parking_lot_config.yaml
index bf0b9da3..78711178 100644
--- a/tests/configs/parking_lot_config.yaml
+++ b/tests/configs/parking_lot_config.yaml
@@ -16,14 +16,6 @@ model:
       inputs:
         - neck
 
-    - name: ImplicitKeypointBBoxHead
-      alias: car-detection-head
-      inputs:
-        - neck
-      task:
-        keypoints: car-keypoints
-        boundingbox: car-boundingbox
-
     - name: EfficientKeypointBBoxHead
       alias: motorbike-detection-head
       task:
@@ -65,14 +57,10 @@ model:
       attached_to: vehicle-type-segmentation-head
     - name: CrossEntropyLoss
       attached_to: color-segmentation-head
-    - name: ImplicitKeypointBBoxLoss
-      attached_to: car-detection-head
     - name: EfficientKeypointBBoxLoss
       attached_to: motorbike-detection-head
 
   metrics:
-    - name: ObjectKeypointSimilarity
-      attached_to: car-detection-head
     - name: MeanAveragePrecisionKeypoints
       attached_to: motorbike-detection-head
     - name: MeanAveragePrecision
@@ -88,16 +76,6 @@ model:
       attached_to: brand-segmentation-head
 
   visualizers:
-    - name: MultiVisualizer
-      alias: multi-visualizer-car
-      attached_to: car-detection-head
-      params:
-        visualizers:
-          - name: KeypointVisualizer
-            params:
-              nonvisible_color: blue
-          - name: BBoxVisualizer
-
     - name: MultiVisualizer
       alias: multi-visualizer-motorbike
       attached_to: motorbike-detection-head
diff --git a/tests/integration/parking_lot.json b/tests/integration/parking_lot.json
index 28ca0a61..c8842c1f 100644
--- a/tests/integration/parking_lot.json
+++ b/tests/integration/parking_lot.json
@@ -90,16 +90,6 @@
                 ],
                 "layout": "NCHW"
             },
-            {
-                "name": "car-detection-head/boxes_and_keypoints/0",
-                "dtype": "float32",
-                "shape": [
-                    1,
-                    5040,
-                    24
-                ],
-                "layout": "NCD"
-            },
             {
                 "name": "color-segmentation-head/color-segmentation/0",
                 "dtype": "float32",
@@ -179,25 +169,6 @@
                     "bbox-head/boundingbox/2"
                 ]
             },
-            {
-                "name": "car-detection-head",
-                "parser": "YoloDetectionNetwork",
-                "metadata": {
-                    "postprocessor_path": null,
-                    "classes": [
-                        "car"
-                    ],
-                    "n_classes": 1,
-                    "iou_threshold": 0.45,
-                    "conf_threshold": 0.25,
-                    "max_det": 300,
-                    "subtype": "yolov7",
-                    "n_keypoints": 6
-                },
-                "outputs": [
-                    "car-detection-head/boxes_and_keypoints/0"
-                ]
-            },
             {
                 "name": "motorbike-detection-head",
                 "parser": "YoloDetectionNetwork",
diff --git a/tests/integration/test_detection.py b/tests/integration/test_detection.py
index c88851ba..45e83f0a 100644
--- a/tests/integration/test_detection.py
+++ b/tests/integration/test_detection.py
@@ -26,14 +26,6 @@ def get_opts_backbone(backbone: str) -> dict[str, Any]:
                     },
                     "inputs": [backbone],
                 },
-                {
-                    "name": "ImplicitKeypointBBoxHead",
-                    "task": {
-                        "keypoints": "car-keypoints",
-                        "boundingbox": "car-boundingbox",
-                    },
-                    "inputs": [backbone],
-                },
             ],
             "losses": [
                 {
@@ -45,10 +37,6 @@ def get_opts_backbone(backbone: str) -> dict[str, Any]:
                     "attached_to": "EfficientKeypointBBoxHead",
                     "params": {"area_factor": 0.5},
                 },
-                {
-                    "name": "ImplicitKeypointBBoxLoss",
-                    "attached_to": "ImplicitKeypointBBoxHead",
-                },
             ],
             "metrics": [
                 {
@@ -60,11 +48,6 @@ def get_opts_backbone(backbone: str) -> dict[str, Any]:
                     "alias": "EfficientKeypointBBoxHead-MaP",
                     "attached_to": "EfficientKeypointBBoxHead",
                 },
-                {
-                    "name": "MeanAveragePrecisionKeypoints",
-                    "alias": "ImplicitKeypointBBoxHead-MaP",
-                    "attached_to": "ImplicitKeypointBBoxHead",
-                },
             ],
         }
     }
diff --git a/tests/integration/test_simple.py b/tests/integration/test_simple.py
index 7b2f0f91..3d489c4a 100644
--- a/tests/integration/test_simple.py
+++ b/tests/integration/test_simple.py
@@ -115,7 +115,6 @@ def test_custom_tasks(
             ), "Config JSON not found in the archive."
             generated_config = json.loads(extracted_cfg.read().decode())
 
-        del generated_config["model"]["heads"][1]["metadata"]["anchors"]
         assert generated_config == correct_archive_config
 
 
diff --git a/tests/unittests/test_utils/test_boxutils.py b/tests/unittests/test_utils/test_boxutils.py
index 8df1ab74..6056cf8e 100644
--- a/tests/unittests/test_utils/test_boxutils.py
+++ b/tests/unittests/test_utils/test_boxutils.py
@@ -10,7 +10,6 @@
     bbox_iou,
     compute_iou_loss,
     dist2bbox,
-    process_bbox_predictions,
 )
 
 
@@ -105,21 +104,6 @@ def test_compute_iou_loss():
     assert 0 <= iou.min() and iou.max() <= 1
 
 
-def test_process_bbox_predictions():
-    bbox = generate_random_bboxes(10, 64, 64, "xywh")
-    data = torch.rand(10, 4)
-    prediction = torch.concat([bbox, data], dim=-1)
-    anchor = torch.rand(10, 2)
-
-    out_bbox_xy, out_bbox_wh, out_bbox_tail = process_bbox_predictions(
-        prediction, anchor
-    )
-
-    assert out_bbox_xy.shape == (10, 2)
-    assert out_bbox_wh.shape == (10, 2)
-    assert out_bbox_tail.shape == (10, 4)
-
-
 def test_anchors_for_fpn_features():
     features = [torch.rand(1, 256, 14, 14), torch.rand(1, 256, 28, 28)]
     strides = torch.tensor([8, 16])
diff --git a/tests/unittests/test_utils/test_dataset_metadata.py b/tests/unittests/test_utils/test_dataset_metadata.py
index 8dba11a8..daf01725 100644
--- a/tests/unittests/test_utils/test_dataset_metadata.py
+++ b/tests/unittests/test_utils/test_dataset_metadata.py
@@ -45,9 +45,3 @@ def test_class_names(metadata):
     metadata._classes["segmentation"] = ["car", "person", "tree"]
     with pytest.raises(RuntimeError):
         metadata.classes()
-
-
-def test_no_loader():
-    metadata = DatasetMetadata()
-    with pytest.raises(RuntimeError):
-        metadata.autogenerate_anchors(3)
diff --git a/tests/unittests/test_utils/test_keypoints.py b/tests/unittests/test_utils/test_keypoints.py
index 3d20dae6..5763386f 100644
--- a/tests/unittests/test_utils/test_keypoints.py
+++ b/tests/unittests/test_utils/test_keypoints.py
@@ -1,10 +1,6 @@
 import pytest
-import torch
 
-from luxonis_train.utils.keypoints import (
-    get_sigmas,
-    process_keypoints_predictions,
-)
+from luxonis_train.utils.keypoints import get_sigmas
 
 
 def test_get_sigmas():
@@ -14,11 +10,3 @@ def test_get_sigmas():
         get_sigmas(sigmas, 2)
     assert len(get_sigmas(None, 17)) == 17
     assert len(get_sigmas(None, 5)) == 5
-
-
-def test_process_keypoints_predictions():
-    keypoints = torch.tensor([[0.1, 0.2, 1.0, 0.4, 0.5, 0.0]])
-    x, y, visibility = process_keypoints_predictions(keypoints)
-    pytest.approx(x[0].tolist(), [0.1, 0.4])
-    pytest.approx(y[0].tolist(), [0.2, 0.5])
-    pytest.approx(visibility[0].tolist(), [1.0, 0.0])