Instance Segmentation Head (#144)

Co-authored-by: Martin Kozlovsky <[email protected]> Co-authored-by: klemen1999 <[email protected]>
luxonis · Jan 25, 2025 · e6c97f3 · e6c97f3
1 parent 84ec507
commit e6c97f3
Show file tree

Hide file tree

Showing 36 changed files with 2,264 additions and 268 deletions.
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
@@ -33,7 +33,7 @@ jobs:
       run: pip install -e .[dev]
 
     - name: Install latest luxonis-ml
-      run: pip install luxonis-ml[all]@git+https://github.com/luxonis/luxonis-ml.git@main --upgrade --no-deps --force-reinstall
+      run: pip install luxonis-ml[all]@git+https://github.com/luxonis/luxonis-ml.git@main --upgrade --force-reinstall
 
     - name: Authenticate to Google Cloud
       id: google-auth

diff --git a/configs/detection_heavy_model.yaml b/configs/detection_heavy_model.yaml
@@ -1,4 +1,5 @@
 # Example configuration for training a predefined heavy detection model
+# NOTE: This example downloads pretrained COCO weights and training parameters are already prepared for fine tuning
 
 model:
   name: detection_heavy
@@ -22,6 +23,9 @@ trainer:
     keep_aspect_ratio: true
     normalize:
       active: true
+      params:
+        mean: [0., 0., 0.]
+        std: [1, 1, 1]
 
   batch_size: 8
   epochs: &epochs 300

diff --git a/configs/detection_light_model.yaml b/configs/detection_light_model.yaml
@@ -23,6 +23,9 @@ trainer:
     keep_aspect_ratio: true
     normalize:
       active: true
+      params:
+        mean: [0., 0., 0.]
+        std: [1, 1, 1]
 
   batch_size: 8
   epochs: &epochs 300

diff --git a/configs/instance_segmentation_heavy_model.yaml b/configs/instance_segmentation_heavy_model.yaml
@@ -0,0 +1,54 @@
+# Example configuration for training a predefined heavy instance segmentation model
+
+model:
+  name: instance_segmentation_heavy
+  predefined_model:
+    name: InstanceSegmentationModel
+    params:
+      variant: heavy
+      loss_params:
+        bbox_loss_weight: 60 # Should be 7.5 * accumulate_grad_batches for best results
+        class_loss_weight: 4 # Should be 0.5 * accumulate_grad_batches for best results
+        dfl_loss_weight: 12 # Should be 1.5 * accumulate_grad_batches for best results
+
+loader:
+  params:
+    dataset_name: coco_test
+
+trainer:
+  preprocessing:
+    train_image_size: [384, 512]
+    keep_aspect_ratio: true
+    normalize:
+      active: true
+      params:
+        mean: [0., 0., 0.]
+        std: [1, 1, 1]
+
+  batch_size: 8
+  epochs: &epochs 300
+  accumulate_grad_batches: 8 # For best results, always accumulate gradients to effectively use 64 batch size
+  n_workers: 8
+  validation_interval: 10
+  n_log_images: 8
+
+  callbacks:
+    - name: EMACallback
+      params:
+        decay: 0.9999 
+        use_dynamic_decay: True 
+        decay_tau: 2000
+    - name: ExportOnTrainEnd
+    - name: TestOnTrainEnd
+
+  training_strategy:
+    name: "TripleLRSGDStrategy"
+    params: 
+      warmup_epochs: 3
+      warmup_bias_lr: 0.1
+      warmup_momentum: 0.8
+      lr: 0.01
+      lre: 0.0001
+      momentum: 0.937     
+      weight_decay: 0.0005
+      nesterov: True
diff --git a/configs/instance_segmentation_light_model.yaml b/configs/instance_segmentation_light_model.yaml
@@ -0,0 +1,54 @@
+# Example configuration for training a predefined light instance segmentation model
+
+model:
+  name: instance_segmentation_light
+  predefined_model:
+    name: InstanceSegmentationModel
+    params:
+      variant: light
+      loss_params:
+        bbox_loss_weight: 60 # Should be 7.5 * accumulate_grad_batches for best results
+        class_loss_weight: 4 # Should be 0.5 * accumulate_grad_batches for best results
+        dfl_loss_weight: 12 # Should be 1.5 * accumulate_grad_batches for best results
+
+loader:
+  params:
+    dataset_name: coco_test
+
+trainer:
+  preprocessing:
+    train_image_size: [384, 512]
+    keep_aspect_ratio: true
+    normalize:
+      active: true
+      params:
+        mean: [0., 0., 0.]
+        std: [1, 1, 1]
+
+  batch_size: 8
+  epochs: &epochs 300
+  accumulate_grad_batches: 8 # For best results, always accumulate gradients to effectively use 64 batch size
+  n_workers: 8
+  validation_interval: 10
+  n_log_images: 8
+
+  callbacks:
+    - name: EMACallback
+      params:
+        decay: 0.9999 
+        use_dynamic_decay: True 
+        decay_tau: 2000
+    - name: ExportOnTrainEnd
+    - name: TestOnTrainEnd
+
+  training_strategy:
+    name: "TripleLRSGDStrategy"
+    params: 
+      warmup_epochs: 3
+      warmup_bias_lr: 0.1
+      warmup_momentum: 0.8
+      lr: 0.01
+      lre: 0.0001
+      momentum: 0.937     
+      weight_decay: 0.0005
+      nesterov: True
diff --git a/configs/keypoint_bbox_heavy_model.yaml b/configs/keypoint_bbox_heavy_model.yaml
@@ -6,6 +6,13 @@ model:
     name: KeypointDetectionModel
     params:
       variant: heavy
+      loss_params:
+        iou_type: "siou"
+        n_warmup_epochs: 0 # No assigner warmup
+        iou_loss_weight: 60 # Should be 7.5 * accumulate_grad_batches for best results
+        class_loss_weight: 4 # Should be 0.5 * accumulate_grad_batches for best results
+        regr_kpts_loss_weight: 96 # Should be 12 * accumulate_grad_batches for best results
+        vis_kpts_loss_weight: 16 # Should be 2 * accumulate_grad_batches for best results
 
 loader:
   params:
@@ -17,29 +24,34 @@ trainer:
     keep_aspect_ratio: true
     normalize:
       active: true
+      params:
+        mean: [0., 0., 0.]
+        std: [1, 1, 1]
 
   batch_size: 8
-  epochs: &epochs 200
+  epochs: &epochs 300
   n_workers: 4
   validation_interval: 10
   n_log_images: 8
+  accumulate_grad_batches: 8 # For best results, always accumulate gradients to effectively use 64 batch size
 
   callbacks:
+    - name: EMACallback
+      params:
+        decay: 0.9999 
+        use_dynamic_decay: True 
+        decay_tau: 2000
     - name: ExportOnTrainEnd
     - name: TestOnTrainEnd
 
-  optimizer:
-    name: SGD
-    params:
-      lr: 0.006
-      momentum: 0.937
+  training_strategy:
+    name: "TripleLRSGDStrategy"
+    params: 
+      warmup_epochs: 3
+      warmup_bias_lr: 0.1
+      warmup_momentum: 0.8
+      lr: 0.01
+      lre: 0.0001
+      momentum: 0.937     
       weight_decay: 0.0005
-      dampening: 0.0
-      nesterov: true
-
-  scheduler:
-    name: CosineAnnealingLR
-    params:
-      T_max: *epochs
-      eta_min: 0.00001
-      last_epoch: -1
+      nesterov: True
diff --git a/configs/keypoint_bbox_light_model.yaml b/configs/keypoint_bbox_light_model.yaml
@@ -6,6 +6,13 @@ model:
     name: KeypointDetectionModel
     params:
       variant: light
+      loss_params:
+        iou_type: "siou"
+        n_warmup_epochs: 0 # No assigner warmup
+        iou_loss_weight: 60 # Should be 7.5 * accumulate_grad_batches for best results
+        class_loss_weight: 4 # Should be 0.5 * accumulate_grad_batches for best results
+        regr_kpts_loss_weight: 96 # Should be 12 * accumulate_grad_batches for best results
+        vis_kpts_loss_weight: 16 # Should be 2 * accumulate_grad_batches for best results
 
 loader:
   params:
@@ -17,29 +24,34 @@ trainer:
     keep_aspect_ratio: true
     normalize:
       active: true
+      params:
+        mean: [0., 0., 0.]
+        std: [1, 1, 1]
 
   batch_size: 8
-  epochs: &epochs 200
+  epochs: &epochs 300
   n_workers: 4
   validation_interval: 10
   n_log_images: 8
+  accumulate_grad_batches: 8 # For best results, always accumulate gradients to effectively use 64 batch size
 
   callbacks:
+    - name: EMACallback
+      params:
+        decay: 0.9999 
+        use_dynamic_decay: True 
+        decay_tau: 2000
     - name: ExportOnTrainEnd
     - name: TestOnTrainEnd
 
-  optimizer:
-    name: SGD
-    params:
-      lr: 0.006
-      momentum: 0.937
+  training_strategy:
+    name: "TripleLRSGDStrategy"
+    params: 
+      warmup_epochs: 3
+      warmup_bias_lr: 0.1
+      warmup_momentum: 0.8
+      lr: 0.01
+      lre: 0.0001
+      momentum: 0.937     
       weight_decay: 0.0005
-      dampening: 0.0
-      nesterov: true
-
-  scheduler:
-    name: CosineAnnealingLR
-    params:
-      T_max: *epochs
-      eta_min: 0.00001
-      last_epoch: -1
+      nesterov: True
diff --git a/luxonis_train/assigners/tal_assigner.py b/luxonis_train/assigners/tal_assigner.py
@@ -254,10 +254,4 @@ def _get_final_assignments(
             torch.full_like(assigned_scores, 0),
         )
 
-        assigned_labels = torch.where(
-            mask_pos_sum.bool(),
-            assigned_labels,
-            torch.full_like(assigned_labels, self.n_classes),
-        )
-
         return assigned_labels, assigned_bboxes, assigned_scores
diff --git a/luxonis_train/attached_modules/losses/README.md b/luxonis_train/attached_modules/losses/README.md
@@ -12,6 +12,8 @@ List of all the available loss functions.
 - [`AdaptiveDetectionLoss`](#adaptivedetectionloss)
 - [`EfficientKeypointBBoxLoss`](#efficientkeypointbboxloss)
 - [`FOMOLocalizationLoss`](#fomolocalizationLoss)
+- \[`PrecisionDFLDetectionLoss`\] (# precisiondfldetectionloss)
+- \[`PrecisionDFLSegmentationLoss`\] (# precisiondflsegmentationloss)
 
 ## `CrossEntropyLoss`
 
@@ -97,7 +99,7 @@ Keypoint Similarity Loss](https://arxiv.org/ftp/arxiv/papers/2204/2204.06806.pdf
 | `class_loss_weight`     | `float`                                           | `1.0`         | Weight used for the classification sub-loss                                                                   |
 | `iou_loss_weight`       | `float`                                           | `2.5`         | Weight used for the `IoU` sub-loss                                                                            |
 | `regr_kpts_loss_weight` | `float`                                           | `1.5`         | Weight used for the `OKS` sub-loss                                                                            |
-| `vis_kpts_loss_weight`  | `float`                                           | `1.0`         | Weight used for the keypoint visibility sub-loss                                                              |
+| `vis_kpts_loss_weight`  | `float`                                           | `2.0`         | Weight used for the keypoint visibility sub-loss                                                              |
 | `sigmas`                | `list[float] \ None`                              | `None`        | Sigmas used in `KeypointLoss` for `OKS` metric. If `None` then use COCO ones if possible or default ones      |
 | `area_factor`           | `float \| None`                                   | `None`        | Factor by which we multiply bounding box area which is used in `KeypointLoss.` If `None` then use default one |
 
@@ -120,4 +122,30 @@ Adapted from [here](https://arxiv.org/abs/2108.07610).
 
 | Key             | Type    | Default value | Description                                                                                                                                                                          |
 | --------------- | ------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `object_weight` | `float` | `1000`        | Weight for the objects in the loss calculation. Training with a larger `object_weight` in the loss parameters may result in more false positives (FP), but it will improve accuracy. |
+| `object_weight` | `float` | `500`         | Weight for the objects in the loss calculation. Training with a larger `object_weight` in the loss parameters may result in more false positives (FP), but it will improve accuracy. |
+
+## `PrecisionDFLDetectionLoss`
+
+Adapted from [here](https://arxiv.org/pdf/2207.02696.pdf) and [here](https://arxiv.org/pdf/2209.02976.pdf).
+
+**Parameters:**
+
+| Key                 | Type    | Default value | Description                                |
+| ------------------- | ------- | ------------- | ------------------------------------------ |
+| `tal_topk`          | `int`   | `10`          | Number of anchors considered in selection. |
+| `class_loss_weight` | `float` | `0.5`         | Weight for classification loss.            |
+| `bbox_loss_weight`  | `float` | `7.5`         | Weight for bbox loss.                      |
+| `dfl_loss_weigth`   | `float` | `1.5`         | Weight for DFL loss.                       |
+
+## `PrecisionDFLSegmentationLoss`
+
+Adapted from [here](https://arxiv.org/pdf/2207.02696.pdf) and [here](https://arxiv.org/pdf/2209.02976.pdf).
+
+**Parameters:**
+
+| Key                 | Type    | Default value | Description                                |
+| ------------------- | ------- | ------------- | ------------------------------------------ |
+| `tal_topk`          | `int`   | `10`          | Number of anchors considered in selection. |
+| `class_loss_weight` | `float` | `0.5`         | Weight for classification loss.            |
+| `bbox_loss_weight`  | `float` | `7.5`         | Weight for bbox and segmentation loss.     |
+| `dfl_loss_weigth`   | `float` | `1.5`         | Weight for DFL loss.                       |
diff --git a/luxonis_train/attached_modules/losses/__init__.py b/luxonis_train/attached_modules/losses/__init__.py
@@ -7,6 +7,8 @@
 from .ohem_bce_with_logits import OHEMBCEWithLogitsLoss
 from .ohem_cross_entropy import OHEMCrossEntropyLoss
 from .ohem_loss import OHEMLoss
+from .precision_dfl_detection_loss import PrecisionDFLDetectionLoss
+from .precision_dlf_segmentation_loss import PrecisionDFLSegmentationLoss
 from .reconstruction_segmentation_loss import ReconstructionSegmentationLoss
 from .sigmoid_focal_loss import SigmoidFocalLoss
 from .smooth_bce_with_logits import SmoothBCEWithLogitsLoss
@@ -26,4 +28,6 @@
     "OHEMCrossEntropyLoss",
     "OHEMBCEWithLogitsLoss",
     "FOMOLocalizationLoss",
+    "PrecisionDFLDetectionLoss",
+    "PrecisionDFLSegmentationLoss",
 ]
diff --git a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py
@@ -56,9 +56,9 @@ def __init__(
         @type reduction: Literal["sum", "mean"]
         @param reduction: Reduction type for loss.
         @type class_loss_weight: float
-        @param class_loss_weight: Weight of classification loss.
+        @param class_loss_weight: Weight of classification loss. Defaults to 1.0. For optimal results, multiply with accumulate_grad_batches.
         @type iou_loss_weight: float
-        @param iou_loss_weight: Weight of IoU loss.
+        @param iou_loss_weight: Weight of IoU loss. Defaults to 2.5. For optimal results, multiply with accumulate_grad_batches.
         """
         super().__init__(**kwargs)
 
@@ -133,6 +133,11 @@ def forward(
         assigned_scores: Tensor,
         mask_positive: Tensor,
     ) -> tuple[Tensor, dict[str, Tensor]]:
+        assigned_labels = torch.where(
+            mask_positive > 0,
+            assigned_labels,
+            torch.full_like(assigned_labels, self.n_classes),
+        )
         one_hot_label = F.one_hot(assigned_labels.long(), self.n_classes + 1)[
             ..., :-1
         ]