Merge branch 'main' into feature/embeddings-models

luxonis · Jan 25, 2025 · a008829 · a008829
2 parents 01de24b + e6c97f3
commit a008829
Show file tree

Hide file tree

Showing 42 changed files with 2,356 additions and 318 deletions.
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
@@ -33,7 +33,7 @@ jobs:
       run: pip install -e .[dev]
 
     - name: Install latest luxonis-ml
-      run: pip install luxonis-ml[all]@git+https://github.com/luxonis/luxonis-ml.git@main --upgrade --no-deps --force-reinstall
+      run: pip install luxonis-ml[all]@git+https://github.com/luxonis/luxonis-ml.git@main --upgrade --force-reinstall
 
     - name: Authenticate to Google Cloud
       id: google-auth

diff --git a/configs/README.md b/configs/README.md
@@ -202,39 +202,40 @@ loader:
 
 Here you can change everything related to actual training of the model.
 
-| Key                       | Type                                           | Default value | Description                                                                                                                                      |
-| ------------------------- | ---------------------------------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `seed`                    | `int`                                          | `None`        | Seed for reproducibility                                                                                                                         |
-| `deterministic`           | `bool \| "warn" \| None`                       | `None`        | Whether PyTorch should use deterministic backend                                                                                                 |
-| `batch_size`              | `int`                                          | `32`          | Batch size used for training                                                                                                                     |
-| `accumulate_grad_batches` | `int`                                          | `1`           | Number of batches for gradient accumulation                                                                                                      |
-| `gradient_clip_val`       | `NonNegativeFloat \| None`                     | `None`        | Value for gradient clipping. If `None`, gradient clipping is disabled. Clipping can help prevent exploding gradients.                            |
-| `gradient_clip_algorithm` | `Literal["norm", "value"] \| None`             | `None`        | Algorithm to use for gradient clipping. Options are `"norm"` (clip by norm) or `"value"` (clip element-wise).                                    |
-| `use_weighted_sampler`    | `bool`                                         | `False`       | Whether to use `WeightedRandomSampler` for training, only works with classification tasks                                                        |
-| `epochs`                  | `int`                                          | `100`         | Number of training epochs                                                                                                                        |
-| `n_workers`               | `int`                                          | `4`           | Number of workers for data loading                                                                                                               |
-| `validation_interval`     | `int`                                          | `5`           | Frequency of computing metrics on validation data                                                                                                |
-| `n_log_images`            | `int`                                          | `4`           | Maximum number of images to visualize and log                                                                                                    |
-| `skip_last_batch`         | `bool`                                         | `True`        | Whether to skip last batch while training                                                                                                        |
-| `accelerator`             | `Literal["auto", "cpu", "gpu"]`                | `"auto"`      | What accelerator to use for training                                                                                                             |
-| `devices`                 | `int \| list[int] \| str`                      | `"auto"`      | Either specify how many devices to use (int), list specific devices, or use "auto" for automatic configuration based on the selected accelerator |
-| `matmul_precision`        | `Literal["medium", "high", "highest"] \| None` | `None`        | Sets the internal precision of float32 matrix multiplications                                                                                    |
-| `strategy`                | `Literal["auto", "ddp"]`                       | `"auto"`      | What strategy to use for training                                                                                                                |
-| `n_sanity_val_steps`      | `int`                                          | `2`           | Number of sanity validation steps performed before training                                                                                      |
-| `profiler`                | `Literal["simple", "advanced"] \| None`        | `None`        | PL profiler for GPU/CPU/RAM utilization analysis                                                                                                 |
-| `verbose`                 | `bool`                                         | `True`        | Print all intermediate results to console                                                                                                        |
-| `pin_memory`              | `bool`                                         | `True`        | Whether to pin memory in the `DataLoader`                                                                                                        |
-| `save_top_k`              | `-1 \| NonNegativeInt`                         | `3`           | Save top K checkpoints based on validation loss when training                                                                                    |
-| `n_validation_batches`    | `PositiveInt \| None`                          | `None`        | Limits the number of validation/test batches and makes the val/test loaders deterministic                                                        |
-| `smart_cfg_auto_populate` | `bool`                                         | `True`        | Automatically populate sensible default values for missing config fields and log warnings                                                        |
+| Key                       | Type                                           | Default value | Description                                                                                                                                                                                            |
+| ------------------------- | ---------------------------------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `seed`                    | `int`                                          | `None`        | Seed for reproducibility                                                                                                                                                                               |
+| `deterministic`           | `bool \| "warn" \| None`                       | `None`        | Whether PyTorch should use deterministic backend                                                                                                                                                       |
+| `batch_size`              | `int`                                          | `32`          | Batch size used for training                                                                                                                                                                           |
+| `accumulate_grad_batches` | `int`                                          | `1`           | Number of batches for gradient accumulation                                                                                                                                                            |
+| `gradient_clip_val`       | `NonNegativeFloat \| None`                     | `None`        | Value for gradient clipping. If `None`, gradient clipping is disabled. Clipping can help prevent exploding gradients.                                                                                  |
+| `gradient_clip_algorithm` | `Literal["norm", "value"] \| None`             | `None`        | Algorithm to use for gradient clipping. Options are `"norm"` (clip by norm) or `"value"` (clip element-wise).                                                                                          |
+| `use_weighted_sampler`    | `bool`                                         | `False`       | Whether to use `WeightedRandomSampler` for training, only works with classification tasks                                                                                                              |
+| `epochs`                  | `int`                                          | `100`         | Number of training epochs                                                                                                                                                                              |
+| `n_workers`               | `int`                                          | `4`           | Number of workers for data loading                                                                                                                                                                     |
+| `validation_interval`     | `int`                                          | `5`           | Frequency of computing metrics on validation data                                                                                                                                                      |
+| `n_log_images`            | `int`                                          | `4`           | Maximum number of images to visualize and log                                                                                                                                                          |
+| `skip_last_batch`         | `bool`                                         | `True`        | Whether to skip last batch while training                                                                                                                                                              |
+| `accelerator`             | `Literal["auto", "cpu", "gpu"]`                | `"auto"`      | What accelerator to use for training                                                                                                                                                                   |
+| `devices`                 | `int \| list[int] \| str`                      | `"auto"`      | Either specify how many devices to use (int), list specific devices, or use "auto" for automatic configuration based on the selected accelerator                                                       |
+| `matmul_precision`        | `Literal["medium", "high", "highest"] \| None` | `None`        | Sets the internal precision of float32 matrix multiplications                                                                                                                                          |
+| `strategy`                | `Literal["auto", "ddp"]`                       | `"auto"`      | What strategy to use for training                                                                                                                                                                      |
+| `n_sanity_val_steps`      | `int`                                          | `2`           | Number of sanity validation steps performed before training                                                                                                                                            |
+| `profiler`                | `Literal["simple", "advanced"] \| None`        | `None`        | PL profiler for GPU/CPU/RAM utilization analysis                                                                                                                                                       |
+| `verbose`                 | `bool`                                         | `True`        | Print all intermediate results to console                                                                                                                                                              |
+| `pin_memory`              | `bool`                                         | `True`        | Whether to pin memory in the `DataLoader`                                                                                                                                                              |
+| `save_top_k`              | `-1 \| NonNegativeInt`                         | `3`           | Save top K checkpoints based on validation loss when training                                                                                                                                          |
+| `n_validation_batches`    | `PositiveInt \| None`                          | `None`        | Limits the number of validation/test batches and makes the val/test loaders deterministic                                                                                                              |
+| `smart_cfg_auto_populate` | `bool`                                         | `True`        | Automatically populate sensible default values for missing config fields and log warnings                                                                                                              |
+| `resume_training`         | `bool`                                         | `False`       | Whether to resume training from a checkpoint. Loads not only the weights from `model.weights` but also the `optimizer state`, `scheduler state`, and other training parameters to continue seamlessly. |
 
 ```yaml
 
 trainer:
   accelerator: "auto"
   devices: "auto"
   strategy: "auto"
-
+  resume_training: true
   n_sanity_val_steps: 1
   profiler: null
   verbose: true
@@ -250,6 +251,21 @@ trainer:
   smart_cfg_auto_populate: true
 ```
 
+### Model Fine-Tuning Options
+
+#### 1. **Fine-Tuning with Custom Configuration Example**
+
+- Do **not** set the `resume_training` flag to `true`.
+- Specify a **new LR** in the config (e.g., `0.1`), overriding the previous LR (e.g., `0.001`).
+- Training starts at the new LR, with the scheduler/optimizer reset (can use different schedulers/optimizers than the base run).
+
+#### 2. **Resume Training Continuously Example**
+
+- Use the `resume_training` flag to continue training from the last checkpoint specified in `model.weights`.
+- LR starts at the value where the previous run ended, maintaining continuity in the scheduler (e.g., combined LR plot would shows a seamless curve).
+- For example:
+  - Resuming training with extended epochs (e.g., 400 epochs after 300) and adjusted `T_max` (e.g., 400 after 300 for cosine annealing) and `eta_min` (e.g., 10x less than before) will use the final learning rate (LR) from the previous run. This ignores the initial LR specified in the config and finishes with the new `eta_min` LR.
+
 ### Smart Configuration Auto-population
 
 When setting `trainer.smart_cfg_auto_populate = True`, the following set of rules will be applied:

diff --git a/configs/detection_heavy_model.yaml b/configs/detection_heavy_model.yaml
@@ -1,4 +1,5 @@
 # Example configuration for training a predefined heavy detection model
+# NOTE: This example downloads pretrained COCO weights and training parameters are already prepared for fine tuning
 
 model:
   name: detection_heavy
@@ -22,6 +23,9 @@ trainer:
     keep_aspect_ratio: true
     normalize:
       active: true
+      params:
+        mean: [0., 0., 0.]
+        std: [1, 1, 1]
 
   batch_size: 8
   epochs: &epochs 300

diff --git a/configs/detection_light_model.yaml b/configs/detection_light_model.yaml
@@ -23,6 +23,9 @@ trainer:
     keep_aspect_ratio: true
     normalize:
       active: true
+      params:
+        mean: [0., 0., 0.]
+        std: [1, 1, 1]
 
   batch_size: 8
   epochs: &epochs 300

diff --git a/configs/instance_segmentation_heavy_model.yaml b/configs/instance_segmentation_heavy_model.yaml
@@ -0,0 +1,54 @@
+# Example configuration for training a predefined heavy instance segmentation model
+
+model:
+  name: instance_segmentation_heavy
+  predefined_model:
+    name: InstanceSegmentationModel
+    params:
+      variant: heavy
+      loss_params:
+        bbox_loss_weight: 60 # Should be 7.5 * accumulate_grad_batches for best results
+        class_loss_weight: 4 # Should be 0.5 * accumulate_grad_batches for best results
+        dfl_loss_weight: 12 # Should be 1.5 * accumulate_grad_batches for best results
+
+loader:
+  params:
+    dataset_name: coco_test
+
+trainer:
+  preprocessing:
+    train_image_size: [384, 512]
+    keep_aspect_ratio: true
+    normalize:
+      active: true
+      params:
+        mean: [0., 0., 0.]
+        std: [1, 1, 1]
+
+  batch_size: 8
+  epochs: &epochs 300
+  accumulate_grad_batches: 8 # For best results, always accumulate gradients to effectively use 64 batch size
+  n_workers: 8
+  validation_interval: 10
+  n_log_images: 8
+
+  callbacks:
+    - name: EMACallback
+      params:
+        decay: 0.9999 
+        use_dynamic_decay: True 
+        decay_tau: 2000
+    - name: ExportOnTrainEnd
+    - name: TestOnTrainEnd
+
+  training_strategy:
+    name: "TripleLRSGDStrategy"
+    params: 
+      warmup_epochs: 3
+      warmup_bias_lr: 0.1
+      warmup_momentum: 0.8
+      lr: 0.01
+      lre: 0.0001
+      momentum: 0.937     
+      weight_decay: 0.0005
+      nesterov: True
diff --git a/configs/instance_segmentation_light_model.yaml b/configs/instance_segmentation_light_model.yaml
@@ -0,0 +1,54 @@
+# Example configuration for training a predefined light instance segmentation model
+
+model:
+  name: instance_segmentation_light
+  predefined_model:
+    name: InstanceSegmentationModel
+    params:
+      variant: light
+      loss_params:
+        bbox_loss_weight: 60 # Should be 7.5 * accumulate_grad_batches for best results
+        class_loss_weight: 4 # Should be 0.5 * accumulate_grad_batches for best results
+        dfl_loss_weight: 12 # Should be 1.5 * accumulate_grad_batches for best results
+
+loader:
+  params:
+    dataset_name: coco_test
+
+trainer:
+  preprocessing:
+    train_image_size: [384, 512]
+    keep_aspect_ratio: true
+    normalize:
+      active: true
+      params:
+        mean: [0., 0., 0.]
+        std: [1, 1, 1]
+
+  batch_size: 8
+  epochs: &epochs 300
+  accumulate_grad_batches: 8 # For best results, always accumulate gradients to effectively use 64 batch size
+  n_workers: 8
+  validation_interval: 10
+  n_log_images: 8
+
+  callbacks:
+    - name: EMACallback
+      params:
+        decay: 0.9999 
+        use_dynamic_decay: True 
+        decay_tau: 2000
+    - name: ExportOnTrainEnd
+    - name: TestOnTrainEnd
+
+  training_strategy:
+    name: "TripleLRSGDStrategy"
+    params: 
+      warmup_epochs: 3
+      warmup_bias_lr: 0.1
+      warmup_momentum: 0.8
+      lr: 0.01
+      lre: 0.0001
+      momentum: 0.937     
+      weight_decay: 0.0005
+      nesterov: True