Unified Cores and Cleanup (#57)

luxonis · Aug 19, 2024 · 9b17a70 · 9b17a70
1 parent 4af2fab
commit 9b17a70
Show file tree

Hide file tree

Showing 45 changed files with 1,277 additions and 1,891 deletions.
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
@@ -45,6 +45,15 @@ jobs:
       if: matrix.os == 'macOS-latest'
       run: pip install -e .[dev]
 
+    - name: Authenticate to Google Cloud
+      id: google-auth
+      uses: google-github-actions/auth@v2
+      with:
+        credentials_json: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }}
+        create_credentials_file: true
+        export_environment_variables: true
+        token_format: access_token
+
     - name: Run tests with coverage [Ubuntu]
       if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10'
       run: pytest tests --cov=luxonis_train --cov-report xml --junit-xml pytest.xml

diff --git a/.gitignore b/.gitignore
@@ -148,3 +148,8 @@ models_venv/*
 # vscode settings
 .vscode
 tests/data
+mlartifacts
+mlruns
+wandb
+tests/_data
+tests/integration/_test-output
diff --git a/configs/README.md b/configs/README.md
@@ -29,16 +29,15 @@ You can create your own config or use/edit one of the examples.
 
 ## Top-level Options
 
-| Key           | Type                  | Default value | Description                                   |
-| ------------- | --------------------- | ------------- | --------------------------------------------- |
-| use_rich_text | bool                  | True          | whether to use rich text for console printing |
-| model         | [Model](#model)       |               | model section                                 |
-| dataset       | [dataset](#dataset)   |               | dataset section                               |
-| train         | [train](#train)       |               | train section                                 |
-| tracker       | [tracker](#tracker)   |               | tracker section                               |
-| trainer       | [trainer](#trainer)   |               | trainer section                               |
-| exporter      | [exporter](#exporter) |               | exporter section                              |
-| tuner         | [tuner](#tuner)       |               | tuner section                                 |
+| Key      | Type                  | Default value | Description      |
+| -------- | --------------------- | ------------- | ---------------- |
+| model    | [Model](#model)       |               | model section    |
+| dataset  | [dataset](#dataset)   |               | dataset section  |
+| train    | [train](#train)       |               | train section    |
+| tracker  | [tracker](#tracker)   |               | tracker section  |
+| trainer  | [trainer](#trainer)   |               | trainer section  |
+| exporter | [exporter](#exporter) |               | exporter section |
+| tuner    | [tuner](#tuner)       |               | tuner section    |
 
 ## Model
 
@@ -214,9 +213,9 @@ Here you can define configuration for exporting.
 | ---------------------- | --------------------------------- | --------------- | ----------------------------------------------------------------------------------------------- |
 | export_save_directory  | str                               | "output_export" | Where to save the exported files.                                                               |
 | input_shape            | list\[int\] \| None               | None            | Input shape of the model. If not provided, inferred from the dataset.                           |
-| export_model_name      | str                               | "model"         | Name of the exported model.                                                                     |
-| data_type              | Literal\["INT8", "FP16", "FP32"\] | "FP16"          | Data type of the exported model.                                                                |
+| data_type              | Literal\["INT8", "FP16", "FP32"\] | "FP16"          | Data type of the exported model. Only used for conversion to BLOB.                              |
 | reverse_input_channels | bool                              | True            | Whether to reverse the image channels in the exported model. Relevant for `.blob` export        |
+| upload                 | bool                              | True            | Whether to upload the files created during export to the current tracker.                       |
 | scale_values           | list\[float\] \| None             | None            | What scale values to use for input normalization. If not provided, inferred from augmentations. |
 | mean_values            | list\[float\] \| None             | None            | What mean values to use for input normalizations. If not provided, inferred from augmentations. |
 | upload_directory       | str \| None                       | None            | Where to upload the exported models.                                                            |

diff --git a/configs/classification_model.yaml b/configs/classification_model.yaml
@@ -1,8 +1,5 @@
 # Example configuration for training a predefined segmentation model
 
-
-use_rich_text: True
-
 model:
   name: cifar10_classification
   predefined_model:

diff --git a/configs/detection_model.yaml b/configs/detection_model.yaml
@@ -1,8 +1,5 @@
 # Example configuration for training a predefined detection model
 
-
-use_rich_text: True
-
 model:
   name: coco_detection
   predefined_model:

diff --git a/configs/efficient_coco_model.yaml b/configs/efficient_coco_model.yaml
@@ -95,7 +95,7 @@ trainer:
   batch_size: 4
   accumulate_grad_batches: 1
   epochs: 200
-  num_workers: 0
+  num_workers: 4
   train_metrics_interval: -1
   validation_interval: 10
   num_log_images: 8

diff --git a/configs/example_export.yaml b/configs/example_export.yaml
@@ -1,8 +1,5 @@
 # Example configuration for exporting a predefined segmentation model
 
-
-use_rich_text: True
-
 model:
   name: coco_segmentation
   weights: null  # specify a path to the weights here

diff --git a/configs/example_multi_input.yaml b/configs/example_multi_input.yaml
@@ -8,8 +8,6 @@ loader:
   # Needs to be set for visualizers and evaluators to work.
   image_source: left
 
-use_rich_text: True
-
 model:
   name: example_multi_input
   nodes:

diff --git a/configs/example_tuning.yaml b/configs/example_tuning.yaml
@@ -1,8 +1,5 @@
 # Example configuration for tuning a predefined segmentation model
 
-
-use_rich_text: True
-
 model:
   name: coco_segmentation
   predefined_model:
@@ -31,8 +28,8 @@ trainer:
       - name: Flip
 
   batch_size: 4
-  epochs: &epochs 10
-  validation_interval: 1
+  epochs: &epochs 100
+  validation_interval: 10
   num_log_images: 8
 
   scheduler:

diff --git a/configs/keypoint_bbox_model.yaml b/configs/keypoint_bbox_model.yaml
@@ -1,8 +1,5 @@
 # Example configuration for training a predefined keypoint-detection model
 
-
-use_rich_text: True
-
 model:
   name: coco_keypoints
   predefined_model:

diff --git a/configs/segmentation_model.yaml b/configs/segmentation_model.yaml
@@ -1,8 +1,5 @@
 # Example configuration for training a predefined segmentation model
 
-
-use_rich_text: True
-
 model:
   name: coco_segmentation
   predefined_model:

diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py
@@ -5,7 +5,6 @@
 
 import typer
 import yaml
-from luxonis_ml.data.__main__ import inspect as lxml_inspect
 from luxonis_ml.enums import SplitType
 
 app = typer.Typer(
@@ -49,35 +48,35 @@ def train(
     opts: OptsType = None,
 ):
     """Start training."""
-    from luxonis_train.core import Trainer
+    from luxonis_train.core import LuxonisModel
 
-    Trainer(config, opts, resume=resume).train()
+    LuxonisModel(config, opts).train(resume_weights=resume)
 
 
 @app.command()
-def eval(
+def test(
     config: ConfigType = None, view: ViewType = SplitType.VAL, opts: OptsType = None
 ):
     """Evaluate model."""
-    from luxonis_train.core import Trainer
+    from luxonis_train.core import LuxonisModel
 
-    Trainer(config, opts).test(view=view.value)
+    LuxonisModel(config, opts).test(view=view.value)
 
 
 @app.command()
 def tune(config: ConfigType = None, opts: OptsType = None):
     """Start hyperparameter tuning."""
-    from luxonis_train.core import Tuner
+    from luxonis_train.core import LuxonisModel
 
-    Tuner(config, opts).tune()
+    LuxonisModel(config, opts).tune()
 
 
 @app.command()
 def export(config: ConfigType = None, opts: OptsType = None):
     """Export model."""
-    from luxonis_train.core import Exporter
+    from luxonis_train.core import LuxonisModel
 
-    Exporter(config, opts).export()
+    LuxonisModel(config, opts).export()
 
 
 @app.command()
@@ -88,9 +87,9 @@ def infer(
     opts: OptsType = None,
 ):
     """Run inference."""
-    from luxonis_train.core import Inferer
+    from luxonis_train.core import LuxonisModel
 
-    Inferer(config, opts, view=view.value, save_dir=save_dir).infer()
+    LuxonisModel(config, opts).infer(view=view.value, save_dir=save_dir)
 
 
 @app.command()
@@ -110,6 +109,7 @@ def inspect(
 ):
     """Inspect dataset."""
     from lightning.pytorch import seed_everything
+    from luxonis_ml.data.__main__ import inspect as lxml_inspect
 
     from luxonis_train.utils.config import Config
 
@@ -133,9 +133,7 @@ def inspect(
         lxml_inspect(
             name=cfg.loader.params["dataset_name"],
             view=view,
-            aug_config=Path(
-                f.name,
-            ),
+            aug_config=f.name,
         )
 
 
@@ -151,9 +149,9 @@ def archive(
     opts: OptsType = None,
 ):
     """Generate NN archive."""
-    from luxonis_train.core import Archiver
+    from luxonis_train.core import LuxonisModel
 
-    Archiver(str(config), opts).archive(executable)
+    LuxonisModel(str(config), opts).archive(executable)
 
 
 def version_callback(value: bool):

diff --git a/luxonis_train/callbacks/README.md b/luxonis_train/callbacks/README.md
@@ -21,7 +21,6 @@ List of supported callbacks from `lightning.pytorch`.
 - [LearningRateMonitor](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.LearningRateMonitor.html#lightning.pytorch.callbacks.LearningRateMonitor)
 - [ModelCheckpoint](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.ModelCheckpoint.html#lightning.pytorch.callbacks.ModelCheckpoint)
 - [RichModelSummary](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.RichModelSummary.html#lightning.pytorch.callbacks.RichModelSummary)
-  - Added automatically if `use_rich_text` is set to `True` in [config](../../configs/README.md#topleveloptions).
 
 ## ExportOnTrainEnd
 
@@ -36,7 +35,6 @@ Performs export on train end with best weights according to the validation loss.
 ## LuxonisProgressBar
 
 Custom rich text progress bar based on RichProgressBar from Pytorch Lightning.
-Added automatically if `use_rich_text` is set to `True` in [config](../../configs/README.md#topleveloptions).
 
 ## MetadataLogger
 

diff --git a/luxonis_train/callbacks/archive_on_train_end.py b/luxonis_train/callbacks/archive_on_train_end.py
@@ -1,29 +1,20 @@
 import logging
-import os
-from pathlib import Path
-from typing import cast
 
 import lightning.pytorch as pl
 
-from luxonis_train.utils.config import Config
+import luxonis_train
 from luxonis_train.utils.registry import CALLBACKS
-from luxonis_train.utils.tracker import LuxonisTrackerPL
+
+logger = logging.getLogger(__name__)
 
 
 @CALLBACKS.register_module()
 class ArchiveOnTrainEnd(pl.Callback):
-    def __init__(self, upload_to_mlflow: bool = False):
-        """Callback that performs archiving of onnx or exported model at the end of
-        training/export. TODO: description.
-
-        @type upload_to_mlflow: bool
-        @param upload_to_mlflow: If set to True, overrides the upload url in Archiver
-            with currently active MLFlow run (if present).
-        """
-        super().__init__()
-        self.upload_to_mlflow = upload_to_mlflow
-
-    def on_train_end(self, trainer: pl.Trainer, pl_module: pl.LightningModule) -> None:
+    def on_train_end(
+        self,
+        _: pl.Trainer,
+        pl_module: "luxonis_train.models.LuxonisLightningModule",
+    ) -> None:
         """Archives the model on train end.
 
         @type trainer: L{pl.Trainer}
@@ -32,41 +23,24 @@ def on_train_end(self, trainer: pl.Trainer, pl_module: pl.LightningModule) -> No
         @param pl_module: Pytorch Lightning module.
         @raises RuntimeError: If no best model path is found.
         """
-        from luxonis_train.core.archiver import Archiver
 
-        model_checkpoint_callbacks = [
-            c
-            for c in trainer.callbacks  # type: ignore
-            if isinstance(c, pl.callbacks.ModelCheckpoint)  # type: ignore
-        ]
-
-        # NOTE: assume that first checkpoint callback is based on val loss
-        best_model_path = model_checkpoint_callbacks[0].best_model_path
+        best_model_path = pl_module.core.get_min_loss_checkpoint_path()
         if not best_model_path:
-            raise RuntimeError(
+            logger.error(
                 "No best model path found. "
                 "Please make sure that ModelCheckpoint callback is present "
-                "and at least one validation epoch has been performed."
+                "and at least one validation epoch has been performed. "
+                "Skipping model archiving."
             )
-        cfg: Config = pl_module.cfg
-        cfg.model.weights = best_model_path
-        if self.upload_to_mlflow:
-            if cfg.tracker.is_mlflow:
-                tracker = cast(LuxonisTrackerPL, trainer.logger)
-                new_upload_url = f"mlflow://{tracker.project_id}/{tracker.run_id}"
-                cfg.archiver.upload_url = new_upload_url
-            else:
-                logging.getLogger(__name__).warning(
-                    "`upload_to_mlflow` is set to True, "
-                    "but there is  no MLFlow active run, skipping."
-                )
-
-        onnx_path = str(Path(best_model_path).parent.with_suffix(".onnx"))
-        if not os.path.exists(onnx_path):
-            raise FileNotFoundError(
-                "Model executable not found. Make sure to run exporter callback before archiver callback"
+            return
+
+        onnx_path = pl_module.core._exported_models.get("onnx")
+        if onnx_path is None:
+            logger.error(
+                "Model executable not found. "
+                "Make sure to run exporter callback before archiver callback. "
+                "Skipping model archiving."
             )
+            return
 
-        archiver = Archiver(cfg=cfg)
-
-        archiver.archive(onnx_path)
+        pl_module.core.archive(onnx_path)