diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 00000000..a6eef919 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +* @luxonis/ML-Reviewers diff --git a/.github/labeler.yaml b/.github/labeler.yaml new file mode 100644 index 00000000..33749bd5 --- /dev/null +++ b/.github/labeler.yaml @@ -0,0 +1,32 @@ +tests: + - changed-files: + - any-glob-to-any-file: 'tests/*' + - head-branch: + - 'test/*' + - 'tests/*' + +DevOps: + - changed-files: + - any-glob-to-any-file: '.github/*' + +CLI: + - changed-files: + - any-glob-to-any-file: '**/__main__.py' + +release: + - base-branch: 'main' + +enhancement: + - head-branch: + - 'feature/*' + - 'feat/*' + - 'enhancement/*' + +fix: + - head-branch: + - 'fix/*' + - 'bug/*' + - 'hotfix/*' + - 'issue/*' + - 'bugfix/*' + - 'patch/*' diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 00000000..6dbf1a87 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,175 @@ +name: CI + +on: + pull_request: + branches: [ dev, main ] + paths: + - 'luxonis_train/**' + - 'tests/**' + - .github/workflows/ci.yaml + - '!**/*.md' + - '!luxonis_train/__main__.py' + +permissions: + pull-requests: write + contents: write + checks: write + +jobs: + assigner: + runs-on: ubuntu-latest + steps: + - name: Auto-assign + uses: toshimaru/auto-author-assign@v2.1.1 + + labeler: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + ref: ${{ github.head_ref }} + + - name: Labeler + uses: actions/labeler@v5 + with: + configuration-path: .github/labeler.yaml + + pre-commit: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + ref: ${{ github.head_ref }} + + - name: Run pre-commit + uses: pre-commit/action@v3.0.1 + + docs: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + ref: ${{ github.head_ref }} + + - name: Install dependencies + run: | + sudo apt update + sudo apt install -y pandoc + pip install pydoctor + curl -L "https://raw.githubusercontent.com/luxonis/python-api-analyzer-to-json/main/gen-docs.py" -o "gen-docs.py" + + - name: Build docs + run: python gen-docs.py luxonis_train + + type-check: + needs: + - pre-commit + - docs + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + ref: ${{ github.head_ref }} + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: pip + + - name: Install dependencies + run: pip install -e .[dev] + + - name: Type check + uses: jakebailey/pyright-action@v2 + with: + version: '1.1.380' + level: warning + warnings: true + python-version: '3.10' + project: pyproject.toml + + tests: + needs: + - type-check + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest] + + runs-on: ${{ matrix.os }} + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + ref: ${{ github.head_ref }} + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: pip + + - name: Install dependencies + run: pip install -e .[dev] + + - name: Authenticate to Google Cloud + id: google-auth + uses: google-github-actions/auth@v2 + with: + credentials_json: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }} + create_credentials_file: true + export_environment_variables: true + token_format: access_token + + - name: Run pytest + uses: pavelzw/pytest-action@v2 + env: + LUXONISML_BUCKET: luxonis-test-bucket + PYTORCH_MPS_HIGH_WATERMARK_RATIO: 0.0 + with: + emoji: false + custom-arguments: --junit-xml pytest.xml --cov luxonis_train --cov-report xml + + - name: Create Test Report + uses: EnricoMi/publish-unit-test-result-action@v2 + if: matrix.os == 'ubuntu-latest' + with: + files: pytest.xml + + - name: Generate coverage badge + uses: tj-actions/coverage-badge-py@v2 + if: matrix.os == 'ubuntu-latest' + with: + output: media/coverage_badge.svg + + - name: Generate coverage report + uses: orgoro/coverage@v3.2 + if: matrix.os == 'ubuntu-latest' + with: + coverageFile: coverage.xml + token: ${{ secrets.GITHUB_TOKEN }} + thresholdAll: 0.9 + thresholdNew: 0.8 + + - name: Commit coverage badge + if: matrix.os == 'ubuntu-latest' + run: | + git config --global user.name 'GitHub Actions' + git config --global user.email 'actions@github.com' + git diff --quiet media/coverage_badge.svg || { + git add media/coverage_badge.svg + git commit -m "[Automated] Updated coverage badge" + } + + - name: Push changes + uses: ad-m/github-push-action@master + if: matrix.os == 'ubuntu-latest' + with: + branch: ${{ github.head_ref }} + diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml deleted file mode 100644 index f3c69761..00000000 --- a/.github/workflows/docs.yaml +++ /dev/null @@ -1,26 +0,0 @@ -name: Docs - -on: - pull_request: - branches: [ dev, main ] - paths: - - 'luxonis_train/**' - - .github/workflows/docs.yaml - -jobs: - docs: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - ref: ${{ github.head_ref }} - - - name: Install dependencies - run: | - pip install pydoctor - curl -L "https://raw.githubusercontent.com/luxonis/python-api-analyzer-to-json/main/gen-docs.py" -o "gen-docs.py" - - - name: Build docs - run: | - python gen-docs.py luxonis_train diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml deleted file mode 100644 index ce6b816b..00000000 --- a/.github/workflows/pre-commit.yaml +++ /dev/null @@ -1,13 +0,0 @@ -name: pre-commit - -on: - pull_request: - branches: [dev, main] - -jobs: - pre-commit: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v3 - - uses: pre-commit/action@v3.0.0 diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml deleted file mode 100644 index a0999d9b..00000000 --- a/.github/workflows/tests.yaml +++ /dev/null @@ -1,126 +0,0 @@ -name: Tests - -on: - pull_request: - branches: [ dev, main ] - paths: - - 'luxonis_train/**/**.py' - - 'tests/**/**.py' - - .github/workflows/tests.yaml - -jobs: - run_tests: - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, windows-latest] - version: ['3.10'] - - runs-on: ${{ matrix.os }} - - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - ref: ${{ github.head_ref }} - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.version }} - cache: pip - - - name: Install dependencies [Ubuntu] - if: matrix.os == 'ubuntu-latest' - run: | - sudo apt update - sudo apt install -y pandoc - pip install -e .[dev] - - - name: Install dependencies [Windows] - if: matrix.os == 'windows-latest' - run: pip install -e .[dev] - - - name: Install dependencies [macOS] - if: matrix.os == 'macOS-latest' - run: pip install -e .[dev] - - - name: Authenticate to Google Cloud - id: google-auth - uses: google-github-actions/auth@v2 - with: - credentials_json: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }} - create_credentials_file: true - export_environment_variables: true - token_format: access_token - - - name: Run tests with coverage [Ubuntu] - if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' - run: pytest tests --cov=luxonis_train --cov-report xml --junit-xml pytest.xml - - - name: Run tests [Windows, macOS] - env: - PYTORCH_MPS_HIGH_WATERMARK_RATIO: 0.0 - if: matrix.os != 'ubuntu-latest' || matrix.version != '3.10' - run: pytest tests --junit-xml pytest.xml - - - name: Generate coverage badge [Ubuntu] - if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' - run: coverage-badge -o media/coverage_badge.svg -f - - - name: Generate coverage report [Ubuntu] - if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' - uses: orgoro/coverage@v3.1 - with: - coverageFile: coverage.xml - token: ${{ secrets.GITHUB_TOKEN }} - - - name: Commit coverage badge [Ubuntu] - if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' - run: | - git config --global user.name 'GitHub Actions' - git config --global user.email 'actions@github.com' - git diff --quiet media/coverage_badge.svg || { - git add media/coverage_badge.svg - git commit -m "[Automated] Updated coverage badge" - } - - - name: Push changes [Ubuntu] - if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' - uses: ad-m/github-push-action@master - with: - branch: ${{ github.head_ref }} - - - name: Upload Test Results - if: always() - uses: actions/upload-artifact@v4 - with: - name: Test Results [${{ matrix.os }}] (Python ${{ matrix.version }}) - path: pytest.xml - retention-days: 10 - if-no-files-found: error - - publish-test-results: - name: "Publish Tests Results" - needs: run_tests - runs-on: ubuntu-latest - permissions: - checks: write - pull-requests: write - if: always() - - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - ref: ${{ github.head_ref }} - - - name: Download Artifacts - uses: actions/download-artifact@v4 - with: - path: artifacts - - - name: Publish Test Results - uses: EnricoMi/publish-unit-test-result-action@v2 - with: - files: "artifacts/**/*.xml" diff --git a/.gitignore b/.gitignore index 7f182cf4..03ba884c 100644 --- a/.gitignore +++ b/.gitignore @@ -152,5 +152,5 @@ mlartifacts mlruns wandb tests/_data -tests/integration/_test-output +tests/integration/save-directory data diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3f95fc26..3d68c872 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,12 +1,11 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.8 + rev: v0.6.4 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] types_or: [python, pyi, jupyter] - id: ruff-format - args: [--line-length, '88'] types_or: [python, pyi, jupyter] - repo: https://github.com/PyCQA/docformatter @@ -14,7 +13,7 @@ repos: hooks: - id: docformatter additional_dependencies: [tomli] - args: [--in-place, --black, --style=epytext] + args: [--in-place, --style=epytext] - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.4.0 @@ -28,4 +27,3 @@ repos: - id: mdformat additional_dependencies: - mdformat-gfm - - mdformat-toc diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d113518b..20fd3607 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -3,25 +3,45 @@ **This guide is intended for our internal development team.** It outlines our workflow and standards for contributing to this project. -## Table of Contents +## Table Of Contents +- [Pre-requisites](#pre-requisites) - [Pre-commit Hooks](#pre-commit-hooks) - [Documentation](#documentation) +- [Type Checking](#type-checking) - [Editor Support](#editor-support) - [Tests](#tests) - [GitHub Actions](#github-actions) - [Making and Reviewing Changes](#making-and-reviewing-changes) -- [Notes](#notes) + +## Pre-requisites + +Clone the repository and navigate to the root directory: + +```bash +git clone git@github.com:luxonis/luxonis-train.git +cd luxonis-train +``` + +Install the development dependencies by running `pip install -r requirements-dev.txt` or install the package with the `dev` extra flag: + +```bash +pip install -e .[dev] +``` + +> \[!NOTE\] +> This will install the package in editable mode (`-e`), +> so you can make changes to the code and run them immediately. ## Pre-commit Hooks We use pre-commit hooks to ensure code quality and consistency: -1. Install pre-commit (see [pre-commit.com](https://pre-commit.com/#install)). +1. Install `pre-commit` (see [pre-commit.com](https://pre-commit.com/#install)). 1. Clone the repository and run `pre-commit install` in the root directory. -1. The pre-commit hook will now run automatically on `git commit`. +1. The `pre-commit` hook will now run automatically on `git commit`. - If the hook fails, it will print an error message and abort the commit. - - It will also modify the files in-place to fix any issues it can. + - Some hooks will also modify the files in-place to fix found issues. ## Documentation @@ -29,52 +49,75 @@ We use the [Epytext](https://epydoc.sourceforge.net/epytext.html) markup languag To verify that your documentation is formatted correctly, follow these steps: 1. Download [`get-docs.py`](https://github.com/luxonis/python-api-analyzer-to-json/blob/main/gen-docs.py) script -1. Run `python3 get-docs.py luxonis_ml` in the root directory. +1. Run `python3 get-docs.py luxonis_train` in the root directory. - If the script runs successfully and produces `docs.json` file, your documentation is formatted correctly. - - **NOTE:** If the script fails, it might not give the specific error message. In that case, you can run - the script for each file individually until you find the one that is causing the error. -### Editor Support +> \[!NOTE\] +> If the script fails, it might not give a specific error message. +> In that case, you can run the script for each file individually +> until you find the one that is causing the error. + +**Editor Support:** - **PyCharm** - built in support for generating `epytext` docstrings -- **Visual Studie Code** - [AI Docify](https://marketplace.visualstudio.com/items?itemName=AIC.docify) extension offers support for `epytext` +- **Visual Studio Code** - [AI Docify](https://marketplace.visualstudio.com/items?itemName=AIC.docify) extension offers support for `epytext` - **NeoVim** - [vim-python-docstring](https://github.com/pixelneo/vim-python-docstring) supports `epytext` style +## Type Checking + +The codebase is type-checked using [pyright](https://github.com/microsoft/pyright) `v1.1.380`. To run type checking, use the following command in the root project directory: + +```bash +pyright --warnings --level warning --pythonversion 3.10 luxonis_train +``` + +**Editor Support:** + +- **PyCharm** - [Pyright](https://plugins.jetbrains.com/plugin/24145-pyright) extension +- **Visual Studio Code** - [Pyright](https://marketplace.visualstudio.com/items?itemName=ms-pyright.pyright) extension +- **NeoVim** - [LSP-Config](https://github.com/neovim/nvim-lspconfig) plugin with the [pyright configuration](https://github.com/neovim/nvim-lspconfig/blob/master/doc/server_configurations.md#pyright) + ## Tests We use [pytest](https://docs.pytest.org/en/stable/) for testing. -The tests are located in the `tests` directory. You can run the tests locally with: +The tests are located in the `tests` directory. To run the tests with coverage, use the following command: ```bash -pytest tests --cov=luxonis_train +pytest --cov=luxonis_train --cov-report=html ``` -This command will run all tests and print a coverage report. The coverage report -is only informational for now, but we may enforce a minimum coverage in the future. +This command will run all tests and generate HTML coverage report. + +> \[!TIP\] +> The coverage report will be saved to `htmlcov` directory. +> If you want to inspect the coverage in more detail, open `htmlcov/index.html` in a browser. + +> \[!TIP\] +> You can choose to run only the unit-tests or only the integration tests by adding `-m unit` or `-m integration` to the `pytest` command. -**If a new feature is added, a new test should be added to cover it.** +> \[!IMPORTANT\] +> If a new feature is added, a new test should be added to cover it. +> The minimum overall test coverage for a PR to be merged is 90%. +> The minimum coverage for new files is 80%. ## GitHub Actions Our GitHub Actions workflow is run when a new PR is opened. -It first checks that the pre-commit hook passes and that the documentation builds successfully. -The tests are run only if the pre-commit hook and documentation build pass. -Successful tests are required for merging a PR. -1. Checks and tests are run automatically when you open a pull request. -1. For the tests to run, the [pre-commit](#pre-commit-hooks) hook must pass and - the [documentation](#documentation) must be built successfully. -1. Review the GitHub Actions output if your PR fails. -1. Fix any issues to ensure that all checks and tests pass. +1. First, the [pre-commit](#pre-commit-hooks) hooks must pass and the [documentation](#documentation) must be built successfully. +1. Next, the [type checking](#type-checking) is run. +1. If all previous checks pass, the [tests](#tests) are run. + +> \[!TIP\] +> Review the GitHub Actions output if your PR fails. + +> \[!IMPORTANT\] +> Successful completion of all the workflow checks is required for merging a PR. -## Making and Reviewing Changes +## Making and Submitting Changes 1. Make changes in a new branch. 1. Test your changes locally. -1. Commit (pre-commit hook will run). -1. Push to your branch and create a pull request. Always request a review from: - - [Martin Kozlovský](https://github.com/kozlov721) - - [Matija Teršek](https://github.com/tersekmatija) - - [Conor Simmons](https://github.com/conorsim) -1. Any other relevant team members can be added as reviewers as well. +1. Commit your changes (pre-commit hooks will run). +1. Push your branch and create a pull request. 1. The team will review and merge your PR. diff --git a/configs/README.md b/configs/README.md index a85d5221..96444f66 100644 --- a/configs/README.md +++ b/configs/README.md @@ -147,16 +147,16 @@ Here you can change everything related to actual training of the model. | accumulate_grad_batches | int | 1 | number of batches for gradient accumulation | | use_weighted_sampler | bool | False | bool if use WeightedRandomSampler for training, only works with classification tasks | | epochs | int | 100 | number of training epochs | -| num_workers | int | 2 | number of workers for data loading | +| n_workers | int | 2 | number of workers for data loading | | train_metrics_interval | int | -1 | frequency of computing metrics on train data, -1 if don't perform | | validation_interval | int | 1 | frequency of computing metrics on validation data | -| num_log_images | int | 4 | maximum number of images to visualize and log | +| n_log_images | int | 4 | maximum number of images to visualize and log | | skip_last_batch | bool | True | whether to skip last batch while training | | accelerator | Literal\["auto", "cpu", "gpu"\] | "auto" | What accelerator to use for training. | | devices | int \| list\[int\] \| str | "auto" | Either specify how many devices to use (int), list specific devices, or use "auto" for automatic configuration based on the selected accelerator | | matmul_precision | Literal\["medium", "high", "highest"\] \| None | None | Sets the internal precision of float32 matrix multiplications. | | strategy | Literal\["auto", "ddp"\] | "auto" | What strategy to use for training. | -| num_sanity_val_steps | int | 2 | Number of sanity validation steps performed before training. | +| n_sanity_val_steps | int | 2 | Number of sanity validation steps performed before training. | | profiler | Literal\["simple", "advanced"\] \| None | None | PL profiler for GPU/CPU/RAM utilization analysis | | verbose | bool | True | Print all intermediate results to console. | diff --git a/configs/classification_model.yaml b/configs/classification_model.yaml index be5a5006..4db7a9b1 100644 --- a/configs/classification_model.yaml +++ b/configs/classification_model.yaml @@ -25,9 +25,9 @@ trainer: batch_size: 4 epochs: &epochs 200 - num_workers: 4 + n_workers: 4 validation_interval: 10 - num_log_images: 8 + n_log_images: 8 callbacks: - name: ExportOnTrainEnd diff --git a/configs/coco_model.yaml b/configs/coco_model.yaml index 9af25feb..23516bea 100644 --- a/configs/coco_model.yaml +++ b/configs/coco_model.yaml @@ -7,7 +7,7 @@ model: - name: EfficientRep params: channels_list: [64, 128, 256, 512, 1024] - num_repeats: [1, 6, 12, 18, 6] + n_repeats: [1, 6, 12, 18, 6] depth_mul: 0.33 width_mul: 0.33 @@ -16,7 +16,7 @@ model: - EfficientRep params: channels_list: [256, 128, 128, 256, 256, 512] - num_repeats: [12, 12, 12, 12] + n_repeats: [12, 12, 12, 12] depth_mul: 0.33 width_mul: 0.33 @@ -108,16 +108,16 @@ trainer: devices: auto strategy: auto - num_sanity_val_steps: 1 + n_sanity_val_steps: 1 profiler: null verbose: True batch_size: 4 accumulate_grad_batches: 1 epochs: &epochs 200 - num_workers: 8 + n_workers: 8 train_metrics_interval: -1 validation_interval: 10 - num_log_images: 8 + n_log_images: 8 skip_last_batch: True log_sub_losses: True save_top_k: 3 diff --git a/configs/detection_model.yaml b/configs/detection_model.yaml index 45c3431e..7bc87eef 100644 --- a/configs/detection_model.yaml +++ b/configs/detection_model.yaml @@ -20,9 +20,9 @@ trainer: batch_size: 4 epochs: &epochs 200 - num_workers: 4 + n_workers: 4 validation_interval: 10 - num_log_images: 8 + n_log_images: 8 callbacks: - name: ExportOnTrainEnd diff --git a/configs/efficient_coco_model.yaml b/configs/efficient_coco_model.yaml index 64aa48e0..f2c9db5d 100644 --- a/configs/efficient_coco_model.yaml +++ b/configs/efficient_coco_model.yaml @@ -5,7 +5,7 @@ model: - name: EfficientRep params: channels_list: [64, 128, 256, 512, 1024] - num_repeats: [1, 6, 12, 18, 6] + n_repeats: [1, 6, 12, 18, 6] depth_mul: 0.33 width_mul: 0.33 @@ -14,7 +14,7 @@ model: - EfficientRep params: channels_list: [256, 128, 128, 256, 256, 512] - num_repeats: [12, 12, 12, 12] + n_repeats: [12, 12, 12, 12] depth_mul: 0.33 width_mul: 0.33 @@ -91,14 +91,14 @@ loader: trainer: - num_sanity_val_steps: 1 + n_sanity_val_steps: 1 batch_size: 4 accumulate_grad_batches: 1 epochs: 200 - num_workers: 4 + n_workers: 4 train_metrics_interval: -1 validation_interval: 10 - num_log_images: 8 + n_log_images: 8 save_top_k: 3 preprocessing: diff --git a/configs/example_export.yaml b/configs/example_export.yaml index f86f1dfa..51f768dc 100644 --- a/configs/example_export.yaml +++ b/configs/example_export.yaml @@ -22,9 +22,9 @@ trainer: batch_size: 4 epochs: &epochs 200 - num_workers: 4 + n_workers: 4 validation_interval: 10 - num_log_images: 8 + n_log_images: 8 optimizer: name: SGD diff --git a/configs/example_multi_input.yaml b/configs/example_multi_input.yaml index d185f37e..9632ed43 100644 --- a/configs/example_multi_input.yaml +++ b/configs/example_multi_input.yaml @@ -97,9 +97,9 @@ tracker: trainer: batch_size: 1 epochs: 10 - num_workers: 4 + n_workers: 4 validation_interval: 10 - num_log_images: 4 + n_log_images: 4 callbacks: - name: ExportOnTrainEnd diff --git a/configs/example_tuning.yaml b/configs/example_tuning.yaml index b350ea2f..d8c9027d 100644 --- a/configs/example_tuning.yaml +++ b/configs/example_tuning.yaml @@ -30,7 +30,7 @@ trainer: batch_size: 4 epochs: &epochs 100 validation_interval: 10 - num_log_images: 8 + n_log_images: 8 scheduler: name: CosineAnnealingLR diff --git a/configs/keypoint_bbox_model.yaml b/configs/keypoint_bbox_model.yaml index 5b1ebb2d..51554f73 100644 --- a/configs/keypoint_bbox_model.yaml +++ b/configs/keypoint_bbox_model.yaml @@ -18,9 +18,9 @@ trainer: batch_size: 4 epochs: &epochs 200 - num_workers: 4 + n_workers: 4 validation_interval: 10 - num_log_images: 8 + n_log_images: 8 callbacks: - name: ExportOnTrainEnd diff --git a/configs/resnet_model.yaml b/configs/resnet_model.yaml index e8353870..bb9f8f62 100644 --- a/configs/resnet_model.yaml +++ b/configs/resnet_model.yaml @@ -36,9 +36,9 @@ loader: trainer: batch_size: 4 epochs: &epochs 200 - num_workers: 4 + n_workers: 4 validation_interval: 10 - num_log_images: 8 + n_log_images: 8 preprocessing: train_image_size: [&height 224, &width 224] diff --git a/configs/segmentation_model.yaml b/configs/segmentation_model.yaml index a822d7c1..b403a75e 100644 --- a/configs/segmentation_model.yaml +++ b/configs/segmentation_model.yaml @@ -21,9 +21,9 @@ trainer: batch_size: 4 epochs: &epochs 200 - num_workers: 4 + n_workers: 4 validation_interval: 10 - num_log_images: 8 + n_log_images: 8 callbacks: - name: ExportOnTrainEnd diff --git a/luxonis_train/__init__.py b/luxonis_train/__init__.py index 60d8d501..ebc4a719 100644 --- a/luxonis_train/__init__.py +++ b/luxonis_train/__init__.py @@ -1,7 +1,11 @@ +__version__ = "0.0.1" + + from .attached_modules import * from .core import * +from .loaders import * from .models import * from .nodes import * +from .optimizers import * +from .schedulers import * from .utils import * - -__version__ = "0.0.1" diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py index 454e9525..c3164227 100644 --- a/luxonis_train/__main__.py +++ b/luxonis_train/__main__.py @@ -41,7 +41,9 @@ class _ViewType(str, Enum): ), ] -ViewType = Annotated[_ViewType, typer.Option(help="Which dataset view to use.")] +ViewType = Annotated[ + _ViewType, typer.Option(help="Which dataset view to use.") +] SaveDirType = Annotated[ Optional[Path], @@ -53,7 +55,8 @@ class _ViewType(str, Enum): def train( config: ConfigType = None, resume: Annotated[ - Optional[str], typer.Option(help="Resume training from this checkpoint.") + Optional[str], + typer.Option(help="Resume training from this checkpoint."), ] = None, opts: OptsType = None, ): @@ -65,7 +68,9 @@ def train( @app.command() def test( - config: ConfigType = None, view: ViewType = _ViewType.VAL, opts: OptsType = None + config: ConfigType = None, + view: ViewType = _ViewType.VAL, + opts: OptsType = None, ): """Evaluate model.""" from luxonis_train.core import LuxonisModel @@ -115,13 +120,26 @@ def inspect( case_sensitive=False, ), ] = "train", # type: ignore + size_multiplier: Annotated[ + float, + typer.Option( + ..., + "--size-multiplier", + "-s", + help=( + "Multiplier for the image size. " + "By default the images are shown in their original size." + ), + show_default=False, + ), + ] = 1.0, opts: OptsType = None, ): """Inspect dataset.""" from lightning.pytorch import seed_everything from luxonis_ml.data.__main__ import inspect as lxml_inspect - from luxonis_train.utils.config import Config + from luxonis_train.utils import Config cfg = Config.get_config(config, opts) if cfg.trainer.seed is not None: @@ -144,6 +162,7 @@ def inspect( name=cfg.loader.params["dataset_name"], view=[view], aug_config=f.name, + size_multiplier=size_multiplier, ) @@ -166,7 +185,7 @@ def archive( def version_callback(value: bool): if value: - typer.echo(f"LuxonisTrain Version: {version(__package__)}") + typer.echo(f"LuxonisTrain Version: {version('luxonis_train')}") raise typer.Exit() @@ -175,7 +194,9 @@ def common( _: Annotated[ bool, typer.Option( - "--version", callback=version_callback, help="Show version and exit." + "--version", + callback=version_callback, + help="Show version and exit.", ), ] = False, source: Annotated[ diff --git a/luxonis_train/utils/assigners/__init__.py b/luxonis_train/assigners/__init__.py similarity index 100% rename from luxonis_train/utils/assigners/__init__.py rename to luxonis_train/assigners/__init__.py diff --git a/luxonis_train/utils/assigners/atts_assigner.py b/luxonis_train/assigners/atts_assigner.py similarity index 84% rename from luxonis_train/utils/assigners/atts_assigner.py rename to luxonis_train/assigners/atts_assigner.py index 9a0466da..269496fa 100644 --- a/luxonis_train/utils/assigners/atts_assigner.py +++ b/luxonis_train/assigners/atts_assigner.py @@ -49,9 +49,10 @@ def forward( @type pred_bboxes: Tensor @param pred_bboxes: Predicted bboxes of shape [bs, n_anchors, 4] @rtype: tuple[Tensor, Tensor, Tensor, Tensor, Tensor] - @return: Assigned labels of shape [bs, n_anchors], assigned bboxes of shape [bs, - n_anchors, 4], assigned scores of shape [bs, n_anchors, n_classes] and - output positive mask of shape [bs, n_anchors]. + @return: Assigned labels of shape [bs, n_anchors], assigned + bboxes of shape [bs, n_anchors, 4], assigned scores of shape + [bs, n_anchors, n_classes] and output positive mask of shape + [bs, n_anchors]. """ self.n_anchors = anchor_bboxes.size(0) @@ -61,9 +62,13 @@ def forward( if self.n_max_boxes == 0: device = gt_bboxes.device return ( - torch.full([self.bs, self.n_anchors], self.n_classes).to(device), + torch.full([self.bs, self.n_anchors], self.n_classes).to( + device + ), torch.zeros([self.bs, self.n_anchors, 4]).to(device), - torch.zeros([self.bs, self.n_anchors, self.n_classes]).to(device), + torch.zeros([self.bs, self.n_anchors, self.n_classes]).to( + device + ), torch.zeros([self.bs, self.n_anchors]).to(device), torch.zeros([self.bs, self.n_anchors]).to(device), ) @@ -78,7 +83,10 @@ def forward( gt_centers = self._get_bbox_center(gt_bboxes_flat) anchor_centers = self._get_bbox_center(anchor_bboxes) distances = ( - (gt_centers[:, None, :] - anchor_centers[None, :, :]).pow(2).sum(-1).sqrt() + (gt_centers[:, None, :] - anchor_centers[None, :, :]) + .pow(2) + .sum(-1) + .sqrt() ) distances = distances.reshape([self.bs, -1, self.n_anchors]) @@ -103,15 +111,18 @@ def forward( ) # Generate final assignments based on masks - assigned_labels, assigned_bboxes, assigned_scores = self._get_final_assignments( + ( + assigned_labels, + assigned_bboxes, + assigned_scores, + ) = self._get_final_assignments( gt_labels, gt_bboxes, assigned_gt_idx, mask_pos_sum ) # Soft label with IoU - if pred_bboxes is not None: - ious = batch_iou(gt_bboxes, pred_bboxes) * mask_pos - ious = ious.max(dim=-2)[0].unsqueeze(-1) - assigned_scores *= ious + ious = batch_iou(gt_bboxes, pred_bboxes) * mask_pos + ious = ious.max(dim=-2)[0].unsqueeze(-1) + assigned_scores *= ious out_mask_positive = mask_pos_sum.bool() @@ -141,12 +152,13 @@ def _select_topk_candidates( @type mask_gt: Tensor @param mask_gt: Mask for valid GT per image. @rtype: tuple[Tensor, Tensor] - @return: Mask of selected anchors and indices of selected anchors. + @return: Mask of selected anchors and indices of selected + anchors. """ mask_gt = mask_gt.repeat(1, 1, self.topk).bool() level_distances = torch.split(distances, n_level_bboxes, dim=-1) - is_in_topk_list = [] - topk_idxs = [] + is_in_topk_list: list[Tensor] = [] + topk_idxs: list[Tensor] = [] start_idx = 0 for per_level_distances, per_level_boxes in zip( level_distances, n_level_bboxes @@ -158,18 +170,20 @@ def _select_topk_candidates( ) topk_idxs.append(per_level_topk_idxs + start_idx) per_level_topk_idxs = torch.where( - mask_gt, per_level_topk_idxs, torch.zeros_like(per_level_topk_idxs) + mask_gt, + per_level_topk_idxs, + torch.zeros_like(per_level_topk_idxs), + ) + is_in_topk = F.one_hot(per_level_topk_idxs, per_level_boxes).sum( + dim=-2 ) - is_in_topk = F.one_hot(per_level_topk_idxs, per_level_boxes).sum(dim=-2) is_in_topk = torch.where( is_in_topk > 1, torch.zeros_like(is_in_topk), is_in_topk ) is_in_topk_list.append(is_in_topk.to(distances.dtype)) start_idx = end_idx - is_in_topk_list = torch.cat(is_in_topk_list, dim=-1) - topk_idxs = torch.cat(topk_idxs, dim=-1) - return is_in_topk_list, topk_idxs + return torch.cat(is_in_topk_list, dim=-1), torch.cat(topk_idxs, dim=-1) def _get_positive_samples( self, @@ -177,14 +191,18 @@ def _get_positive_samples( topk_idxs: Tensor, overlaps: Tensor, ) -> Tensor: - """Computes threshold and returns mask for samples over threshold. + """Computes threshold and returns mask for samples over + threshold. @type is_in_topk: Tensor - @param is_in_topk: Mask of selected anchors [bx, n_max_boxes, n_anchors] + @param is_in_topk: Mask of selected anchors [bx, n_max_boxes, + n_anchors] @type topk_idxs: Tensor - @param topk_idxs: Indices of selected anchors [bx, n_max_boxes, topK * n_levels] + @param topk_idxs: Indices of selected anchors [bx, n_max_boxes, + topK * n_levels] @type overlaps: Tensor - @param overlaps: IoUs between GTs and anchors [bx, n_max_boxes, n_anchors] + @param overlaps: IoUs between GTs and anchors [bx, n_max_boxes, + n_anchors] @rtype: Tensor @return: Mask of positive samples [bx, n_max_boxes, n_anchors] """ @@ -199,14 +217,17 @@ def _get_positive_samples( assist_idxs = assist_idxs[:, None] flatten_idxs = topk_idxs + assist_idxs candidate_overlaps = _candidate_overlaps.reshape(-1)[flatten_idxs] - candidate_overlaps = candidate_overlaps.reshape([self.bs, self.n_max_boxes, -1]) + candidate_overlaps = candidate_overlaps.reshape( + [self.bs, self.n_max_boxes, -1] + ) overlaps_mean_per_gt = candidate_overlaps.mean(dim=-1, keepdim=True) overlaps_std_per_gt = candidate_overlaps.std(dim=-1, keepdim=True) overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt is_pos = torch.where( - _candidate_overlaps > overlaps_thr_per_gt.repeat([1, 1, self.n_anchors]), + _candidate_overlaps + > overlaps_thr_per_gt.repeat([1, 1, self.n_anchors]), is_in_topk, torch.zeros_like(is_in_topk), ) @@ -230,15 +251,18 @@ def _get_final_assignments( @type mask_pos_sum: Tensor @param mask_pos_sum: Mask of matched GTs [bs, n_anchors] @rtype: tuple[Tensor, Tensor, Tensor] - @return: Assigned labels of shape [bs, n_anchors], assigned bboxes of shape [bs, - n_anchors, 4], assigned scores of shape [bs, n_anchors, n_classes]. + @return: Assigned labels of shape [bs, n_anchors], assigned + bboxes of shape [bs, n_anchors, 4], assigned scores of shape + [bs, n_anchors, n_classes]. """ # assigned target labels batch_idx = torch.arange( self.bs, dtype=gt_labels.dtype, device=gt_labels.device ) batch_idx = batch_idx[..., None] - assigned_gt_idx = (assigned_gt_idx + batch_idx * self.n_max_boxes).long() + assigned_gt_idx = ( + assigned_gt_idx + batch_idx * self.n_max_boxes + ).long() assigned_labels = gt_labels.flatten()[assigned_gt_idx.flatten()] assigned_labels = assigned_labels.reshape([self.bs, self.n_anchors]) assigned_labels = torch.where( @@ -252,7 +276,9 @@ def _get_final_assignments( assigned_bboxes = assigned_bboxes.reshape([self.bs, self.n_anchors, 4]) # assigned target scores - assigned_scores = F.one_hot(assigned_labels.long(), self.n_classes + 1).float() + assigned_scores = F.one_hot( + assigned_labels.long(), self.n_classes + 1 + ).float() assigned_scores = assigned_scores[:, :, : self.n_classes] return assigned_labels, assigned_bboxes, assigned_scores diff --git a/luxonis_train/utils/assigners/tal_assigner.py b/luxonis_train/assigners/tal_assigner.py similarity index 87% rename from luxonis_train/utils/assigners/tal_assigner.py rename to luxonis_train/assigners/tal_assigner.py index 08b5b461..ea228eba 100644 --- a/luxonis_train/utils/assigners/tal_assigner.py +++ b/luxonis_train/assigners/tal_assigner.py @@ -66,9 +66,10 @@ def forward( @type mask_gt: Tensor @param mask_gt: Mask for valid GTs [bs, n_max_boxes, 1] @rtype: tuple[Tensor, Tensor, Tensor, Tensor, Tensor] - @return: Assigned labels of shape [bs, n_anchors], assigned bboxes of shape [bs, - n_anchors, 4], assigned scores of shape [bs, n_anchors, n_classes] and - output mask of shape [bs, n_anchors] + @return: Assigned labels of shape [bs, n_anchors], assigned + bboxes of shape [bs, n_anchors, 4], assigned scores of shape + [bs, n_anchors, n_classes] and output mask of shape [bs, + n_anchors] """ self.bs = pred_scores.size(0) self.n_max_boxes = gt_bboxes.size(1) @@ -76,7 +77,9 @@ def forward( if self.n_max_boxes == 0: device = gt_bboxes.device return ( - torch.full_like(pred_scores[..., 0], self.n_classes).to(device), + torch.full_like(pred_scores[..., 0], self.n_classes).to( + device + ), torch.zeros_like(pred_bboxes).to(device), torch.zeros_like(pred_scores).to(device), torch.zeros_like(pred_scores[..., 0]).to(device), @@ -105,7 +108,11 @@ def forward( ) # Generate final targets based on masks - assigned_labels, assigned_bboxes, assigned_scores = self._get_final_assignments( + ( + assigned_labels, + assigned_bboxes, + assigned_scores, + ) = self._get_final_assignments( gt_labels, gt_bboxes, assigned_gt_idx, mask_pos_sum ) @@ -137,7 +144,8 @@ def _get_alignment_metric( gt_labels: Tensor, gt_bboxes: Tensor, ): - """Calculates anchor alignment metric and IoU between GTs and predicted bboxes. + """Calculates anchor alignment metric and IoU between GTs and + predicted bboxes. @type pred_scores: Tensor @param pred_scores: Predicted scores [bs, n_anchors, 1] @@ -151,7 +159,9 @@ def _get_alignment_metric( pred_scores = pred_scores.permute(0, 2, 1) gt_labels = gt_labels.to(torch.long) ind = torch.zeros([2, self.bs, self.n_max_boxes], dtype=torch.long) - ind[0] = torch.arange(end=self.bs).view(-1, 1).repeat(1, self.n_max_boxes) + ind[0] = ( + torch.arange(end=self.bs).view(-1, 1).repeat(1, self.n_max_boxes) + ) ind[1] = gt_labels.squeeze(-1) bbox_scores = pred_scores[ind[0], ind[1]] @@ -169,24 +179,30 @@ def _select_topk_candidates( """Selects k anchors based on provided metrics tensor. @type metrics: Tensor - @param metrics: Metrics tensor of shape [bs, n_max_boxes, n_anchors] + @param metrics: Metrics tensor of shape [bs, n_max_boxes, + n_anchors] @type largest: bool - @param largest: Flag if should keep largest topK. Defaults to True. + @param largest: Flag if should keep largest topK. Defaults to + True. @type topk_mask: Tensor - @param topk_mask: Mask for valid GTs of shape [bs, n_max_boxes, topk] + @param topk_mask: Mask for valid GTs of shape [bs, n_max_boxes, + topk] @rtype: Tensor - @return: Mask of selected anchors of shape [bs, n_max_boxes, n_anchors] + @return: Mask of selected anchors of shape [bs, n_max_boxes, + n_anchors] """ - num_anchors = metrics.shape[-1] + n_anchors = metrics.shape[-1] topk_metrics, topk_idxs = torch.topk( metrics, self.topk, dim=-1, largest=largest ) if topk_mask is None: - topk_mask = (topk_metrics.max(dim=-1, keepdim=True)[0] > self.eps).tile( - [1, 1, self.topk] - ) - topk_idxs = torch.where(topk_mask, topk_idxs, torch.zeros_like(topk_idxs)) - is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(dim=-2) + topk_mask = ( + topk_metrics.max(dim=-1, keepdim=True)[0] > self.eps + ).tile([1, 1, self.topk]) + topk_idxs = torch.where( + topk_mask, topk_idxs, torch.zeros_like(topk_idxs) + ) + is_in_topk = F.one_hot(topk_idxs, n_anchors).sum(dim=-2) is_in_topk = torch.where( is_in_topk > 1, torch.zeros_like(is_in_topk), is_in_topk ) @@ -210,8 +226,9 @@ def _get_final_assignments( @type mask_pos_sum: Tensor @param mask_pos_sum: Mask of matched GTs [bs, n_anchors] @rtype: tuple[Tensor, Tensor, Tensor] - @return: Assigned labels of shape [bs, n_anchors], assigned bboxes of shape [bs, - n_anchors, 4], assigned scores of shape [bs, n_anchors, n_classes]. + @return: Assigned labels of shape [bs, n_anchors], assigned + bboxes of shape [bs, n_anchors, 4], assigned scores of shape + [bs, n_anchors, n_classes]. """ # assigned target labels batch_ind = torch.arange( @@ -228,7 +245,9 @@ def _get_final_assignments( assigned_scores = F.one_hot(assigned_labels, self.n_classes) mask_pos_scores = mask_pos_sum[:, :, None].repeat(1, 1, self.n_classes) assigned_scores = torch.where( - mask_pos_scores > 0, assigned_scores, torch.full_like(assigned_scores, 0) + mask_pos_scores > 0, + assigned_scores, + torch.full_like(assigned_scores, 0), ) assigned_labels = torch.where( diff --git a/luxonis_train/utils/assigners/utils.py b/luxonis_train/assigners/utils.py similarity index 88% rename from luxonis_train/utils/assigners/utils.py rename to luxonis_train/assigners/utils.py index fadf5f8e..fe9fba4b 100644 --- a/luxonis_train/utils/assigners/utils.py +++ b/luxonis_train/assigners/utils.py @@ -2,7 +2,7 @@ import torch.nn.functional as F from torch import Tensor -from luxonis_train.utils.boxutils import bbox_iou +from luxonis_train.utils import bbox_iou def candidates_in_gt( @@ -20,7 +20,9 @@ def candidates_in_gt( @return: Mask for anchors inside any GT bbox """ n_anchors = anchor_centers.size(0) - anchor_centers = anchor_centers.unsqueeze(0).repeat(gt_bboxes.size(0), 1, 1) + anchor_centers = anchor_centers.unsqueeze(0).repeat( + gt_bboxes.size(0), 1, 1 + ) gt_bboxes_lt = gt_bboxes[:, :2].unsqueeze(1).repeat(1, n_anchors, 1) gt_bboxes_rb = gt_bboxes[:, 2:].unsqueeze(1).repeat(1, n_anchors, 1) bbox_delta_lt = anchor_centers - gt_bboxes_lt @@ -33,12 +35,15 @@ def candidates_in_gt( def fix_collisions( mask_pos: Tensor, overlaps: Tensor, n_max_boxes: int ) -> tuple[Tensor, Tensor, Tensor]: - """If an anchor is assigned to multiple GTs, the one with highest IoU is selected. + """If an anchor is assigned to multiple GTs, the one with highest + IoU is selected. @type mask_pos: Tensor - @param mask_pos: Mask of assigned anchors [bs, n_max_boxes, n_anchors] + @param mask_pos: Mask of assigned anchors [bs, n_max_boxes, + n_anchors] @type overlaps: Tensor - @param overlaps: IoUs between GTs and anchors [bx, n_max_boxes, n_anchors] + @param overlaps: IoUs between GTs and anchors [bx, n_max_boxes, + n_anchors] @type n_max_boxes: int @param n_max_boxes: Number of maximum boxes per image @rtype: tuple[Tensor, Tensor, Tensor] @@ -46,7 +51,9 @@ def fix_collisions( """ mask_pos_sum = mask_pos.sum(dim=-2) if mask_pos_sum.max() > 1: - mask_multi_gts = (mask_pos_sum.unsqueeze(1) > 1).repeat([1, n_max_boxes, 1]) + mask_multi_gts = (mask_pos_sum.unsqueeze(1) > 1).repeat( + [1, n_max_boxes, 1] + ) max_overlaps_idx = overlaps.argmax(dim=1) is_max_overlaps = F.one_hot(max_overlaps_idx, n_max_boxes) is_max_overlaps = is_max_overlaps.permute(0, 2, 1).to(overlaps.dtype) @@ -57,8 +64,8 @@ def fix_collisions( def batch_iou(batch1: Tensor, batch2: Tensor) -> Tensor: - """Calculates IoU for each pair of bboxes in the batch. Bboxes must be in xyxy - format. + """Calculates IoU for each pair of bboxes in the batch. Bboxes must + be in xyxy format. @type batch1: Tensor @param batch1: Tensor of shape C{[bs, N, 4]} diff --git a/luxonis_train/attached_modules/base_attached_module.py b/luxonis_train/attached_modules/base_attached_module.py index 17a4c277..904120a2 100644 --- a/luxonis_train/attached_modules/base_attached_module.py +++ b/luxonis_train/attached_modules/base_attached_module.py @@ -1,13 +1,15 @@ import logging from abc import ABC +from contextlib import suppress from typing import Generic +from luxonis_ml.data import LabelType from luxonis_ml.utils.registry import AutoRegisterMeta -from torch import Tensor, nn +from torch import Size, Tensor, nn from typing_extensions import TypeVarTuple, Unpack from luxonis_train.nodes import BaseNode -from luxonis_train.utils.types import IncompatibleException, Labels, LabelType, Packet +from luxonis_train.utils import IncompatibleException, Labels, Packet logger = logging.getLogger(__name__) @@ -15,7 +17,11 @@ class BaseAttachedModule( - nn.Module, Generic[Unpack[Ts]], ABC, metaclass=AutoRegisterMeta, register=False + nn.Module, + Generic[Unpack[Ts]], + ABC, + metaclass=AutoRegisterMeta, + register=False, ): """Base class for all modules that are attached to a L{LuxonisNode}. @@ -58,21 +64,38 @@ def __init__(self, *, node: BaseNode | None = None): self._node = node self._epoch = 0 - self._required_labels: tuple[LabelType, ...] | None = None - if self._node and self.supported_labels and self.node.tasks: + self.required_labels: list[LabelType] = [] + if self._node and self.supported_labels: + module_supported = [ + label.value + if isinstance(label, LabelType) + else f"({' + '.join(label)})" + for label in self.supported_labels + ] + module_supported = f"[{', '.join(module_supported)}]" + if not self.node.tasks: + raise IncompatibleException( + f"Module '{self.name}' requires one of the following " + f"labels or combinations of labels: {module_supported}, " + f"but is connected to node '{self.node.name}' which does not specify any tasks." + ) node_tasks = set(self.node.tasks) for required_labels in self.supported_labels: if isinstance(required_labels, LabelType): - required_labels = (required_labels,) + required_labels = [required_labels] + else: + required_labels = list(required_labels) if set(required_labels) <= node_tasks: - self._required_labels = required_labels + self.required_labels = required_labels break else: - raise ValueError( - f"Module {self.name} supports labels {self.supported_labels}, " - f"but is connected to node {self.node.name} which does not support any of them. " - f"{self.node.name} supports {list(self.node_tasks.keys())}." + node_supported = [task.value for task in self.node.tasks] + raise IncompatibleException( + f"Module '{self.name}' requires one of the following labels or combinations of labels: {module_supported}, " + f"but is connected to node '{self.node.name}' which does not support any of them. " + f"{self.node.name} supports {node_supported}." ) + self._check_node_type_override() @property def name(self) -> str: @@ -83,7 +106,8 @@ def node(self) -> BaseNode: """Reference to the node that this module is attached to. @type: L{BaseNode} - @raises RuntimeError: If the node was not provided during initialization. + @raises RuntimeError: If the node was not provided during + initialization. """ if self._node is None: raise RuntimeError( @@ -93,20 +117,63 @@ def node(self) -> BaseNode: return self._node @property - def required_labels(self) -> tuple[LabelType, ...]: - if self._required_labels is None: - raise ValueError(f"{self.name} does not require any labels.") - return self._required_labels + def n_keypoints(self) -> int: + """Getter for the number of keypoints. + + @type: int + @raises ValueError: If the node does not support keypoints. + @raises RuntimeError: If the node doesn't define any task. + """ + return self.node.n_keypoints + + @property + def n_classes(self) -> int: + """Getter for the number of classes. + + @type: int + @raises RuntimeError: If the node doesn't define any task. + @raises ValueError: If the number of classes is different for + different tasks. In that case, use the L{get_n_classes} + method. + """ + return self.node.n_classes + + @property + def original_in_shape(self) -> Size: + """Getter for the original input shape as [N, H, W]. + + @type: Size + """ + return self.node.original_in_shape + + @property + def class_names(self) -> list[str]: + """Getter for the class names. + + @type: list[str] + @raises RuntimeError: If the node doesn't define any task. + @raises ValueError: If the class names are different for + different tasks. In that case, use the L{get_class_names} + method. + """ + return self.node.class_names @property def node_tasks(self) -> dict[LabelType, str]: + """Getter for the tasks of the attached node. + + @type: dict[LabelType, str] + @raises RuntimeError: If the node does not have the `tasks` attribute set. + """ if self.node._tasks is None: - raise ValueError("Node must have the `tasks` attribute specified.") + raise RuntimeError( + "Node must have the `tasks` attribute specified." + ) return self.node._tasks def get_label( self, labels: Labels, label_type: LabelType | None = None - ) -> tuple[Tensor, LabelType]: + ) -> Tensor: """Extracts a specific label from the labels dictionary. If the label type is not provided, the first label that matches the @@ -114,11 +181,11 @@ def get_label( Example:: >>> # supported_labels = [LabelType.SEGMENTATION] - >>> labels = {"segmentation": ..., "boundingbox": ...} + >>> labels = {"segmentation": seg_tensor, "boundingbox": bbox_tensor} >>> get_label(labels) - (..., LabelType.SEGMENTATION) # returns the first matching label + seg_tensor # returns the first matching label >>> get_label(labels, LabelType.BOUNDINGBOX) - (..., LabelType.BOUNDINGBOX) # returns the bounding box label + bbox_tensor # returns the bounding box label >>> get_label(labels, LabelType.CLASSIFICATION) IncompatibleException: Label 'classification' is missing from the dataset. @@ -126,13 +193,18 @@ def get_label( @param labels: Labels from the dataset. @type label_type: LabelType | None @param label_type: Type of the label to extract. - @raises IncompatibleException: If the label is not found in the labels dictionary. - @raises NotImplementedError: If the module requires multiple labels. For such cases, - the `prepare` method should be overridden. - @rtype: tuple[Tensor, LabelType] - @return: Extracted label and its type. + @rtype: Tensor + @return: Extracted label + + @raises ValueError: If the module requires multiple labels and the C{label_type} is not provided. + @raises IncompatibleException: If the label is not found in the labels dictionary. """ + return self._get_label(labels, label_type)[0] + + def _get_label( + self, labels: Labels, label_type: LabelType | None = None + ) -> tuple[Tensor, LabelType]: if label_type is None: if len(self.required_labels) == 1: label_type = self.required_labels[0] @@ -145,16 +217,9 @@ def get_label( ) return labels[task_name] - if len(self.required_labels) > 1: - raise NotImplementedError( - f"{self.name} requires multiple labels. You must provide the " - "`label_type` argument to extract the desired label." - ) - for label, label_type in labels.values(): - if label_type == self.required_labels[0]: - return label, label_type - raise IncompatibleException.from_missing_task( - self.required_labels[0].value, list(labels.keys()), self.name + raise ValueError( + f"{self.name} requires multiple labels. You must provide the " + "`label_type` argument to extract the desired label." ) def get_input_tensors( @@ -181,33 +246,37 @@ def get_input_tensors( @rtype: list[Tensor] @return: Extracted input tensors - @raises ValueError: If the task type is not supported by the node or if the task - is not present in the inputs. + @raises IncompatibleException: If the task type is not supported by the node. + @raises IncompatibleException: If the task is not present in the inputs. - @raises NotImplementedError: If the module requires multiple labels. + @raises ValueError: If the module requires multiple labels. For such cases, the `prepare` method should be overridden. """ if task_type is not None: if isinstance(task_type, LabelType): if task_type not in self.node_tasks: - raise ValueError( + raise IncompatibleException( f"Task {task_type.value} is not supported by the node " f"{self.node.name}." ) return inputs[self.node_tasks[task_type]] else: if task_type not in inputs: - raise ValueError(f"Task {task_type} is not present in the inputs.") + raise IncompatibleException( + f"Task {task_type} is not present in the inputs." + ) return inputs[task_type] if len(self.required_labels) > 1: - raise NotImplementedError( + raise ValueError( f"{self.name} requires multiple labels, " "you must provide the `task_type` argument to extract the desired input." ) return inputs[self.node_tasks[self.required_labels[0]]] - def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Unpack[Ts]]: + def prepare( + self, inputs: Packet[Tensor], labels: Labels + ) -> tuple[Unpack[Ts]]: """Prepares node outputs for the forward pass of the module. This default implementation selects the output and label based on @@ -223,48 +292,63 @@ def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Unpack[Ts]]: @rtype: tuple[Unpack[Ts]] @return: Prepared inputs. Should allow the following usage with the - L{forward} method: + L{forward} method:: >>> loss.forward(*loss.prepare(outputs, labels)) - @raises NotImplementedError: If the module requires multiple labels. - @raises IncompatibleException: If the inputs are not compatible with the module. + @raises RuntimeError: If the module requires multiple labels and + is connected to a multi-task node. In this case, the default + implementation cannot be used and the C{prepare} method should be overridden. + + @raises RuntimeError: If the C{tasks} attribute is not set on the node. + @raises RuntimeError: If the C{supported_labels} attribute is not set on the module. """ if self.node._tasks is None: - raise ValueError( + raise RuntimeError( f"{self.node.name} must have the `tasks` attribute specified " f"for {self.name} to make use of the default `prepare` method." ) if self.supported_labels is None: - raise ValueError( + raise RuntimeError( f"{self.name} must have the `supported_labels` attribute " "specified in order to use the default `prepare` method." ) if len(self.supported_labels) > 1: - if len(self.node._tasks) > 1: - raise NotImplementedError( + if len(self.node_tasks) > 1: + raise RuntimeError( f"{self.name} supports more than one label type" f"and is connected to {self.node.name} node " "which is a multi-task node. The default `prepare` " "implementation cannot be used in this case." ) self.supported_labels = list( - set(self.supported_labels) & set(self.node._tasks) + set(self.supported_labels) & set(self.node_tasks) ) x = self.get_input_tensors(inputs) - label, label_type = self.get_label(labels) + label, label_type = self._get_label(labels) if label_type in [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]: - if isinstance(x, list): - if len(x) == 1: - x = x[0] - else: - logger.warning( - f"Module {self.name} expects a single tensor as input, " - f"but got {len(x)} tensors. Using the last tensor. " - f"If this is not the desired behavior, please override the " - "`prepare` method of the attached module or the `wrap` " - f"method of {self.node.name}." - ) - x = x[-1] + if len(x) == 1: + x = x[0] + else: + logger.warning( + f"Module {self.name} expects a single tensor as input, " + f"but got {len(x)} tensors. Using the last tensor. " + f"If this is not the desired behavior, please override the " + "`prepare` method of the attached module or the `wrap` " + f"method of {self.node.name}." + ) + x = x[-1] return x, label # type: ignore + + def _check_node_type_override(self) -> None: + if "node" not in self.__annotations__: + return + + node_type = self.__annotations__["node"] + with suppress(RuntimeError): + if not isinstance(self.node, node_type): + raise IncompatibleException( + f"Module '{self.name}' is attached to the '{self.node.name}' node, " + f"but '{self.name}' is only compatible with nodes of type '{node_type.__name__}'." + ) diff --git a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py index 6a28bff9..d25825cb 100644 --- a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py +++ b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py @@ -1,31 +1,39 @@ -from typing import Literal, cast +import logging +from typing import Any, Literal, cast import torch import torch.nn.functional as F +from luxonis_ml.data import LabelType from torch import Tensor, nn from torchvision.ops import box_convert +from luxonis_train.assigners import ATSSAssigner, TaskAlignedAssigner from luxonis_train.nodes import EfficientBBoxHead -from luxonis_train.utils.assigners import ATSSAssigner, TaskAlignedAssigner -from luxonis_train.utils.boxutils import ( - IoUType, +from luxonis_train.utils import ( + Labels, + Packet, anchors_for_fpn_features, compute_iou_loss, dist2bbox, ) -from luxonis_train.utils.types import IncompatibleException, Labels, LabelType, Packet +from luxonis_train.utils.boundingbox import IoUType from .base_loss import BaseLoss +logger = logging.getLogger(__name__) -class AdaptiveDetectionLoss(BaseLoss[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]): + +class AdaptiveDetectionLoss( + BaseLoss[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor] +): node: EfficientBBoxHead supported_labels = [LabelType.BOUNDINGBOX] - class NodePacket(Packet[Tensor]): - features: list[Tensor] - class_scores: Tensor - distributions: Tensor + anchors: Tensor + anchor_points: Tensor + n_anchors_list: list[int] + stride_tensor: Tensor + gt_bboxes_scale: Tensor def __init__( self, @@ -34,7 +42,7 @@ def __init__( reduction: Literal["sum", "mean"] = "mean", class_loss_weight: float = 1.0, iou_loss_weight: float = 2.5, - **kwargs, + **kwargs: Any, ): """BBox loss adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial Applications }. It combines IoU based bbox regression loss and varifocal loss @@ -51,23 +59,15 @@ def __init__( @param class_loss_weight: Weight of classification loss. @type iou_loss_weight: float @param iou_loss_weight: Weight of IoU loss. - @type kwargs: dict - @param kwargs: Additional arguments to pass to L{BaseLoss}. """ super().__init__(**kwargs) - if not isinstance(self.node, EfficientBBoxHead): - raise IncompatibleException( - f"Loss `{self.name}` is only " - "compatible with nodes of type `EfficientBBoxHead`." - ) self.iou_type: IoUType = iou_type self.reduction = reduction - self.n_classes = self.node.n_classes self.stride = self.node.stride self.grid_cell_size = self.node.grid_cell_size self.grid_cell_offset = self.node.grid_cell_offset - self.original_img_size = self.node.original_in_shape[1:] + self.original_img_size = self.original_in_shape[1:] self.n_warmup_epochs = n_warmup_epochs self.atts_assigner = ATSSAssigner(topk=9, n_classes=self.n_classes) @@ -79,84 +79,41 @@ def __init__( self.class_loss_weight = class_loss_weight self.iou_loss_weight = iou_loss_weight - self.anchors = None - self.anchor_points = None - self.n_anchors_list = None - self.stride_tensor = None - self.gt_bboxes_scale = None + self._logged_assigner_change = False def prepare( - self, outputs: Packet[Tensor], labels: Labels + self, inputs: Packet[Tensor], labels: Labels ) -> tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]: - feats = self.get_input_tensors(outputs, "features") - pred_scores = self.get_input_tensors(outputs, "class_scores")[0] - pred_distri = self.get_input_tensors(outputs, "distributions")[0] + feats = self.get_input_tensors(inputs, "features") + pred_scores = self.get_input_tensors(inputs, "class_scores")[0] + pred_distri = self.get_input_tensors(inputs, "distributions")[0] + + target = self.get_label(labels) + batch_size = pred_scores.shape[0] - device = pred_scores.device - target = self.get_label(labels)[0] - if self.gt_bboxes_scale is None: - self.gt_bboxes_scale = torch.tensor( - [ - self.original_img_size[1], - self.original_img_size[0], - self.original_img_size[1], - self.original_img_size[0], - ], - device=device, - ) - ( - self.anchors, - self.anchor_points, - self.n_anchors_list, - self.stride_tensor, - ) = anchors_for_fpn_features( - feats, - self.stride, - self.grid_cell_size, - self.grid_cell_offset, - multiply_with_stride=True, - ) - self.anchor_points_strided = self.anchor_points / self.stride_tensor + self._init_parameters(feats) - target = self._preprocess_target(target, batch_size) + target = self._preprocess_bbox_target(target, batch_size) pred_bboxes = dist2bbox(pred_distri, self.anchor_points_strided) gt_labels = target[:, :, :1] gt_xyxy = target[:, :, 1:] mask_gt = (gt_xyxy.sum(-1, keepdim=True) > 0).float() - if self._epoch < self.n_warmup_epochs: - ( - assigned_labels, - assigned_bboxes, - assigned_scores, - mask_positive, - _, - ) = self.atts_assigner( - self.anchors, - self.n_anchors_list, - gt_labels, - gt_xyxy, - mask_gt, - pred_bboxes.detach() * self.stride_tensor, - ) - else: - # TODO: log change of assigner (once common Logger) - ( - assigned_labels, - assigned_bboxes, - assigned_scores, - mask_positive, - _, - ) = self.tal_assigner( - pred_scores.detach(), - pred_bboxes.detach() * self.stride_tensor, - self.anchor_points, - gt_labels, - gt_xyxy, - mask_gt, - ) + ( + assigned_labels, + assigned_bboxes, + assigned_scores, + mask_positive, + _, + ) = self._run_assigner( + gt_labels, + gt_xyxy, + mask_gt, + pred_bboxes, + pred_scores, + ) return ( pred_bboxes, @@ -176,8 +133,12 @@ def forward( assigned_scores: Tensor, mask_positive: Tensor, ): - one_hot_label = F.one_hot(assigned_labels.long(), self.n_classes + 1)[..., :-1] - loss_cls = self.varifocal_loss(pred_scores, assigned_scores, one_hot_label) + one_hot_label = F.one_hot(assigned_labels.long(), self.n_classes + 1)[ + ..., :-1 + ] + loss_cls = self.varifocal_loss( + pred_scores, assigned_scores, one_hot_label + ) if assigned_scores.sum() > 1: loss_cls /= assigned_scores.sum() @@ -192,17 +153,77 @@ def forward( bbox_format="xyxy", )[0] - loss = self.class_loss_weight * loss_cls + self.iou_loss_weight * loss_iou + loss = ( + self.class_loss_weight * loss_cls + self.iou_loss_weight * loss_iou + ) sub_losses = {"class": loss_cls.detach(), "iou": loss_iou.detach()} return loss, sub_losses - def _preprocess_target(self, target: Tensor, batch_size: int): - """Preprocess target in shape [batch_size, N, 5] where N is maximum number of - instances in one image.""" + def _init_parameters(self, features: list[Tensor]): + if not hasattr(self, "gt_bboxes_scale"): + self.gt_bboxes_scale = torch.tensor( + [ + self.original_img_size[1], + self.original_img_size[0], + self.original_img_size[1], + self.original_img_size[0], + ], + device=features[0].device, + ) + ( + self.anchors, + self.anchor_points, + self.n_anchors_list, + self.stride_tensor, + ) = anchors_for_fpn_features( + features, + self.stride, + self.grid_cell_size, + self.grid_cell_offset, + multiply_with_stride=True, + ) + self.anchor_points_strided = ( + self.anchor_points / self.stride_tensor + ) + + def _run_assigner( + self, + gt_labels: Tensor, + gt_xyxy: Tensor, + mask_gt: Tensor, + pred_bboxes: Tensor, + pred_scores: Tensor, + ) -> tuple[Tensor, Tensor, Tensor, Tensor, Tensor]: + if self._epoch < self.n_warmup_epochs: + return self.atts_assigner( + self.anchors, + self.n_anchors_list, + gt_labels, + gt_xyxy, + mask_gt, + pred_bboxes.detach() * self.stride_tensor, + ) + else: + self._log_assigner_change() + return self.tal_assigner( + pred_scores.detach(), + pred_bboxes.detach() * self.stride_tensor, + self.anchor_points, + gt_labels, + gt_xyxy, + mask_gt, + ) + + def _preprocess_bbox_target( + self, target: Tensor, batch_size: int + ) -> Tensor: + """Preprocess target in shape [batch_size, N, 5] where N is the + maximum number of instances in one image.""" sample_ids, counts = cast( - tuple[Tensor, Tensor], torch.unique(target[:, 0].int(), return_counts=True) + tuple[Tensor, Tensor], + torch.unique(target[:, 0].int(), return_counts=True), ) c_max = int(counts.max()) if counts.numel() > 0 else 0 out_target = torch.zeros(batch_size, c_max, 5, device=target.device) @@ -214,6 +235,16 @@ def _preprocess_target(self, target: Tensor, batch_size: int): out_target[..., 1:] = box_convert(scaled_target, "xywh", "xyxy") return out_target + def _log_assigner_change(self): + if self._logged_assigner_change: + return + + logger.info( + f"Switching to Task Aligned Assigner after {self.n_warmup_epochs} warmup epochs.", + stacklevel=2, + ) + self._logged_assigner_change = True + class VarifocalLoss(nn.Module): def __init__(self, alpha: float = 0.75, gamma: float = 2.0): @@ -236,7 +267,8 @@ def forward( self, pred_score: Tensor, target_score: Tensor, label: Tensor ) -> Tensor: weight = ( - self.alpha * pred_score.pow(self.gamma) * (1 - label) + target_score * label + self.alpha * pred_score.pow(self.gamma) * (1 - label) + + target_score * label ) ce_loss = F.binary_cross_entropy( pred_score.float(), target_score.float(), reduction="none" diff --git a/luxonis_train/attached_modules/losses/base_loss.py b/luxonis_train/attached_modules/losses/base_loss.py index 89ce8d8c..7a69d0d8 100644 --- a/luxonis_train/attached_modules/losses/base_loss.py +++ b/luxonis_train/attached_modules/losses/base_loss.py @@ -17,19 +17,23 @@ class BaseLoss( ): """A base class for all loss functions. - This class defines the basic interface for all loss functions. It utilizes automatic - registration of defined subclasses to a L{LOSSES} registry. + This class defines the basic interface for all loss functions. It + utilizes automatic registration of defined subclasses to a L{LOSSES} + registry. """ @abstractmethod - def forward(self, *args: Unpack[Ts]) -> Tensor | tuple[Tensor, dict[str, Tensor]]: + def forward( + self, *args: Unpack[Ts] + ) -> Tensor | tuple[Tensor, dict[str, Tensor]]: """Forward pass of the loss function. @type args: Unpack[Ts] @param args: Prepared inputs from the L{prepare} method. @rtype: Tensor | tuple[Tensor, dict[str, Tensor]] - @return: The main loss and optional a dictionary of sublosses (for logging). - Only the main loss is used for backpropagation. + @return: The main loss and optional a dictionary of sublosses + (for logging). Only the main loss is used for + backpropagation. """ ... @@ -45,8 +49,10 @@ def run( @type labels: L{Labels} @param labels: Labels from the dataset. @rtype: Tensor | tuple[Tensor, dict[str, Tensor]] - @return: The main loss and optional a dictionary of sublosses (for logging). - Only the main loss is used for backpropagation. - @raises IncompatibleException: If the inputs are not compatible with the module. + @return: The main loss and optional a dictionary of sublosses + (for logging). Only the main loss is used for + backpropagation. + @raises IncompatibleException: If the inputs are not compatible + with the module. """ return self(*self.prepare(inputs, labels)) diff --git a/luxonis_train/attached_modules/losses/bce_with_logits.py b/luxonis_train/attached_modules/losses/bce_with_logits.py index 442a89c3..b759d06b 100644 --- a/luxonis_train/attached_modules/losses/bce_with_logits.py +++ b/luxonis_train/attached_modules/losses/bce_with_logits.py @@ -1,4 +1,4 @@ -from typing import Literal +from typing import Any, Literal import torch from luxonis_ml.data import LabelType @@ -15,35 +15,39 @@ def __init__( weight: list[float] | None = None, reduction: Literal["none", "mean", "sum"] = "mean", pos_weight: Tensor | None = None, - **kwargs, + **kwargs: Any, ): - """This loss combines a L{nn.Sigmoid} layer and the L{nn.BCELoss} in one single - class. This version is more numerically stable than using a plain C{Sigmoid} - followed by a {BCELoss} as, by combining the operations into one layer, we take - advantage of the log-sum-exp trick for numerical stability. + """This loss combines a L{nn.Sigmoid} layer and the + L{nn.BCELoss} in one single class. This version is more + numerically stable than using a plain C{Sigmoid} followed by a + {BCELoss} as, by combining the operations into one layer, we + take advantage of the log-sum-exp trick for numerical stability. @type weight: list[float] | None - @param weight: a manual rescaling weight given to the loss of each batch - element. If given, has to be a list of length C{nbatch}. Defaults to - C{None}. + @param weight: a manual rescaling weight given to the loss of + each batch element. If given, has to be a list of length + C{nbatch}. Defaults to C{None}. @type reduction: Literal["none", "mean", "sum"] - @param reduction: Specifies the reduction to apply to the output: C{"none"} | - C{"mean"} | C{"sum"}. C{"none"}: no reduction will be applied, C{"mean"}: - the sum of the output will be divided by the number of elements in the - output, C{"sum"}: the output will be summed. Note: C{size_average} and - C{reduce} are in the process of being deprecated, and in the meantime, - specifying either of those two args will override C{reduction}. Defaults to - C{"mean"}. + @param reduction: Specifies the reduction to apply to the + output: C{"none"} | C{"mean"} | C{"sum"}. C{"none"}: no + reduction will be applied, C{"mean"}: the sum of the output + will be divided by the number of elements in the output, + C{"sum"}: the output will be summed. Note: C{size_average} + and C{reduce} are in the process of being deprecated, and in + the meantime, specifying either of those two args will + override C{reduction}. Defaults to C{"mean"}. @type pos_weight: Tensor | None - @param pos_weight: a weight of positive examples to be broadcasted with target. - Must be a tensor with equal size along the class dimension to the number of - classes. Pay close attention to PyTorch's broadcasting semantics in order to - achieve the desired operations. For a target of size [B, C, H, W] (where B - is batch size) pos_weight of size [B, C, H, W] will apply different - pos_weights to each element of the batch or [C, H, W] the same pos_weights - across the batch. To apply the same positive weight along all spacial - dimensions for a 2D multi-class target [C, H, W] use: [C, 1, 1]. Defaults to - C{None}. + @param pos_weight: a weight of positive examples to be + broadcasted with target. Must be a tensor with equal size + along the class dimension to the number of classes. Pay + close attention to PyTorch's broadcasting semantics in order + to achieve the desired operations. For a target of size [B, + C, H, W] (where B is batch size) pos_weight of size [B, C, + H, W] will apply different pos_weights to each element of + the batch or [C, H, W] the same pos_weights across the + batch. To apply the same positive weight along all spacial + dimensions for a 2D multi-class target [C, H, W] use: [C, 1, + 1]. Defaults to C{None}. """ super().__init__(**kwargs) self.criterion = nn.BCEWithLogitsLoss( @@ -53,6 +57,15 @@ def __init__( ) def forward(self, predictions: Tensor, target: Tensor) -> Tensor: + """Computes the BCE loss from logits. + + @type predictions: Tensor + @param predictions: Network predictions of shape (N, C, ...) + @type target: Tensor + @param target: A tensor of the same shape as predictions. + @rtype: Tensor + @return: A scalar tensor. + """ if predictions.shape != target.shape: raise RuntimeError( f"Target tensor dimension ({target.shape}) and preds tensor " diff --git a/luxonis_train/attached_modules/losses/cross_entropy.py b/luxonis_train/attached_modules/losses/cross_entropy.py index 05a0f524..4be0cfdc 100644 --- a/luxonis_train/attached_modules/losses/cross_entropy.py +++ b/luxonis_train/attached_modules/losses/cross_entropy.py @@ -1,5 +1,5 @@ from logging import getLogger -from typing import Literal +from typing import Any, Literal import torch import torch.nn as nn @@ -9,12 +9,11 @@ from .base_loss import BaseLoss logger = getLogger(__name__) -was_logged = False class CrossEntropyLoss(BaseLoss[Tensor, Tensor]): - """This criterion computes the cross entropy loss between input logits and - target.""" + """This criterion computes the cross entropy loss between input + logits and target.""" supported_labels = [LabelType.SEGMENTATION, LabelType.CLASSIFICATION] @@ -24,7 +23,7 @@ def __init__( ignore_index: int = -100, reduction: Literal["none", "mean", "sum"] = "mean", label_smoothing: float = 0.0, - **kwargs, + **kwargs: Any, ): super().__init__(**kwargs) @@ -34,19 +33,19 @@ def __init__( reduction=reduction, label_smoothing=label_smoothing, ) + self._was_logged = False def forward(self, preds: Tensor, target: Tensor) -> Tensor: - global was_logged if preds.ndim == target.ndim: ch_dim = 1 if preds.ndim > 1 else 0 if preds.shape[ch_dim] == 1: - if not was_logged: + if not self._was_logged: logger.warning( "`CrossEntropyLoss` expects at least 2 classes. " "Attempting to fix by adding a dummy channel. " "If you want to be sure, use `BCEWithLogitsLoss` instead." ) - was_logged = True + self._was_logged = True preds = torch.cat([torch.zeros_like(preds), preds], dim=ch_dim) if target.shape[ch_dim] == 1: target = torch.cat([1 - target, target], dim=ch_dim) diff --git a/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py b/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py index 2e6621de..d996dcfd 100644 --- a/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py +++ b/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py @@ -1,52 +1,44 @@ -from typing import Literal, cast +from typing import Any, Literal import torch import torch.nn.functional as F -from torch import Tensor, nn -from torchvision.ops import box_convert +from luxonis_ml.data import LabelType +from torch import Tensor -from luxonis_train.attached_modules.metrics.object_keypoint_similarity import ( - get_area_factor, - get_sigmas, -) +from luxonis_train.attached_modules.losses import AdaptiveDetectionLoss from luxonis_train.nodes import EfficientKeypointBBoxHead -from luxonis_train.utils.assigners import ATSSAssigner, TaskAlignedAssigner -from luxonis_train.utils.boxutils import ( - IoUType, - anchors_for_fpn_features, +from luxonis_train.utils import ( + Labels, + Packet, compute_iou_loss, dist2bbox, + get_sigmas, + get_with_default, ) -from luxonis_train.utils.types import IncompatibleException, Labels, LabelType, Packet +from luxonis_train.utils.boundingbox import IoUType -from .base_loss import BaseLoss from .bce_with_logits import BCEWithLogitsLoss -class EfficientKeypointBBoxLoss( - BaseLoss[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor] -): +class EfficientKeypointBBoxLoss(AdaptiveDetectionLoss): node: EfficientKeypointBBoxHead supported_labels = [(LabelType.BOUNDINGBOX, LabelType.KEYPOINTS)] - class NodePacket(Packet[Tensor]): - features: list[Tensor] - class_scores: Tensor - distributions: Tensor + gt_kpts_scale: Tensor def __init__( self, n_warmup_epochs: int = 4, iou_type: IoUType = "giou", reduction: Literal["sum", "mean"] = "mean", - class_bbox_loss_weight: float = 1.0, + class_loss_weight: float = 1.0, iou_loss_weight: float = 2.5, viz_pw: float = 1.0, regr_kpts_loss_weight: float = 1.5, vis_kpts_loss_weight: float = 1.0, sigmas: list[float] | None = None, area_factor: float | None = None, - **kwargs, + **kwargs: Any, ): """BBox loss adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial Applications }. It combines IoU based bbox regression loss and varifocal loss @@ -55,12 +47,12 @@ def __init__( @type n_warmup_epochs: int @param n_warmup_epochs: Number of epochs where ATSS assigner is used, after that we switch to TAL assigner. - @type iou_type: L{IoUType} + @type iou_type: Literal["none", "giou", "diou", "ciou", "siou"] @param iou_type: IoU type used for bbox regression loss. @type reduction: Literal["sum", "mean"] @param reduction: Reduction type for loss. - @type class_bbox_loss_weight: float - @param class_bbox_loss_weight: Weight of classification loss for bounding boxes. + @type class_loss_weight: float + @param class_loss_weight: Weight of classification loss for bounding boxes. @type regr_kpts_loss_weight: float @param regr_kpts_loss_weight: Weight of regression loss for keypoints. @type vis_kpts_loss_weight: float @@ -71,153 +63,100 @@ def __init__( @param sigmas: Sigmas used in KeypointLoss for OKS metric. If None then use COCO ones if possible or default ones. Defaults to C{None}. @type area_factor: float | None @param area_factor: Factor by which we multiply bbox area which is used in KeypointLoss. If None then use default one. Defaults to C{None}. - @type kwargs: dict - @param kwargs: Additional arguments to pass to L{BaseLoss}. """ - super().__init__(**kwargs) + super().__init__( + n_warmup_epochs=n_warmup_epochs, + iou_type=iou_type, + reduction=reduction, + class_loss_weight=class_loss_weight, + iou_loss_weight=iou_loss_weight, + **kwargs, + ) - if not isinstance(self.node, EfficientKeypointBBoxHead): - raise IncompatibleException( - f"Loss `{self.name}` is only " - "compatible with nodes of type `EfficientKeypointBBoxHead`." - ) - self.iou_type: IoUType = iou_type - self.reduction = reduction - self.n_classes = self.node.n_classes - self.stride = self.node.stride - self.grid_cell_size = self.node.grid_cell_size - self.grid_cell_offset = self.node.grid_cell_offset - self.original_img_size = self.node.original_in_shape[1:] - self.n_heads = self.node.n_heads - self.n_kps = self.node.n_keypoints - - self.b_cross_entropy = BCEWithLogitsLoss(pos_weight=torch.tensor([viz_pw])) + self.b_cross_entropy = BCEWithLogitsLoss( + pos_weight=torch.tensor([viz_pw]) + ) self.sigmas = get_sigmas( - sigmas=sigmas, n_keypoints=self.n_kps, class_name=self.name + sigmas=sigmas, + n_keypoints=self.n_keypoints, + caller_name=self.name, ) - self.area_factor = get_area_factor(area_factor, class_name=self.name) - - self.n_warmup_epochs = n_warmup_epochs - self.atts_assigner = ATSSAssigner(topk=9, n_classes=self.n_classes) - self.tal_assigner = TaskAlignedAssigner( - topk=13, n_classes=self.n_classes, alpha=1.0, beta=6.0 + self.area_factor = get_with_default( + area_factor, "bbox area scaling", self.name, default=0.53 ) - - self.varifocal_loss = VarifocalLoss() - self.class_bbox_loss_weight = class_bbox_loss_weight - self.iou_loss_weight = iou_loss_weight self.regr_kpts_loss_weight = regr_kpts_loss_weight self.vis_kpts_loss_weight = vis_kpts_loss_weight def prepare( - self, outputs: Packet[Tensor], labels: Labels - ) -> tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]: - feats = self.get_input_tensors(outputs, "features") - pred_scores = self.get_input_tensors(outputs, "class_scores")[0] - pred_distri = self.get_input_tensors(outputs, "distributions")[0] - pred_kpts = self.get_input_tensors(outputs, "keypoints_raw")[0] + self, inputs: Packet[Tensor], labels: Labels + ) -> tuple[ + Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor + ]: + feats = self.get_input_tensors(inputs, "features") + pred_scores = self.get_input_tensors(inputs, "class_scores")[0] + pred_distri = self.get_input_tensors(inputs, "distributions")[0] + pred_kpts = self.get_input_tensors(inputs, "keypoints_raw")[0] + + target_kpts = self.get_label(labels, LabelType.KEYPOINTS) + target_bbox = self.get_label(labels, LabelType.BOUNDINGBOX) batch_size = pred_scores.shape[0] - device = pred_scores.device - - target_kpts = self.get_label(labels, LabelType.KEYPOINTS)[0] - target_bbox = self.get_label(labels, LabelType.BOUNDINGBOX)[0] n_kpts = (target_kpts.shape[1] - 2) // 3 - gt_bboxes_scale = torch.tensor( - [ - self.original_img_size[1], - self.original_img_size[0], - self.original_img_size[1], - self.original_img_size[0], - ], - device=device, - ) - gt_kpts_scale = torch.tensor( - [ - self.original_img_size[1], - self.original_img_size[0], - ], - device=device, - ) - ( - anchors, - anchor_points, - n_anchors_list, - stride_tensor, - ) = anchors_for_fpn_features( - feats, - self.stride, - self.grid_cell_size, - self.grid_cell_offset, - multiply_with_stride=True, - ) + self._init_parameters(feats) - anchor_points_strided = anchor_points / stride_tensor - pred_bboxes = dist2bbox(pred_distri, anchor_points_strided) + pred_bboxes = dist2bbox(pred_distri, self.anchor_points_strided) pred_kpts = self.dist2kpts_noscale( - anchor_points_strided, pred_kpts.view(batch_size, -1, n_kpts, 3) + self.anchor_points_strided, + pred_kpts.view( + batch_size, + -1, + n_kpts, + 3, + ), ) - target_bbox = self._preprocess_bbox_target( - target_bbox, batch_size, gt_bboxes_scale - ) + target_bbox = self._preprocess_bbox_target(target_bbox, batch_size) gt_bbox_labels = target_bbox[:, :, :1] gt_xyxy = target_bbox[:, :, 1:] mask_gt = (gt_xyxy.sum(-1, keepdim=True) > 0).float() - - if self._epoch < self.n_warmup_epochs: - ( - assigned_labels, - assigned_bboxes, - assigned_scores, - mask_positive, - assigned_gt_idx, - ) = self.atts_assigner( - anchors, - n_anchors_list, - gt_bbox_labels, - gt_xyxy, - mask_gt, - pred_bboxes.detach() * stride_tensor, - ) - else: - ( - assigned_labels, - assigned_bboxes, - assigned_scores, - mask_positive, - assigned_gt_idx, - ) = self.tal_assigner( - pred_scores.detach(), - pred_bboxes.detach() * stride_tensor, - anchor_points, - gt_bbox_labels, - gt_xyxy, - mask_gt, - ) + ( + assigned_labels, + assigned_bboxes, + assigned_scores, + mask_positive, + assigned_gt_idx, + ) = self._run_assigner( + gt_bbox_labels, + gt_xyxy, + mask_gt, + pred_bboxes, + pred_scores, + ) batched_kpts = self._preprocess_kpts_target( - target_kpts, batch_size, gt_kpts_scale + target_kpts, batch_size, self.gt_kpts_scale ) assigned_gt_idx_expanded = assigned_gt_idx.unsqueeze(-1).unsqueeze(-1) selected_keypoints = batched_kpts.gather( - 1, assigned_gt_idx_expanded.expand(-1, -1, self.n_kps, 3) + 1, assigned_gt_idx_expanded.expand(-1, -1, self.n_keypoints, 3) ) xy_components = selected_keypoints[:, :, :, :2] - normalized_xy = xy_components / stride_tensor.view(1, -1, 1, 1) + normalized_xy = xy_components / self.stride_tensor.view(1, -1, 1, 1) selected_keypoints = torch.cat( (normalized_xy, selected_keypoints[:, :, :, 2:]), dim=-1 ) gt_kpt = selected_keypoints[mask_positive] pred_kpts = pred_kpts[mask_positive] - assigned_bboxes = assigned_bboxes / stride_tensor + assigned_bboxes = assigned_bboxes / self.stride_tensor area = ( - assigned_bboxes[mask_positive][:, 0] - assigned_bboxes[mask_positive][:, 2] + assigned_bboxes[mask_positive][:, 0] + - assigned_bboxes[mask_positive][:, 2] ) * ( - assigned_bboxes[mask_positive][:, 1] - assigned_bboxes[mask_positive][:, 3] + assigned_bboxes[mask_positive][:, 1] + - assigned_bboxes[mask_positive][:, 3] ) return ( @@ -256,8 +195,12 @@ def forward( ).mean() visibility_loss = self.b_cross_entropy.forward(pred_kpts[..., 2], mask) - one_hot_label = F.one_hot(assigned_labels.long(), self.n_classes + 1)[..., :-1] - loss_cls = self.varifocal_loss(pred_scores, assigned_scores, one_hot_label) + one_hot_label = F.one_hot(assigned_labels.long(), self.n_classes + 1)[ + ..., :-1 + ] + loss_cls = self.varifocal_loss( + pred_scores, assigned_scores, one_hot_label + ) if assigned_scores.sum() > 1: loss_cls /= assigned_scores.sum() @@ -273,7 +216,7 @@ def forward( )[0] loss = ( - self.class_bbox_loss_weight * loss_cls + self.class_loss_weight * loss_cls + self.iou_loss_weight * loss_iou + regression_loss * self.regr_kpts_loss_weight + visibility_loss * self.vis_kpts_loss_weight @@ -288,49 +231,32 @@ def forward( return loss, sub_losses - def _preprocess_bbox_target( - self, bbox_target: Tensor, batch_size: int, scale_tensor: Tensor - ) -> Tensor: - """Preprocess target bboxes in shape [batch_size, N, 5] where N is maximum - number of instances in one image.""" - sample_ids, counts = cast( - tuple[Tensor, Tensor], - torch.unique(bbox_target[:, 0].int(), return_counts=True), - ) - c_max = int(counts.max()) if counts.numel() > 0 else 0 - out_target = torch.zeros(batch_size, c_max, 5, device=bbox_target.device) - out_target[:, :, 0] = -1 - for id, count in zip(sample_ids, counts): - out_target[id, :count] = bbox_target[bbox_target[:, 0] == id][:, 1:] - - scaled_target = out_target[:, :, 1:5] * scale_tensor - out_target[..., 1:] = box_convert(scaled_target, "xywh", "xyxy") - return out_target - def _preprocess_kpts_target( self, kpts_target: Tensor, batch_size: int, scale_tensor: Tensor ) -> Tensor: - """Preprocesses the target keypoints in shape [batch_size, N, n_keypoints, 3] - where N is the maximum number of keypoints in one image.""" + """Preprocesses the target keypoints in shape [batch_size, N, + n_keypoints, 3] where N is the maximum number of keypoints in + one image.""" _, counts = torch.unique(kpts_target[:, 0].int(), return_counts=True) max_kpts = int(counts.max()) if counts.numel() > 0 else 0 batched_keypoints = torch.zeros( - (batch_size, max_kpts, self.n_kps, 3), device=kpts_target.device + (batch_size, max_kpts, self.n_keypoints, 3), + device=kpts_target.device, ) for i in range(batch_size): keypoints_i = kpts_target[kpts_target[:, 0] == i] scaled_keypoints_i = keypoints_i[:, 2:].clone() - batched_keypoints[i, : keypoints_i.shape[0]] = scaled_keypoints_i.view( - -1, self.n_kps, 3 + batched_keypoints[i, : keypoints_i.shape[0]] = ( + scaled_keypoints_i.view(-1, self.n_keypoints, 3) ) batched_keypoints[i, :, :, :2] *= scale_tensor[:2] return batched_keypoints def dist2kpts_noscale(self, anchor_points: Tensor, kpts: Tensor) -> Tensor: - """Adjusts and scales predicted keypoints relative to anchor points without - considering image stride.""" + """Adjusts and scales predicted keypoints relative to anchor + points without considering image stride.""" adj_kpts = kpts.clone() scale = 2.0 x_adj = anchor_points[:, [0]] - 0.5 @@ -341,32 +267,13 @@ def dist2kpts_noscale(self, anchor_points: Tensor, kpts: Tensor) -> Tensor: adj_kpts[..., 1] += y_adj return adj_kpts - -class VarifocalLoss(nn.Module): - def __init__(self, alpha: float = 0.75, gamma: float = 2.0): - """Varifocal Loss is a loss function for training a dense object detector to predict - the IoU-aware classification score, inspired by focal loss. - Code is adapted from: U{https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/models/losses.py} - - @type alpha: float - @param alpha: alpha parameter in focal loss, default is 0.75. - @type gamma: float - @param gamma: gamma parameter in focal loss, default is 2.0. - """ - - super().__init__() - - self.alpha = alpha - self.gamma = gamma - - def forward( - self, pred_score: Tensor, target_score: Tensor, label: Tensor - ) -> Tensor: - weight = ( - self.alpha * pred_score.pow(self.gamma) * (1 - label) + target_score * label - ) - ce_loss = F.binary_cross_entropy( - pred_score.float(), target_score.float(), reduction="none" + def _init_parameters(self, features: list[Tensor]): + device = features[0].device + super()._init_parameters(features) + self.gt_kpts_scale = torch.tensor( + [ + self.original_img_size[1], + self.original_img_size[0], + ], + device=device, ) - loss = (ce_loss * weight).sum() - return loss diff --git a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py index d174c555..8c9230ae 100644 --- a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py +++ b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py @@ -1,17 +1,20 @@ -from typing import cast +import logging +from typing import Any, cast import torch +from luxonis_ml.data import LabelType from torch import Tensor from torchvision.ops import box_convert from luxonis_train.attached_modules.losses.keypoint_loss import KeypointLoss from luxonis_train.nodes import ImplicitKeypointBBoxHead -from luxonis_train.utils.boxutils import ( +from luxonis_train.utils import ( + Labels, + Packet, compute_iou_loss, match_to_anchor, process_bbox_predictions, ) -from luxonis_train.utils.types import IncompatibleException, Labels, LabelType, Packet from .base_loss import BaseLoss from .bce_with_logits import BCEWithLogitsLoss @@ -25,7 +28,10 @@ list[Tensor], ] +logger = logging.getLogger(__name__) + +# TODO: BROKEN! class ImplicitKeypointBBoxLoss(BaseLoss[list[Tensor], KeypointTargetType]): node: ImplicitKeypointBBoxHead supported_labels = [(LabelType.BOUNDINGBOX, LabelType.KEYPOINTS)] @@ -47,10 +53,10 @@ def __init__( anchor_threshold: float = 4.0, bias: float = 0.5, balance: list[float] | None = None, - **kwargs, + **kwargs: Any, ): - """Joint loss for keypoint and box predictions for cases where the keypoints and - boxes are inherently linked. + """Joint loss for keypoint and box predictions for cases where + the keypoints and boxes are inherently linked. Based on U{YOLO-Pose: Enhancing YOLO for Multi Person Pose Estimation Using Object Keypoint Similarity Loss}. @@ -89,34 +95,29 @@ def __init__( super().__init__(**kwargs) - if not isinstance(self.node, ImplicitKeypointBBoxHead): - raise IncompatibleException( - f"Loss `{self.name}` is only " - "compatible with nodes of type `ImplicitKeypointBBoxHead`." - ) - self.n_classes = self.node.n_classes - self.n_keypoints = self.node.n_keypoints self.n_anchors = self.node.n_anchors - self.num_heads = self.node.num_heads + self.n_heads = self.node.n_heads self.box_offset = self.node.box_offset self.anchors = self.node.anchors self.balance = balance or [4.0, 1.0, 0.4] - if len(self.balance) < self.num_heads: - raise ValueError( - f"Balance list must have at least {self.num_heads} elements." + if len(self.balance) < self.n_heads: + logger.warning( + f"Balance list must have at least {self.n_heads} elements." + "Filling the rest with 1.0." ) + self.balance += [1.0] * (self.n_heads - len(self.balance)) self.min_objectness_iou = min_objectness_iou self.bbox_weight = bbox_loss_weight self.class_weight = class_loss_weight self.objectness_weight = objectness_loss_weight - self.kpt_visibility_weight = keypoint_visibility_loss_weight - self.keypoint_regression_loss_weight = keypoint_regression_loss_weight self.anchor_threshold = anchor_threshold self.bias = bias - self.b_cross_entropy = BCEWithLogitsLoss(pos_weight=torch.tensor([obj_pw])) + self.b_cross_entropy = BCEWithLogitsLoss( + pos_weight=torch.tensor([obj_pw]) + ) self.class_loss = SmoothBCEWithLogitsLoss( label_smoothing=label_smoothing, bce_pow=cls_pw, @@ -126,6 +127,8 @@ def __init__( bce_power=viz_pw, sigmas=sigmas, area_factor=area_factor, + regression_loss_weight=keypoint_regression_loss_weight, + visibility_loss_weight=keypoint_visibility_loss_weight, ) self.positive_smooth_const = 1 - 0.5 * label_smoothing @@ -134,38 +137,44 @@ def __init__( def prepare( self, outputs: Packet[Tensor], labels: Labels ) -> tuple[list[Tensor], KeypointTargetType]: - """Prepares the labels to be in the correct format for loss calculation. + """Prepares the labels to be in the correct format for loss + calculation. @type outputs: Packet[Tensor] @param outputs: Output from the forward pass. @type labels: L{Labels} @param labels: Dictionary containing the labels. - @rtype: tuple[list[Tensor], tuple[list[Tensor], list[Tensor], list[Tensor], - list[tuple[Tensor, Tensor, Tensor, Tensor]], list[Tensor]]] - @return: Tuple containing the original output and the postprocessed labels. The - processed labels are a tuple containing the class targets, box targets, - keypoint targets, indices and anchors. Indicies are a tuple containing - vectors of indices for batch, anchor, feature y and feature x dimensions, - respectively. They are all of shape (n_targets,). The indices are used to - index the output tensors of shape (batch_size, n_anchors, feature_height, - feature_width, n_classes + box_offset + n_keypoints * 3) to get a tensor of - shape (n_targets, n_classes + box_offset + n_keypoints * 3). + @rtype: tuple[list[Tensor], tuple[list[Tensor], list[Tensor], + list[Tensor], list[tuple[Tensor, Tensor, Tensor, Tensor]], + list[Tensor]]] + @return: Tuple containing the original output and the + postprocessed labels. The processed labels are a tuple + containing the class targets, box targets, keypoint targets, + indices and anchors. Indicies are a tuple containing vectors + of indices for batch, anchor, feature y and feature x + dimensions, respectively. They are all of shape + (n_targets,). The indices are used to index the output + tensors of shape (batch_size, n_anchors, feature_height, + feature_width, n_classes + box_offset + n_keypoints * 3) to + get a tensor of shape (n_targets, n_classes + box_offset + + n_keypoints * 3). """ predictions = self.get_input_tensors(outputs, "features") - kpts = self.get_label(labels, LabelType.KEYPOINTS)[0] - boxes = self.get_label(labels, LabelType.BOUNDINGBOX)[0] + kpt_label = self.get_label(labels, LabelType.KEYPOINTS) + bbox_label = self.get_label(labels, LabelType.BOUNDINGBOX) - nkpts = (kpts.shape[1] - 2) // 3 - targets = torch.zeros((len(boxes), nkpts * 3 + self.box_offset + 1)) - targets[:, :2] = boxes[:, :2] + targets = torch.zeros( + (kpt_label.shape[0], self.n_keypoints * 3 + self.box_offset + 1) + ) + targets[:, :2] = kpt_label[:, :2] targets[:, 2 : self.box_offset + 1] = box_convert( - boxes[:, 2:], "xywh", "cxcywh" + bbox_label[:, 2:], "xywh", "cxcywh" ) - targets[:, self.box_offset + 1 :: 3] = kpts[:, 2::3] # insert kp x coordinates - targets[:, self.box_offset + 2 :: 3] = kpts[:, 3::3] # insert kp y coordinates - targets[:, self.box_offset + 3 :: 3] = kpts[:, 4::3] # insert kp visibility + # insert keypoints + for i in range(1, 4): + targets[:, self.box_offset + i :: 3] = kpt_label[:, i + 1 :: 3] n_targets = targets.shape[0] @@ -176,21 +185,26 @@ def prepare( anchors: list[Tensor] = [] anchor_indices = ( - torch.arange(self.n_anchors, device=targets.device, dtype=torch.float32) + torch.arange( + self.n_anchors, device=targets.device, dtype=torch.float32 + ) .reshape(self.n_anchors, 1) .repeat(1, n_targets) .unsqueeze(-1) ) - targets = torch.cat((targets.repeat(self.n_anchors, 1, 1), anchor_indices), 2) + targets = torch.cat( + (targets.repeat(self.n_anchors, 1, 1), anchor_indices), 2 + ) xy_deltas = ( torch.tensor( - [[0, 0], [1, 0], [0, 1], [-1, 0], [0, -1]], device=targets.device + [[0, 0], [1, 0], [0, 1], [-1, 0], [0, -1]], + device=targets.device, ).float() * self.bias ) - for i in range(self.num_heads): + for i in range(self.n_heads): anchor = self.anchors[i] feature_height, feature_width = predictions[i].shape[2:4] scaled_targets, xy_shifts = match_to_anchor( @@ -251,9 +265,15 @@ def forward( "kpt_regression": torch.tensor(0.0, device=device), } - for pred, class_target, box_target, kpt_target, index, anchor, balance in zip( - predictions, *targets, self.balance - ): + for ( + pred, + class_target, + box_target, + kpt_target, + index, + anchor, + balance, + ) in zip(predictions, *targets, self.balance): obj_targets = torch.zeros_like(pred[..., 0], device=device) n_targets = len(class_target) @@ -280,13 +300,8 @@ def forward( kpt_target.to(device), area.to(device), ) - - sub_losses["kpt_regression"] += ( - kpt_sublosses["regression"] * self.keypoint_regression_loss_weight - ) - sub_losses["kpt_visibility"] += ( - kpt_sublosses["visibility"] * self.kpt_visibility_weight - ) + for name, kpt_subloss in kpt_sublosses.items(): + sub_losses[name] += kpt_subloss obj_targets[index] = (self.min_objectness_iou) + ( 1 - self.min_objectness_iou @@ -295,11 +310,10 @@ def forward( if self.n_classes > 1: sub_losses["class"] += ( self.class_loss.forward( - [ - pred_subset[ - :, - self.box_offset : self.box_offset + self.n_classes, - ] + pred_subset[ + :, + self.box_offset : self.box_offset + + self.n_classes, ], class_target, ) @@ -315,7 +329,9 @@ def forward( loss = cast(Tensor, sum(sub_losses.values())).reshape([]) return loss, {name: loss.detach() for name, loss in sub_losses.items()} - def _create_keypoint_target(self, scaled_targets: Tensor, box_xy_deltas: Tensor): + def _create_keypoint_target( + self, scaled_targets: Tensor, box_xy_deltas: Tensor + ): keypoint_target = scaled_targets[:, self.box_offset + 1 : -1] for j in range(self.n_keypoints): idx = 3 * j diff --git a/luxonis_train/attached_modules/losses/keypoint_loss.py b/luxonis_train/attached_modules/losses/keypoint_loss.py index d5ca278f..c17ac7a1 100644 --- a/luxonis_train/attached_modules/losses/keypoint_loss.py +++ b/luxonis_train/attached_modules/losses/keypoint_loss.py @@ -1,17 +1,20 @@ +from typing import Any + import torch +from luxonis_ml.data import LabelType from torch import Tensor -from luxonis_train.attached_modules.metrics.object_keypoint_similarity import ( - get_area_factor, +from luxonis_train.utils import ( get_sigmas, + get_with_default, + process_keypoints_predictions, ) -from luxonis_train.utils.boxutils import process_keypoints_predictions -from luxonis_train.utils.types import Labels, LabelType, Packet from .base_loss import BaseLoss from .bce_with_logits import BCEWithLogitsLoss +# TODO: Make it work on its own class KeypointLoss(BaseLoss[Tensor, Tensor]): supported_labels = [LabelType.KEYPOINTS] @@ -21,73 +24,89 @@ def __init__( bce_power: float = 1.0, sigmas: list[float] | None = None, area_factor: float | None = None, - **kwargs, + regression_loss_weight: float = 1.0, + visibility_loss_weight: float = 1.0, + **kwargs: Any, ): - """Keypoint based loss that is computed from OKS-based regression and visibility - loss. + """Keypoint based loss that is computed from OKS-based + regression and visibility loss. @type n_keypoints: int @param n_keypoints: Number of keypoints. @type bce_power: float - @param bce_power: Power used for BCE visibility loss. Defaults to C{1.0}. - @param sigmas: Sigmas used for OKS. If None then use COCO ones if possible or - default ones. Defaults to C{None}. + @param bce_power: Power used for BCE visibility loss. Defaults + to C{1.0}. + @param sigmas: Sigmas used for OKS. If None then use COCO ones + if possible or default ones. Defaults to C{None}. @type area_factor: float | None - @param area_factor: Factor by which we multiply bbox area. If None then use - default one. Defaults to C{None}. + @param area_factor: Factor by which we multiply bbox area. If + None then use default one. Defaults to C{None}. + @type regression_loss_weight: float + @param regression_loss_weight: Weight of regression loss. + Defaults to C{1.0}. + @type visibility_loss_weight: float + @param visibility_loss_weight: Weight of visibility loss. + Defaults to C{1.0}. """ super().__init__(**kwargs) self.b_cross_entropy = BCEWithLogitsLoss( pos_weight=torch.tensor([bce_power]), **kwargs ) - self.sigmas = get_sigmas( - sigmas=sigmas, n_keypoints=n_keypoints, class_name=self.name + self.sigmas = get_sigmas(sigmas, n_keypoints, caller_name=self.name) + self.area_factor = get_with_default( + area_factor, "bbox area scaling", self.name, default=0.53 ) - self.area_factor = get_area_factor(area_factor, class_name=self.name) - - def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Tensor, Tensor]: - return torch.cat(inputs["keypoints"], dim=0), self.get_label(labels)[0] + self.regression_loss_weight = regression_loss_weight + self.visibility_loss_weight = visibility_loss_weight def forward( self, prediction: Tensor, target: Tensor, area: Tensor ) -> tuple[Tensor, dict[str, Tensor]]: - """Computes the keypoint loss and visibility loss for a given prediction and - target. + """Computes the keypoint loss and visibility loss for a given + prediction and target. @type prediction: Tensor - @param prediction: Predicted tensor of shape C{[n_detections, n_keypoints * 3]}. + @param prediction: Predicted tensor of shape C{[n_detections, + n_keypoints * 3]}. @type target: Tensor - @param target: Target tensor of shape C{[n_detections, n_keypoints * 3]}. + @param target: Target tensor of shape C{[n_detections, + n_keypoints * 3]}. @type area: Tensor @param area: Area tensor of shape C{[n_detections]}. @rtype: tuple[Tensor, dict[str, Tensor]] - @return: A tuple containing the total loss tensor of shape C{[1,]} and a - dictionary with the regression loss and visibility loss tensors. + @return: A tuple containing the total loss tensor of shape + C{[1,]} and a dictionary with the regression loss and + visibility loss tensors. """ - device = prediction.device - sigmas = self.sigmas.to(device) + sigmas = self.sigmas.to(prediction.device) pred_x, pred_y, pred_v = process_keypoints_predictions(prediction) - gt_x = target[:, 0::3] - gt_y = target[:, 1::3] - gt_v = (target[:, 2::3] > 0).float() + target_x = target[:, 0::3] + target_y = target[:, 1::3] + target_visibility = (target[:, 2::3] > 0).float() - visibility_loss = self.b_cross_entropy.forward(pred_v, gt_v) + visibility_loss = ( + self.b_cross_entropy.forward(pred_v, target_visibility) + * self.visibility_loss_weight + ) scales = area * self.area_factor - d = (gt_x - pred_x) ** 2 + (gt_y - pred_y) ** 2 - e = d / (2 * sigmas**2) / (scales.view(-1, 1) + 1e-9) / 2 + distance = (target_x - pred_x) ** 2 + (target_y - pred_y) ** 2 + normalized_distance = ( + distance / (2 * sigmas**2) / (scales.view(-1, 1) + 1e-9) / 2 + ) - regression_loss_unreduced = 1 - torch.exp(-e) - regression_loss_reduced = (regression_loss_unreduced * gt_v).sum(dim=1) / ( - gt_v.sum(dim=1) + 1e-9 + regression_loss = 1 - torch.exp(-normalized_distance) + regression_loss = (regression_loss * target_visibility).sum(dim=1) / ( + target_visibility.sum(dim=1) + 1e-9 ) - regression_loss = regression_loss_reduced.mean() + regression_loss = regression_loss.mean() + regression_loss *= self.regression_loss_weight total_loss = regression_loss + visibility_loss return total_loss, { - "regression": regression_loss, - "visibility": visibility_loss, + "kpt_regression": regression_loss, + "kpt_visibility": visibility_loss, } diff --git a/luxonis_train/attached_modules/losses/sigmoid_focal_loss.py b/luxonis_train/attached_modules/losses/sigmoid_focal_loss.py index f3affc74..884d4863 100644 --- a/luxonis_train/attached_modules/losses/sigmoid_focal_loss.py +++ b/luxonis_train/attached_modules/losses/sigmoid_focal_loss.py @@ -1,4 +1,4 @@ -from typing import Literal +from typing import Any, Literal from luxonis_ml.data import LabelType from torch import Tensor @@ -15,7 +15,7 @@ def __init__( alpha: float = 0.25, gamma: float = 2.0, reduction: Literal["none", "mean", "sum"] = "mean", - **kwargs, + **kwargs: Any, ): """Focal loss from U{Focal Loss for Dense Object Detection }. @@ -37,7 +37,11 @@ def __init__( def forward(self, preds: Tensor, target: Tensor) -> Tensor: loss = sigmoid_focal_loss( - preds, target, alpha=self.alpha, gamma=self.gamma, reduction=self.reduction + preds, + target, + alpha=self.alpha, + gamma=self.gamma, + reduction=self.reduction, ) return loss diff --git a/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py b/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py index ac976428..edc2bf98 100644 --- a/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py +++ b/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py @@ -1,4 +1,4 @@ -from typing import Literal +from typing import Any, Literal import torch from luxonis_ml.data import LabelType @@ -17,31 +17,32 @@ def __init__( bce_pow: float = 1.0, weight: list[float] | None = None, reduction: Literal["mean", "sum", "none"] = "mean", - **kwargs, + **kwargs: Any, ): """BCE with logits loss and label smoothing. @type label_smoothing: float - @param label_smoothing: Label smoothing factor. Defaults to C{0.0}. + @param label_smoothing: Label smoothing factor. Defaults to + C{0.0}. @type bce_pow: float @param bce_pow: Weight for positive samples. Defaults to C{1.0}. @type weight: list[float] | None - @param weight: a manual rescaling weight given to the loss of each batch - element. If given, it has to be a list of length C{nbatch}. + @param weight: a manual rescaling weight given to the loss of + each batch element. If given, it has to be a list of length + C{nbatch}. @type reduction: Literal["mean", "sum", "none"] - @param reduction: Specifies the reduction to apply to the output: C{'none'} | - C{'mean'} | C{'sum'}. C{'none'}: no reduction will be applied, C{'mean'}: - the sum of the output will be divided by the number of elements in the - output, C{'sum'}: the output will be summed. Note: C{size_average} and - C{reduce} are in the process of being deprecated, and in the meantime, - specifying either of those two args will override C{reduction}. Defaults to - C{'mean'}. - @type kwargs: dict - @param kwargs: Additional arguments to pass to L{BaseLoss}. + @param reduction: Specifies the reduction to apply to the + output: C{'none'} | C{'mean'} | C{'sum'}. C{'none'}: no + reduction will be applied, C{'mean'}: the sum of the output + will be divided by the number of elements in the output, + C{'sum'}: the output will be summed. Note: C{size_average} + and C{reduce} are in the process of being deprecated, and in + the meantime, specifying either of those two args will + override C{reduction}. Defaults to C{'mean'}. """ super().__init__(**kwargs) - self.negative_smooth_const = 1.0 - 0.5 * label_smoothing - self.positive_smooth_const = 0.5 * label_smoothing + self.positive_smooth_const = 1.0 - label_smoothing + self.negative_smooth_const = label_smoothing self.criterion = BCEWithLogitsLoss( pos_weight=torch.tensor( [bce_pow], @@ -50,24 +51,26 @@ def __init__( reduction=reduction, ) - def forward(self, predictions: list[Tensor], target: Tensor) -> Tensor: + def forward(self, predictions: Tensor, target: Tensor) -> Tensor: """Computes the BCE loss with label smoothing. - @type predictions: list[Tensor] - @param predictions: List of tensors of shape (N, n_classes), containing the - predicted class scores. + @type predictions: Tensor + @param predictions: Network predictions of shape (N, C, ...) @type target: Tensor - @param target: A tensor of shape (N,), containing the ground-truth class labels + @param target: A tensor of the same shape as predictions. @rtype: Tensor @return: A scalar tensor. """ - prediction = predictions[0] - smoothed_target = torch.full_like( - prediction, - self.negative_smooth_const, - device=prediction.device, - ) - smoothed_target[ - torch.arange(target.shape[0]), target - ] = self.positive_smooth_const - return self.criterion.forward(prediction, smoothed_target) + if predictions.shape != target.shape: + raise RuntimeError( + f"Target tensor dimension ({target.shape}) and predictions tensor " + f"dimension ({predictions.shape}) should be the same." + ) + + if self.negative_smooth_const != 0.0: + target = ( + target * self.positive_smooth_const + + (1 - target) * self.negative_smooth_const + ) + + return self.criterion(predictions, target) diff --git a/luxonis_train/attached_modules/losses/softmax_focal_loss.py b/luxonis_train/attached_modules/losses/softmax_focal_loss.py index 14f32e54..43c844f3 100644 --- a/luxonis_train/attached_modules/losses/softmax_focal_loss.py +++ b/luxonis_train/attached_modules/losses/softmax_focal_loss.py @@ -1,6 +1,5 @@ -# TODO: document - -from typing import Literal +import logging +from typing import Any, Literal import torch from luxonis_ml.data import LabelType @@ -10,21 +9,26 @@ from .cross_entropy import CrossEntropyLoss +logger = logging.getLogger(__name__) + +# TODO: Add support for multi-class tasks class SoftmaxFocalLoss(BaseLoss[Tensor, Tensor]): supported_labels = [LabelType.SEGMENTATION, LabelType.CLASSIFICATION] def __init__( self, - alpha: float | list[float] = 0.25, + alpha: float = 0.25, gamma: float = 2.0, reduction: Literal["none", "mean", "sum"] = "mean", - **kwargs, + **kwargs: Any, ): - """Focal loss implementation for multi-class/multi-label tasks using Softmax. + """Focal loss implementation for binary classification and + segmentation tasks using Softmax. - @type alpha: float | list[float] - @param alpha: Weighting factor for the rare class. Defaults to C{0.25}. + @type alpha: float + @param alpha: Weighting factor for the rare class. Defaults to + C{0.25}. @type gamma: float @param gamma: Focusing parameter. Defaults to C{2.0}. @type reduction: Literal["none", "mean", "sum"] @@ -40,13 +44,7 @@ def __init__( def forward(self, predictions: Tensor, target: Tensor) -> Tensor: ce_loss = self.ce_criterion.forward(predictions, target) pt = torch.exp(-ce_loss) - loss = ce_loss * ((1 - pt) ** self.gamma) - - if isinstance(self.alpha, float) and self.alpha >= 0: - loss = self.alpha * loss - elif isinstance(self.alpha, list): - alpha_t = torch.tensor(self.alpha)[target] - loss = alpha_t * loss + loss = ce_loss * ((1 - pt) ** self.gamma) * self.alpha if self.reduction == "mean": loss = loss.mean() diff --git a/luxonis_train/attached_modules/metrics/__init__.py b/luxonis_train/attached_modules/metrics/__init__.py index 9e73e4ac..b1dc40ea 100644 --- a/luxonis_train/attached_modules/metrics/__init__.py +++ b/luxonis_train/attached_modules/metrics/__init__.py @@ -1,8 +1,8 @@ from .base_metric import BaseMetric -from .common import Accuracy, F1Score, JaccardIndex, Precision, Recall from .mean_average_precision import MeanAveragePrecision from .mean_average_precision_keypoints import MeanAveragePrecisionKeypoints from .object_keypoint_similarity import ObjectKeypointSimilarity +from .torchmetrics import Accuracy, F1Score, JaccardIndex, Precision, Recall __all__ = [ "Accuracy", diff --git a/luxonis_train/attached_modules/metrics/base_metric.py b/luxonis_train/attached_modules/metrics/base_metric.py index b2e456c9..a4109d2d 100644 --- a/luxonis_train/attached_modules/metrics/base_metric.py +++ b/luxonis_train/attached_modules/metrics/base_metric.py @@ -5,8 +5,8 @@ from typing_extensions import TypeVarTuple, Unpack from luxonis_train.attached_modules import BaseAttachedModule +from luxonis_train.utils import Labels, Packet from luxonis_train.utils.registry import METRICS -from luxonis_train.utils.types import Labels, Packet Ts = TypeVarTuple("Ts") @@ -19,8 +19,9 @@ class BaseMetric( ): """A base class for all metrics. - This class defines the basic interface for all metrics. It utilizes automatic - registration of defined subclasses to a L{METRICS} registry. + This class defines the basic interface for all metrics. It utilizes + automatic registration of defined subclasses to a L{METRICS} + registry. """ @abstractmethod @@ -33,7 +34,9 @@ def update(self, *args: Unpack[Ts]) -> None: ... @abstractmethod - def compute(self) -> Tensor | tuple[Tensor, dict[str, Tensor]] | dict[str, Tensor]: + def compute( + self, + ) -> Tensor | tuple[Tensor, dict[str, Tensor]] | dict[str, Tensor]: """Computes the metric. @rtype: Tensor | tuple[Tensor, dict[str, Tensor]] | dict[str, Tensor] @@ -48,12 +51,14 @@ def compute(self) -> Tensor | tuple[Tensor, dict[str, Tensor]] | dict[str, Tenso def run_update(self, outputs: Packet[Tensor], labels: Labels) -> None: """Calls the metric's update method. - Validates and prepares the inputs, then calls the metric's update method. + Validates and prepares the inputs, then calls the metric's + update method. @type outputs: Packet[Tensor] @param outputs: The outputs of the model. @type labels: Labels - @param labels: The labels of the model. @raises L{IncompatibleException}: If the - inputs are not compatible with the module. + @param labels: The labels of the model. @raises + L{IncompatibleException}: If the inputs are not compatible + with the module. """ self.update(*self.prepare(outputs, labels)) diff --git a/luxonis_train/attached_modules/metrics/common.py b/luxonis_train/attached_modules/metrics/common.py deleted file mode 100644 index 97e8a7ec..00000000 --- a/luxonis_train/attached_modules/metrics/common.py +++ /dev/null @@ -1,92 +0,0 @@ -import logging - -import torchmetrics -from luxonis_ml.data import LabelType -from torch import Tensor - -from .base_metric import BaseMetric - -logger = logging.getLogger(__name__) - - -class TorchMetricWrapper(BaseMetric): - def __init__(self, **kwargs): - super().__init__(node=kwargs.pop("node", None)) - task = kwargs.get("task") - - if self.node.n_classes > 1: - if task == "binary": - raise ValueError( - f"Task type set to '{task}', but the dataset has more than 1 class. " - f"Set the `task` parameter for {self.name} to either 'multiclass' or 'multilabel'." - ) - task = "multiclass" - else: - if task == "multiclass": - raise ValueError( - f"Task type set to '{task}', but the dataset has only 1 class. " - f"Set the `task` parameter for {self.name} to 'binary'." - ) - task = "binary" - if "task" not in kwargs: - logger.warning( - f"Task type not specified for {self.name}, assuming '{task}'. " - "If this is not correct, please set the `task` parameter explicitly." - ) - kwargs["task"] = task - self._task = task - - if self._task == "multiclass": - if "num_classes" not in kwargs: - if self.node is None: - raise ValueError( - "Either `node` or `num_classes` must be provided to " - "multiclass torchmetrics." - ) - kwargs["num_classes"] = self.node.n_classes - elif self._task == "multilabel": - if "num_labels" not in kwargs: - if self.node is None: - raise ValueError( - "Either `node` or `num_labels` must be provided to " - "multilabel torchmetrics." - ) - kwargs["num_labels"] = self.node.n_classes - - self.metric = self.Metric(**kwargs) - - def update(self, preds, target, *args, **kwargs) -> None: - if self._task in ["multiclass"]: - target = target.argmax(dim=1) - self.metric.update(preds, target, *args, **kwargs) - - def compute(self) -> Tensor: - return self.metric.compute() - - def reset(self) -> None: - self.metric.reset() - - -class Accuracy(TorchMetricWrapper): - supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION] - Metric = torchmetrics.Accuracy - - -class F1Score(TorchMetricWrapper): - supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION] - Metric = torchmetrics.F1Score - - -class JaccardIndex(TorchMetricWrapper): - supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION] - Metric = torchmetrics.JaccardIndex - - -class Precision(TorchMetricWrapper): - supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION] - Metric = torchmetrics.Precision - - -class Recall(TorchMetricWrapper): - supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION] - Metric = torchmetrics.Recall diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision.py b/luxonis_train/attached_modules/metrics/mean_average_precision.py index ffdf5e22..6d51f55b 100644 --- a/luxonis_train/attached_modules/metrics/mean_average_precision.py +++ b/luxonis_train/attached_modules/metrics/mean_average_precision.py @@ -1,23 +1,29 @@ +from typing import Any + import torchmetrics.detection as detection +from luxonis_ml.data import LabelType from torch import Tensor from torchvision.ops import box_convert -from luxonis_train.utils.types import Labels, LabelType, Packet +from luxonis_train.utils import Labels, Packet from .base_metric import BaseMetric -class MeanAveragePrecision(BaseMetric): - """Compute the Mean-Average-Precision (mAP) and Mean-Average-Recall (mAR) for object - detection predictions. +class MeanAveragePrecision( + BaseMetric[list[dict[str, Tensor]], list[dict[str, Tensor]]] +): + """Compute the Mean-Average-Precision (mAP) and Mean-Average-Recall + (mAR) for object detection predictions. - Adapted from U{Mean-Average-Precision (mAP) and Mean-Average-Recall (mAR) + Adapted from U{Mean-Average-Precision (mAP) and Mean-Average-Recall + (mAR) }. """ supported_labels = [LabelType.BOUNDINGBOX] - def __init__(self, **kwargs): + def __init__(self, **kwargs: Any): super().__init__(**kwargs) self.metric = detection.MeanAveragePrecision() @@ -29,12 +35,12 @@ def update( self.metric.update(outputs, labels) def prepare( - self, outputs: Packet[Tensor], labels: Labels + self, inputs: Packet[Tensor], labels: Labels ) -> tuple[list[dict[str, Tensor]], list[dict[str, Tensor]]]: - box_label = self.get_label(labels)[0] - output_nms = self.get_input_tensors(outputs) + box_label = self.get_label(labels) + output_nms = self.get_input_tensors(inputs) - image_size = self.node.original_in_shape[1:] + image_size = self.original_in_shape[1:] output_list: list[dict[str, Tensor]] = [] label_list: list[dict[str, Tensor]] = [] @@ -51,7 +57,9 @@ def prepare( curr_bboxs = box_convert(curr_label[:, 2:], "xywh", "xyxy") curr_bboxs[:, 0::2] *= image_size[1] curr_bboxs[:, 1::2] *= image_size[0] - label_list.append({"boxes": curr_bboxs, "labels": curr_label[:, 1].int()}) + label_list.append( + {"boxes": curr_bboxs, "labels": curr_label[:, 1].int()} + ) return output_list, label_list @@ -59,11 +67,21 @@ def reset(self) -> None: self.metric.reset() def compute(self) -> tuple[Tensor, dict[str, Tensor]]: - metric_dict = self.metric.compute() + metric_dict: dict[str, Tensor] = self.metric.compute() del metric_dict["classes"] del metric_dict["map_per_class"] del metric_dict["mar_100_per_class"] + for key in list(metric_dict.keys()): + if "map" in key: + map = metric_dict[key] + mar_key = key.replace("map", "mar") + if mar_key in metric_dict: + mar = metric_dict[mar_key] + metric_dict[key.replace("map", "f1")] = ( + 2 * (map * mar) / (map + mar) + ) + map = metric_dict.pop("map") return map, metric_dict diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py b/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py index 0d558b43..3b34c242 100644 --- a/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py +++ b/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py @@ -3,21 +3,20 @@ from typing import Any, Literal import torch +from luxonis_ml.data import LabelType from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval from torch import Tensor from torchvision.ops import box_convert -from luxonis_train.attached_modules.metrics.object_keypoint_similarity import ( - get_area_factor, - get_sigmas, -) -from luxonis_train.utils.types import Labels, LabelType, Packet +from luxonis_train.utils import Labels, Packet, get_sigmas, get_with_default from .base_metric import BaseMetric -class MeanAveragePrecisionKeypoints(BaseMetric): +class MeanAveragePrecisionKeypoints( + BaseMetric[list[dict[str, Tensor]], list[dict[str, Tensor]]] +): """Mean Average Precision metric for keypoints. Uses C{OKS} as IoU measure. @@ -48,15 +47,14 @@ def __init__( box_format: Literal["xyxy", "xywh", "cxcywh"] = "xyxy", **kwargs, ): - """Implementation of the mean average precision metric for keypoint detections. + """Implementation of the mean average precision metric for + keypoint detections. Adapted from: U{https://github.com/Lightning-AI/torchmetrics/blob/v1.0.1/src/ torchmetrics/detection/mean_ap.py}. - @license: Apache-2.0 License + @license: Apache License, Version 2.0 - @type num_keypoints: int - @param num_keypoints: Number of keypoints. @type sigmas: list[float] | None @param sigmas: Sigma for each keypoint to weigh its importance, if C{None}, then use COCO if possible otherwise defaults. Defaults to C{None}. @@ -66,15 +64,15 @@ def __init__( @param max_dets: Maximum number of detections to be considered per image. Defaults to C{20}. @type box_format: Literal["xyxy", "xywh", "cxcywh"] @param box_format: Input bbox format. - @type kwargs: Any - @param kwargs: Additional arguments to pass to L{BaseMetric}. """ super().__init__(**kwargs) - self.n_keypoints = self.node.n_keypoints - - self.sigmas = get_sigmas(sigmas, self.n_keypoints, self.name) - self.area_factor = get_area_factor(area_factor, self.name) + self.sigmas = get_sigmas( + sigmas, self.n_keypoints, caller_name=self.name + ) + self.area_factor = get_with_default( + area_factor, "bbox area scaling", self.name, default=0.53 + ) self.max_dets = max_dets allowed_box_formats = ("xyxy", "xywh", "cxcywh") @@ -93,12 +91,16 @@ def __init__( self.add_state("groundtruth_labels", default=[], dist_reduce_fx=None) self.add_state("groundtruth_area", default=[], dist_reduce_fx=None) self.add_state("groundtruth_crowds", default=[], dist_reduce_fx=None) - self.add_state("groundtruth_keypoints", default=[], dist_reduce_fx=None) + self.add_state( + "groundtruth_keypoints", default=[], dist_reduce_fx=None + ) - def prepare(self, outputs: Packet[Tensor], labels: Labels): + def prepare( + self, inputs: Packet[Tensor], labels: Labels + ) -> tuple[list[dict[str, Tensor]], list[dict[str, Tensor]]]: assert self.node.tasks is not None - kpts = self.get_label(labels, LabelType.KEYPOINTS)[0] - boxes = self.get_label(labels, LabelType.BOUNDINGBOX)[0] + kpts = self.get_label(labels, LabelType.KEYPOINTS) + boxes = self.get_label(labels, LabelType.BOUNDINGBOX) nkpts = (kpts.shape[1] - 2) // 3 label = torch.zeros((len(boxes), nkpts * 3 + 6)) @@ -108,19 +110,21 @@ def prepare(self, outputs: Packet[Tensor], labels: Labels): label[:, 7::3] = kpts[:, 3::3] # y label[:, 8::3] = kpts[:, 4::3] # visiblity - output_list_kpt_map = [] - label_list_kpt_map = [] - image_size = self.node.original_in_shape[1:] + output_list_kpt_map: list[dict[str, Tensor]] = [] + label_list_kpt_map: list[dict[str, Tensor]] = [] + image_size = self.original_in_shape[1:] - output_kpts = self.get_input_tensors(outputs, LabelType.KEYPOINTS) - output_bboxes = self.get_input_tensors(outputs, LabelType.BOUNDINGBOX) + output_kpts = self.get_input_tensors(inputs, LabelType.KEYPOINTS) + output_bboxes = self.get_input_tensors(inputs, LabelType.BOUNDINGBOX) for i in range(len(output_kpts)): output_list_kpt_map.append( { "boxes": output_bboxes[i][:, :4], "scores": output_bboxes[i][:, 4], "labels": output_bboxes[i][:, 5].int(), - "keypoints": output_kpts[i].reshape(-1, self.n_keypoints * 3), + "keypoints": output_kpts[i].reshape( + -1, self.n_keypoints * 3 + ), } ) @@ -223,7 +227,9 @@ def compute(self) -> tuple[Tensor, dict[str, Tensor]]: coco_target.createIndex() coco_preds.createIndex() - self.coco_eval = COCOeval(coco_target, coco_preds, iouType="keypoints") + self.coco_eval = COCOeval( + coco_target, coco_preds, iouType="keypoints" + ) self.coco_eval.params.kpt_oks_sigmas = self.sigmas.cpu().numpy() self.coco_eval.params.maxDets = [self.max_dets] @@ -254,20 +260,24 @@ def _get_coco_format( crowds: list[Tensor] | None = None, area: list[Tensor] | None = None, ) -> dict[str, list[dict[str, Any]]]: - """Transforms and returns all cached targets or predictions in COCO format. + """Transforms and returns all cached targets or predictions in + COCO format. - Format is defined at U{https://cocodataset.org/#format-data}. + Format is defined at U{ + https://cocodataset.org/#format-data}. """ - images = [] - annotations = [] - annotation_id = 1 # has to start with 1, otherwise COCOEval results are wrong + images: list[dict[str, int]] = [] + annotations: list[dict[str, Any]] = [] + annotation_id = ( + 1 # has to start with 1, otherwise COCOEval results are wrong + ) for image_id, (image_boxes, image_kpts, image_labels) in enumerate( zip(boxes, keypoints, labels) ): - image_boxes_list = image_boxes.cpu().tolist() - image_kpts_list = image_kpts.cpu().tolist() - image_labels_list = image_labels.cpu().tolist() + image_boxes_list: list[list[float]] = image_boxes.cpu().tolist() + image_kpts_list: list[list[float]] = image_kpts.cpu().tolist() + image_labels_list: list[int] = image_labels.cpu().tolist() images.append({"id": image_id}) @@ -297,8 +307,12 @@ def _get_coco_format( else: area_stat = image_box[2] * image_box[3] * self.area_factor - num_keypoints = len( - [i for i in range(2, len(image_kpt), 3) if image_kpt[i] != 0] + n_keypoints = len( + [ + i + for i in range(2, len(image_kpt), 3) + if image_kpt[i] != 0 + ] ) # number of annotated keypoints annotation = { "id": annotation_id, @@ -307,14 +321,18 @@ def _get_coco_format( "area": area_stat, "category_id": image_label, "iscrowd": ( - crowds[image_id][k].cpu().tolist() if crowds is not None else 0 + crowds[image_id][k].cpu().tolist() + if crowds is not None + else 0 ), "keypoints": image_kpt, - "num_keypoints": num_keypoints, + "num_keypoints": n_keypoints, } if scores is not None: score = scores[image_id][k].cpu().tolist() + # `tolist` returns a number for scalar tensors, + # the name is misleading if not isinstance(score, float): raise ValueError( f"Invalid input score of sample {image_id}, element {k}" @@ -325,9 +343,15 @@ def _get_coco_format( annotation_id += 1 classes = [{"id": i, "name": str(i)} for i in self._get_classes()] - return {"images": images, "annotations": annotations, "categories": classes} + return { + "images": images, + "annotations": annotations, + "categories": classes, + } - def _get_safe_item_values(self, item: dict[str, Tensor]) -> tuple[Tensor, Tensor]: + def _get_safe_item_values( + self, item: dict[str, Tensor] + ) -> tuple[Tensor, Tensor]: """Convert and return the boxes.""" boxes = self._fix_empty_tensors(item["boxes"]) if boxes.numel() > 0: @@ -336,7 +360,8 @@ def _get_safe_item_values(self, item: dict[str, Tensor]) -> tuple[Tensor, Tensor return boxes, keypoints def _get_classes(self) -> list[int]: - """Return a list of unique classes found in ground truth and detection data.""" + """Return a list of unique classes found in ground truth and + detection data.""" if len(self.pred_labels) > 0 or len(self.groundtruth_labels) > 0: return ( torch.cat(self.pred_labels + self.groundtruth_labels) @@ -348,7 +373,8 @@ def _get_classes(self) -> list[int]: @staticmethod def _fix_empty_tensors(input_tensor: Tensor) -> Tensor: - """Empty tensors can cause problems in DDP mode, this methods corrects them.""" + """Empty tensors can cause problems in DDP mode, this methods + corrects them.""" if input_tensor.numel() == 0 and input_tensor.ndim == 1: return input_tensor.unsqueeze(0) return input_tensor diff --git a/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py b/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py index 4cbd1cac..503a00ad 100644 --- a/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py +++ b/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py @@ -1,11 +1,13 @@ import logging +from typing import Any import torch +from luxonis_ml.data import LabelType from scipy.optimize import linear_sum_assignment from torch import Tensor from torchvision.ops import box_convert -from luxonis_train.utils.types import Labels, LabelType, Packet +from luxonis_train.utils import Labels, Packet, get_sigmas, get_with_default from .base_metric import BaseMetric @@ -33,46 +35,46 @@ def __init__( sigmas: list[float] | None = None, area_factor: float | None = None, use_cocoeval_oks: bool = True, - **kwargs, + **kwargs: Any, ) -> None: - """Object Keypoint Similarity metric for evaluating keypoint predictions. + """Object Keypoint Similarity metric for evaluating keypoint + predictions. - @type n_keypoints: int - @param n_keypoints: Number of keypoints. @type sigmas: list[float] | None - @param sigmas: Sigma for each keypoint to weigh its importance, if C{None}, then - use COCO if possible otherwise defaults. Defaults to C{None}. + @param sigmas: Sigma for each keypoint to weigh its importance, + if C{None}, then use COCO if possible otherwise defaults. + Defaults to C{None}. @type area_factor: float | None - @param area_factor: Factor by which we multiply bbox area. If None then use - default one. Defaults to C{None}. + @param area_factor: Factor by which we multiply bbox area. If + None then use default one. Defaults to C{None}. @type use_cocoeval_oks: bool - @param use_cocoeval_oks: Whether to use same OKS formula as in COCOeval or use - the one from definition. Defaults to C{True}. + @param use_cocoeval_oks: Whether to use same OKS formula as in + COCOeval or use the one from definition. Defaults to + C{True}. """ super().__init__(**kwargs) - if n_keypoints is None and self.node is None: - raise ValueError( - f"Either `n_keypoints` or `node` must be provided to {self.name}." - ) - self.n_keypoints = n_keypoints or self.node.n_keypoints - - self.sigmas = get_sigmas(sigmas, self.n_keypoints, self.name) - self.area_factor = get_area_factor(area_factor, self.name) + self.sigmas = get_sigmas( + sigmas, self.n_keypoints, caller_name=self.name + ) + self.area_factor = get_with_default( + area_factor, "bbox area scaling", self.name, default=0.53 + ) self.use_cocoeval_oks = use_cocoeval_oks self.add_state("pred_keypoints", default=[], dist_reduce_fx=None) - self.add_state("groundtruth_keypoints", default=[], dist_reduce_fx=None) + self.add_state( + "groundtruth_keypoints", default=[], dist_reduce_fx=None + ) self.add_state("groundtruth_scales", default=[], dist_reduce_fx=None) def prepare( - self, outputs: Packet[Tensor], labels: Labels + self, inputs: Packet[Tensor], labels: Labels ) -> tuple[list[dict[str, Tensor]], list[dict[str, Tensor]]]: - assert self.node.tasks is not None - kpts_labels = self.get_label(labels, LabelType.KEYPOINTS)[0] - bbox_labels = self.get_label(labels, LabelType.BOUNDINGBOX)[0] - num_keypoints = (kpts_labels.shape[1] - 2) // 3 - label = torch.zeros((len(bbox_labels), num_keypoints * 3 + 6)) + kpts_labels = self.get_label(labels, LabelType.KEYPOINTS) + bbox_labels = self.get_label(labels, LabelType.BOUNDINGBOX) + n_keypoints = (kpts_labels.shape[1] - 2) // 3 + label = torch.zeros((len(bbox_labels), n_keypoints * 3 + 6)) label[:, :2] = bbox_labels[:, :2] label[:, 2:6] = box_convert(bbox_labels[:, 2:], "xywh", "xyxy") label[:, 6::3] = kpts_labels[:, 2::3] # insert kp x coordinates @@ -81,10 +83,10 @@ def prepare( output_list_oks = [] label_list_oks = [] - image_size = self.node.original_in_shape[1:] + image_size = self.original_in_shape[1:] for i, pred_kpt in enumerate( - self.get_input_tensors(outputs, LabelType.KEYPOINTS) + self.get_input_tensors(inputs, LabelType.KEYPOINTS) ): output_list_oks.append({"keypoints": pred_kpt}) @@ -97,8 +99,12 @@ def prepare( curr_kpts[:, 1::3] *= image_size[0] curr_bboxs_widths = curr_bboxs[:, 2] - curr_bboxs[:, 0] curr_bboxs_heights = curr_bboxs[:, 3] - curr_bboxs[:, 1] - curr_scales = curr_bboxs_widths * curr_bboxs_heights * self.area_factor - label_list_oks.append({"keypoints": curr_kpts, "scales": curr_scales}) + curr_scales = ( + curr_bboxs_widths * curr_bboxs_heights * self.area_factor + ) + label_list_oks.append( + {"keypoints": curr_kpts, "scales": curr_scales} + ) return output_list_oks, label_list_oks @@ -129,11 +135,11 @@ def update( width and height are unnormalized. """ for item in preds: - keypoints = fix_empty_tensors(item["keypoints"]) + keypoints = self._fix_empty_tensors(item["keypoints"]) self.pred_keypoints.append(keypoints) for item in target: - keypoints = fix_empty_tensors(item["keypoints"]) + keypoints = self._fix_empty_tensors(item["keypoints"]) self.groundtruth_keypoints.append(keypoints) self.groundtruth_scales.append(item["scales"]) @@ -144,10 +150,14 @@ def compute(self) -> Tensor: image_mean_oks = torch.zeros(len(self.groundtruth_keypoints)) for i, (pred_kpts, gt_kpts, gt_scales) in enumerate( zip( - self.pred_keypoints, self.groundtruth_keypoints, self.groundtruth_scales + self.pred_keypoints, + self.groundtruth_keypoints, + self.groundtruth_scales, ) ): - gt_kpts = torch.reshape(gt_kpts, (-1, self.n_keypoints, 3)) # [N, K, 3] + gt_kpts = torch.reshape( + gt_kpts, (-1, self.n_keypoints, 3) + ) # [N, K, 3] image_ious = compute_oks( pred_kpts, @@ -159,13 +169,23 @@ def compute(self) -> Tensor: gt_indices, pred_indices = linear_sum_assignment( image_ious.cpu().numpy(), maximize=True ) - matched_ious = [image_ious[n, m] for n, m in zip(gt_indices, pred_indices)] + matched_ious = [ + image_ious[n, m] for n, m in zip(gt_indices, pred_indices) + ] image_mean_oks[i] = torch.tensor(matched_ious).mean() final_oks = image_mean_oks.nanmean() return final_oks + @staticmethod + def _fix_empty_tensors(input_tensor: Tensor) -> Tensor: + """Empty tensors can cause problems in DDP mode, this methods + corrects them.""" + if input_tensor.numel() == 0 and input_tensor.ndim == 1: + return input_tensor.unsqueeze(0) + return input_tensor + def compute_oks( pred: Tensor, @@ -174,7 +194,8 @@ def compute_oks( sigmas: Tensor, use_cocoeval_oks: bool, ) -> Tensor: - """Compute Object Keypoint Similarity between every GT and prediction. + """Compute Object Keypoint Similarity between every GT and + prediction. @type pred: Tensor[N, K, 3] @param pred: Predicted keypoints. @@ -183,11 +204,11 @@ def compute_oks( @type scales: Tensor[M] @param scales: Scales of the bounding boxes. @type sigmas: Tensor - @param sigmas: Sigma for each keypoint to weigh its importance, if C{None}, then use - same weights for all. + @param sigmas: Sigma for each keypoint to weigh its importance, if + C{None}, then use same weights for all. @type use_cocoeval_oks: bool - @param use_cocoeval_oks: Whether to use same OKS formula as in COCOeval or use the - one from definition. + @param use_cocoeval_oks: Whether to use same OKS formula as in + COCOeval or use the one from definition. @rtype: Tensor @return: Object Keypoint Similarity every pred and gt [M, N] """ @@ -211,73 +232,3 @@ def compute_oks( return (torch.exp(-oks) * kpt_mask[:, None]).sum(-1) / ( kpt_mask.sum(-1)[:, None] + eps ) - - -def fix_empty_tensors(input_tensor: Tensor) -> Tensor: - """Empty tensors can cause problems in DDP mode, this methods corrects them.""" - if input_tensor.numel() == 0 and input_tensor.ndim == 1: - return input_tensor.unsqueeze(0) - return input_tensor - - -def get_sigmas( - sigmas: list[float] | None, n_keypoints: int, class_name: str | None -) -> Tensor: - """Validate and set the sigma values.""" - if sigmas is not None: - if len(sigmas) == n_keypoints: - return torch.tensor(sigmas, dtype=torch.float32) - else: - error_msg = "The length of the sigmas list must be the same as the number of keypoints." - if class_name: - error_msg = f"[{class_name}] {error_msg}" - raise ValueError(error_msg) - else: - if n_keypoints == 17: - warn_msg = "Default COCO sigmas are being used." - if class_name: - warn_msg = f"[{class_name}] {warn_msg}" - logger.warning(warn_msg) - return torch.tensor( - [ - 0.026, - 0.025, - 0.025, - 0.035, - 0.035, - 0.079, - 0.079, - 0.072, - 0.072, - 0.062, - 0.062, - 0.107, - 0.107, - 0.087, - 0.087, - 0.089, - 0.089, - ], - dtype=torch.float32, - ) - else: - warn_msg = "Default sigma of 0.04 is being used for each keypoint." - if class_name: - warn_msg = f"[{class_name}] {warn_msg}" - logger.warning(warn_msg) - return torch.tensor([0.04] * n_keypoints, dtype=torch.float32) - - -def get_area_factor(area_factor: float | None, class_name: str | None) -> float: - """Set the default area factor if not defined.""" - factor = 0.53 - if area_factor is None: - warn_msg = ( - f"Default area_factor of {factor} is being used for bbox area scaling." - ) - if class_name: - warn_msg = f"[{class_name}] {warn_msg}" - logger.warning(warn_msg) - return factor - else: - return area_factor diff --git a/luxonis_train/attached_modules/metrics/torchmetrics.py b/luxonis_train/attached_modules/metrics/torchmetrics.py new file mode 100644 index 00000000..a8797a13 --- /dev/null +++ b/luxonis_train/attached_modules/metrics/torchmetrics.py @@ -0,0 +1,114 @@ +import logging +from contextlib import suppress +from typing import Any + +import torchmetrics +from luxonis_ml.data import LabelType +from torch import Tensor + +from .base_metric import BaseMetric + +logger = logging.getLogger(__name__) + + +class TorchMetricWrapper(BaseMetric[Tensor]): + Metric: type[torchmetrics.Metric] + + def __init__(self, **kwargs: Any): + super().__init__(node=kwargs.pop("node", None)) + task = kwargs.get("task") + if task is None: + if "num_classes" in kwargs: + if kwargs["num_classes"] == 1: + task = "binary" + else: + task = "multiclass" + elif "num_labels" in kwargs: + task = "multilabel" + else: + with suppress(RuntimeError, ValueError): + if self.n_classes == 1: + task = "binary" + else: + task = "multiclass" + + if task is None: + raise ValueError( + f"'{self.name}' does not have the 'task' parameter set. " + "and it is not possible to infer it from the other arguments. " + "You can either set the 'task' parameter explicitly, provide either 'num_classes' or 'num_labels' argument, " + "or use this metric with a node. " + "The 'task' can be one of 'binary', 'multiclass', or 'multilabel'. " + ) + self._task = task + kwargs["task"] = task + + n_classes: int | None = kwargs.get( + "num_classes", kwargs.get("num_labels") + ) + + if n_classes is None: + with suppress(RuntimeError, ValueError): + n_classes = self.n_classes + + if n_classes is None and task != "binary": + arg_name = "num_classes" if task == "multiclass" else "num_labels" + raise ValueError( + f"'{self.name}' metric does not have the '{arg_name}' parameter set " + "and it is not possible to infer it from the other arguments. " + "You can either set the '{arg_name}' parameter explicitly, or use this metric with a node." + ) + + if task == "binary" and n_classes is not None and n_classes > 1: + raise ValueError( + f"Task type set to '{task}', but the dataset has more than 1 class. " + f"Set the `task` argument of '{self.name}' to either 'multiclass' or 'multilabel'." + ) + elif task != "binary" and n_classes == 1: + raise ValueError( + f"Task type set to '{task}', but the dataset has only 1 class. " + f"Set the `task` argument of '{self.name}' to 'binary'." + ) + + if task == "multiclass": + kwargs["num_classes"] = n_classes + elif task == "multilabel": + kwargs["num_labels"] = n_classes + + self.metric = self.Metric(**kwargs) + + def update(self, preds: Tensor, target: Tensor) -> None: + if self._task in ["multiclass"]: + target = target.argmax(dim=1) + self.metric.update(preds, target) + + def compute(self) -> Tensor: + return self.metric.compute() + + def reset(self) -> None: + self.metric.reset() + + +class Accuracy(TorchMetricWrapper): + supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION] + Metric = torchmetrics.Accuracy + + +class F1Score(TorchMetricWrapper): + supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION] + Metric = torchmetrics.F1Score + + +class JaccardIndex(TorchMetricWrapper): + supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION] + Metric = torchmetrics.JaccardIndex + + +class Precision(TorchMetricWrapper): + supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION] + Metric = torchmetrics.Precision + + +class Recall(TorchMetricWrapper): + supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION] + Metric = torchmetrics.Recall diff --git a/luxonis_train/attached_modules/visualizers/base_visualizer.py b/luxonis_train/attached_modules/visualizers/base_visualizer.py index 5fa6db62..817a09d5 100644 --- a/luxonis_train/attached_modules/visualizers/base_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/base_visualizer.py @@ -4,8 +4,8 @@ from typing_extensions import TypeVarTuple, Unpack from luxonis_train.attached_modules import BaseAttachedModule +from luxonis_train.utils import Labels, Packet from luxonis_train.utils.registry import VISUALIZERS -from luxonis_train.utils.types import Labels, Packet Ts = TypeVarTuple("Ts") @@ -17,8 +17,9 @@ class BaseVisualizer( ): """A base class for all visualizers. - This class defines the basic interface for all visualizers. It utilizes automatic - registration of defined subclasses to the L{VISUALIZERS} registry. + This class defines the basic interface for all visualizers. It + utilizes automatic registration of defined subclasses to the + L{VISUALIZERS} registry. """ @abstractmethod @@ -27,7 +28,12 @@ def forward( label_canvas: Tensor, prediction_canvas: Tensor, *args: Unpack[Ts], - ) -> Tensor | tuple[Tensor, Tensor] | tuple[Tensor, list[Tensor]] | list[Tensor]: + ) -> ( + Tensor + | tuple[Tensor, Tensor] + | tuple[Tensor, list[Tensor]] + | list[Tensor] + ): """Forward pass of the visualizer. Takes an image and the prepared inputs from the `prepare` method and @@ -62,4 +68,6 @@ def run( inputs: Packet[Tensor], labels: Labels, ) -> Tensor | tuple[Tensor, Tensor] | tuple[Tensor, list[Tensor]]: - return self(label_canvas, prediction_canvas, *self.prepare(inputs, labels)) + return self( + label_canvas, prediction_canvas, *self.prepare(inputs, labels) + ) diff --git a/luxonis_train/attached_modules/visualizers/bbox_visualizer.py b/luxonis_train/attached_modules/visualizers/bbox_visualizer.py index df3ac933..e544bf06 100644 --- a/luxonis_train/attached_modules/visualizers/bbox_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/bbox_visualizer.py @@ -1,12 +1,16 @@ import logging import torch +from luxonis_ml.data import LabelType from torch import Tensor -from luxonis_train.utils.types import LabelType - from .base_visualizer import BaseVisualizer -from .utils import Color, draw_bounding_box_labels, draw_bounding_boxes, get_color +from .utils import ( + Color, + draw_bounding_box_labels, + draw_bounding_boxes, + get_color, +) class BBoxVisualizer(BaseVisualizer[list[Tensor], Tensor]): @@ -25,39 +29,50 @@ def __init__( ): """Visualizer for bounding box predictions. - Creates a visualization of the bounding box predictions and labels. + Creates a visualization of the bounding box predictions and + labels. @type labels: dict[int, str] | list[str] | None - @param labels: Either a dictionary mapping class indices to names, or a list of - names. If list is provided, the label mapping is done by index. By default, - no labels are drawn. + @param labels: Either a dictionary mapping class indices to + names, or a list of names. If list is provided, the label + mapping is done by index. By default, no labels are drawn. @type draw_labels: bool - @param draw_labels: Whether or not to draw labels. Defaults to C{True}. + @param draw_labels: Whether or not to draw labels. Defaults to + C{True}. @type colors: dict[int, Color] | list[Color] | None - @param colors: Either a dictionary mapping class indices to colors, or a list of - colors. If list is provided, the color mapping is done by index. By default, - random colors are used. + @param colors: Either a dictionary mapping class indices to + colors, or a list of colors. If list is provided, the color + mapping is done by index. By default, random colors are + used. @type fill: bool - @param fill: Whether or not to fill the bounding boxes. Defaults to C{False}. + @param fill: Whether or not to fill the bounding boxes. Defaults + to C{False}. @type width: int | None - @param width: The width of the bounding box lines. Defaults to C{1}. + @param width: The width of the bounding box lines. Defaults to + C{1}. @type font: str | None - @param font: A filename containing a TrueType font. Defaults to C{None}. + @param font: A filename containing a TrueType font. Defaults to + C{None}. @type font_size: int | None - @param font_size: The font size to use for the labels. Defaults to C{None}. + @param font_size: The font size to use for the labels. Defaults + to C{None}. """ super().__init__(**kwargs) if isinstance(labels, list): labels = {i: label for i, label in enumerate(labels)} self.bbox_labels = labels or { - i: label for i, label in enumerate(self.node.class_names) + i: label for i, label in enumerate(self.class_names) } if colors is None: - colors = {label: get_color(i) for i, label in self.bbox_labels.items()} + colors = { + label: get_color(i) for i, label in self.bbox_labels.items() + } if isinstance(colors, list): - colors = {self.bbox_labels[i]: color for i, color in enumerate(colors)} + colors = { + self.bbox_labels[i]: color for i, color in enumerate(colors) + } self.colors = colors self.fill = fill self.width = width @@ -159,16 +174,17 @@ def forward( predictions: list[Tensor], targets: Tensor, ) -> tuple[Tensor, Tensor]: - """Creates a visualization of the bounding box predictions and labels. + """Creates a visualization of the bounding box predictions and + labels. @type label_canvas: Tensor @param label_canvas: The canvas containing the labels. @type prediction_canvas: Tensor @param prediction_canvas: The canvas containing the predictions. @type prediction: Tensor - @param prediction: The predicted bounding boxes. The shape should be [N, 6], - where N is the number of bounding boxes and the last dimension is [x1, y1, - x2, y2, class, conf]. + @param prediction: The predicted bounding boxes. The shape + should be [N, 6], where N is the number of bounding boxes + and the last dimension is [x1, y1, x2, y2, class, conf]. @type targets: Tensor @param targets: The target bounding boxes. """ diff --git a/luxonis_train/attached_modules/visualizers/classification_visualizer.py b/luxonis_train/attached_modules/visualizers/classification_visualizer.py index 20a5710e..9d26172b 100644 --- a/luxonis_train/attached_modules/visualizers/classification_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/classification_visualizer.py @@ -23,8 +23,8 @@ def __init__( """Visualizer for classification tasks. @type include_plot: bool - @param include_plot: Whether to include a plot of the class probabilities in the - visualization. Defaults to C{True}. + @param include_plot: Whether to include a plot of the class + probabilities in the visualization. Defaults to C{True}. """ super().__init__(**kwargs) self.include_plot = include_plot @@ -34,19 +34,21 @@ def __init__( def _get_class_name(self, pred: Tensor) -> str: idx = int((pred.argmax()).item()) - if self.node.class_names is None: + if self.class_names is None: return str(idx) - return self.node.class_names[idx] + return self.class_names[idx] - def _generate_plot(self, prediction: Tensor, width: int, height: int) -> Tensor: - prediction = prediction.softmax(-1).detach().cpu().numpy() + def _generate_plot( + self, prediction: Tensor, width: int, height: int + ) -> Tensor: + pred = prediction.softmax(-1).detach().cpu().numpy() fig, ax = plt.subplots(figsize=(width / 100, height / 100)) - ax.bar(np.arange(len(prediction)), prediction) - ax.set_xticks(np.arange(len(prediction))) - if self.node.class_names is not None: - ax.set_xticklabels(self.node.class_names, rotation=90) + ax.bar(np.arange(len(pred)), pred) + ax.set_xticks(np.arange(len(pred))) + if self.class_names is not None: + ax.set_xticklabels(self.class_names, rotation=90) else: - ax.set_xticklabels(np.arange(1, len(prediction) + 1)) + ax.set_xticklabels(np.arange(1, len(pred) + 1)) ax.set_ylim(0, 1) ax.set_xlabel("Class") ax.set_ylabel("Probability") @@ -88,7 +90,9 @@ def forward( overlay[i] = numpy_to_torch_img(arr) if self.include_plot: plots[i] = self._generate_plot( - prediction, prediction_canvas.shape[3], prediction_canvas.shape[2] + prediction, + prediction_canvas.shape[3], + prediction_canvas.shape[2], ) if self.include_plot: diff --git a/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py b/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py index 287d5e1c..53b9cb88 100644 --- a/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py @@ -22,18 +22,20 @@ def __init__( """Visualizer for keypoints. @type visibility_threshold: float - @param visibility_threshold: Threshold for visibility of keypoints. If the - visibility of a keypoint is below this threshold, it is considered as not - visible. Defaults to C{0.5}. + @param visibility_threshold: Threshold for visibility of + keypoints. If the visibility of a keypoint is below this + threshold, it is considered as not visible. Defaults to + C{0.5}. @type connectivity: list[tuple[int, int]] | None - @param connectivity: List of tuples of keypoint indices that define the - connections in the skeleton. Defaults to C{None}. + @param connectivity: List of tuples of keypoint indices that + define the connections in the skeleton. Defaults to C{None}. @type visible_color: L{Color} - @param visible_color: Color of visible keypoints. Either a string or a tuple of - RGB values. Defaults to C{"red"}. + @param visible_color: Color of visible keypoints. Either a + string or a tuple of RGB values. Defaults to C{"red"}. @type nonvisible_color: L{Color} | None - @param nonvisible_color: Color of nonvisible keypoints. If C{None}, nonvisible - keypoints are not drawn. Defaults to C{None}. + @param nonvisible_color: Color of nonvisible keypoints. If + C{None}, nonvisible keypoints are not drawn. Defaults to + C{None}. """ super().__init__(**kwargs) self.visibility_threshold = visibility_threshold @@ -62,7 +64,9 @@ def draw_predictions( if nonvisible_color is not None: _kwargs = deepcopy(kwargs) _kwargs["colors"] = nonvisible_color - nonvisible_kpts = prediction[..., :2] * mask.unsqueeze(-1).float() + nonvisible_kpts = ( + prediction[..., :2] * mask.unsqueeze(-1).float() + ) viz[i] = draw_keypoints( viz[i].clone(), nonvisible_kpts[..., :2], diff --git a/luxonis_train/attached_modules/visualizers/multi_visualizer.py b/luxonis_train/attached_modules/visualizers/multi_visualizer.py index c7925ecc..b7ecbfbb 100644 --- a/luxonis_train/attached_modules/visualizers/multi_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/multi_visualizer.py @@ -7,7 +7,8 @@ class MultiVisualizer(BaseVisualizer[Packet[Tensor], Labels]): - """Special type of visualizer that combines multiple visualizers together. + """Special type of visualizer that combines multiple visualizers + together. All the visualizers are applied in the order they are provided and they all draw on the same canvas. @@ -25,14 +26,16 @@ def __init__(self, visualizers: list[Kwargs], **kwargs): self.visualizers = [] for item in visualizers: visualizer_params = item.get("params", {}) - visualizer = VISUALIZERS.get(item["name"])(**visualizer_params, **kwargs) + visualizer = VISUALIZERS.get(item["name"])( + **visualizer_params, **kwargs + ) self.visualizers.append(visualizer) def prepare( - self, output: Packet[Tensor], label: Labels, idx: int = 0 + self, inputs: Packet[Tensor], label: Labels, idx: int = 0 ) -> tuple[Packet[Tensor], Labels]: self._idx = idx - return output, label + return inputs, label def forward( self, @@ -42,12 +45,16 @@ def forward( labels: Labels, ) -> tuple[Tensor, Tensor]: for visualizer in self.visualizers: - match visualizer.run(label_canvas, prediction_canvas, outputs, labels): + match visualizer.run( + label_canvas, prediction_canvas, outputs, labels + ): case Tensor() as prediction_viz: prediction_canvas = prediction_viz case (Tensor(data=label_viz), Tensor(data=prediction_viz)): label_canvas = label_viz prediction_canvas = prediction_viz case _: - raise NotImplementedError + raise NotImplementedError( + "Unexpected return type from visualizer." + ) return label_canvas, prediction_canvas diff --git a/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py b/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py index 85b93ce1..15e2fd09 100644 --- a/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py @@ -1,12 +1,16 @@ import logging import torch +from luxonis_ml.data import LabelType from torch import Tensor -from luxonis_train.utils.types import LabelType - from .base_visualizer import BaseVisualizer -from .utils import Color, draw_segmentation_labels, get_color, seg_output_to_bool +from .utils import ( + Color, + draw_segmentation_labels, + get_color, + seg_output_to_bool, +) logger = logging.getLogger(__name__) log_disable = False @@ -98,7 +102,8 @@ def forward( targets: Tensor, **kwargs, ) -> tuple[Tensor, Tensor]: - """Creates a visualization of the segmentation predictions and labels. + """Creates a visualization of the segmentation predictions and + labels. @type label_canvas: Tensor @param label_canvas: The canvas to draw the labels on. @@ -146,7 +151,9 @@ def _adjust_colors( if not log_disable: if colors is None: - logger.warning("No colors provided. Using random colors instead.") + logger.warning( + "No colors provided. Using random colors instead." + ) elif data.size(1) != len(colors): logger.warning( f"Number of colors ({len(colors)}) does not match number of " diff --git a/luxonis_train/attached_modules/visualizers/utils.py b/luxonis_train/attached_modules/visualizers/utils.py index c55b12ce..402ab98f 100644 --- a/luxonis_train/attached_modules/visualizers/utils.py +++ b/luxonis_train/attached_modules/visualizers/utils.py @@ -19,7 +19,7 @@ draw_segmentation_masks, ) -from luxonis_train.utils.config import Config +from luxonis_train.utils import Config Color = str | tuple[int, int, int] """Color type alias. @@ -44,13 +44,14 @@ def figure_to_torch(fig: Figure, width: int, height: int) -> Tensor: def torch_img_to_numpy( img: Tensor, reverse_colors: bool = False ) -> npt.NDArray[np.uint8]: - """Converts a torch image (CHW) to a numpy array (HWC). Optionally also converts - colors. + """Converts a torch image (CHW) to a numpy array (HWC). Optionally + also converts colors. @type img: Tensor @param img: Torch image (CHW) @type reverse_colors: bool - @param reverse_colors: Whether to reverse colors (RGB to BGR). Defaults to False. + @param reverse_colors: Whether to reverse colors (RGB to BGR). + Defaults to False. @rtype: npt.NDArray[np.uint8] @return: Numpy image (HWC) """ @@ -129,8 +130,8 @@ def draw_bounding_box_labels(img: Tensor, label: Tensor, **kwargs) -> Tensor: @type img: Tensor @param img: Image to draw on. @type label: Tensor - @param label: Bounding box label. The shape should be (n_instances, 4), where the - last dimension is (x, y, w, h). + @param label: Bounding box label. The shape should be (n_instances, + 4), where the last dimension is (x, y, w, h). @type kwargs: dict @param kwargs: Additional arguments to pass to L{torchvision.utils.draw_bounding_boxes}. @@ -150,10 +151,11 @@ def draw_keypoint_labels(img: Tensor, label: Tensor, **kwargs) -> Tensor: @type img: Tensor @param img: Image to draw on. @type label: Tensor - @param label: Keypoint label. The shape should be (n_instances, 3), where the last - dimension is (x, y, visibility). + @param label: Keypoint label. The shape should be (n_instances, 3), + where the last dimension is (x, y, visibility). @type kwargs: dict - @param kwargs: Additional arguments to pass to L{torchvision.utils.draw_keypoints}. + @param kwargs: Additional arguments to pass to + L{torchvision.utils.draw_keypoints}. @rtype: Tensor @return: Image with keypoint labels drawn on. """ @@ -191,7 +193,8 @@ def unnormalize( std: list[float] | float | None = None, to_uint8: bool = False, ) -> Tensor: - """Unnormalizes an image back to original values, optionally converts it to uint8. + """Unnormalizes an image back to original values, optionally + converts it to uint8. @type img: Tensor @param img: Image to unnormalize. @@ -304,9 +307,12 @@ def get_color(seed: int) -> Color: # # TEST: def combine_visualizations( - visualization: Tensor | tuple[Tensor, Tensor] | tuple[Tensor, list[Tensor]], + visualization: Tensor + | tuple[Tensor, Tensor] + | tuple[Tensor, list[Tensor]], ) -> Tensor: - """Default way of combining multiple visualizations into one final image.""" + """Default way of combining multiple visualizations into one final + image.""" def resize_to_match( fst: Tensor, @@ -315,7 +321,7 @@ def resize_to_match( keep_size: Literal["larger", "smaller", "first", "second"] = "larger", resize_along: Literal["width", "height", "exact"] = "height", keep_aspect_ratio: bool = True, - ): + ) -> tuple[Tensor, Tensor]: """Resizes two images so they have the same size. Resizes two images so they can be concateneted together. It's possible to @@ -411,7 +417,9 @@ def resize_to_match( case Tensor() as viz: return viz case (Tensor(data=viz_labels), Tensor(data=viz_predictions)): - viz_labels, viz_predictions = resize_to_match(viz_labels, viz_predictions) + viz_labels, viz_predictions = resize_to_match( + viz_labels, viz_predictions + ) return torch.cat([viz_labels, viz_predictions], dim=-1) case (Tensor(data=_), [*viz]) if isinstance(viz, list) and all( diff --git a/luxonis_train/callbacks/__init__.py b/luxonis_train/callbacks/__init__.py index 4c7f7824..95f860a1 100644 --- a/luxonis_train/callbacks/__init__.py +++ b/luxonis_train/callbacks/__init__.py @@ -1,9 +1,13 @@ from lightning.pytorch.callbacks import ( DeviceStatsMonitor, EarlyStopping, + GradientAccumulationScheduler, LearningRateMonitor, ModelCheckpoint, + ModelPruning, RichModelSummary, + StochasticWeightAveraging, + Timer, ) from luxonis_train.utils.registry import CALLBACKS @@ -26,6 +30,10 @@ CALLBACKS.register_module(module=ModelCheckpoint) CALLBACKS.register_module(module=RichModelSummary) CALLBACKS.register_module(module=DeviceStatsMonitor) +CALLBACKS.register_module(module=GradientAccumulationScheduler) +CALLBACKS.register_module(module=StochasticWeightAveraging) +CALLBACKS.register_module(module=Timer) +CALLBACKS.register_module(module=ModelPruning) __all__ = [ diff --git a/luxonis_train/callbacks/archive_on_train_end.py b/luxonis_train/callbacks/archive_on_train_end.py index d9e7b298..30949e4e 100644 --- a/luxonis_train/callbacks/archive_on_train_end.py +++ b/luxonis_train/callbacks/archive_on_train_end.py @@ -26,12 +26,12 @@ def on_train_end( """ path = self.get_checkpoint(pl_module) - if path is None: + if path is None: # pragma: no cover logger.warning("Skipping model archiving.") return onnx_path = pl_module.core._exported_models.get("onnx") - if onnx_path is None: + if onnx_path is None: # pragma: no cover logger.error( "Model executable not found. " "Make sure to run exporter callback before archiver callback. " diff --git a/luxonis_train/callbacks/export_on_train_end.py b/luxonis_train/callbacks/export_on_train_end.py index 261c4ef6..e727e81f 100644 --- a/luxonis_train/callbacks/export_on_train_end.py +++ b/luxonis_train/callbacks/export_on_train_end.py @@ -25,7 +25,7 @@ def on_train_end( @param pl_module: Pytorch Lightning module. """ path = self.get_checkpoint(pl_module) - if path is None: + if path is None: # pragma: no cover logger.warning("Skipping model export.") return diff --git a/luxonis_train/callbacks/gpu_stats_monitor.py b/luxonis_train/callbacks/gpu_stats_monitor.py index 9479d4d2..a189ed3f 100644 --- a/luxonis_train/callbacks/gpu_stats_monitor.py +++ b/luxonis_train/callbacks/gpu_stats_monitor.py @@ -27,11 +27,11 @@ import pytorch_lightning as pl import torch -from lightning.pytorch.accelerators import CUDAAccelerator # type: ignore -from pytorch_lightning.utilities import rank_zero_only -from pytorch_lightning.utilities.exceptions import ( - MisconfigurationException, # type: ignore +from lightning.pytorch.accelerators.cuda import CUDAAccelerator +from lightning_fabric.utilities.exceptions import ( + MisconfigurationException, # noqa: F401 ) +from pytorch_lightning.utilities import rank_zero_only from pytorch_lightning.utilities.parsing import AttributeDict from pytorch_lightning.utilities.types import STEP_OUTPUT @@ -40,49 +40,6 @@ @CALLBACKS.register_module() class GPUStatsMonitor(pl.Callback): - """Automatically monitors and logs GPU stats during training stage. - C{GPUStatsMonitor} is a callback and in order to use it you need to assign a logger - in the C{Trainer}. - - Args: - memory_utilization: Set to C{True} to monitor used, free and percentage of memory - utilization at the start and end of each step. Default: C{True}. - gpu_utilization: Set to C{True} to monitor percentage of GPU utilization - at the start and end of each step. Default: C{True}. - intra_step_time: Set to C{True} to monitor the time of each step. Default: {False}. - inter_step_time: Set to C{True} to monitor the time between the end of one step - and the start of the next step. Default: C{False}. - fan_speed: Set to C{True} to monitor percentage of fan speed. Default: C{False}. - temperature: Set to C{True} to monitor the memory and gpu temperature in degree Celsius. - Default: C{False}. - - Raises: - MisconfigurationException: - If NVIDIA driver is not installed, not running on GPUs, or C{Trainer} has no logger. - - Example:: - - >>> from pytorch_lightning import Trainer - >>> from pytorch_lightning.callbacks import GPUStatsMonitor - >>> gpu_stats = GPUStatsMonitor() # doctest: +SKIP - >>> trainer = Trainer(callbacks=[gpu_stats]) # doctest: +SKIP - - GPU stats are mainly based on C{nvidia-smi --query-gpu} command. The description of the queries is as follows: - - - **fan.speed** – The fan speed value is the percent of maximum speed that the device's fan is currently - intended to run at. It ranges from 0 to 100 %. Note: The reported speed is the intended fan speed. - If the fan is physically blocked and unable to spin, this output will not match the actual fan speed. - Many parts do not report fan speeds because they rely on cooling via fans in the surrounding enclosure. - - **memory.used** – Total memory allocated by active contexts. - - **memory.free** – Total free memory. - - **utilization.gpu** – Percent of time over the past sample period during which one or more kernels was - executing on the GPU. The sample period may be between 1 second and 1/6 second depending on the product. - - **utilization.memory** – Percent of time over the past sample period during which global (device) memory was - being read or written. The sample period may be between 1 second and 1/6 second depending on the product. - - **temperature.gpu** – Core GPU temperature, in degrees C. - - **temperature.memory** – HBM memory temperature, in degrees C. - """ - def __init__( self, memory_utilization: bool = True, @@ -92,6 +49,40 @@ def __init__( fan_speed: bool = False, temperature: bool = False, ): + """Automatically monitors and logs GPU stats during training + stage. C{GPUStatsMonitor} is a callback and in order to use it + you need to assign a logger in the C{Trainer}. + + GPU stats are mainly based on C{nvidia-smi --query-gpu} command. The description of the queries is as follows: + + - C{fan.speed} – The fan speed value is the percent of maximum speed that the device's fan is currently + intended to run at. It ranges from 0 to 100 %. Note: The reported speed is the intended fan speed. + If the fan is physically blocked and unable to spin, this output will not match the actual fan speed. + Many parts do not report fan speeds because they rely on cooling via fans in the surrounding enclosure. + - C{memory.used} – Total memory allocated by active contexts. + - C{memory.free} – Total free memory. + - C{utilization.gpu} – Percent of time over the past sample period during which one or more kernels was + executing on the GPU. The sample period may be between 1 second and 1/6 second depending on the product. + - C{utilization.memory} – Percent of time over the past sample period during which global (device) memory was + being read or written. The sample period may be between 1 second and 1/6 second depending on the product. + - C{temperature.gpu} – Core GPU temperature, in degrees C. + - C{temperature.memory} – HBM memory temperature, in degrees C. + + @type memory_utilization: bool + @param memory_utilization: Set to C{True} to monitor used, free and percentage of memory utilization at the start and end of each step. Defaults to C{True}. + @type gpu_utilization: bool + @param gpu_utilization: Set to C{True} to monitor percentage of GPU utilization at the start and end of each step. Defaults to C{True}. + @type intra_step_time: bool + @param intra_step_time: Set to C{True} to monitor the time of each step. Defaults to {False}. + @type inter_step_time: bool + @param inter_step_time: Set to C{True} to monitor the time between the end of one step and the start of the next step. Defaults to C{False}. + @type fan_speed: bool + @param fan_speed: Set to C{True} to monitor percentage of fan speed. Defaults to C{False}. + @type temperature: bool + @param temperature: Set to C{True} to monitor the memory and gpu temperature in degree Celsius. Defaults to C{False}. + @raises MisconfigurationException: If NVIDIA driver is not installed, not running on GPUs, or C{Trainer} has no logger. + """ + super().__init__() if shutil.which("nvidia-smi") is None: @@ -167,7 +158,9 @@ def on_train_batch_start( gpu_stat_keys = self._get_gpu_stat_keys() gpu_stats = self._get_gpu_stats([k for k, _ in gpu_stat_keys]) - logs = self._parse_gpu_stats(self._device_ids, gpu_stats, gpu_stat_keys) + logs = self._parse_gpu_stats( + self._device_ids, gpu_stats, gpu_stat_keys + ) if self._log_stats.inter_step_time and self._snap_inter_step_time: # First log at beginning of second step @@ -193,9 +186,13 @@ def on_train_batch_end( if not trainer._logger_connector.should_update_logs: return - gpu_stat_keys = self._get_gpu_stat_keys() + self._get_gpu_device_stat_keys() + gpu_stat_keys = ( + self._get_gpu_stat_keys() + self._get_gpu_device_stat_keys() + ) gpu_stats = self._get_gpu_stats([k for k, _ in gpu_stat_keys]) - logs = self._parse_gpu_stats(self._device_ids, gpu_stats, gpu_stat_keys) + logs = self._parse_gpu_stats( + self._device_ids, gpu_stats, gpu_stat_keys + ) if self._log_stats.intra_step_time and self._snap_intra_step_time: logs["batch_time/intra_step (ms)"] = ( @@ -213,7 +210,9 @@ def _get_gpu_ids(device_ids: List[int]) -> List[str]: cuda_visible_devices: List[str] = os.getenv( "CUDA_VISIBLE_DEVICES", default=default ).split(",") - return [cuda_visible_devices[device_id].strip() for device_id in device_ids] + return [ + cuda_visible_devices[device_id].strip() for device_id in device_ids + ] def _get_gpu_stats(self, queries: List[str]) -> List[List[float]]: if not queries: @@ -251,7 +250,9 @@ def _to_float(x: str) -> float: @staticmethod def _parse_gpu_stats( - device_ids: List[int], stats: List[List[float]], keys: List[Tuple[str, str]] + device_ids: List[int], + stats: List[List[float]], + keys: List[Tuple[str, str]], ) -> Dict[str, float]: """Parse the gpu stats into a loggable dict.""" logs = {} @@ -288,6 +289,8 @@ def _get_gpu_device_stat_keys(self) -> List[Tuple[str, str]]: stat_keys.append(("fan.speed", "%")) if self._log_stats.temperature: - stat_keys.extend([("temperature.gpu", "°C"), ("temperature.memory", "°C")]) + stat_keys.extend( + [("temperature.gpu", "°C"), ("temperature.memory", "°C")] + ) return stat_keys diff --git a/luxonis_train/callbacks/luxonis_progress_bar.py b/luxonis_train/callbacks/luxonis_progress_bar.py index d14fcf08..b8bf6512 100644 --- a/luxonis_train/callbacks/luxonis_progress_bar.py +++ b/luxonis_train/callbacks/luxonis_progress_bar.py @@ -3,7 +3,11 @@ import lightning.pytorch as pl import tabulate -from lightning.pytorch.callbacks import ProgressBar, RichProgressBar, TQDMProgressBar +from lightning.pytorch.callbacks import ( + ProgressBar, + RichProgressBar, + TQDMProgressBar, +) from rich.console import Console from rich.table import Table @@ -14,7 +18,6 @@ class BaseLuxonisProgressBar(ABC, ProgressBar): def get_metrics( self, trainer: pl.Trainer, pl_module: pl.LightningModule ) -> dict[str, int | str | float | dict[str, float]]: - # NOTE: there might be a cleaner way of doing this items = super().get_metrics(trainer, pl_module) items.pop("v_num", None) if trainer.training and pl_module.training_step_outputs: @@ -30,7 +33,8 @@ def print_results( ) -> None: """Prints results to the console. - This includes the stage name, loss value, and tables with metrics. + This includes the stage name, loss value, and tables with + metrics. @type stage: str @param stage: Stage name. @@ -39,12 +43,13 @@ def print_results( @type metrics: Mapping[str, Mapping[str, int | str | float]] @param metrics: Metrics in format {table_name: table}. """ - pass + ... @CALLBACKS.register_module() class LuxonisTQDMProgressBar(TQDMProgressBar, BaseLuxonisProgressBar): - """Custom text progress bar based on TQDMProgressBar from Pytorch Lightning.""" + """Custom text progress bar based on TQDMProgressBar from Pytorch + Lightning.""" def __init__(self): super().__init__(leave=True) @@ -71,7 +76,8 @@ def _print_table( @type key_name: str @param key_name: Name of the key column. Defaults to C{"Name"}. @type value_name: str - @param value_name: Name of the value column. Defaults to C{"Value"}. + @param value_name: Name of the value column. Defaults to + C{"Value"}. """ self._rule(title) print( @@ -100,14 +106,15 @@ def print_results( @CALLBACKS.register_module() class LuxonisRichProgressBar(RichProgressBar, BaseLuxonisProgressBar): - """Custom rich text progress bar based on RichProgressBar from Pytorch Lightning.""" + """Custom rich text progress bar based on RichProgressBar from + Pytorch Lightning.""" def __init__(self): super().__init__(leave=True) @property def console(self) -> Console: - if self._console is None: + if self._console is None: # pragma: no cover raise RuntimeError( "Console is not initialized for the `LuxonisRichProgressBar`. " "Consider setting `tracker.use_rich_progress_bar` to `False` in the configuration." @@ -130,7 +137,8 @@ def print_table( @type key_name: str @param key_name: Name of the key column. Defaults to C{"Name"}. @type value_name: str - @param value_name: Name of the value column. Defaults to C{"Value"}. + @param value_name: Name of the value column. Defaults to + C{"Value"}. """ rich_table = Table( title=title, @@ -140,10 +148,7 @@ def print_table( rich_table.add_column(key_name, style="magenta") rich_table.add_column(value_name, style="white") for name, value in table.items(): - if isinstance(value, float): - rich_table.add_row(name, f"{value:.5f}") - else: - rich_table.add_row(name, str(value)) + rich_table.add_row(name, f"{value:.5f}") self.console.print(rich_table) def print_results( @@ -153,7 +158,9 @@ def print_results( metrics: Mapping[str, Mapping[str, int | str | float]], ) -> None: self.console.rule(f"{stage}", style="bold magenta") - self.console.print(f"[bold magenta]Loss:[/bold magenta] [white]{loss}[/white]") + self.console.print( + f"[bold magenta]Loss:[/bold magenta] [white]{loss}[/white]" + ) self.console.print("[bold magenta]Metrics:[/bold magenta]") for table_name, table in metrics.items(): self.print_table(table_name, table) diff --git a/luxonis_train/callbacks/metadata_logger.py b/luxonis_train/callbacks/metadata_logger.py index 45ff8717..ab29f7d0 100644 --- a/luxonis_train/callbacks/metadata_logger.py +++ b/luxonis_train/callbacks/metadata_logger.py @@ -6,7 +6,7 @@ import yaml import luxonis_train -from luxonis_train.utils.config import Config +from luxonis_train.utils import Config from luxonis_train.utils.registry import CALLBACKS @@ -15,8 +15,9 @@ class MetadataLogger(pl.Callback): def __init__(self, hyperparams: list[str]): """Callback that logs training metadata. - Metadata include all defined hyperparameters together with git hashes of - luxonis-ml and luxonis-train packages. Also stores this information locally. + Metadata include all defined hyperparameters together with git + hashes of luxonis-ml and luxonis-train packages. Also stores + this information locally. @type hyperparams: list[str] @param hyperparams: List of hyperparameters to log. @@ -25,30 +26,44 @@ def __init__(self, hyperparams: list[str]): self.hyperparams = hyperparams def on_fit_start( - self, _: pl.Trainer, pl_module: "luxonis_train.models.LuxonisLightningModule" + self, + _: pl.Trainer, + pl_module: "luxonis_train.models.LuxonisLightningModule", ) -> None: cfg: Config = pl_module.cfg hparams = {key: cfg.get(key) for key in self.hyperparams} - # try to get luxonis-ml and luxonis-train git commit hashes (if installed as editable) luxonis_ml_hash = self._get_editable_package_git_hash("luxonis_ml") - if luxonis_ml_hash: + if luxonis_ml_hash: # pragma: no cover hparams["luxonis_ml"] = luxonis_ml_hash - luxonis_train_hash = self._get_editable_package_git_hash("luxonis_train") - if luxonis_train_hash: + luxonis_train_hash = self._get_editable_package_git_hash( + "luxonis_train" + ) + if luxonis_train_hash: # pragma: no cover hparams["luxonis_train"] = luxonis_train_hash pl_module.logger.log_hyperparams(hparams) - # also save metadata locally - with open(osp.join(pl_module.save_dir, "metadata.yaml"), "w+") as f: + with open(osp.join(pl_module.save_dir, "metadata.yaml"), "w") as f: yaml.dump(hparams, f, default_flow_style=False) @staticmethod - def _get_editable_package_git_hash(package_name: str) -> str | None: + def _get_editable_package_git_hash( + package_name: str, + ) -> str | None: # pragma: no cover + """Get git hash of an editable package. + + @type package_name: str + @param package_name: Name of the package. + @rtype: str or None + @return: Git hash of the package or None if the package is not + installed in editable mode. + """ try: distribution = pkg_resources.get_distribution(package_name) + if distribution.location is None: + return None package_location = osp.join(distribution.location, package_name) # remove any additional folders in path (e.g. "/src") diff --git a/luxonis_train/callbacks/module_freezer.py b/luxonis_train/callbacks/module_freezer.py index 4f73ff30..de0afa99 100644 --- a/luxonis_train/callbacks/module_freezer.py +++ b/luxonis_train/callbacks/module_freezer.py @@ -13,7 +13,8 @@ def __init__(self, frozen_modules: list[tuple[nn.Module, int]]): """Callback that freezes parts of the model. @type frozen_modules: list[tuple[nn.Module, int]] - @param frozen_modules: List of tuples of modules and epochs to freeze until. + @param frozen_modules: List of tuples of modules and epochs to + freeze until. """ super().__init__() self.frozen_modules = frozen_modules diff --git a/luxonis_train/callbacks/needs_checkpoint.py b/luxonis_train/callbacks/needs_checkpoint.py index 30355e82..b3de6aed 100644 --- a/luxonis_train/callbacks/needs_checkpoint.py +++ b/luxonis_train/callbacks/needs_checkpoint.py @@ -10,7 +10,9 @@ class NeedsCheckpoint(pl.Callback): def __init__( - self, preferred_checkpoint: Literal["metric", "loss"] = "metric", **kwargs + self, + preferred_checkpoint: Literal["metric", "loss"] = "metric", + **kwargs, ): super().__init__(**kwargs) self.preferred_checkpoint = preferred_checkpoint @@ -40,7 +42,8 @@ def _get_checkpoint( ) return path - def _get_other_type(self, checkpoint_type: str) -> str: + @staticmethod + def _get_other_type(checkpoint_type: str) -> str: if checkpoint_type == "loss": return "metric" return "loss" diff --git a/luxonis_train/callbacks/test_on_train_end.py b/luxonis_train/callbacks/test_on_train_end.py index f2bb09ec..a60a16dd 100644 --- a/luxonis_train/callbacks/test_on_train_end.py +++ b/luxonis_train/callbacks/test_on_train_end.py @@ -27,4 +27,6 @@ def on_train_end( for callback in trainer.callbacks: # type: ignore if isinstance(callback, ModelCheckpoint): if hash(callback.monitor) in best_paths: - callback.best_model_path = best_paths[hash(callback.monitor)] + callback.best_model_path = best_paths[ + hash(callback.monitor) + ] diff --git a/luxonis_train/callbacks/upload_checkpoint.py b/luxonis_train/callbacks/upload_checkpoint.py index 29da59ef..b9753e94 100644 --- a/luxonis_train/callbacks/upload_checkpoint.py +++ b/luxonis_train/callbacks/upload_checkpoint.py @@ -12,7 +12,8 @@ @CALLBACKS.register_module() class UploadCheckpoint(pl.Callback): - """Callback that uploads best checkpoint based on the validation loss.""" + """Callback that uploads best checkpoint based on the validation + loss.""" def __init__(self): """Constructs `UploadCheckpoint`. @@ -43,7 +44,9 @@ def on_save_checkpoint( if curr_best_checkpoint not in self.last_best_checkpoints: self.logger.info("Uploading checkpoint...") temp_filename = ( - Path(curr_best_checkpoint).parent.with_suffix(".ckpt").name + Path(curr_best_checkpoint) + .parent.with_suffix(".ckpt") + .name ) torch.save(checkpoint, temp_filename) module.logger.upload_artifact(temp_filename, typ="weights") diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py index c683773c..cffa3ff1 100644 --- a/luxonis_train/core/core.py +++ b/luxonis_train/core/core.py @@ -3,7 +3,7 @@ import threading from logging import getLogger from pathlib import Path -from typing import Any, Literal +from typing import Any, Literal, Mapping, overload import lightning.pytorch as pl import lightning_utilities.core.rank_zero as rank_zero_module @@ -16,15 +16,17 @@ from luxonis_ml.nn_archive import ArchiveGenerator from luxonis_ml.nn_archive.config import CONFIG_VERSION from luxonis_ml.utils import LuxonisFileSystem, reset_logging, setup_logging +from typeguard import typechecked from luxonis_train.attached_modules.visualizers import get_unnormalized_images -from luxonis_train.callbacks import LuxonisRichProgressBar, LuxonisTQDMProgressBar +from luxonis_train.callbacks import ( + LuxonisRichProgressBar, + LuxonisTQDMProgressBar, +) +from luxonis_train.loaders import BaseLoaderTorch, collate_fn from luxonis_train.models import LuxonisLightningModule -from luxonis_train.utils.config import Config -from luxonis_train.utils.general import DatasetMetadata -from luxonis_train.utils.loaders import BaseLoaderTorch, collate_fn +from luxonis_train.utils import Config, DatasetMetadata, LuxonisTrackerPL from luxonis_train.utils.registry import LOADERS -from luxonis_train.utils.tracker import LuxonisTrackerPL from .utils.export_utils import ( blobconverter_export, @@ -41,8 +43,8 @@ class LuxonisModel: """Common logic of the core components. - This class contains common logic of the core components (trainer, evaluator, - exporter, etc.). + This class contains common logic of the core components (trainer, + evaluator, exporter, etc.). """ def __init__( @@ -80,6 +82,7 @@ def __init__( self.cfg.tracker.save_directory, self.tracker.run_name ) self.log_file = osp.join(self.run_save_dir, "luxonis_train.log") + self.error_message = None # NOTE: to add the file handler (we only get the save dir now, # but we want to use the logger before) @@ -89,10 +92,16 @@ def __init__( # NOTE: overriding logger in pl so it uses our logger to log device info rank_zero_module.log = logger - deterministic = False if self.cfg.trainer.seed is not None: pl.seed_everything(self.cfg.trainer.seed, workers=True) - deterministic = True + + self.pl_trainer = create_trainer( + self.cfg.trainer, + logger=self.tracker, + callbacks=LuxonisRichProgressBar() + if self.cfg.trainer.use_rich_progress_bar + else LuxonisTQDMProgressBar(), + ) self.train_augmentations = Augmentations( image_size=self.cfg.trainer.preprocessing.train_image_size, @@ -114,15 +123,6 @@ def __init__( only_normalize=True, ) - self.pl_trainer = create_trainer( - self.cfg, - logger=self.tracker, - deterministic=deterministic, - callbacks=LuxonisRichProgressBar() - if self.cfg.trainer.use_rich_progress_bar - else LuxonisTQDMProgressBar(), - ) - self.loaders: dict[str, BaseLoaderTorch] = {} for view in ["train", "val", "test"]: loader_name = self.cfg.loader.name @@ -155,27 +155,31 @@ def __init__( sampler = None # TODO: implement weighted sampler if self.cfg.trainer.use_weighted_sampler: - raise NotImplementedError("Weighted sampler is not implemented yet.") + raise NotImplementedError( + "Weighted sampler is not implemented yet." + ) self.pytorch_loaders = { view: torch_data.DataLoader( self.loaders[view], batch_size=self.cfg.trainer.batch_size, - num_workers=self.cfg.trainer.num_workers, + num_workers=self.cfg.trainer.n_workers, collate_fn=collate_fn, shuffle=view == "train", drop_last=( - self.cfg.trainer.skip_last_batch if view == "train" else False + self.cfg.trainer.skip_last_batch + if view == "train" + else False ), pin_memory=self.cfg.trainer.pin_memory, sampler=sampler if view == "train" else None, ) for view in ["train", "val", "test"] } - self.error_message = None - self.dataset_metadata = DatasetMetadata.from_loader(self.loaders["train"]) - self.dataset_metadata.set_loader(self.pytorch_loaders["train"]) + self.dataset_metadata = DatasetMetadata.from_loader( + self.loaders["train"] + ) self.cfg.save_data(osp.join(self.run_save_dir, "config.yaml")) @@ -195,7 +199,7 @@ def _train(self, resume: str | None, *args, **kwargs): status = "success" try: self.pl_trainer.fit(*args, ckpt_path=resume, **kwargs) - except Exception as e: + except Exception as e: # pragma: no cover logger.exception("Encountered an exception during training.") status = "failed" raise e @@ -211,29 +215,34 @@ def train( @type new_thread: bool @param new_thread: Runs training in new thread if set to True. @type resume_weights: str | None - @param resume_weights: Path to checkpoint to resume training from. + @param resume_weights: Path to the checkpoint from which to to + resume the training. """ if self.cfg.trainer.matmul_precision is not None: logger.info( f"Setting matmul precision to {self.cfg.trainer.matmul_precision}" ) - torch.set_float32_matmul_precision(self.cfg.trainer.matmul_precision) + torch.set_float32_matmul_precision( + self.cfg.trainer.matmul_precision + ) if resume_weights is not None: resume_weights = str( LuxonisFileSystem.download(resume_weights, self.run_save_dir) ) - def graceful_exit(signum: int, _): - logger.info(f"{signal.Signals(signum).name} received, stopping training...") + def graceful_exit(signum: int, _): # pragma: no cover + logger.info( + f"{signal.Signals(signum).name} received, stopping training..." + ) ckpt_path = osp.join(self.run_save_dir, "resume.ckpt") self.pl_trainer.save_checkpoint(ckpt_path) self.tracker.upload_artifact( ckpt_path, typ="checkpoints", name="resume.ckpt" ) self.tracker._finalize(status="failed") - exit(0) + exit() signal.signal(signal.SIGTERM, graceful_exit) @@ -249,7 +258,7 @@ def graceful_exit(signum: int, _): logger.info("Training finished") logger.info(f"Checkpoints saved in: {self.run_save_dir}") - else: + else: # pragma: no cover # Every time exception happens in the Thread, this hook will activate def thread_exception_hook(args): self.error_message = str(args.exc_value) @@ -269,7 +278,10 @@ def thread_exception_hook(args): self.thread.start() def export( - self, onnx_save_path: str | None = None, *, weights: str | Path | None = None + self, + onnx_save_path: str | None = None, + *, + weights: str | Path | None = None, ) -> None: """Runs export. @@ -290,8 +302,12 @@ def export( export_save_dir = Path(self.run_save_dir, "export") export_save_dir.mkdir(parents=True, exist_ok=True) - export_path = export_save_dir / (self.cfg.exporter.name or self.cfg.model.name) - onnx_save_path = onnx_save_path or str(export_path.with_suffix(".onnx")) + export_path = export_save_dir / ( + self.cfg.exporter.name or self.cfg.model.name + ) + onnx_save_path = onnx_save_path or str( + export_path.with_suffix(".onnx") + ) with replace_weights(self.lightning_module, weights): output_names = self.lightning_module.export_onnx( @@ -301,7 +317,9 @@ def export( try_onnx_simplify(onnx_save_path) self._exported_models["onnx"] = Path(onnx_save_path) - scale_values, mean_values, reverse_channels = get_preprocessing(self.cfg) + scale_values, mean_values, reverse_channels = get_preprocessing( + self.cfg + ) if self.cfg.exporter.blobconverter.active: try: @@ -313,7 +331,9 @@ def export( str(export_save_dir), onnx_save_path, ) - self._exported_models["blob"] = export_path.with_suffix(".blob") + self._exported_models["blob"] = export_path.with_suffix( + ".blob" + ) except ImportError: logger.error("Failed to import `blobconverter`") logger.warning( @@ -340,36 +360,52 @@ def export( for path in self._exported_models.values(): if self.cfg.exporter.upload_to_run: self.tracker.upload_artifact(path, typ="export") - if self.cfg.exporter.upload_url is not None: + if self.cfg.exporter.upload_url is not None: # pragma: no cover LuxonisFileSystem.upload(path, self.cfg.exporter.upload_url) with open(export_path.with_suffix(".yaml"), "w") as f: yaml.dump(modelconverter_config, f) if self.cfg.exporter.upload_to_run: self.tracker.upload_artifact(f.name, name=f.name, typ="export") - if self.cfg.exporter.upload_url is not None: + if self.cfg.exporter.upload_url is not None: # pragma: no cover LuxonisFileSystem.upload(f.name, self.cfg.exporter.upload_url) + @overload def test( - self, new_thread: bool = False, view: Literal["train", "test", "val"] = "val" - ) -> None: + self, + new_thread: Literal[False] = ..., + view: Literal["train", "test", "val"] = "val", + ) -> Mapping[str, float]: ... + + @overload + def test( + self, + new_thread: Literal[True] = ..., + view: Literal["train", "test", "val"] = "val", + ) -> None: ... + + @typechecked + def test( + self, + new_thread: bool = False, + view: Literal["train", "val", "test"] = "val", + ) -> Mapping[str, float] | None: """Runs testing. @type new_thread: bool @param new_thread: Runs testing in a new thread if set to True. @type view: Literal["train", "test", "val"] @param view: Which view to run the testing on. Defauls to "val". + @rtype: Mapping[str, float] | None + @return: If new_thread is False, returns a dictionary test + results. """ - if view not in self.pytorch_loaders: - raise ValueError( - f"View {view} is not valid. Valid views are: 'train', 'val', 'test'." - ) loader = self.pytorch_loaders[view] if not new_thread: - self.pl_trainer.test(self.lightning_module, loader) - else: + return self.pl_trainer.test(self.lightning_module, loader)[0] + else: # pragma: no cover self.thread = threading.Thread( target=self.pl_trainer.test, args=(self.lightning_module, loader), @@ -377,22 +413,24 @@ def test( ) self.thread.start() - def infer(self, view: str = "val", save_dir: str | Path | None = None) -> None: + @typechecked + def infer( + self, + view: Literal["train", "val", "test"] = "val", + save_dir: str | Path | None = None, + ) -> None: """Runs inference. @type view: str - @param view: Which split to run the inference on. Valid values are: 'train', - 'val', 'test'. Defaults to "val". + @param view: Which split to run the inference on. Valid values + are: 'train', 'val', 'test'. Defaults to "val". @type save_dir: str | Path | None - @param save_dir: Directory where to save the visualizations. If not specified, - visualizations will be rendered on the screen. + @param save_dir: Directory where to save the visualizations. If + not specified, visualizations will be rendered on the + screen. """ self.lightning_module.eval() - if view not in self.pytorch_loaders: - raise ValueError( - f"View {view} is not valid. Valid views are: 'train', 'val', 'test'." - ) for inputs, labels in self.pytorch_loaders[view]: images = get_unnormalized_images(self.cfg, inputs) outputs = self.lightning_module.forward( @@ -418,18 +456,24 @@ def _objective(trial: optuna.trial.Trial) -> float: **tracker_params, ) - run_save_dir = osp.join(cfg_tracker.save_directory, child_tracker.run_name) + run_save_dir = osp.join( + cfg_tracker.save_directory, child_tracker.run_name + ) assert self.cfg.tuner is not None - curr_params = get_trial_params(all_augs, self.cfg.tuner.params, trial) + curr_params = get_trial_params( + all_augs, self.cfg.tuner.params, trial + ) curr_params["model.predefined_model"] = None cfg_copy = self.cfg.model_copy(deep=True) + # manually remove Normalize so it doesn't + # get duplicated when creating new cfg instance cfg_copy.trainer.preprocessing.augmentations = [ a for a in cfg_copy.trainer.preprocessing.augmentations if a.name != "Normalize" - ] # manually remove Normalize so it doesn't duplicate it when creating new cfg instance + ] cfg = Config.get_config(cfg_copy.model_dump(), curr_params) child_tracker.log_hyperparams(curr_params) @@ -449,18 +493,16 @@ def _objective(trial: optuna.trial.Trial) -> float: else LuxonisTQDMProgressBar() ] - pruner_callback = PyTorchLightningPruningCallback(trial, monitor="val/loss") + pruner_callback = PyTorchLightningPruningCallback( + trial, monitor="val/loss" + ) callbacks.append(pruner_callback) - deterministic = False - if self.cfg.trainer.seed: + + if self.cfg.trainer.seed is not None: pl.seed_everything(cfg.trainer.seed, workers=True) - deterministic = True pl_trainer = create_trainer( - cfg, - logger=child_tracker, - callbacks=callbacks, - deterministic=deterministic, + cfg.trainer, logger=child_tracker, callbacks=callbacks ) try: @@ -475,7 +517,9 @@ def _objective(trial: optuna.trial.Trial) -> float: except optuna.TrialPruned as e: logger.info(e) - if "val/loss" not in pl_trainer.callback_metrics: + if ( + "val/loss" not in pl_trainer.callback_metrics + ): # pragma: no cover raise ValueError( "No validation loss found. " "This can happen if `TestOnTrainEnd` callback is used." @@ -485,9 +529,13 @@ def _objective(trial: optuna.trial.Trial) -> float: cfg_tuner = self.cfg.tuner if cfg_tuner is None: - raise ValueError("You have to specify the `tuner` section in config.") + raise ValueError( + "You have to specify the `tuner` section in config." + ) - all_augs = [a.name for a in self.cfg.trainer.preprocessing.augmentations] + all_augs = [ + a.name for a in self.cfg.trainer.preprocessing.augmentations + ] rank = rank_zero_only.rank cfg_tracker = self.cfg.tracker tracker_params = cfg_tracker.model_dump() @@ -499,7 +547,7 @@ def _objective(trial: optuna.trial.Trial) -> float: is_sweep=False, **tracker_params, ) - if self.parent_tracker.is_mlflow: + if self.parent_tracker.is_mlflow: # pragma: no cover # Experiment needs to be interacted with to create actual MLFlow run self.parent_tracker.experiment["mlflow"].active_run() @@ -515,7 +563,7 @@ def _objective(trial: optuna.trial.Trial) -> float: if cfg_tuner.storage.active: if cfg_tuner.storage.storage_type == "local": storage = "sqlite:///study_local.db" - else: + else: # pragma: no cover storage = "postgresql://{}:{}@{}:{}/{}".format( self.cfg.ENVIRON.POSTGRES_USER, self.cfg.ENVIRON.POSTGRES_PASSWORD, @@ -540,7 +588,7 @@ def _objective(trial: optuna.trial.Trial) -> float: self.parent_tracker.log_hyperparams(study.best_params) - if self.cfg.tracker.is_wandb: + if self.cfg.tracker.is_wandb: # pragma: no cover # If wandb used then init parent tracker separately at the end wandb_parent_tracker = LuxonisTrackerPL( rank=rank_zero_only.rank, @@ -555,8 +603,8 @@ def archive(self, path: str | Path | None = None) -> Path: """Generates an NN Archive out of a model executable. @type path: str | Path | None - @param path: Path to the model executable. If not specified, the model will be - exported first. + @param path: Path to the model executable. If not specified, the + model will be exported first. @rtype: Path @return: Path to the generated NN Archive. """ @@ -583,8 +631,12 @@ def _mult(lst: list[float | int]) -> list[float]: return [round(x * 255.0, 5) for x in lst] preprocessing = { # TODO: keep preprocessing same for each input? - "mean": _mult(self.cfg.trainer.preprocessing.normalize.params["mean"]), - "scale": _mult(self.cfg.trainer.preprocessing.normalize.params["std"]), + "mean": _mult( + self.cfg.trainer.preprocessing.normalize.params["mean"] + ), + "scale": _mult( + self.cfg.trainer.preprocessing.normalize.params["std"] + ), "reverse_channels": self.cfg.trainer.preprocessing.train_rgb, "interleaved_to_planar": False, # TODO: make it modifiable? } @@ -642,8 +694,10 @@ def _mult(lst: list[float | int]) -> list[float]: logger.info(f"NN Archive saved to {archive_path}") - if self.cfg.archiver.upload_url is not None: - LuxonisFileSystem.upload(archive_path, self.cfg.archiver.upload_url) + if self.cfg.archiver.upload_url is not None: # pragma: no cover + LuxonisFileSystem.upload( + archive_path, self.cfg.archiver.upload_url + ) if self.cfg.archiver.upload_to_run: self.tracker.upload_artifact(archive_path, typ="archive") @@ -655,14 +709,15 @@ def get_status(self) -> tuple[int, int]: """Get current status of training. @rtype: tuple[int, int] - @return: First element is current epoch, second element is total number of - epochs. + @return: First element is current epoch, second element is total + number of epochs. """ return self.lightning_module.get_status() @rank_zero_only def get_status_percentage(self) -> float: - """Return percentage of current training, takes into account early stopping. + """Return percentage of current training, takes into account + early stopping. @rtype: float @return: Percentage of current training in range 0-100. @@ -671,7 +726,8 @@ def get_status_percentage(self) -> float: @rank_zero_only def get_error_message(self) -> str | None: - """Return error message if one occurs while running in thread, otherwise None. + """Return error message if one occurs while running in thread, + otherwise None. @rtype: str | None @return: Error message @@ -680,10 +736,12 @@ def get_error_message(self) -> str | None: @rank_zero_only def get_min_loss_checkpoint_path(self) -> str | None: - """Return best checkpoint path with respect to minimal validation loss. + """Return best checkpoint path with respect to minimal + validation loss. @rtype: str - @return: Path to best checkpoint with respect to minimal validation loss + @return: Path to best checkpoint with respect to minimal + validation loss """ if not self.pl_trainer.checkpoint_callbacks: return None @@ -691,10 +749,12 @@ def get_min_loss_checkpoint_path(self) -> str | None: @rank_zero_only def get_best_metric_checkpoint_path(self) -> str | None: - """Return best checkpoint path with respect to best validation metric. + """Return best checkpoint path with respect to best validation + metric. @rtype: str - @return: Path to best checkpoint with respect to best validation metric + @return: Path to best checkpoint with respect to best validation + metric """ if len(self.pl_trainer.checkpoint_callbacks) < 2: return None diff --git a/luxonis_train/core/utils/archive_utils.py b/luxonis_train/core/utils/archive_utils.py index 72cdefc7..96c2bcde 100644 --- a/luxonis_train/core/utils/archive_utils.py +++ b/luxonis_train/core/utils/archive_utils.py @@ -15,7 +15,7 @@ ImplementedHeads, ImplementedHeadsIsSoxtmaxed, ) -from luxonis_train.utils.config import Config +from luxonis_train.utils import Config logger = logging.getLogger(__name__) @@ -63,7 +63,7 @@ def _from_onnx_dtype(dtype: int) -> DataType: TensorProto.FLOAT: "float32", TensorProto.FLOAT16: "float16", } - if dtype not in dtype_map: + if dtype not in dtype_map: # pragma: no cover raise ValueError(f"Unsupported ONNX data type: `{dtype}`") return DataType(dtype_map[dtype]) @@ -72,7 +72,7 @@ def _from_onnx_dtype(dtype: int) -> DataType: def _load_onnx_model(onnx_path: Path) -> onnx.ModelProto: try: return onnx.load(str(onnx_path)) - except Exception as e: + except Exception as e: # pragma: no cover raise ValueError(f"Failed to load ONNX model: `{onnx_path}`") from e @@ -98,7 +98,9 @@ def _get_onnx_inputs(onnx_path: Path) -> dict[str, MetadataDict]: for inp in model.graph.input: shape = [dim.dim_value for dim in inp.type.tensor_type.shape.dim] inputs[inp.name]["shape"] = shape - inputs[inp.name]["dtype"] = _from_onnx_dtype(inp.type.tensor_type.elem_type) + inputs[inp.name]["dtype"] = _from_onnx_dtype( + inp.type.tensor_type.elem_type + ) return inputs @@ -116,7 +118,7 @@ def _get_classes( node_task = "segmentation" case "ImplicitKeypointBBoxHead" | "EfficientKeypointBBoxHead": node_task = "keypoints" - case _: + case _: # pragma: no cover raise ValueError("Node does not map to a default task.") return classes.get(node_task, []) @@ -137,7 +139,9 @@ def _get_head_specific_parameters( parameters = {} if head_name == "ClassificationHead": - parameters["is_softmax"] = getattr(ImplementedHeadsIsSoxtmaxed, head_name).value + parameters["is_softmax"] = getattr( + ImplementedHeadsIsSoxtmaxed, head_name + ).value elif head_name == "EfficientBBoxHead": parameters["subtype"] = ObjectDetectionSubtypeYOLO.YOLOv6.value head_node = nodes[head_alias] @@ -145,7 +149,9 @@ def _get_head_specific_parameters( parameters["conf_threshold"] = head_node.conf_thres parameters["max_det"] = head_node.max_det elif head_name in ["SegmentationHead", "BiSeNetHead"]: - parameters["is_softmax"] = getattr(ImplementedHeadsIsSoxtmaxed, head_name).value + parameters["is_softmax"] = getattr( + ImplementedHeadsIsSoxtmaxed, head_name + ).value elif head_name == "ImplicitKeypointBBoxHead": parameters["subtype"] = ObjectDetectionSubtypeYOLO.YOLOv7.value head_node = nodes[head_alias] @@ -161,18 +167,21 @@ def _get_head_specific_parameters( parameters["conf_threshold"] = head_node.conf_thres parameters["max_det"] = head_node.max_det parameters["n_keypoints"] = head_node.n_keypoints - else: + else: # pragma: no cover raise ValueError("Unknown head name") return parameters -def _get_head_outputs(outputs: list[dict], head_name: str, head_type: str) -> list[str]: +def _get_head_outputs( + outputs: list[dict], head_name: str, head_type: str +) -> list[str]: """Get model outputs in a head-specific format. @type outputs: list[dict] @param outputs: List of NN Archive outputs. @type head_name: str - @param head_name: Type of the head (e.g. 'EfficientBBoxHead') or its custom alias. + @param head_name: Type of the head (e.g. 'EfficientBBoxHead') or its + custom alias. @type head_type: str @param head_name: Type of the head (e.g. 'EfficientBBoxHead'). @rtype: list[str] @@ -238,7 +247,9 @@ def get_heads( task = str(next(iter(task.values()))) classes = _get_classes(node_name, task, class_dict) - head_outputs = _get_head_outputs(outputs, node_alias, node_name) + head_outputs = _get_head_outputs( + outputs, node_alias, node_name + ) head_dict = { "parser": parser, "metadata": { diff --git a/luxonis_train/core/utils/export_utils.py b/luxonis_train/core/utils/export_utils.py index 3b34a912..b4863f1b 100644 --- a/luxonis_train/core/utils/export_utils.py +++ b/luxonis_train/core/utils/export_utils.py @@ -42,7 +42,7 @@ def try_onnx_simplify(onnx_path: str) -> None: model_onnx = onnx.load(onnx_path) onnx_model, check = onnxsim.simplify(model_onnx) if not check: - raise RuntimeError("ONNX simplify failed.") + raise RuntimeError("ONNX simplify failed.") # pragma: no cover onnx.save(onnx_model, onnx_path) logger.info(f"ONNX model saved to {onnx_path}") @@ -52,7 +52,7 @@ def try_onnx_simplify(onnx_path: str) -> None: "`onnxsim` not installed. Skipping ONNX model simplification. " "Ensure `onnxsim` is installed in your environment." ) - except RuntimeError: + except RuntimeError: # pragma: no cover logger.error( "Failed to simplify ONNX model. Proceeding without simplification." ) @@ -100,7 +100,7 @@ def blobconverter_export( logger.info("Converting ONNX to .blob") - optimizer_params = [] + optimizer_params: list[str] = [] if scale_values: optimizer_params.append(f"--scale_values={scale_values}") if mean_values: @@ -111,7 +111,7 @@ def blobconverter_export( blob_path = blobconverter.from_onnx( model=onnx_path, optimizer_params=optimizer_params, - data_type=cfg.data_type, + data_type=cfg.data_type.upper(), shaves=cfg.blobconverter.shaves, version=cfg.blobconverter.version, use_cache=False, diff --git a/luxonis_train/core/utils/train_utils.py b/luxonis_train/core/utils/train_utils.py index 3a45a85b..73b615cb 100644 --- a/luxonis_train/core/utils/train_utils.py +++ b/luxonis_train/core/utils/train_utils.py @@ -1,9 +1,11 @@ +from typing import Any + import lightning.pytorch as pl -from luxonis_train.utils.config import Config +from luxonis_train.utils.config import TrainerConfig -def create_trainer(cfg: Config, **kwargs) -> pl.Trainer: +def create_trainer(cfg: TrainerConfig, **kwargs: Any) -> pl.Trainer: """Creates Pytorch Lightning trainer. @type cfg: Config @@ -13,13 +15,14 @@ def create_trainer(cfg: Config, **kwargs) -> pl.Trainer: @return: Pytorch Lightning trainer. """ return pl.Trainer( - accelerator=cfg.trainer.accelerator, - devices=cfg.trainer.devices, - strategy=cfg.trainer.strategy, - max_epochs=cfg.trainer.epochs, - accumulate_grad_batches=cfg.trainer.accumulate_grad_batches, - check_val_every_n_epoch=cfg.trainer.validation_interval, - num_sanity_val_steps=cfg.trainer.num_sanity_val_steps, - profiler=cfg.trainer.profiler, + accelerator=cfg.accelerator, + devices=cfg.devices, + strategy=cfg.strategy, + max_epochs=cfg.epochs, + accumulate_grad_batches=cfg.accumulate_grad_batches, + check_val_every_n_epoch=cfg.validation_interval, + num_sanity_val_steps=cfg.n_sanity_val_steps, + profiler=cfg.profiler, + deterministic=cfg.deterministic, **kwargs, ) diff --git a/luxonis_train/core/utils/tune_utils.py b/luxonis_train/core/utils/tune_utils.py index e2fe692e..d9d6c4c0 100644 --- a/luxonis_train/core/utils/tune_utils.py +++ b/luxonis_train/core/utils/tune_utils.py @@ -61,17 +61,23 @@ def get_trial_params( case "int", [int(low), int(high), *tail]: step = tail[0] if tail else 1 if not isinstance(step, int): - raise ValueError(f"Step for int type must be int, but got {step}") + raise ValueError( + f"Step for int type must be int, but got {step}" + ) new_value = trial.suggest_int(key_name, low, high, step=step) case "loguniform", [float(low), float(high)]: new_value = trial.suggest_loguniform(key_name, low, high) case "uniform", [float(low), float(high)]: new_value = trial.suggest_uniform(key_name, low, high) case _, _: - raise KeyError(f"Combination of {key_type} and {value} not supported") + raise KeyError( + f"Combination of {key_type} and {value} not supported" + ) new_params[key_name] = new_value if len(new_params) == 0: - raise ValueError("No paramteres to tune. Specify them under `tuner.params`.") + raise ValueError( + "No paramteres to tune. Specify them under `tuner.params`." + ) return new_params diff --git a/luxonis_train/utils/loaders/__init__.py b/luxonis_train/loaders/__init__.py similarity index 100% rename from luxonis_train/utils/loaders/__init__.py rename to luxonis_train/loaders/__init__.py diff --git a/luxonis_train/utils/loaders/base_loader.py b/luxonis_train/loaders/base_loader.py similarity index 65% rename from luxonis_train/utils/loaders/base_loader.py rename to luxonis_train/loaders/base_loader.py index 5e884955..b6b8a863 100644 --- a/luxonis_train/utils/loaders/base_loader.py +++ b/luxonis_train/loaders/base_loader.py @@ -1,16 +1,17 @@ from abc import ABC, abstractmethod import torch -from luxonis_ml.data import Augmentations +from luxonis_ml.data import Augmentations, LabelType from luxonis_ml.utils.registry import AutoRegisterMeta from torch import Size, Tensor from torch.utils.data import Dataset from luxonis_train.utils.registry import LOADERS -from luxonis_train.utils.types import Labels, LabelType +from luxonis_train.utils.types import Labels LuxonisLoaderTorchOutput = tuple[dict[str, Tensor], Labels] -"""LuxonisLoaderTorchOutput is a tuple of source tensors and corresponding labels.""" +"""LuxonisLoaderTorchOutput is a tuple of source tensors and +corresponding labels.""" class BaseLoaderTorch( @@ -20,8 +21,8 @@ class BaseLoaderTorch( register=False, registry=LOADERS, ): - """Base abstract loader class that enforces LuxonisLoaderTorchOutput output label - structure.""" + """Base abstract loader class that enforces LuxonisLoaderTorchOutput + output label structure.""" def __init__( self, @@ -38,6 +39,8 @@ def image_source(self) -> str: """Name of the input image group. Example: 'image' + + @type: str """ if self._image_source is None: raise ValueError("image_source is not set") @@ -47,39 +50,46 @@ def image_source(self) -> str: @abstractmethod def input_shapes(self) -> dict[str, Size]: """ - Shape of each loader group (sub-element), WITHOUT batch dimension. + Shape (c, h, w) of each loader group (sub-element), WITHOUT batch dimension. Examples: - 1. Single image input:: - { - 'image': torch.Size([3, 224, 224]), - } - - 2. Image and segmentation input:: - { - 'image': torch.Size([3, 224, 224]), - 'segmentation': torch.Size([1, 224, 224]), - } - - 3. Left image, right image and disparity input:: - { - 'left': torch.Size([3, 224, 224]), - 'right': torch.Size([3, 224, 224]), - 'disparity': torch.Size([1, 224, 224]), - } - - 4. Image, keypoints, and point cloud input:: - { - 'image': torch.Size([3, 224, 224]), - 'keypoints': torch.Size([17, 2]), - 'point_cloud': torch.Size([20000, 3]), - } - - @rtype: dict[str, Size] - @return: A dictionary mapping group names to their shapes. + 1. Single image input:: + { + 'image': torch.Size([3, 224, 224]), + } + + 2. Image and segmentation input:: + { + 'image': torch.Size([3, 224, 224]), + 'segmentation': torch.Size([1, 224, 224]), + } + + 3. Left image, right image and disparity input:: + { + 'left': torch.Size([3, 224, 224]), + 'right': torch.Size([3, 224, 224]), + 'disparity': torch.Size([1, 224, 224]), + } + + 4. Image, keypoints, and point cloud input:: + { + 'image': torch.Size([3, 224, 224]), + 'keypoints': torch.Size([17, 2]), + 'point_cloud': torch.Size([20000, 3]), + } + + @type: dict[str, Size] """ ... + @property + def input_shape(self) -> Size: + """Shape (c, h, w) of the input tensor, WITHOUT batch dimension. + + @type: torch.Size + """ + return self.input_shapes[self.image_source] + @abstractmethod def __len__(self) -> int: """Returns length of the dataset.""" @@ -106,11 +116,12 @@ def get_classes(self) -> dict[str, list[str]]: ... def get_n_keypoints(self) -> dict[str, int] | None: - """Returns the dictionary defining the semantic skeleton for each class using - keypoints. + """Returns the dictionary defining the semantic skeleton for + each class using keypoints. @rtype: Dict[str, Dict] - @return: A dictionary mapping classes to their skeleton definitions. + @return: A dictionary mapping classes to their skeleton + definitions. """ return None @@ -121,19 +132,21 @@ def collate_fn( """Default collate function used for training. @type batch: list[LuxonisLoaderTorchOutput] - @param batch: List of loader outputs (dict of Tensors) and labels (dict of Tensors) - in the LuxonisLoaderTorchOutput format. + @param batch: List of loader outputs (dict of Tensors) and labels + (dict of Tensors) in the LuxonisLoaderTorchOutput format. @rtype: tuple[dict[str, Tensor], dict[LabelType, Tensor]] - @return: Tuple of inputs and annotations in the format expected by the model. + @return: Tuple of inputs and annotations in the format expected by + the model. """ inputs: tuple[dict[str, Tensor], ...] labels: tuple[Labels, ...] inputs, labels = zip(*batch) - out_inputs = {k: torch.stack([i[k] for i in inputs], 0) for k in inputs[0].keys()} - out_labels = {task: {} for task in labels[0].keys()} + out_inputs = { + k: torch.stack([i[k] for i in inputs], 0) for k in inputs[0].keys() + } - out_labels = {} + out_labels: Labels = {} for task in labels[0].keys(): label_type = labels[0][task][1] diff --git a/luxonis_train/utils/loaders/luxonis_loader_torch.py b/luxonis_train/loaders/luxonis_loader_torch.py similarity index 98% rename from luxonis_train/utils/loaders/luxonis_loader_torch.py rename to luxonis_train/loaders/luxonis_loader_torch.py index 328f87be..8286a7a2 100644 --- a/luxonis_train/utils/loaders/luxonis_loader_torch.py +++ b/luxonis_train/loaders/luxonis_loader_torch.py @@ -156,7 +156,9 @@ def _parse_dataset( f"Supported types are: {', '.join(DatasetType.__members__)}." ) - logger.info(f"Parsing dataset from {dataset_dir} with name '{dataset_name}'") + logger.info( + f"Parsing dataset from {dataset_dir} with name '{dataset_name}'" + ) return LuxonisParser( dataset_dir, diff --git a/luxonis_train/models/luxonis_lightning.py b/luxonis_train/models/luxonis_lightning.py index a3671dac..2bbf8ca9 100644 --- a/luxonis_train/models/luxonis_lightning.py +++ b/luxonis_train/models/luxonis_lightning.py @@ -1,6 +1,7 @@ from collections import defaultdict from collections.abc import Mapping from logging import getLogger +from pathlib import Path from typing import Literal, cast import lightning.pytorch as pl @@ -17,21 +18,32 @@ BaseMetric, BaseVisualizer, ) -from luxonis_train.attached_modules.metrics.common import TorchMetricWrapper +from luxonis_train.attached_modules.metrics.torchmetrics import ( + TorchMetricWrapper, +) from luxonis_train.attached_modules.visualizers import ( combine_visualizations, get_unnormalized_images, ) -from luxonis_train.callbacks import ( - BaseLuxonisProgressBar, - ModuleFreezer, -) +from luxonis_train.callbacks import BaseLuxonisProgressBar, ModuleFreezer from luxonis_train.nodes import BaseNode +from luxonis_train.utils import ( + DatasetMetadata, + Kwargs, + Labels, + LuxonisTrackerPL, + Packet, + to_shape_packet, + traverse_graph, +) from luxonis_train.utils.config import AttachedModuleConfig, Config -from luxonis_train.utils.general import DatasetMetadata, to_shape_packet, traverse_graph -from luxonis_train.utils.registry import CALLBACKS, OPTIMIZERS, SCHEDULERS, Registry -from luxonis_train.utils.tracker import LuxonisTrackerPL -from luxonis_train.utils.types import Kwargs, Labels, Packet +from luxonis_train.utils.graph import Graph +from luxonis_train.utils.registry import ( + CALLBACKS, + OPTIMIZERS, + SCHEDULERS, + Registry, +) from .luxonis_output import LuxonisOutput @@ -105,13 +117,13 @@ def __init__( @type save_dir: str @param save_dir: Directory to save checkpoints. @type input_shapes: dict[str, Size] - @param input_shapes: Dictionary of input shapes. Keys are input names, values - are shapes. + @param input_shapes: Dictionary of input shapes. Keys are input + names, values are shapes. @type dataset_metadata: L{DatasetMetadata} | None @param dataset_metadata: Dataset metadata. @type kwargs: Any - @param kwargs: Additional arguments to pass to the L{LightningModule} - constructor. + @param kwargs: Additional arguments to pass to the + L{LightningModule} constructor. """ super().__init__(**kwargs) @@ -123,18 +135,24 @@ def __init__( self.image_source = cfg.loader.image_source self.dataset_metadata = dataset_metadata or DatasetMetadata() self.frozen_nodes: list[tuple[nn.Module, int]] = [] - self.graph: dict[str, list[str]] = {} + self.graph: Graph = {} self.loader_input_shapes: dict[str, dict[str, Size]] = {} self.node_input_sources: dict[str, list[str]] = defaultdict(list) self.loss_weights: dict[str, float] = {} self.main_metric: str | None = None self.save_dir = save_dir self.test_step_outputs: list[Mapping[str, Tensor | float | int]] = [] - self.training_step_outputs: list[Mapping[str, Tensor | float | int]] = [] - self.validation_step_outputs: list[Mapping[str, Tensor | float | int]] = [] + self.training_step_outputs: list[ + Mapping[str, Tensor | float | int] + ] = [] + self.validation_step_outputs: list[ + Mapping[str, Tensor | float | int] + ] = [] self.losses: dict[str, dict[str, BaseLoss]] = defaultdict(dict) self.metrics: dict[str, dict[str, BaseMetric]] = defaultdict(dict) - self.visualizers: dict[str, dict[str, BaseVisualizer]] = defaultdict(dict) + self.visualizers: dict[str, dict[str, BaseVisualizer]] = defaultdict( + dict + ) self._logged_images = 0 @@ -152,7 +170,9 @@ def __init__( elif isinstance(node_cfg.freezing.unfreeze_after, int): unfreeze_after = node_cfg.freezing.unfreeze_after else: - unfreeze_after = int(node_cfg.freezing.unfreeze_after * epochs) + unfreeze_after = int( + node_cfg.freezing.unfreeze_after * epochs + ) frozen_nodes.append((node_name, unfreeze_after)) if node_cfg.task is not None: @@ -172,8 +192,14 @@ def __init__( node_cfg.task = {next(iter(Node.tasks)): node_cfg.task} else: - node_cfg.task = {**Node._process_tasks(Node.tasks), **node_cfg.task} - nodes[node_name] = (Node, {**node_cfg.params, "_tasks": node_cfg.task}) + node_cfg.task = { + **Node._process_tasks(Node.tasks), + **node_cfg.task, + } + nodes[node_name] = ( + Node, + {**node_cfg.params, "_tasks": node_cfg.task}, + ) # Handle inputs for this node if node_cfg.input_sources: @@ -241,7 +267,7 @@ def __init__( @property def core(self) -> "luxonis_train.core.LuxonisModel": """Returns the core model.""" - if self._core is None: + if self._core is None: # pragma: no cover raise ValueError("Core reference is not set.") return self._core @@ -251,12 +277,12 @@ def _initiate_nodes( ) -> nn.ModuleDict: """Initializes all the nodes in the model. - Traverses the graph and initiates each node using outputs of the preceding - nodes. + Traverses the graph and initiates each node using outputs of the + preceding nodes. @type nodes: dict[str, tuple[type[LuxonisNode], Kwargs]] - @param nodes: Dictionary of nodes to be initiated. Keys are node names, values - are tuples of node class and node kwargs. + @param nodes: Dictionary of nodes to be initiated. Keys are node + names, values are tuples of node class and node kwargs. @rtype: L{nn.ModuleDict}[str, L{LuxonisNode}] @return: Dictionary of initiated nodes. """ @@ -268,9 +294,10 @@ def _initiate_nodes( for source_name, shape in shapes.items() } - for node_name, (Node, node_kwargs), node_input_names, _ in traverse_graph( - self.graph, nodes - ): + for node_name, ( + Node, + node_kwargs, + ), node_input_names, _ in traverse_graph(self.graph, nodes): node_dummy_inputs: list[Packet[Tensor]] = [] """List of dummy input packets for the node. @@ -313,23 +340,27 @@ def forward( ) -> LuxonisOutput: """Forward pass of the model. - Traverses the graph and step-by-step computes the outputs of each node. Each - next node is computed only when all of its predecessors are computed. Once the - outputs are not needed anymore, they are removed from the memory. + Traverses the graph and step-by-step computes the outputs of + each node. Each next node is computed only when all of its + predecessors are computed. Once the outputs are not needed + anymore, they are removed from the memory. @type inputs: L{Tensor} @param inputs: Input tensor. @type task_labels: L{TaskLabels} | None @param task_labels: Labels dictionary. Defaults to C{None}. @type images: L{Tensor} | None - @param images: Canvas tensor for visualizers. Defaults to C{None}. + @param images: Canvas tensor for visualizers. Defaults to + C{None}. @type compute_loss: bool - @param compute_loss: Whether to compute losses. Defaults to C{True}. + @param compute_loss: Whether to compute losses. Defaults to + C{True}. @type compute_metrics: bool - @param compute_metrics: Whether to update metrics. Defaults to C{True}. + @param compute_metrics: Whether to update metrics. Defaults to + C{True}. @type compute_visualizations: bool - @param compute_visualizations: Whether to compute visualizations. Defaults to - C{False}. + @param compute_visualizations: Whether to compute + visualizations. Defaults to C{False}. @rtype: L{LuxonisOutput} @return: Output of the model. """ @@ -353,11 +384,19 @@ def forward( outputs = node.run(node_inputs) computed[node_name] = outputs - if compute_loss and node_name in self.losses and labels is not None: + if ( + compute_loss + and node_name in self.losses + and labels is not None + ): for loss_name, loss in self.losses[node_name].items(): losses[node_name][loss_name] = loss.run(outputs, labels) - if compute_metrics and node_name in self.metrics and labels is not None: + if ( + compute_metrics + and node_name in self.metrics + and labels is not None + ): for metric in self.metrics[node_name].values(): metric.run_update(outputs, labels) @@ -367,7 +406,9 @@ def forward( and images is not None and labels is not None ): - for viz_name, visualizer in self.visualizers[node_name].items(): + for viz_name, visualizer in self.visualizers[ + node_name + ].items(): viz = combine_visualizations( visualizer.run( images, @@ -420,7 +461,7 @@ def compute_metrics(self) -> dict[str, dict[str, Tensor]]: computed_submetrics = {metric_name: metric_value} case dict(submetrics): computed_submetrics = submetrics - case unknown: + case unknown: # pragma: no cover raise ValueError( f"Metric {metric_name} returned unexpected value of " f"type {type(unknown)}." @@ -435,7 +476,8 @@ def export_onnx(self, save_path: str, **kwargs) -> list[str]: @type save_path: str @param save_path: Path where the exported model will be saved. @type kwargs: Any - @param kwargs: Additional arguments for the L{torch.onnx.export} method. + @param kwargs: Additional arguments for the L{torch.onnx.export} + method. @rtype: list[str] @return: List of output names. """ @@ -448,7 +490,8 @@ def export_onnx(self, save_path: str, **kwargs) -> list[str]: } inputs_deep_clone = { - k: torch.zeros(elem.shape).to(self.device) for k, elem in inputs.items() + k: torch.zeros(elem.shape).to(self.device) + for k, elem in inputs.items() } inputs_for_onnx = {"inputs": inputs_deep_clone} @@ -519,22 +562,26 @@ def export_forward(inputs) -> tuple[Tensor, ...]: def process_losses( self, - losses_dict: dict[str, dict[str, Tensor | tuple[Tensor, dict[str, Tensor]]]], + losses_dict: dict[ + str, dict[str, Tensor | tuple[Tensor, dict[str, Tensor]]] + ], ) -> tuple[Tensor, dict[str, Tensor]]: """Processes individual losses from the model run. - Goes over the computed losses and computes the final loss as a weighted sum of - all the losses. + Goes over the computed losses and computes the final loss as a + weighted sum of all the losses. - @type losses_dict: dict[str, dict[str, Tensor | tuple[Tensor, dict[str, - Tensor]]]] - @param losses_dict: Dictionary of computed losses. Each node can have multiple - losses attached. The first key identifies the node, the second key - identifies the specific loss. Values are either single tensors or tuples of - tensors and sublosses. + @type losses_dict: dict[str, dict[str, Tensor | tuple[Tensor, + dict[str, Tensor]]]] + @param losses_dict: Dictionary of computed losses. Each node can + have multiple losses attached. The first key identifies the + node, the second key identifies the specific loss. Values + are either single tensors or tuples of tensors and + sublosses. @rtype: tuple[Tensor, dict[str, Tensor]] - @return: Tuple of final loss and dictionary of processed sublosses. The - dictionary is in a format of {loss_name: loss_value}. + @return: Tuple of final loss and dictionary of processed + sublosses. The dictionary is in a format of {loss_name: + loss_value}. """ final_loss = torch.zeros(1, device=self.device) training_step_output: dict[str, Tensor] = {} @@ -548,9 +595,9 @@ def process_losses( loss *= self.loss_weights[loss_name] final_loss += loss - training_step_output[ - f"loss/{node_name}/{loss_name}" - ] = loss.detach().cpu() + training_step_output[f"loss/{node_name}/{loss_name}"] = ( + loss.detach().cpu() + ) if self.cfg.trainer.log_sub_losses and sublosses: for subloss_name, subloss_value in sublosses.items(): training_step_output[ @@ -559,10 +606,14 @@ def process_losses( training_step_output["loss"] = final_loss.detach().cpu() return final_loss, training_step_output - def training_step(self, train_batch: tuple[dict[str, Tensor], Labels]) -> Tensor: + def training_step( + self, train_batch: tuple[dict[str, Tensor], Labels] + ) -> Tensor: """Performs one step of training with provided batch.""" outputs = self.forward(*train_batch) - assert outputs.losses, "Losses are empty, check if you have defined any loss" + assert ( + outputs.losses + ), "Losses are empty, check if you have defined any loss" loss, training_step_output = self.process_losses(outputs.losses) self.training_step_outputs.append(training_step_output) @@ -605,7 +656,8 @@ def get_status(self) -> tuple[int, int]: return self.current_epoch, self.cfg.trainer.epochs def get_status_percentage(self) -> float: - """Returns percentage of current training, takes into account early stopping.""" + """Returns percentage of current training, takes into account + early stopping.""" if self._trainer.early_stopping_callback: # model haven't yet stop from early stopping callback if self._trainer.early_stopping_callback.stopped_epoch == 0: @@ -616,11 +668,13 @@ def get_status_percentage(self) -> float: return (self.current_epoch / self.cfg.trainer.epochs) * 100 def _evaluation_step( - self, mode: Literal["test", "val"], batch: tuple[dict[str, Tensor], Labels] + self, + mode: Literal["test", "val"], + batch: tuple[dict[str, Tensor], Labels], ) -> dict[str, Tensor]: inputs, labels = batch images = None - if self._logged_images < self.cfg.trainer.num_log_images: + if self._logged_images < self.cfg.trainer.n_log_images: images = get_unnormalized_images(self.cfg, inputs) outputs = self.forward( inputs, @@ -638,7 +692,7 @@ def _evaluation_step( for viz_name, viz_batch in visualizations.items(): logged_images = self._logged_images for viz in viz_batch: - if logged_images >= self.cfg.trainer.num_log_images: + if logged_images >= self.cfg.trainer.n_log_images: break self.logger.log_image( f"{mode}/visualizations/{node_name}/{viz_name}/{logged_images}", @@ -662,7 +716,9 @@ def _evaluation_epoch_end(self, mode: Literal["test", "val"]) -> None: logger.info("Metrics computed.") for node_name, metrics in computed_metrics.items(): for metric_name, metric_value in metrics.items(): - metric_results[node_name][metric_name] = metric_value.cpu().item() + metric_results[node_name][metric_name] = ( + metric_value.cpu().item() + ) self.log( f"{mode}/metric/{node_name}/{metric_name}", metric_value, @@ -682,7 +738,9 @@ def _evaluation_epoch_end(self, mode: Literal["test", "val"]) -> None: def configure_callbacks(self) -> list[pl.Callback]: """Configures Pytorch Lightning callbacks.""" self.min_val_loss_checkpoints_path = f"{self.save_dir}/min_val_loss" - self.best_val_metric_checkpoints_path = f"{self.save_dir}/best_val_metric" + self.best_val_metric_checkpoints_path = ( + f"{self.save_dir}/best_val_metric" + ) model_name = self.cfg.model.name callbacks: list[pl.Callback] = [ @@ -716,14 +774,17 @@ def configure_callbacks(self) -> list[pl.Callback]: for callback in self.cfg.trainer.callbacks: if callback.active: - callbacks.append(CALLBACKS.get(callback.name)(**callback.params)) + callbacks.append( + CALLBACKS.get(callback.name)(**callback.params) + ) return callbacks def configure_optimizers( self, ) -> tuple[ - list[torch.optim.Optimizer], list[torch.optim.lr_scheduler._LRScheduler] + list[torch.optim.Optimizer], + list[torch.optim.lr_scheduler._LRScheduler], ]: """Configures model optimizers and schedulers.""" cfg_optimizer = self.cfg.trainer.optimizer @@ -739,18 +800,20 @@ def configure_optimizers( return [optimizer], [scheduler] - def load_checkpoint(self, path: str | None) -> None: + def load_checkpoint(self, path: str | Path | None) -> None: """Loads checkpoint weights from provided path. - Loads the checkpoints gracefully, ignoring keys that are not found in the model - state dict or in the checkpoint. + Loads the checkpoints gracefully, ignoring keys that are not + found in the model state dict or in the checkpoint. @type path: str | None - @param path: Path to the checkpoint. If C{None}, no checkpoint will be loaded. + @param path: Path to the checkpoint. If C{None}, no checkpoint + will be loaded. """ if path is None: return + path = str(path) checkpoint = torch.load(path, map_location=self.device) if "state_dict" not in checkpoint: @@ -809,7 +872,9 @@ def _init_attached_module( return module_name, node_name @staticmethod - def _to_module_dict(modules: dict[str, dict[str, nn.Module]]) -> nn.ModuleDict: + def _to_module_dict( + modules: dict[str, dict[str, nn.Module]], + ) -> nn.ModuleDict: return nn.ModuleDict( { node_name: nn.ModuleDict(node_modules) @@ -819,7 +884,9 @@ def _to_module_dict(modules: dict[str, dict[str, nn.Module]]) -> nn.ModuleDict: @property def _progress_bar(self) -> BaseLuxonisProgressBar: - return cast(BaseLuxonisProgressBar, self._trainer.progress_bar_callback) + return cast( + BaseLuxonisProgressBar, self._trainer.progress_bar_callback + ) @rank_zero_only def _print_results( @@ -829,16 +896,20 @@ def _print_results( logger.info(f"{stage} loss: {loss:.4f}") - self._progress_bar.print_results(stage=stage, loss=loss, metrics=metrics) + self._progress_bar.print_results( + stage=stage, loss=loss, metrics=metrics + ) if self.main_metric is not None: main_metric_node, main_metric_name = self.main_metric.split("/") main_metric = metrics[main_metric_node][main_metric_name] - logger.info(f"{stage} main metric ({self.main_metric}): {main_metric:.4f}") + logger.info( + f"{stage} main metric ({self.main_metric}): {main_metric:.4f}" + ) def _is_train_eval_epoch(self) -> bool: - """Checks if train eval should be performed on current epoch based on configured - train_metrics_interval.""" + """Checks if train eval should be performed on current epoch + based on configured train_metrics_interval.""" train_metrics_interval = self.cfg.trainer.train_metrics_interval # add +1 to current_epoch because starting epoch is at 0 return ( diff --git a/luxonis_train/models/luxonis_output.py b/luxonis_train/models/luxonis_output.py index d69943fc..3cf59329 100644 --- a/luxonis_train/models/luxonis_output.py +++ b/luxonis_train/models/luxonis_output.py @@ -3,8 +3,7 @@ from torch import Tensor -from luxonis_train.utils.general import to_shape_packet -from luxonis_train.utils.types import Packet +from luxonis_train.utils import Packet, to_shape_packet @dataclass diff --git a/luxonis_train/models/predefined_models/base_predefined_model.py b/luxonis_train/models/predefined_models/base_predefined_model.py index 33ababdc..9388f345 100644 --- a/luxonis_train/models/predefined_models/base_predefined_model.py +++ b/luxonis_train/models/predefined_models/base_predefined_model.py @@ -1,4 +1,4 @@ -from abc import ABC, abstractproperty +from abc import ABC, abstractmethod from luxonis_ml.utils.registry import AutoRegisterMeta @@ -17,21 +17,21 @@ class BasePredefinedModel( registry=MODELS, register=False, ): - @abstractproperty - def nodes(self) -> list[ModelNodeConfig]: - ... + @property + @abstractmethod + def nodes(self) -> list[ModelNodeConfig]: ... - @abstractproperty - def losses(self) -> list[LossModuleConfig]: - ... + @property + @abstractmethod + def losses(self) -> list[LossModuleConfig]: ... - @abstractproperty - def metrics(self) -> list[MetricModuleConfig]: - ... + @property + @abstractmethod + def metrics(self) -> list[MetricModuleConfig]: ... - @abstractproperty - def visualizers(self) -> list[AttachedModuleConfig]: - ... + @property + @abstractmethod + def visualizers(self) -> list[AttachedModuleConfig]: ... def generate_model( self, diff --git a/luxonis_train/models/predefined_models/classification_model.py b/luxonis_train/models/predefined_models/classification_model.py index c9d782eb..e390b667 100644 --- a/luxonis_train/models/predefined_models/classification_model.py +++ b/luxonis_train/models/predefined_models/classification_model.py @@ -1,13 +1,13 @@ from dataclasses import dataclass, field from typing import Literal +from luxonis_train.utils import Kwargs from luxonis_train.utils.config import ( AttachedModuleConfig, LossModuleConfig, MetricModuleConfig, ModelNodeConfig, ) -from luxonis_train.utils.types import Kwargs from .base_predefined_model import BasePredefinedModel @@ -15,7 +15,7 @@ @dataclass class ClassificationModel(BasePredefinedModel): backbone: str = "MicroNet" - task: Literal["multiclass", "multilabel"] = "multilabel" + task: Literal["multiclass", "multilabel"] = "multiclass" backbone_params: Kwargs = field(default_factory=dict) head_params: Kwargs = field(default_factory=dict) loss_params: Kwargs = field(default_factory=dict) diff --git a/luxonis_train/models/predefined_models/detection_model.py b/luxonis_train/models/predefined_models/detection_model.py index e9db4462..94c4487f 100644 --- a/luxonis_train/models/predefined_models/detection_model.py +++ b/luxonis_train/models/predefined_models/detection_model.py @@ -1,12 +1,12 @@ from dataclasses import dataclass, field +from luxonis_train.utils import Kwargs from luxonis_train.utils.config import ( AttachedModuleConfig, LossModuleConfig, MetricModuleConfig, ModelNodeConfig, ) -from luxonis_train.utils.types import Kwargs from .base_predefined_model import BasePredefinedModel @@ -47,7 +47,9 @@ def nodes(self) -> list[ModelNodeConfig]: name="EfficientBBoxHead", alias="detection_head", freezing=self.head_params.pop("freezing", {}), - inputs=["detection_neck"] if self.use_neck else ["detection_backbone"], + inputs=["detection_neck"] + if self.use_neck + else ["detection_backbone"], params=self.head_params, task=self.task_name, ) diff --git a/luxonis_train/models/predefined_models/keypoint_detection_model.py b/luxonis_train/models/predefined_models/keypoint_detection_model.py index 588911c6..670b00b1 100644 --- a/luxonis_train/models/predefined_models/keypoint_detection_model.py +++ b/luxonis_train/models/predefined_models/keypoint_detection_model.py @@ -1,13 +1,13 @@ from dataclasses import dataclass, field from typing import Literal +from luxonis_train.utils import Kwargs from luxonis_train.utils.config import ( AttachedModuleConfig, LossModuleConfig, MetricModuleConfig, ModelNodeConfig, ) -from luxonis_train.utils.types import Kwargs from .base_predefined_model import BasePredefinedModel @@ -21,7 +21,7 @@ class KeypointDetectionModel(BasePredefinedModel): loss_params: Kwargs = field(default_factory=dict) head_type: Literal[ "ImplicitKeypointBBoxHead", "EfficientKeypointBBoxHead" - ] = "ImplicitKeypointBBoxHead" + ] = "EfficientKeypointBBoxHead" kpt_visualizer_params: Kwargs = field(default_factory=dict) bbox_visualizer_params: Kwargs = field(default_factory=dict) bbox_task_name: str | None = None @@ -50,7 +50,7 @@ def nodes(self) -> list[ModelNodeConfig]: task = {} if self.bbox_task_name is not None: - task["bbox"] = self.bbox_task_name + task["boundingbox"] = self.bbox_task_name if self.kpt_task_name is not None: task["keypoints"] = self.kpt_task_name diff --git a/luxonis_train/models/predefined_models/segmentation_model.py b/luxonis_train/models/predefined_models/segmentation_model.py index b5e81f76..d1076239 100644 --- a/luxonis_train/models/predefined_models/segmentation_model.py +++ b/luxonis_train/models/predefined_models/segmentation_model.py @@ -1,13 +1,13 @@ from dataclasses import dataclass, field from typing import Literal +from luxonis_train.utils import Kwargs from luxonis_train.utils.config import ( AttachedModuleConfig, LossModuleConfig, MetricModuleConfig, ModelNodeConfig, ) -from luxonis_train.utils.types import Kwargs from .base_predefined_model import BasePredefinedModel diff --git a/luxonis_train/nodes/README.md b/luxonis_train/nodes/README.md index 2f147e23..60e5971c 100644 --- a/luxonis_train/nodes/README.md +++ b/luxonis_train/nodes/README.md @@ -77,7 +77,7 @@ Adapted from [here](https://arxiv.org/pdf/2209.02976.pdf). | Key | Type | Default value | Description | | ------------- | ----------- | --------------------------- | --------------------------------------------------- | | channels_list | List\[int\] | \[64, 128, 256, 512, 1024\] | List of number of channels for each block | -| num_repeats | List\[int\] | \[1, 6, 12, 18, 6\] | List of number of repeats of RepVGGBlock | +| n_repeats | List\[int\] | \[1, 6, 12, 18, 6\] | List of number of repeats of RepVGGBlock | | in_channels | int | 3 | Number of input channels, should be 3 in most cases | | depth_mul | int | 0.33 | Depth multiplier | | width_mul | int | 0.25 | Width multiplier | @@ -145,9 +145,9 @@ Adapted from [here](https://arxiv.org/pdf/2209.02976.pdf). | Key | Type | Default value | Description | | ------------- | ---------------- | ------------------------------------------------------- | ----------------------------------------- | -| num_heads | Literal\[2,3,4\] | 3 ***Note:** Should be same also on head in most cases* | Number of output heads | +| n_heads | Literal\[2,3,4\] | 3 ***Note:** Should be same also on head in most cases* | Number of output heads | | channels_list | List\[int\] | \[256, 128, 128, 256, 256, 512\] | List of number of channels for each block | -| num_repeats | List\[int\] | \[12, 12, 12, 12\] | List of number of repeats of RepVGGBlock | +| n_repeats | List\[int\] | \[12, 12, 12, 12\] | List of number of repeats of RepVGGBlock | | depth_mul | int | 0.33 | Depth multiplier | | width_mul | int | 0.25 | Width multiplier | @@ -182,7 +182,7 @@ Adapted from [here](https://arxiv.org/pdf/2209.02976.pdf). | Key | Type | Default value | Description | | ---------- | ----- | ------------- | -------------------------------------------------- | -| num_heads | bool | 3 | Number of output heads | +| n_heads | bool | 3 | Number of output heads | | conf_thres | float | 0.25 | confidence threshold for nms (used for evaluation) | | iou_thres | float | 0.45 | iou threshold for nms (used for evaluation) | @@ -195,7 +195,7 @@ Adapted from [here](https://arxiv.org/pdf/2207.02696.pdf). | Key | Type | Default value | Description | | ---------------- | --------------------------- | ------------- | ---------------------------------------------------------------------------------------------------------- | | n_keypoints | int \| None | None | Number of keypoints. | -| num_heads | int | 3 | Number of output heads | +| n_heads | int | 3 | Number of output heads | | anchors | List\[List\[int\]\] \| None | None | Anchors used for object detection. If set to `None`, the anchors are computed at runtime from the dataset. | | init_coco_biases | bool | True | Whether to use COCO bias and weight initialization | | conf_thres | float | 0.25 | confidence threshold for nms (used for evaluation) | diff --git a/luxonis_train/nodes/activations/__init__.py b/luxonis_train/nodes/activations/__init__.py index 37aea0fc..0d3d1e0b 100644 --- a/luxonis_train/nodes/activations/__init__.py +++ b/luxonis_train/nodes/activations/__init__.py @@ -1,3 +1,3 @@ -from .activations import HSigmoid, HSwish +from .activations import HSigmoid -__all__ = ["HSigmoid", "HSwish"] +__all__ = ["HSigmoid"] diff --git a/luxonis_train/nodes/activations/activations.py b/luxonis_train/nodes/activations/activations.py index f3abedd6..93703a1c 100644 --- a/luxonis_train/nodes/activations/activations.py +++ b/luxonis_train/nodes/activations/activations.py @@ -10,14 +10,3 @@ def __init__(self): def forward(self, x: Tensor) -> Tensor: return self.relu(x + 3) / 6 - - -class HSwish(nn.Module): - def __init__(self): - """H-Swish activation function from U{Searching for MobileNetV3 - }.""" - super().__init__() - self.sigmoid = HSigmoid() - - def forward(self, x: Tensor) -> Tensor: - return x * self.sigmoid(x) diff --git a/luxonis_train/nodes/backbones/contextspatial.py b/luxonis_train/nodes/backbones/contextspatial.py index 2cac4b81..cf98cd4c 100644 --- a/luxonis_train/nodes/backbones/contextspatial.py +++ b/luxonis_train/nodes/backbones/contextspatial.py @@ -1,9 +1,3 @@ -"""Implementation of Context Spatial backbone. - -Source: U{BiseNetV1} -""" - - from torch import Tensor, nn from torch.nn import functional as F @@ -13,21 +7,43 @@ ConvModule, FeatureFusionBlock, ) +from luxonis_train.utils import Kwargs from luxonis_train.utils.registry import NODES class ContextSpatial(BaseNode[Tensor, list[Tensor]]): - def __init__(self, context_backbone: str = "MobileNetV2", **kwargs): - """Context spatial backbone. - TODO: Add more documentation. + def __init__( + self, + context_backbone: str | nn.Module = "MobileNetV2", + backbone_kwargs: Kwargs | None = None, + **kwargs, + ): + """Context Spatial backbone introduced in BiseNetV1. + Source: U{BiseNetV1} + + @see: U{BiseNetv1: Bilateral Segmentation Network for + Real-time Semantic Segmentation + } @type context_backbone: str - @param context_backbone: Backbone used. Defaults to C{MobileNetV2}. + @param context_backbone: Backbone used in the context path. + Can be either a string or a C{torch.nn.Module}. + If a string argument is used, it has to be a name of a module + stored in the L{NODES} registry. Defaults to C{MobileNetV2}. + + @type backbone_kwargs: dict + @param backbone_kwargs: Keyword arguments for the backbone. + Only used when the C{context_backbone} argument is a string. """ super().__init__(**kwargs) - self.context_path = ContextPath(NODES.get(context_backbone)(**kwargs)) + if isinstance(context_backbone, str): + backbone_kwargs = backbone_kwargs or {} + backbone_kwargs |= kwargs + context_backbone = NODES.get(context_backbone)(**backbone_kwargs) + + self.context_path = ContextPath(context_backbone) self.spatial_path = SpatialPath(3, 128) self.ffm = FeatureFusionBlock(256, 256) @@ -35,22 +51,41 @@ def forward(self, inputs: Tensor) -> list[Tensor]: spatial_out = self.spatial_path(inputs) context16, _ = self.context_path(inputs) fm_fuse = self.ffm(spatial_out, context16) - outs = [fm_fuse] - return outs + return [fm_fuse] class SpatialPath(nn.Module): def __init__(self, in_channels: int, out_channels: int): super().__init__() intermediate_channels = 64 - self.conv_7x7 = ConvModule(in_channels, intermediate_channels, 7, 2, 3) + self.conv_7x7 = ConvModule( + in_channels, + intermediate_channels, + kernel_size=7, + stride=2, + padding=3, + ) self.conv_3x3_1 = ConvModule( - intermediate_channels, intermediate_channels, 3, 2, 1 + intermediate_channels, + intermediate_channels, + kernel_size=3, + stride=2, + padding=1, ) self.conv_3x3_2 = ConvModule( - intermediate_channels, intermediate_channels, 3, 2, 1 + intermediate_channels, + intermediate_channels, + kernel_size=3, + stride=2, + padding=1, + ) + self.conv_1x1 = ConvModule( + intermediate_channels, + out_channels, + kernel_size=1, + stride=1, + padding=0, ) - self.conv_1x1 = ConvModule(intermediate_channels, out_channels, 1, 1, 0) def forward(self, x: Tensor) -> Tensor: x = self.conv_7x7(x) @@ -60,25 +95,30 @@ def forward(self, x: Tensor) -> Tensor: class ContextPath(nn.Module): - def __init__(self, backbone: BaseNode): + def __init__(self, backbone: nn.Module): super().__init__() self.backbone = backbone - self.up16 = nn.Upsample(scale_factor=2.0, mode="bilinear", align_corners=True) - self.up32 = nn.Upsample(scale_factor=2.0, mode="bilinear", align_corners=True) + self.up16 = nn.Upsample( + scale_factor=2.0, mode="bilinear", align_corners=True + ) + self.up32 = nn.Upsample( + scale_factor=2.0, mode="bilinear", align_corners=True + ) self.refine16 = ConvModule(128, 128, 3, 1, 1) self.refine32 = ConvModule(128, 128, 3, 1, 1) - def forward(self, x: Tensor) -> list[Tensor]: - *_, down16, down32 = self.backbone.forward(x) + def forward(self, x: Tensor) -> tuple[Tensor, Tensor]: + *_, down16, down32 = self.backbone(x) if not hasattr(self, "arm16"): self.arm16 = AttentionRefinmentBlock(down16.shape[1], 128) self.arm32 = AttentionRefinmentBlock(down32.shape[1], 128) self.global_context = nn.Sequential( - nn.AdaptiveAvgPool2d(1), ConvModule(down32.shape[1], 128, 1, 1, 0) + nn.AdaptiveAvgPool2d(1), + ConvModule(down32.shape[1], 128, 1, 1, 0), ) arm_down16 = self.arm16(down16) @@ -86,15 +126,18 @@ def forward(self, x: Tensor) -> list[Tensor]: global_down32 = self.global_context(down32) global_down32 = F.interpolate( - global_down32, size=down32.size()[2:], mode="bilinear", align_corners=True + global_down32, + size=down32.shape[2:], + mode="bilinear", + align_corners=True, ) - arm_down32 = arm_down32 + global_down32 + arm_down32 += global_down32 arm_down32 = self.up32(arm_down32) arm_down32 = self.refine32(arm_down32) - arm_down16 = arm_down16 + arm_down32 + arm_down16 += arm_down32 arm_down16 = self.up16(arm_down16) arm_down16 = self.refine16(arm_down16) - return [arm_down16, arm_down32] + return arm_down16, arm_down32 diff --git a/luxonis_train/nodes/backbones/efficientnet.py b/luxonis_train/nodes/backbones/efficientnet.py index e560bc5f..7744236a 100644 --- a/luxonis_train/nodes/backbones/efficientnet.py +++ b/luxonis_train/nodes/backbones/efficientnet.py @@ -1,8 +1,4 @@ -"""Implementation of the EfficientNet backbone. - -Source: U{https://github.com/rwightman/gen-efficientnet-pytorch} -@license: U{Apache 2.0} -""" +from typing import Any import torch from torch import Tensor, nn @@ -13,33 +9,49 @@ class EfficientNet(BaseNode[Tensor, list[Tensor]]): attach_index: int = -1 - def __init__(self, download_weights: bool = False, **kwargs): + def __init__( + self, + download_weights: bool = False, + out_indices: list[int] | None = None, + **kwargs: Any, + ): """EfficientNet backbone. + EfficientNet is a convolutional neural network architecture and scaling method that uniformly scales all dimensions of depth/width/resolution using a compound coefficient. Unlike conventional practice that arbitrary scales these factors, the EfficientNet scaling method uniformly scales network width, depth, and resolution with a set of fixed scaling coefficients. + + Source: U{https://github.com/rwightman/gen-efficientnet-pytorch} + + @license: U{Apache License, Version 2.0 + } + + @see: U{https://paperswithcode.com/method/efficientnet} + @see: U{EfficientNet: Rethinking Model Scaling for + Convolutional Neural Networks + } @type download_weights: bool @param download_weights: If C{True} download weights from imagenet. Defaults to C{False}. + @type out_indices: list[int] | None + @param out_indices: Indices of the output layers. Defaults to [0, 1, 2, 4, 6]. """ super().__init__(**kwargs) - efficientnet_lite0_model = torch.hub.load( + self.backbone: nn.Module = torch.hub.load( # type: ignore "rwightman/gen-efficientnet-pytorch", "efficientnet_lite0", pretrained=download_weights, ) - efficientnet_lite0_model.classifier = nn.Identity() - self.out_indices = [0, 1, 2, 4, 6] - efficientnet_lite0_model.bn2 = nn.Identity() - efficientnet_lite0_model.conv_head = nn.Identity() - self.backbone = efficientnet_lite0_model - - def forward(self, x: Tensor) -> list[Tensor]: - outs = [] - x = self.backbone.conv_stem(x) + self.out_indices = out_indices or [0, 1, 2, 4, 6] + + def forward(self, inputs: Tensor) -> list[Tensor]: + x = self.backbone.conv_stem(inputs) x = self.backbone.bn1(x) x = self.backbone.act1(x) - for i, m in enumerate(self.backbone.blocks): - x = m(x) + + outs: list[Tensor] = [] + + for i, layer in enumerate(self.backbone.blocks): + x = layer(x) if i in self.out_indices: outs.append(x) diff --git a/luxonis_train/nodes/backbones/efficientrep/__init__.py b/luxonis_train/nodes/backbones/efficientrep/__init__.py new file mode 100644 index 00000000..51ff264a --- /dev/null +++ b/luxonis_train/nodes/backbones/efficientrep/__init__.py @@ -0,0 +1,3 @@ +from .efficientrep import EfficientRep + +__all__ = ["EfficientRep"] diff --git a/luxonis_train/nodes/backbones/efficientrep.py b/luxonis_train/nodes/backbones/efficientrep/efficientrep.py similarity index 53% rename from luxonis_train/nodes/backbones/efficientrep.py rename to luxonis_train/nodes/backbones/efficientrep/efficientrep.py index be558620..0143855c 100644 --- a/luxonis_train/nodes/backbones/efficientrep.py +++ b/luxonis_train/nodes/backbones/efficientrep/efficientrep.py @@ -1,11 +1,5 @@ -"""Implementation of the EfficientRep backbone. - -Adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial -Applications}. -""" - import logging -from typing import Literal +from typing import Any from torch import Tensor, nn @@ -15,63 +9,68 @@ RepVGGBlock, SpatialPyramidPoolingBlock, ) -from luxonis_train.utils.general import make_divisible +from luxonis_train.utils import make_divisible + +from .variants import VariantLiteral, get_variant logger = logging.getLogger(__name__) class EfficientRep(BaseNode[Tensor, list[Tensor]]): + in_channels: int + def __init__( self, - variant: Literal["s", "n", "m", "l"] = "n", + variant: VariantLiteral = "nano", channels_list: list[int] | None = None, - num_repeats: list[int] | None = None, - depth_mul: float = 0.33, - width_mul: float = 0.25, - **kwargs, + n_repeats: list[int] | None = None, + depth_mul: float | None = None, + width_mul: float | None = None, + **kwargs: Any, ): - """EfficientRep backbone. - - @type variant: Literal["s", "n", "m", "l"] - @param variant: EfficientRep variant. Defaults to "n". + """Implementation of the EfficientRep backbone. + + Adapted from U{YOLOv6: A Single-Stage Object Detection Framework + for Industrial Applications + }. + + @type variant: Literal["n", "nano", "s", "small", "m", "medium", "l", "large"] + @param variant: EfficientRep variant. Defaults to "nano". + The variant determines the depth and width multipliers. + The depth multiplier determines the number of blocks in each stage and the width multiplier determines the number of channels. + The following variants are available: + - "n" or "nano" (default): depth_multiplier=0.33, width_multiplier=0.25 + - "s" or "small": depth_multiplier=0.33, width_multiplier=0.50 + - "m" or "medium": depth_multiplier=0.60, width_multiplier=0.75 + - "l" or "large": depth_multiplier=1.0, width_multiplier=1.0 @type channels_list: list[int] | None @param channels_list: List of number of channels for each block. If unspecified, defaults to [64, 128, 256, 512, 1024]. - @type num_repeats: list[int] | None - @param num_repeats: List of number of repeats of RepVGGBlock. If unspecified, + @type n_repeats: list[int] | None + @param n_repeats: List of number of repeats of RepVGGBlock. If unspecified, defaults to [1, 6, 12, 18, 6]. @type depth_mul: float - @param depth_mul: Depth multiplier. Depending on the variant, defaults to 0.33. + @param depth_mul: Depth multiplier. If provided, overrides the variant value. @type width_mul: float - @param width_mul: Width multiplier. Depending on the variant, defaults to 0.25. - @type kwargs: Any - @param kwargs: Additional arguments to pass to L{BaseNode}. + @param width_mul: Width multiplier. If provided, overrides the variant value. """ super().__init__(**kwargs) - if variant not in EFFICIENTREP_VARIANTS: - raise ValueError( - f"EfficientRep model variant should be in {list(EFFICIENTREP_VARIANTS.keys())}" - ) - - ( - depth_mul, - width_mul, - ) = EFFICIENTREP_VARIANTS[variant] + var = get_variant(variant) + depth_mul = depth_mul or var.depth_multiplier + width_mul = width_mul or var.width_multiplier channels_list = channels_list or [64, 128, 256, 512, 1024] - num_repeats = num_repeats or [1, 6, 12, 18, 6] - channels_list = [make_divisible(i * width_mul, 8) for i in channels_list] - num_repeats = [ - (max(round(i * depth_mul), 1) if i > 1 else i) for i in num_repeats + n_repeats = n_repeats or [1, 6, 12, 18, 6] + channels_list = [ + make_divisible(i * width_mul, 8) for i in channels_list + ] + n_repeats = [ + (max(round(i * depth_mul), 1) if i > 1 else i) for i in n_repeats ] - - in_channels = self.in_channels - if not isinstance(in_channels, int): - raise ValueError("EfficientRep module expects only one input.") self.repvgg_encoder = RepVGGBlock( - in_channels=in_channels, + in_channels=self.in_channels, out_channels=channels_list[0], kernel_size=3, stride=2, @@ -90,7 +89,7 @@ def __init__( block=RepVGGBlock, in_channels=channels_list[i + 1], out_channels=channels_list[i + 1], - num_blocks=num_repeats[i + 1], + n_blocks=n_repeats[i + 1], ), ) self.blocks.append(curr_block) @@ -107,27 +106,20 @@ def set_export_mode(self, mode: bool = True) -> None: """Reparametrizes instances of L{RepVGGBlock} in the network. @type mode: bool - @param mode: Whether to set the export mode. Defaults to C{True}. + @param mode: Whether to set the export mode. Defaults to + C{True}. """ super().set_export_mode(mode) if self.export: - logger.info("Reparametrizing EfficientRep.") + logger.info("Reparametrizing 'EfficientRep'.") for module in self.modules(): if isinstance(module, RepVGGBlock): module.reparametrize() def forward(self, inputs: Tensor) -> list[Tensor]: - outputs = [] + outputs: list[Tensor] = [] x = self.repvgg_encoder(inputs) for block in self.blocks: x = block(x) outputs.append(x) return outputs - - -EFFICIENTREP_VARIANTS = { - "n": (0.33, 0.25), - "s": (0.33, 0.50), - "m": (0.60, 0.75), - "l": (1.0, 1.0), -} diff --git a/luxonis_train/nodes/backbones/efficientrep/variants.py b/luxonis_train/nodes/backbones/efficientrep/variants.py new file mode 100644 index 00000000..7ced749e --- /dev/null +++ b/luxonis_train/nodes/backbones/efficientrep/variants.py @@ -0,0 +1,44 @@ +from typing import Literal, TypeAlias + +from pydantic import BaseModel + +VariantLiteral: TypeAlias = Literal[ + "n", "nano", "s", "small", "m", "medium", "l", "large" +] + + +class EfficientRepVariant(BaseModel): + depth_multiplier: float + width_multiplier: float + + +def get_variant(variant: VariantLiteral) -> EfficientRepVariant: + variants = { + "n": EfficientRepVariant( + depth_multiplier=0.33, + width_multiplier=0.25, + ), + "s": EfficientRepVariant( + depth_multiplier=0.33, + width_multiplier=0.50, + ), + "m": EfficientRepVariant( + depth_multiplier=0.60, + width_multiplier=0.75, + ), + "l": EfficientRepVariant( + depth_multiplier=1.0, + width_multiplier=1.0, + ), + } + variants["nano"] = variants["n"] + variants["small"] = variants["s"] + variants["medium"] = variants["m"] + variants["large"] = variants["l"] + + if variant not in variants: # pragma: no cover + raise ValueError( + f"EfficientRep variant should be one of " + f"{list(variants.keys())}, got '{variant}'." + ) + return variants[variant] diff --git a/luxonis_train/nodes/backbones/micronet.py b/luxonis_train/nodes/backbones/micronet.py deleted file mode 100644 index 074dce2a..00000000 --- a/luxonis_train/nodes/backbones/micronet.py +++ /dev/null @@ -1,842 +0,0 @@ -from typing import Literal - -import torch -from torch import Tensor, nn - -from luxonis_train.nodes.activations import HSigmoid, HSwish -from luxonis_train.nodes.base_node import BaseNode -from luxonis_train.nodes.blocks import ConvModule - - -class MicroNet(BaseNode[Tensor, list[Tensor]]): - """ - - TODO: DOCS - """ - - def __init__(self, variant: Literal["M1", "M2", "M3"] = "M1", **kwargs): - """MicroNet backbone. - - @type variant: Literal["M1", "M2", "M3"] - @param variant: Model variant to use. Defaults to "M1". - """ - super().__init__(**kwargs) - - if variant not in MICRONET_VARIANTS_SETTINGS: - raise ValueError( - f"MicroNet model variant should be in {list(MICRONET_VARIANTS_SETTINGS.keys())}" - ) - - self.inplanes = 64 - ( - in_channels, - stem_groups, - _, - init_a, - init_b, - out_indices, - channels, - cfgs, - ) = MICRONET_VARIANTS_SETTINGS[variant] - self.out_indices = out_indices - self.channels = channels - - self.features = nn.ModuleList([Stem(3, 2, stem_groups)]) - - for ( - stride, - out_channels, - kernel_size, - c1, - c2, - g1, - g2, - _, - g3, - g4, - y1, - y2, - y3, - r, - ) in cfgs: - self.features.append( - MicroBlock( - in_channels, - out_channels, - kernel_size, - stride, - (c1, c2), - (g1, g2), - (g3, g4), - (y1, y2, y3), - r, - init_a, - init_b, - ) - ) - in_channels = out_channels - - def forward(self, x: Tensor) -> list[Tensor]: - outs = [] - for m in self.features: - x = m(x) - outs.append(x) - return outs - - -class MicroBlock(nn.Module): - def __init__( - self, - in_channels: int, - out_channels: int, - kernel_size: int = 3, - stride: int = 1, - t1: tuple[int, int] = (2, 2), - gs1: tuple[int, int] = (0, 6), - groups_1x1: tuple[int, int] = (1, 1), - dy: tuple[int, int, int] = (2, 0, 1), - r: int = 1, - init_a: tuple[float, float] = (1.0, 1.0), - init_b: tuple[float, float] = (0.0, 0.0), - ): - super().__init__() - - self.identity = stride == 1 and in_channels == out_channels - y1, y2, y3 = dy - g1, g2 = groups_1x1 - reduction = 8 * r - intermediate_channels = in_channels * t1[0] * t1[1] - - if gs1[0] == 0: - self.layers = nn.Sequential( - DepthSpatialSepConv(in_channels, t1, kernel_size, stride), - DYShiftMax( - intermediate_channels, - intermediate_channels, - init_a, - init_b, - True if y2 == 2 else False, - gs1[1], - reduction, - ) - if y2 > 0 - else nn.ReLU6(True), - ChannelShuffle(gs1[1]), - ChannelShuffle(intermediate_channels // 2) - if y2 != 0 - else nn.Sequential(), - ConvModule( - in_channels=intermediate_channels, - out_channels=out_channels, - kernel_size=1, - groups=g1, - activation=nn.Identity(), - ), - DYShiftMax( - out_channels, - out_channels, - (1.0, 0.0), - (0.0, 0.0), - False, - g2, - reduction // 2, - ) - if y3 > 0 - else nn.Sequential(), - ChannelShuffle(g2), - ChannelShuffle(out_channels // 2) - if out_channels % 2 == 0 and y3 != 0 - else nn.Sequential(), - ) - elif g2 == 0: - self.layers = nn.Sequential( - ConvModule( - in_channels=in_channels, - out_channels=intermediate_channels, - kernel_size=1, - groups=gs1[0], - activation=nn.Identity(), - ), - DYShiftMax( - intermediate_channels, - intermediate_channels, - (1.0, 0.0), - (0.0, 0.0), - False, - gs1[1], - reduction, - ) - if y3 > 0 - else nn.Sequential(), - ) - else: - self.layers = nn.Sequential( - ConvModule( - in_channels=in_channels, - out_channels=intermediate_channels, - kernel_size=1, - groups=gs1[0], - activation=nn.Identity(), - ), - DYShiftMax( - intermediate_channels, - intermediate_channels, - init_a, - init_b, - True if y1 == 2 else False, - gs1[1], - reduction, - ) - if y1 > 0 - else nn.ReLU6(True), - ChannelShuffle(gs1[1]), - DepthSpatialSepConv(intermediate_channels, (1, 1), kernel_size, stride), - nn.Sequential(), - DYShiftMax( - intermediate_channels, - intermediate_channels, - init_a, - init_b, - True if y2 == 2 else False, - gs1[1], - reduction, - True, - ) - if y2 > 0 - else nn.ReLU6(True), - ChannelShuffle(intermediate_channels // 4) - if y1 != 0 and y2 != 0 - else nn.Sequential() - if y1 == 0 and y2 == 0 - else ChannelShuffle(intermediate_channels // 2), - ConvModule( - in_channels=intermediate_channels, - out_channels=out_channels, - kernel_size=1, - groups=g1, - activation=nn.Identity(), - ), - DYShiftMax( - out_channels, - out_channels, - (1.0, 0.0), - (0.0, 0.0), - False, - g2, - reduction=reduction // 2 - if out_channels < intermediate_channels - else reduction, - ) - if y3 > 0 - else nn.Sequential(), - ChannelShuffle(g2), - ChannelShuffle(out_channels // 2) if y3 != 0 else nn.Sequential(), - ) - - def forward(self, inputs: Tensor) -> Tensor: - out = self.layers(inputs) - if self.identity: - out += inputs - return out - - -class ChannelShuffle(nn.Module): - def __init__(self, groups: int): - super().__init__() - self.groups = groups - - def forward(self, x: Tensor) -> Tensor: - b, c, h, w = x.size() - channels_per_group = c // self.groups - x = x.view(b, self.groups, channels_per_group, h, w) - x = torch.transpose(x, 1, 2).contiguous() - out = x.view(b, -1, h, w) - return out - - -class DYShiftMax(nn.Module): - def __init__( - self, - in_channels: int, - out_channels: int, - init_a: tuple[float, float] = (0.0, 0.0), - init_b: tuple[float, float] = (0.0, 0.0), - act_relu: bool = True, - g: int = 6, - reduction: int = 4, - expansion: bool = False, - ): - super().__init__() - self.exp: Literal[2, 4] = 4 if act_relu else 2 - self.init_a = init_a - self.init_b = init_b - self.out_channels = out_channels - - self.avg_pool = nn.Sequential(nn.Sequential(), nn.AdaptiveAvgPool2d(1)) - - squeeze = self._make_divisible(in_channels // reduction, 4) - - self.fc = nn.Sequential( - nn.Linear(in_channels, squeeze), - nn.ReLU(True), - nn.Linear(squeeze, out_channels * self.exp), - HSigmoid(), - ) - - if g != 1 and expansion: - g = in_channels // g - - gc = in_channels // g - index = Tensor(range(in_channels)).view(1, in_channels, 1, 1) - index = index.view(1, g, gc, 1, 1) - indexgs = torch.split(index, [1, g - 1], dim=1) - indexgs = torch.cat([indexgs[1], indexgs[0]], dim=1) - indexs = torch.split(indexgs, [1, gc - 1], dim=2) - indexs = torch.cat([indexs[1], indexs[0]], dim=2) - self.index = indexs.view(in_channels).long() - - def forward(self, x: Tensor) -> Tensor: - B, C, _, _ = x.shape - x_out = x - - y = self.avg_pool(x).view(B, C) - y = self.fc(y).view(B, -1, 1, 1) - y = (y - 0.5) * 4.0 - - x2 = x_out[:, self.index, :, :] - - if self.exp == 4: - a1, b1, a2, b2 = torch.split(y, self.out_channels, dim=1) - - a1 = a1 + self.init_a[0] - a2 = a2 + self.init_b[1] - b1 = b1 + self.init_b[0] - b2 = b2 + self.init_b[1] - - z1 = x_out * a1 + x2 * b1 - z2 = x_out * a2 + x2 * b2 - - out = torch.max(z1, z2) - - elif self.exp == 2: - a1, b1 = torch.split(y, self.out_channels, dim=1) - a1 = a1 + self.init_a[0] - b1 = b1 + self.init_b[0] - out = x_out * a1 + x2 * b1 - else: - raise RuntimeError("Expansion should be 2 or 4.") - - return out - - def _make_divisible(self, v, divisor, min_value=None): - if min_value is None: - min_value = divisor - new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) - # Make sure that round down does not go down by more than 10%. - if new_v < 0.9 * v: - new_v += divisor - return new_v - - -class SwishLinear(nn.Module): - def __init__(self, in_channels: int, out_channels: int): - super().__init__() - self.linear = nn.Sequential( - nn.Linear(in_channels, out_channels), nn.BatchNorm1d(out_channels), HSwish() - ) - - def forward(self, x: Tensor) -> Tensor: - return self.linear(x) - - -class SpatialSepConvSF(nn.Module): - def __init__( - self, in_channels: int, outs: tuple[int, int], kernel_size: int, stride: int - ): - super().__init__() - out_channels1, out_channels2 = outs - self.conv = nn.Sequential( - nn.Conv2d( - in_channels, - out_channels1, - (kernel_size, 1), - (stride, 1), - (kernel_size // 2, 0), - bias=False, - ), - nn.BatchNorm2d(out_channels1), - nn.Conv2d( - out_channels1, - out_channels1 * out_channels2, - (1, kernel_size), - (1, stride), - (0, kernel_size // 2), - groups=out_channels1, - bias=False, - ), - nn.BatchNorm2d(out_channels1 * out_channels2), - ChannelShuffle(out_channels1), - ) - - def forward(self, x: Tensor) -> Tensor: - return self.conv(x) - - -class Stem(nn.Module): - def __init__(self, in_channels: int, stride: int, outs: tuple[int, int] = (4, 4)): - super().__init__() - self.stem = nn.Sequential( - SpatialSepConvSF(in_channels, outs, 3, stride), nn.ReLU6(True) - ) - - def forward(self, x: Tensor) -> Tensor: - return self.stem(x) - - -class DepthSpatialSepConv(nn.Module): - def __init__( - self, in_channels: int, expand: tuple[int, int], kernel_size: int, stride: int - ): - super().__init__() - exp1, exp2 = expand - intermediate_channels = in_channels * exp1 - out_channels = in_channels * exp1 * exp2 - - self.conv = nn.Sequential( - nn.Conv2d( - in_channels, - intermediate_channels, - (kernel_size, 1), - (stride, 1), - (kernel_size // 2, 0), - groups=in_channels, - bias=False, - ), - nn.BatchNorm2d(intermediate_channels), - nn.Conv2d( - intermediate_channels, - out_channels, - (1, kernel_size), - (1, stride), - (0, kernel_size // 2), - groups=intermediate_channels, - bias=False, - ), - nn.BatchNorm2d(out_channels), - ) - - def forward(self, x: Tensor) -> Tensor: - return self.conv(x) - - -MICRONET_VARIANTS_SETTINGS = { - "M1": [ - 6, # stem_ch - [3, 2], # stem_groups - 960, # out_ch - [1.0, 1.0], # init_a - [0.0, 0.0], # init_b - [1, 2, 4, 7], # out indices - [8, 16, 32, 576], - [ - # s, c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r - [2, 8, 3, 2, 2, 0, 6, 8, 2, 2, 2, 0, 1, 1], - [2, 16, 3, 2, 2, 0, 8, 16, 4, 4, 2, 2, 1, 1], - [ - 2, - 16, - 5, - 2, - 2, - 0, - 16, - 16, - 4, - 4, - 2, - 2, - 1, - 1, - ], - [ - 1, - 32, - 5, - 1, - 6, - 4, - 4, - 32, - 4, - 4, - 2, - 2, - 1, - 1, - ], - [ - 2, - 64, - 5, - 1, - 6, - 8, - 8, - 64, - 8, - 8, - 2, - 2, - 1, - 1, - ], - [ - 1, - 96, - 3, - 1, - 6, - 8, - 8, - 96, - 8, - 8, - 2, - 2, - 1, - 2, - ], - [1, 576, 3, 1, 6, 12, 12, 0, 0, 0, 2, 2, 1, 2], # 96->96(4,24)->576 - ], - ], - "M2": [ - 8, - [4, 2], - 1024, - [1.0, 1.0], - [0.0, 0.0], - [1, 3, 6, 9], - [12, 24, 64, 768], - [ - # s, c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r - [ - 2, - 12, - 3, - 2, - 2, - 0, - 8, - 12, - 4, - 4, - 2, - 0, - 1, - 1, - ], - [ - 2, - 16, - 3, - 2, - 2, - 0, - 12, - 16, - 4, - 4, - 2, - 2, - 1, - 1, - ], - [ - 1, - 24, - 3, - 2, - 2, - 0, - 16, - 24, - 4, - 4, - 2, - 2, - 1, - 1, - ], - [ - 2, - 32, - 5, - 1, - 6, - 6, - 6, - 32, - 4, - 4, - 2, - 2, - 1, - 1, - ], - [ - 1, - 32, - 5, - 1, - 6, - 8, - 8, - 32, - 4, - 4, - 2, - 2, - 1, - 2, - ], - [ - 1, - 64, - 5, - 1, - 6, - 8, - 8, - 64, - 8, - 8, - 2, - 2, - 1, - 2, - ], - [ - 2, - 96, - 5, - 1, - 6, - 8, - 8, - 96, - 8, - 8, - 2, - 2, - 1, - 2, - ], - [ - 1, - 128, - 3, - 1, - 6, - 12, - 12, - 128, - 8, - 8, - 2, - 2, - 1, - 2, - ], - [1, 768, 3, 1, 6, 16, 16, 0, 0, 0, 2, 2, 1, 2], - ], - ], - "M3": [ - 12, - [4, 3], - 1024, - [1.0, 0.5], - [0.0, 0.5], - [1, 3, 8, 12], - [16, 24, 80, 864], - [ - # s, c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r - [ - 2, - 16, - 3, - 2, - 2, - 0, - 12, - 16, - 4, - 4, - 0, - 2, - 0, - 1, - ], - [ - 2, - 24, - 3, - 2, - 2, - 0, - 16, - 24, - 4, - 4, - 0, - 2, - 0, - 1, - ], - [ - 1, - 24, - 3, - 2, - 2, - 0, - 24, - 24, - 4, - 4, - 0, - 2, - 0, - 1, - ], - [ - 2, - 32, - 5, - 1, - 6, - 6, - 6, - 32, - 4, - 4, - 0, - 2, - 0, - 1, - ], - [ - 1, - 32, - 5, - 1, - 6, - 8, - 8, - 32, - 4, - 4, - 0, - 2, - 0, - 2, - ], - [ - 1, - 64, - 5, - 1, - 6, - 8, - 8, - 48, - 8, - 8, - 0, - 2, - 0, - 2, - ], - [ - 1, - 80, - 5, - 1, - 6, - 8, - 8, - 80, - 8, - 8, - 0, - 2, - 0, - 2, - ], - [ - 1, - 80, - 5, - 1, - 6, - 10, - 10, - 80, - 8, - 8, - 0, - 2, - 0, - 2, - ], - [ - 2, - 120, - 5, - 1, - 6, - 10, - 10, - 120, - 10, - 10, - 0, - 2, - 0, - 2, - ], - [ - 1, - 120, - 5, - 1, - 6, - 12, - 12, - 120, - 10, - 10, - 0, - 2, - 0, - 2, - ], - [ - 1, - 144, - 3, - 1, - 6, - 12, - 12, - 144, - 12, - 12, - 0, - 2, - 0, - 2, - ], - [1, 864, 3, 1, 6, 12, 12, 0, 0, 0, 0, 2, 0, 2], - ], - ], -} diff --git a/luxonis_train/nodes/backbones/micronet/__init__.py b/luxonis_train/nodes/backbones/micronet/__init__.py new file mode 100644 index 00000000..5b41ece3 --- /dev/null +++ b/luxonis_train/nodes/backbones/micronet/__init__.py @@ -0,0 +1,3 @@ +from .micronet import MicroNet + +__all__ = ["MicroNet"] diff --git a/luxonis_train/nodes/backbones/micronet/blocks.py b/luxonis_train/nodes/backbones/micronet/blocks.py new file mode 100644 index 00000000..3da5e15e --- /dev/null +++ b/luxonis_train/nodes/backbones/micronet/blocks.py @@ -0,0 +1,515 @@ +from typing import Literal + +import torch +from torch import Tensor, nn + +from luxonis_train.nodes.activations import HSigmoid +from luxonis_train.nodes.blocks import ConvModule + + +class MicroBlock(nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int = 3, + stride: int = 1, + expansion_ratios: tuple[int, int] = (2, 2), + groups_1: tuple[int, int] = (0, 6), + groups_2: tuple[int, int] = (1, 1), + use_dynamic_shift: tuple[int, int, int] = (2, 0, 1), + reduction_factor: int = 1, + init_a: tuple[float, float] = (1.0, 1.0), + init_b: tuple[float, float] = (0.0, 0.0), + ): + """ + MicroBlock: The basic building block of MicroNet. + + This block implements the Micro-Factorized Convolution and Dynamic Shift-Max activation. + It can be configured to use different combinations of these components based on the network design. + + @type in_channels: int + @param in_channels: Number of input channels. + @type out_channels: int + @param out_channels: Number of output channels. + @type kernel_size: int + @param kernel_size: Size of the convolution kernel. Defaults to 3. + @type stride: int + @param stride: Stride of the convolution. Defaults to 1. + @type expansion_ratios: tuple[int, int] + @param expansion_ratios: Expansion ratios for the intermediate channels. Defaults to (2, 2). + @type groups_1: tuple[int, int] + @param groups_1: Groups for the first set of convolutions. Defaults to (0, 6). + @type groups_2: tuple[int, int] + @param groups_2: Groups for the second set of convolutions. Defaults to (1, 1). + @type use_dynamic_shift: tuple[int, int, int] + @param use_dynamic_shift: Flags to use Dynamic Shift-Max in different positions. Defaults to (2, 0, 1). + @type reduction_factor: int + @param reduction_factor: Reduction factor for the squeeze-and-excitation-like operation. Defaults to 1. + @type init_a: tuple[float, float] + @param init_a: Initialization parameters for Dynamic Shift-Max. Defaults to (1.0, 1.0). + @type init_b: tuple[float, float] + @param init_b: Initialization parameters for Dynamic Shift-Max. Defaults to (0.0, 0.0). + """ + super().__init__() + + self.use_residual = stride == 1 and in_channels == out_channels + self.expansion_ratios = expansion_ratios + use_dy1, use_dy2, use_dy3 = use_dynamic_shift + group1, group2 = groups_2 + reduction = 8 * reduction_factor + intermediate_channels = ( + in_channels * expansion_ratios[0] * expansion_ratios[1] + ) + + if groups_1[0] == 0: + self.layers = self._create_lite_block( + in_channels, + out_channels, + intermediate_channels, + kernel_size, + stride, + groups_1[1], + group1, + group2, + use_dy2, + use_dy3, + reduction, + init_a, + init_b, + ) + elif group2 == 0: + self.layers = self._create_transition_block( + in_channels, + intermediate_channels, + groups_1[0], + groups_1[1], + use_dy3, + reduction, + ) + else: + self.layers = self._create_full_block( + in_channels, + out_channels, + intermediate_channels, + kernel_size, + stride, + groups_1, + group1, + group2, + use_dy1, + use_dy2, + use_dy3, + reduction, + init_a, + init_b, + ) + + def _create_lite_block( + self, + in_channels: int, + out_channels: int, + intermediate_channels: int, + kernel_size: int, + stride: int, + group1: int, + group2: int, + group3: int, + use_dy2: int, + use_dy3: int, + reduction: int, + init_a: tuple[float, float], + init_b: tuple[float, float], + ) -> nn.Sequential: + return nn.Sequential( + DepthSpatialSepConv( + in_channels, self.expansion_ratios, kernel_size, stride + ), + DYShiftMax( + intermediate_channels, + intermediate_channels, + init_a, + init_b, + True if use_dy2 == 2 else False, + group1, + reduction, + ) + if use_dy2 > 0 + else nn.ReLU6(True), + ChannelShuffle(group1), + ChannelShuffle(intermediate_channels // 2) + if use_dy2 != 0 + else nn.Sequential(), + ConvModule( + in_channels=intermediate_channels, + out_channels=out_channels, + kernel_size=1, + groups=group2, + activation=nn.Identity(), + ), + DYShiftMax( + out_channels, + out_channels, + (1.0, 0.0), + (0.0, 0.0), + False, + group3, + reduction // 2, + ) + if use_dy3 > 0 + else nn.Sequential(), + ChannelShuffle(group3), + ChannelShuffle(out_channels // 2) + if out_channels % 2 == 0 and use_dy3 != 0 + else nn.Sequential(), + ) + + def _create_transition_block( + self, + in_channels: int, + intermediate_channels: int, + group1: int, + group2: int, + use_dy3: int, + reduction: int, + ) -> nn.Sequential: + return nn.Sequential( + ConvModule( + in_channels=in_channels, + out_channels=intermediate_channels, + kernel_size=1, + groups=group1, + activation=nn.Identity(), + ), + DYShiftMax( + intermediate_channels, + intermediate_channels, + (1.0, 0.0), + (0.0, 0.0), + False, + group2, + reduction, + ) + if use_dy3 > 0 + else nn.Sequential(), + ) + + def _create_full_block( + self, + in_channels: int, + out_channels: int, + intermediate_channels: int, + kernel_size: int, + stride: int, + groups_1: tuple[int, int], + group1: int, + group2: int, + use_dy1: int, + use_dy2: int, + use_dy3: int, + reduction: int, + init_a: tuple[float, float], + init_b: tuple[float, float], + ) -> nn.Sequential: + return nn.Sequential( + ConvModule( + in_channels=in_channels, + out_channels=intermediate_channels, + kernel_size=1, + groups=groups_1[0], + activation=nn.Identity(), + ), + DYShiftMax( + intermediate_channels, + intermediate_channels, + init_a, + init_b, + True if use_dy1 == 2 else False, + groups_1[1], + reduction, + ) + if use_dy1 > 0 + else nn.ReLU6(True), + ChannelShuffle(groups_1[1]), + DepthSpatialSepConv( + intermediate_channels, (1, 1), kernel_size, stride + ), + DYShiftMax( + intermediate_channels, + intermediate_channels, + init_a, + init_b, + True if use_dy2 == 2 else False, + groups_1[1], + reduction, + True, + ) + if use_dy2 > 0 + else nn.ReLU6(True), + ChannelShuffle(intermediate_channels // 4) + if use_dy1 != 0 and use_dy2 != 0 + else nn.Sequential() + if use_dy1 == 0 and use_dy2 == 0 + else ChannelShuffle(intermediate_channels // 2), + ConvModule( + in_channels=intermediate_channels, + out_channels=out_channels, + kernel_size=1, + groups=group1, + activation=nn.Identity(), + ), + DYShiftMax( + out_channels, + out_channels, + (1.0, 0.0), + (0.0, 0.0), + False, + group2, + reduction=reduction // 2 + if out_channels < intermediate_channels + else reduction, + ) + if use_dy3 > 0 + else nn.Sequential(), + ChannelShuffle(group2), + ChannelShuffle(out_channels // 2) + if use_dy3 != 0 + else nn.Sequential(), + ) + + def forward(self, inputs: Tensor) -> Tensor: + out = self.layers(inputs) + if self.use_residual: + out += inputs + return out + + +class ChannelShuffle(nn.Module): + def __init__(self, groups: int): + """Shuffle the channels of the input tensor. + + This operation is used to mix information between groups after + grouped convolutions. + + @type groups: int + @param groups: Number of groups to divide the channels into + before shuffling. + """ + + super().__init__() + self.groups = groups + + def forward(self, x: Tensor) -> Tensor: + batch_size, channels, height, width = x.size() + channels_per_group = channels // self.groups + x = x.view(batch_size, self.groups, channels_per_group, height, width) + x = torch.transpose(x, 1, 2).contiguous() + out = x.view(batch_size, -1, height, width) + return out + + +class DYShiftMax(nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int, + init_a: tuple[float, float] = (0.0, 0.0), + init_b: tuple[float, float] = (0.0, 0.0), + use_relu: bool = True, + groups: int = 6, + reduction: int = 4, + expansion: bool = False, + ): + """Dynamic Shift-Max activation function. + + This module implements the Dynamic Shift-Max operation, which + adaptively fuses and selects channel information based on the + input. + + @type in_channels: int + @param in_channels: Number of input channels. + @type out_channels: int + @param out_channels: Number of output channels. + @type init_a: tuple[float, float] + @param init_a: Initial values for the 'a' parameters. Defaults + to (0.0, 0.0). + @type init_b: tuple[float, float] + @param init_b: Initial values for the 'b' parameters. Defaults + to (0.0, 0.0). + @type use_relu: bool + @param use_relu: Whether to use ReLU activation. Defaults to + True. + @type groups: int + @param groups: Number of groups for channel shuffling. Defaults + to 6. + @type reduction: int + @param reduction: Reduction factor for the squeeze operation. + Defaults to 4. + @type expansion: bool + @param expansion: Whether to use expansion in grouping. Defaults + to False. + """ + super().__init__() + self.exp: Literal[2, 4] = 4 if use_relu else 2 + self.init_a = init_a + self.init_b = init_b + self.out_channels = out_channels + + self.avg_pool = nn.AdaptiveAvgPool2d(1) + + squeeze_channels = self._make_divisible(in_channels // reduction, 4) + + self.fc = nn.Sequential( + nn.Linear(in_channels, squeeze_channels), + nn.ReLU(True), + nn.Linear(squeeze_channels, out_channels * self.exp), + HSigmoid(), + ) + + if groups != 1 and expansion: + groups = in_channels // groups + + channels_per_group = in_channels // groups + index = torch.arange(in_channels).view(1, in_channels, 1, 1) + index = index.view(1, groups, channels_per_group, 1, 1) + index_groups = torch.split(index, [1, groups - 1], dim=1) + index_groups = torch.cat([index_groups[1], index_groups[0]], dim=1) + index_splits = torch.split( + index_groups, [1, channels_per_group - 1], dim=2 + ) + index_splits = torch.cat([index_splits[1], index_splits[0]], dim=2) + self.index = index_splits.view(in_channels).long() + + def forward(self, x: Tensor) -> Tensor: + batch_size, channels, _, _ = x.shape + x_out = x + + y = self.avg_pool(x).view(batch_size, channels) + y = self.fc(y).view(batch_size, -1, 1, 1) + y = (y - 0.5) * 4.0 + + x2 = x_out[:, self.index, :, :] + + if self.exp == 4: + a1, b1, a2, b2 = torch.split(y, self.out_channels, dim=1) + + a1 = a1 + self.init_a[0] + a2 = a2 + self.init_b[1] + b1 = b1 + self.init_b[0] + b2 = b2 + self.init_b[1] + + z1 = x_out * a1 + x2 * b1 + z2 = x_out * a2 + x2 * b2 + + out = torch.max(z1, z2) + + elif self.exp == 2: + a1, b1 = torch.split(y, self.out_channels, dim=1) + a1 = a1 + self.init_a[0] + b1 = b1 + self.init_b[0] + out = x_out * a1 + x2 * b1 + else: + raise RuntimeError("Expansion should be 2 or 4.") + + return out + + def _make_divisible( + self, value: int, divisor: int, min_value: int | None = None + ) -> int: + if min_value is None: + min_value = divisor + new_v = max(min_value, int(value + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * value: + new_v += divisor + return new_v + + +class SpatialSepConvSF(nn.Module): + def __init__( + self, + in_channels: int, + outs: tuple[int, int], + kernel_size: int, + stride: int, + ): + super().__init__() + out_channels1, out_channels2 = outs + self.conv = nn.Sequential( + nn.Conv2d( + in_channels, + out_channels1, + kernel_size=(kernel_size, 1), + stride=(stride, 1), + padding=(kernel_size // 2, 0), + bias=False, + ), + nn.BatchNorm2d(out_channels1), + nn.Conv2d( + out_channels1, + out_channels1 * out_channels2, + kernel_size=(1, kernel_size), + stride=(1, stride), + padding=(0, kernel_size // 2), + groups=out_channels1, + bias=False, + ), + nn.BatchNorm2d(out_channels1 * out_channels2), + ChannelShuffle(out_channels1), + ) + + def forward(self, x: Tensor) -> Tensor: + return self.conv(x) + + +class Stem(nn.Module): + def __init__( + self, in_channels: int, stride: int, outs: tuple[int, int] = (4, 4) + ): + super().__init__() + self.stem = nn.Sequential( + SpatialSepConvSF(in_channels, outs, 3, stride), nn.ReLU6(True) + ) + + def forward(self, x: Tensor) -> Tensor: + return self.stem(x) + + +class DepthSpatialSepConv(nn.Module): + def __init__( + self, + in_channels: int, + expand: tuple[int, int], + kernel_size: int, + stride: int, + ): + super().__init__() + exp1, exp2 = expand + intermediate_channels = in_channels * exp1 + out_channels = in_channels * exp1 * exp2 + + self.conv = nn.Sequential( + nn.Conv2d( + in_channels, + intermediate_channels, + (kernel_size, 1), + (stride, 1), + padding=(kernel_size // 2, 0), + groups=in_channels, + bias=False, + ), + nn.BatchNorm2d(intermediate_channels), + nn.Conv2d( + intermediate_channels, + out_channels, + (1, kernel_size), + (1, stride), + padding=(0, kernel_size // 2), + groups=intermediate_channels, + bias=False, + ), + nn.BatchNorm2d(out_channels), + ) + + def forward(self, x: Tensor) -> Tensor: + return self.conv(x) diff --git a/luxonis_train/nodes/backbones/micronet/micronet.py b/luxonis_train/nodes/backbones/micronet/micronet.py new file mode 100644 index 00000000..82df5cb3 --- /dev/null +++ b/luxonis_train/nodes/backbones/micronet/micronet.py @@ -0,0 +1,62 @@ +from typing import Any, Literal + +from torch import Tensor, nn + +from luxonis_train.nodes.base_node import BaseNode + +from .blocks import MicroBlock, Stem +from .variants import get_variant + + +class MicroNet(BaseNode[Tensor, list[Tensor]]): + def __init__( + self, + variant: Literal["M1", "M2", "M3"] = "M1", + out_indices: list[int] | None = None, + **kwargs: Any, + ): + """MicroNet backbone. + + This class creates the full MicroNet architecture based on the + specified variant. It consists of a stem layer followed by + multiple MicroBlocks. + + @type variant: Literal["M1", "M2", "M3"] + @param variant: Model variant to use. Defaults to "M1". + @type out_indices: list[int] | None + @param out_indices: Indices of the output layers. If provided, + overrides the variant value. + """ + super().__init__(**kwargs) + + var = get_variant(variant) + self.out_indices = out_indices or var.out_indices + in_channels = var.stem_channels + + self.layers = nn.ModuleList([Stem(3, 2, var.stem_groups)]) + + for bc in var.block_configs: + self.layers.append( + MicroBlock( + in_channels, + bc.out_channels, + bc.kernel_size, + bc.stride, + bc.expand_ratio, + bc.groups_1, + bc.groups_2, + bc.dy_shifts, + bc.reduction_factor, + var.init_a, + var.init_b, + ) + ) + in_channels = bc.out_channels + + def forward(self, inputs: Tensor) -> list[Tensor]: + outs: list[Tensor] = [] + for i, layer in enumerate(self.layers): + inputs = layer(inputs) + if i in self.out_indices: + outs.append(inputs) + return outs diff --git a/luxonis_train/nodes/backbones/micronet/variants.py b/luxonis_train/nodes/backbones/micronet/variants.py new file mode 100644 index 00000000..22a8d552 --- /dev/null +++ b/luxonis_train/nodes/backbones/micronet/variants.py @@ -0,0 +1,344 @@ +from typing import Literal + +from pydantic import BaseModel + + +class MicroBlockConfig(BaseModel): + stride: int + out_channels: int + kernel_size: int + expand_ratio: tuple[int, int] + groups_1: tuple[int, int] + groups_2: tuple[int, int] + dy_shifts: tuple[int, int, int] + reduction_factor: int + + +class MicroNetVariant(BaseModel): + stem_channels: int + stem_groups: tuple[int, int] + init_a: tuple[float, float] + init_b: tuple[float, float] + out_indices: list[int] + block_configs: list[MicroBlockConfig] + + +M1 = MicroNetVariant( + stem_channels=6, + stem_groups=(3, 2), + init_a=(1.0, 1.0), + init_b=(0.0, 0.0), + out_indices=[1, 2, 4, 7], + block_configs=[ + MicroBlockConfig( + stride=2, + out_channels=8, + kernel_size=3, + expand_ratio=(2, 2), + groups_1=(0, 6), + groups_2=(2, 2), + dy_shifts=(2, 0, 1), + reduction_factor=1, + ), + MicroBlockConfig( + stride=2, + out_channels=16, + kernel_size=3, + expand_ratio=(2, 2), + groups_1=(0, 8), + groups_2=(4, 4), + dy_shifts=(2, 2, 1), + reduction_factor=1, + ), + MicroBlockConfig( + stride=2, + out_channels=16, + kernel_size=5, + expand_ratio=(2, 2), + groups_1=(0, 16), + groups_2=(4, 4), + dy_shifts=(2, 2, 1), + reduction_factor=1, + ), + MicroBlockConfig( + stride=1, + out_channels=32, + kernel_size=5, + expand_ratio=(1, 6), + groups_1=(4, 4), + groups_2=(4, 4), + dy_shifts=(2, 2, 1), + reduction_factor=1, + ), + MicroBlockConfig( + stride=2, + out_channels=64, + kernel_size=5, + expand_ratio=(1, 6), + groups_1=(8, 8), + groups_2=(8, 8), + dy_shifts=(2, 2, 1), + reduction_factor=1, + ), + MicroBlockConfig( + stride=1, + out_channels=96, + kernel_size=3, + expand_ratio=(1, 6), + groups_1=(8, 8), + groups_2=(8, 8), + dy_shifts=(2, 2, 1), + reduction_factor=2, + ), + MicroBlockConfig( + stride=1, + out_channels=576, + kernel_size=3, + expand_ratio=(1, 6), + groups_1=(12, 12), + groups_2=(0, 0), + dy_shifts=(2, 2, 1), + reduction_factor=2, + ), + ], +) + +M2 = MicroNetVariant( + stem_channels=8, + stem_groups=(4, 2), + init_a=(1.0, 1.0), + init_b=(0.0, 0.0), + out_indices=[1, 3, 6, 9], + block_configs=[ + MicroBlockConfig( + stride=2, + out_channels=12, + kernel_size=3, + expand_ratio=(2, 2), + groups_1=(0, 8), + groups_2=(4, 4), + dy_shifts=(2, 0, 1), + reduction_factor=1, + ), + MicroBlockConfig( + stride=2, + out_channels=16, + kernel_size=3, + expand_ratio=(2, 2), + groups_1=(0, 12), + groups_2=(4, 4), + dy_shifts=(2, 2, 1), + reduction_factor=1, + ), + MicroBlockConfig( + stride=1, + out_channels=24, + kernel_size=3, + expand_ratio=(2, 2), + groups_1=(0, 16), + groups_2=(4, 4), + dy_shifts=(2, 2, 1), + reduction_factor=1, + ), + MicroBlockConfig( + stride=2, + out_channels=32, + kernel_size=5, + expand_ratio=(1, 6), + groups_1=(6, 6), + groups_2=(4, 4), + dy_shifts=(2, 2, 1), + reduction_factor=1, + ), + MicroBlockConfig( + stride=1, + out_channels=32, + kernel_size=5, + expand_ratio=(1, 6), + groups_1=(8, 8), + groups_2=(4, 4), + dy_shifts=(2, 2, 1), + reduction_factor=2, + ), + MicroBlockConfig( + stride=1, + out_channels=64, + kernel_size=5, + expand_ratio=(1, 6), + groups_1=(8, 8), + groups_2=(8, 8), + dy_shifts=(2, 2, 1), + reduction_factor=2, + ), + MicroBlockConfig( + stride=2, + out_channels=96, + kernel_size=5, + expand_ratio=(1, 6), + groups_1=(8, 8), + groups_2=(8, 8), + dy_shifts=(2, 2, 1), + reduction_factor=2, + ), + MicroBlockConfig( + stride=1, + out_channels=128, + kernel_size=3, + expand_ratio=(1, 6), + groups_1=(12, 12), + groups_2=(8, 8), + dy_shifts=(2, 2, 1), + reduction_factor=2, + ), + MicroBlockConfig( + stride=1, + out_channels=768, + kernel_size=3, + expand_ratio=(1, 6), + groups_1=(16, 16), + groups_2=(0, 0), + dy_shifts=(2, 2, 1), + reduction_factor=2, + ), + ], +) + +M3 = MicroNetVariant( + stem_channels=12, + stem_groups=(4, 3), + init_a=(1.0, 0.5), + init_b=(0.0, 0.5), + out_indices=[1, 3, 8, 12], + block_configs=[ + MicroBlockConfig( + stride=2, + out_channels=16, + kernel_size=3, + expand_ratio=(2, 2), + groups_1=(0, 12), + groups_2=(4, 4), + dy_shifts=(0, 2, 0), + reduction_factor=1, + ), + MicroBlockConfig( + stride=2, + out_channels=24, + kernel_size=3, + expand_ratio=(2, 2), + groups_1=(0, 16), + groups_2=(4, 4), + dy_shifts=(0, 2, 0), + reduction_factor=1, + ), + MicroBlockConfig( + stride=1, + out_channels=24, + kernel_size=3, + expand_ratio=(2, 2), + groups_1=(0, 24), + groups_2=(4, 4), + dy_shifts=(0, 2, 0), + reduction_factor=1, + ), + MicroBlockConfig( + stride=2, + out_channels=32, + kernel_size=5, + expand_ratio=(1, 6), + groups_1=(6, 6), + groups_2=(4, 4), + dy_shifts=(0, 2, 0), + reduction_factor=1, + ), + MicroBlockConfig( + stride=1, + out_channels=32, + kernel_size=5, + expand_ratio=(1, 6), + groups_1=(8, 8), + groups_2=(4, 4), + dy_shifts=(0, 2, 0), + reduction_factor=2, + ), + MicroBlockConfig( + stride=1, + out_channels=64, + kernel_size=5, + expand_ratio=(1, 6), + groups_1=(8, 8), + groups_2=(8, 8), + dy_shifts=(0, 2, 0), + reduction_factor=2, + ), + MicroBlockConfig( + stride=2, + out_channels=80, + kernel_size=5, + expand_ratio=(1, 6), + groups_1=(8, 8), + groups_2=(8, 8), + dy_shifts=(0, 2, 0), + reduction_factor=2, + ), + MicroBlockConfig( + stride=1, + out_channels=80, + kernel_size=5, + expand_ratio=(1, 6), + groups_1=(10, 10), + groups_2=(8, 8), + dy_shifts=(0, 2, 0), + reduction_factor=2, + ), + MicroBlockConfig( + stride=1, + out_channels=120, + kernel_size=5, + expand_ratio=(1, 6), + groups_1=(10, 10), + groups_2=(10, 10), + dy_shifts=(0, 2, 0), + reduction_factor=2, + ), + MicroBlockConfig( + stride=1, + out_channels=120, + kernel_size=5, + expand_ratio=(1, 6), + groups_1=(12, 12), + groups_2=(10, 10), + dy_shifts=(0, 2, 0), + reduction_factor=2, + ), + MicroBlockConfig( + stride=1, + out_channels=144, + kernel_size=3, + expand_ratio=(1, 6), + groups_1=(12, 12), + groups_2=(12, 12), + dy_shifts=(0, 2, 0), + reduction_factor=2, + ), + MicroBlockConfig( + stride=1, + out_channels=864, + kernel_size=3, + expand_ratio=(1, 6), + groups_1=(12, 12), + groups_2=(0, 0), + dy_shifts=(0, 2, 0), + reduction_factor=2, + ), + ], +) + + +def get_variant(variant: Literal["M1", "M2", "M3"]) -> MicroNetVariant: + variants = {"M1": M1, "M2": M2, "M3": M3} + if variant not in variants: # pragma: no cover + raise ValueError( + "MicroNet model variant should be in " + f"{list(variants.keys())}, got {variant}." + ) + return variants[variant] diff --git a/luxonis_train/nodes/backbones/mobilenetv2.py b/luxonis_train/nodes/backbones/mobilenetv2.py index 48161835..8de19854 100644 --- a/luxonis_train/nodes/backbones/mobilenetv2.py +++ b/luxonis_train/nodes/backbones/mobilenetv2.py @@ -1,44 +1,51 @@ -"""MobileNetV2 backbone. - -TODO: source? -""" +from typing import Any import torchvision -from torch import Tensor, nn +from torch import Tensor from luxonis_train.nodes.base_node import BaseNode class MobileNetV2(BaseNode[Tensor, list[Tensor]]): - """Implementation of the MobileNetV2 backbone. - - TODO: add more info - """ - - def __init__(self, download_weights: bool = False, **kwargs): - """Constructor of the MobileNetV2 backbone. + def __init__( + self, + download_weights: bool = False, + out_indices: list[int] | None = None, + **kwargs: Any, + ): + """MobileNetV2 backbone. + + This class implements the MobileNetV2 model as described in: + U{MobileNetV2: Inverted Residuals and Linear Bottlenecks } by Sandler I{et al.} + + The network consists of an initial fully convolutional layer, followed by + 19 bottleneck residual blocks, and a final 1x1 convolution. It can be used + as a feature extractor for tasks like image classification, object detection, + and semantic segmentation. + + Key features: + - Inverted residual structure with linear bottlenecks + - Depth-wise separable convolutions for efficiency + - Configurable width multiplier and input resolution @type download_weights: bool @param download_weights: If True download weights from imagenet. Defaults to False. - @type kwargs: Any - @param kwargs: Additional arguments to pass to L{BaseNode}. + @type out_indices: list[int] | None + @param out_indices: Indices of the output layers. Defaults to [3, 6, 13, 18]. """ super().__init__(**kwargs) - mobilenet_v2 = torchvision.models.mobilenet_v2( + self.backbone = torchvision.models.mobilenet_v2( weights="DEFAULT" if download_weights else None ) - mobilenet_v2.classifier = nn.Identity() - self.out_indices = [3, 6, 13, 18] - self.channels = [24, 32, 96, 1280] - self.backbone = mobilenet_v2 - - def forward(self, x: Tensor) -> list[Tensor]: - outs = [] - for i, module in enumerate(self.backbone.features): - x = module(x) + self.out_indices = out_indices or [3, 6, 13, 18] + + def forward(self, inputs: Tensor) -> list[Tensor]: + outs: list[Tensor] = [] + for i, layer in enumerate(self.backbone.features): + inputs = layer(inputs) if i in self.out_indices: - outs.append(x) + outs.append(inputs) return outs diff --git a/luxonis_train/nodes/backbones/mobileone/__init__.py b/luxonis_train/nodes/backbones/mobileone/__init__.py new file mode 100644 index 00000000..a6e573aa --- /dev/null +++ b/luxonis_train/nodes/backbones/mobileone/__init__.py @@ -0,0 +1,3 @@ +from .mobileone import MobileOne + +__all__ = ["MobileOne"] diff --git a/luxonis_train/nodes/backbones/mobileone.py b/luxonis_train/nodes/backbones/mobileone/blocks.py similarity index 55% rename from luxonis_train/nodes/backbones/mobileone.py rename to luxonis_train/nodes/backbones/mobileone/blocks.py index 2d460fd0..63e19eae 100644 --- a/luxonis_train/nodes/backbones/mobileone.py +++ b/luxonis_train/nodes/backbones/mobileone/blocks.py @@ -4,170 +4,12 @@ @license: U{Apple} """ - -from typing import Literal - import torch from torch import Tensor, nn -from luxonis_train.nodes.base_node import BaseNode from luxonis_train.nodes.blocks import ConvModule, SqueezeExciteBlock -class MobileOne(BaseNode[Tensor, list[Tensor]]): - """Implementation of MobileOne backbone. - - TODO: add more details - """ - - in_channels: int - - VARIANTS_SETTINGS: dict[str, dict] = { - "s0": {"width_multipliers": (0.75, 1.0, 1.0, 2.0), "num_conv_branches": 4}, - "s1": {"width_multipliers": (1.5, 1.5, 2.0, 2.5)}, - "s2": {"width_multipliers": (1.5, 2.0, 2.5, 4.0)}, - "s3": {"width_multipliers": (2.0, 2.5, 3.0, 4.0)}, - "s4": {"width_multipliers": (3.0, 3.5, 3.5, 4.0), "use_se": True}, - } - - def __init__(self, variant: Literal["s0", "s1", "s2", "s3", "s4"] = "s0", **kwargs): - """Constructor for the MobileOne module. - - @type variant: Literal["s0", "s1", "s2", "s3", "s4"] - @param variant: Specifies which variant of the MobileOne network to use. For - details, see TODO. Defaults to "s0". - """ - super().__init__(**kwargs) - - if variant not in MobileOne.VARIANTS_SETTINGS.keys(): - raise ValueError( - f"MobileOne model variant should be in {list(MobileOne.VARIANTS_SETTINGS.keys())}" - ) - - variant_params = MobileOne.VARIANTS_SETTINGS[variant] - # TODO: make configurable - self.width_multipliers = variant_params["width_multipliers"] - self.num_conv_branches = variant_params.get("num_conv_branches", 1) - self.num_blocks_per_stage = [2, 8, 10, 1] - self.use_se = variant_params.get("use_se", False) - - self.in_planes = min(64, int(64 * self.width_multipliers[0])) - - self.stage0 = MobileOneBlock( - in_channels=self.in_channels, - out_channels=self.in_planes, - kernel_size=3, - stride=2, - padding=1, - ) - self.cur_layer_idx = 1 - self.stage1 = self._make_stage( - int(64 * self.width_multipliers[0]), - self.num_blocks_per_stage[0], - num_se_blocks=0, - ) - self.stage2 = self._make_stage( - int(128 * self.width_multipliers[1]), - self.num_blocks_per_stage[1], - num_se_blocks=0, - ) - self.stage3 = self._make_stage( - int(256 * self.width_multipliers[2]), - self.num_blocks_per_stage[2], - num_se_blocks=int(self.num_blocks_per_stage[2] // 2) if self.use_se else 0, - ) - self.stage4 = self._make_stage( - int(512 * self.width_multipliers[3]), - self.num_blocks_per_stage[3], - num_se_blocks=self.num_blocks_per_stage[3] if self.use_se else 0, - ) - - def forward(self, inputs: Tensor) -> list[Tensor]: - outs = [] - x = self.stage0(inputs) - outs.append(x) - x = self.stage1(x) - outs.append(x) - x = self.stage2(x) - outs.append(x) - x = self.stage3(x) - outs.append(x) - x = self.stage4(x) - outs.append(x) - - return outs - - def export_mode(self, export: bool = True) -> None: - """Sets the module to export mode. - - Reparameterizes the model to obtain a plain CNN-like structure for inference. - TODO: add more details - - @warning: The reparametrization is destructive and cannot be reversed! - - @type export: bool - @param export: Whether to set the export mode to True or False. Defaults to True. - """ - if export: - for module in self.modules(): - if hasattr(module, "reparameterize"): - module.reparameterize() - - def _make_stage(self, planes: int, num_blocks: int, num_se_blocks: int): - """Build a stage of MobileOne model. - - @type planes: int - @param planes: Number of output channels. - @type num_blocks: int - @param num_blocks: Number of blocks in this stage. - @type num_se_blocks: int - @param num_se_blocks: Number of SE blocks in this stage. - @rtype: nn.Sequential - @return: A stage of MobileOne model. - """ - # Get strides for all layers - strides = [2] + [1] * (num_blocks - 1) - blocks = [] - for ix, stride in enumerate(strides): - use_se = False - if num_se_blocks > num_blocks: - raise ValueError( - "Number of SE blocks cannot " "exceed number of layers." - ) - if ix >= (num_blocks - num_se_blocks): - use_se = True - - # Depthwise conv - blocks.append( - MobileOneBlock( - in_channels=self.in_planes, - out_channels=self.in_planes, - kernel_size=3, - stride=stride, - padding=1, - groups=self.in_planes, - use_se=use_se, - num_conv_branches=self.num_conv_branches, - ) - ) - # Pointwise conv - blocks.append( - MobileOneBlock( - in_channels=self.in_planes, - out_channels=planes, - kernel_size=1, - stride=1, - padding=0, - groups=1, - use_se=use_se, - num_conv_branches=self.num_conv_branches, - ) - ) - self.in_planes = planes - self.cur_layer_idx += 1 - return nn.Sequential(*blocks) - - class MobileOneBlock(nn.Module): """MobileOne building block. @@ -186,7 +28,7 @@ def __init__( padding: int = 0, groups: int = 1, use_se: bool = False, - num_conv_branches: int = 1, + n_conv_branches: int = 1, ): """Construct a MobileOneBlock module. @@ -205,9 +47,11 @@ def __init__( @type groups: int @param groups: Group number. Defaults to 1. @type use_se: bool - @param use_se: Whether to use SE-ReLU activations. Defaults to False. - @type num_conv_branches: int - @param num_conv_branches: Number of linear conv branches. Defaults to 1. + @param use_se: Whether to use SE-ReLU activations. Defaults to + False. + @type n_conv_branches: int + @param n_conv_branches: Number of linear conv branches. Defaults + to 1. """ super().__init__() @@ -216,17 +60,17 @@ def __init__( self.kernel_size = kernel_size self.in_channels = in_channels self.out_channels = out_channels - self.num_conv_branches = num_conv_branches + self.n_conv_branches = n_conv_branches self.inference_mode = False - # Check if SE-ReLU is requested + self.se: nn.Module if use_se: self.se = SqueezeExciteBlock( in_channels=out_channels, intermediate_channels=int(out_channels * 0.0625), ) else: - self.se = nn.Identity() # type: ignore + self.se = nn.Identity() self.activation = nn.ReLU() # Re-parameterizable skip connection @@ -237,8 +81,8 @@ def __init__( ) # Re-parameterizable conv branches - rbr_conv = list() - for _ in range(self.num_conv_branches): + rbr_conv: list[nn.Module] = [] + for _ in range(self.n_conv_branches): rbr_conv.append( ConvModule( in_channels=self.in_channels, @@ -265,9 +109,9 @@ def __init__( activation=nn.Identity(), ) - def forward(self, inputs: Tensor): + def forward(self, inputs: Tensor) -> Tensor: """Apply forward pass.""" - # Inference mode forward pass. + if self.inference_mode: return self.activation(self.se(self.reparam_conv(inputs))) @@ -284,7 +128,7 @@ def forward(self, inputs: Tensor): # Other branches out = scale_out + identity_out - for ix in range(self.num_conv_branches): + for ix in range(self.n_conv_branches): out += self.rbr_conv[ix](inputs) return self.activation(self.se(out)) @@ -315,10 +159,10 @@ def reparameterize(self): # Delete un-used branches for para in self.parameters(): para.detach_() - self.__delattr__("rbr_conv") - self.__delattr__("rbr_scale") + del self.rbr_conv + del self.rbr_scale if hasattr(self, "rbr_skip"): - self.__delattr__("rbr_skip") + del self.rbr_skip self.inference_mode = True @@ -336,18 +180,22 @@ def _get_kernel_bias(self) -> tuple[Tensor, Tensor]: kernel_scale, bias_scale = self._fuse_bn_tensor(self.rbr_scale) # Pad scale branch kernel to match conv branch kernel size. pad = self.kernel_size // 2 - kernel_scale = torch.nn.functional.pad(kernel_scale, [pad, pad, pad, pad]) + kernel_scale = torch.nn.functional.pad( + kernel_scale, [pad, pad, pad, pad] + ) # get weights and bias of skip branch kernel_identity = torch.zeros(()) bias_identity = torch.zeros(()) if self.rbr_skip is not None: - kernel_identity, bias_identity = self._fuse_bn_tensor(self.rbr_skip) + kernel_identity, bias_identity = self._fuse_bn_tensor( + self.rbr_skip + ) # get weights and bias of conv branches kernel_conv = torch.zeros(()) bias_conv = torch.zeros(()) - for ix in range(self.num_conv_branches): + for ix in range(self.n_conv_branches): _kernel, _bias = self._fuse_bn_tensor(self.rbr_conv[ix]) kernel_conv = kernel_conv + _kernel bias_conv = bias_conv + _bias @@ -356,7 +204,7 @@ def _get_kernel_bias(self) -> tuple[Tensor, Tensor]: bias_final = bias_conv + bias_scale + bias_identity return kernel_final, bias_final - def _fuse_bn_tensor(self, branch) -> tuple[Tensor, Tensor]: + def _fuse_bn_tensor(self, branch: nn.Module) -> tuple[Tensor, Tensor]: """Method to fuse batchnorm layer with preceeding conv layer. Reference: U{https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L95} @@ -374,13 +222,21 @@ def _fuse_bn_tensor(self, branch) -> tuple[Tensor, Tensor]: if not hasattr(self, "id_tensor"): input_dim = self.in_channels // self.groups kernel_value = torch.zeros( - (self.in_channels, input_dim, self.kernel_size, self.kernel_size), + ( + self.in_channels, + input_dim, + self.kernel_size, + self.kernel_size, + ), dtype=branch.weight.dtype, device=branch.weight.device, ) for i in range(self.in_channels): kernel_value[ - i, i % input_dim, self.kernel_size // 2, self.kernel_size // 2 + i, + i % input_dim, + self.kernel_size // 2, + self.kernel_size // 2, ] = 1 self.id_tensor = kernel_value kernel = self.id_tensor diff --git a/luxonis_train/nodes/backbones/mobileone/mobileone.py b/luxonis_train/nodes/backbones/mobileone/mobileone.py new file mode 100644 index 00000000..8180f960 --- /dev/null +++ b/luxonis_train/nodes/backbones/mobileone/mobileone.py @@ -0,0 +1,197 @@ +"""MobileOne backbone. + +Source: U{} +@license: U{Apple} +""" + +import logging +from typing import Any, Literal + +from torch import Tensor, nn + +from luxonis_train.nodes.base_node import BaseNode + +from .blocks import MobileOneBlock +from .variants import get_variant + +logger = logging.getLogger(__name__) + + +class MobileOne(BaseNode[Tensor, list[Tensor]]): + in_channels: int + + def __init__( + self, + variant: Literal["s0", "s1", "s2", "s3", "s4"] = "s0", + width_multipliers: tuple[float, float, float, float] | None = None, + n_conv_branches: int | None = None, + use_se: bool | None = None, + **kwargs: Any, + ): + """MobileOne: An efficient CNN backbone for mobile devices. + + The architecture focuses on reducing memory access costs and improving parallelism + while allowing aggressive parameter scaling for better representation capacity. + Different variants (S0-S4) offer various accuracy-latency tradeoffs. + + Key features: + - Designed for low latency on mobile while maintaining high accuracy + - Uses re-parameterizable branches during training that get folded at inference + - Employs trivial over-parameterization branches for improved accuracy + - Simple feed-forward structure at inference with no branches/skip connections + - Variants achieve <1ms inference time on iPhone 12 with up to 75.9% top-1 ImageNet accuracy + - Outperforms other efficient architectures like MobileNets on image classification, + object detection and semantic segmentation tasks + - Uses only basic operators available across platforms (no custom activations) + + + Reference: U{MobileOne: An Improved One millisecond Mobile Backbone + } + + @type variant: Literal["s0", "s1", "s2", "s3", "s4"] + @param variant: Specifies which variant of the MobileOne network to use. Defaults to "s0". + Each variant specifies a predefined set of values for: + - width multipliers - A tuple of 4 float values specifying the width multipliers for each stage of the network. If the use of SE blocks is disabled, the last two values are ignored. + - number of convolution branches - An integer specifying the number of linear convolution branches in MobileOne block. + - use of SE blocks - A boolean specifying whether to use SE blocks in the network. + + The variants are as follows: + - s0 (default): width_multipliers=(0.75, 1.0, 1.0, 2.0), n_conv_branches=4, use_se=False + - s1: width_multipliers=(1.5, 1.5, 2.0, 2.5), n_conv_branches=1, use_se=False + - s2: width_multipliers=(1.5, 2.0, 2.5, 4.0), n_conv_branches=1, use_se=False + - s3: width_multipliers=(2.0, 2.5, 3.0, 4.0), n_conv_branches=1, use_se=False + - s4: width_multipliers=(3.0, 3.5, 3.5, 4.0), n_conv_branches=1, use_se=True + + @type width_multipliers: tuple[float, float, float, float] | None + @param width_multipliers: Width multipliers for each stage. If provided, overrides the variant values. + @type n_conv_branches: int | None + @param n_conv_branches: Number of linear convolution branches in MobileOne block. If provided, overrides the variant values. + @type use_se: bool | None + @param use_se: Whether to use SE blocks in the network. If provided, overrides the variant value. + """ + super().__init__(**kwargs) + + var = get_variant(variant) + + width_multipliers = width_multipliers or var.width_multipliers + use_se = use_se or var.use_se + self.n_blocks_per_stage = [2, 8, 10, 1] + self.n_conv_branches = n_conv_branches or var.n_conv_branches + + self.in_planes = min(64, int(64 * width_multipliers[0])) + + self.stage0 = MobileOneBlock( + in_channels=self.in_channels, + out_channels=self.in_planes, + kernel_size=3, + stride=2, + padding=1, + ) + self.cur_layer_idx = 1 + self.stage1 = self._make_stage( + int(64 * width_multipliers[0]), + self.n_blocks_per_stage[0], + n_se_blocks=0, + ) + self.stage2 = self._make_stage( + int(128 * width_multipliers[1]), + self.n_blocks_per_stage[1], + n_se_blocks=0, + ) + self.stage3 = self._make_stage( + int(256 * width_multipliers[2]), + self.n_blocks_per_stage[2], + n_se_blocks=self.n_blocks_per_stage[2] // 2 if use_se else 0, + ) + self.stage4 = self._make_stage( + int(512 * width_multipliers[3]), + self.n_blocks_per_stage[3], + n_se_blocks=self.n_blocks_per_stage[3] if use_se else 0, + ) + + def forward(self, inputs: Tensor) -> list[Tensor]: + outs: list[Tensor] = [] + x = self.stage0(inputs) + outs.append(x) + x = self.stage1(x) + outs.append(x) + x = self.stage2(x) + outs.append(x) + x = self.stage3(x) + outs.append(x) + x = self.stage4(x) + outs.append(x) + + return outs + + def set_export_mode(self, mode: bool = True) -> None: + """Sets the module to export mode. + + Reparameterizes the model to obtain a plain CNN-like structure for inference. + TODO: add more details + + @warning: The reparametrization is destructive and cannot be reversed! + + @type export: bool + @param export: Whether to set the export mode to True or False. Defaults to True. + """ + super().set_export_mode(mode) + if self.export: + logger.info("Reparametrizing 'MobileOne'.") + for module in self.modules(): + if hasattr(module, "reparameterize"): + module.reparameterize() + + def _make_stage(self, planes: int, n_blocks: int, n_se_blocks: int): + """Build a stage of MobileOne model. + + @type planes: int + @param planes: Number of output channels. + @type n_blocks: int + @param n_blocks: Number of blocks in this stage. + @type n_se_blocks: int + @param n_se_blocks: Number of SE blocks in this stage. + @rtype: nn.Sequential + @return: A stage of MobileOne model. + """ + # Get strides for all layers + strides = [2] + [1] * (n_blocks - 1) + blocks: list[nn.Module] = [] + for ix, stride in enumerate(strides): + use_se = False + if n_se_blocks > n_blocks: + raise ValueError( + "Number of SE blocks cannot " "exceed number of layers." + ) + if ix >= (n_blocks - n_se_blocks): + use_se = True + + # Depthwise conv + blocks.append( + MobileOneBlock( + in_channels=self.in_planes, + out_channels=self.in_planes, + kernel_size=3, + stride=stride, + padding=1, + groups=self.in_planes, + use_se=use_se, + n_conv_branches=self.n_conv_branches, + ) + ) + # Pointwise conv + blocks.append( + MobileOneBlock( + in_channels=self.in_planes, + out_channels=planes, + kernel_size=1, + stride=1, + padding=0, + groups=1, + use_se=use_se, + n_conv_branches=self.n_conv_branches, + ) + ) + self.in_planes = planes + self.cur_layer_idx += 1 + return nn.Sequential(*blocks) diff --git a/luxonis_train/nodes/backbones/mobileone/variants.py b/luxonis_train/nodes/backbones/mobileone/variants.py new file mode 100644 index 00000000..fbb0add3 --- /dev/null +++ b/luxonis_train/nodes/backbones/mobileone/variants.py @@ -0,0 +1,39 @@ +from typing import Literal + +from pydantic import BaseModel + + +class MobileOneVariant(BaseModel): + width_multipliers: tuple[float, float, float, float] + n_conv_branches: int = 1 + use_se: bool = False + + +def get_variant( + variant: Literal["s0", "s1", "s2", "s3", "s4"], +) -> MobileOneVariant: + variants = { + "s0": MobileOneVariant( + width_multipliers=(0.75, 1.0, 1.0, 2.0), + n_conv_branches=4, + ), + "s1": MobileOneVariant( + width_multipliers=(1.5, 1.5, 2.0, 2.5), + ), + "s2": MobileOneVariant( + width_multipliers=(1.5, 2.0, 2.5, 4.0), + ), + "s3": MobileOneVariant( + width_multipliers=(2.0, 2.5, 3.0, 4.0), + ), + "s4": MobileOneVariant( + width_multipliers=(3.0, 3.5, 3.5, 4.0), + use_se=True, + ), + } + if variant not in variants: # pragma: no cover + raise ValueError( + "MobileOne model variant should be in " + f"{list(variants.keys())}, got {variant}." + ) + return variants[variant] diff --git a/luxonis_train/nodes/backbones/repvgg.py b/luxonis_train/nodes/backbones/repvgg.py deleted file mode 100644 index c536c78e..00000000 --- a/luxonis_train/nodes/backbones/repvgg.py +++ /dev/null @@ -1,149 +0,0 @@ -import logging -from typing import Literal - -import torch.utils.checkpoint as checkpoint -from torch import Tensor, nn - -from luxonis_train.nodes.blocks import RepVGGBlock - -from ..base_node import BaseNode - -logger = logging.getLogger(__name__) - - -class RepVGG(BaseNode): - """Implementation of RepVGG backbone. - - Source: U{https://github.com/DingXiaoH/RepVGG} - @license: U{MIT}. - - @todo: technical documentation - """ - - in_channels: int - attach_index: int = -1 - - VARIANTS_SETTINGS = { - "A0": { - "num_blocks": [2, 4, 14, 1], - "width_multiplier": [0.75, 0.75, 0.75, 2.5], - }, - "A1": { - "num_blocks": [2, 4, 14, 1], - "width_multiplier": [1, 1, 1, 2.5], - }, - "A2": { - "num_blocks": [2, 4, 14, 1], - "width_multiplier": [1.5, 1.5, 1.5, 2.75], - }, - } - - def __init__( - self, - variant: Literal["A0", "A1", "A2"] = "A0", - num_blocks: list[int] | None = None, - width_multiplier: list[float] | None = None, - override_groups_map: dict[int, int] | None = None, - use_se: bool = False, - use_checkpoint: bool = False, - **kwargs, - ): - """Constructor for the RepVGG module. - - @type variant: Literal["A0", "A1", "A2"] - @param variant: RepVGG model variant. Defaults to "A0". - @type override_groups_map: dict[int, int] | None - @param override_groups_map: Dictionary mapping layer index to number of groups. - @type use_se: bool - @param use_se: Whether to use Squeeze-and-Excitation blocks. - @type use_checkpoint: bool - @param use_checkpoint: Whether to use checkpointing. - @type num_blocks: list[int] | None - @param num_blocks: Number of blocks in each stage. - @type width_multiplier: list[float] | None - @param width_multiplier: Width multiplier for each stage. - """ - super().__init__(**kwargs) - if variant not in self.VARIANTS_SETTINGS.keys(): - raise ValueError( - f"RepVGG model variant should be one of " - f"{list(self.VARIANTS_SETTINGS.keys())}." - ) - - num_blocks = num_blocks or self.VARIANTS_SETTINGS[variant]["num_blocks"] - width_multiplier = ( - width_multiplier or self.VARIANTS_SETTINGS[variant]["width_multiplier"] - ) - self.override_groups_map = override_groups_map or {} - assert 0 not in self.override_groups_map - self.use_se = use_se - self.use_checkpoint = use_checkpoint - - self.in_planes = min(64, int(64 * width_multiplier[0])) - self.stage0 = RepVGGBlock( - in_channels=self.in_channels, - out_channels=self.in_planes, - kernel_size=3, - stride=2, - padding=1, - use_se=self.use_se, - ) - self.cur_layer_idx = 1 - self.stage1 = self._make_stage( - int(64 * width_multiplier[0]), num_blocks[0], stride=2 - ) - self.stage2 = self._make_stage( - int(128 * width_multiplier[1]), num_blocks[1], stride=2 - ) - self.stage3 = self._make_stage( - int(256 * width_multiplier[2]), num_blocks[2], stride=2 - ) - self.stage4 = self._make_stage( - int(512 * width_multiplier[3]), num_blocks[3], stride=2 - ) - self.gap = nn.AdaptiveAvgPool2d(output_size=1) - - def forward(self, inputs: Tensor) -> list[Tensor]: - outputs = [] - out = self.stage0(inputs) - for stage in (self.stage1, self.stage2, self.stage3, self.stage4): - for block in stage: - if self.use_checkpoint: - out = checkpoint.checkpoint(block, out) - else: - out = block(out) - outputs.append(out) - return outputs - - def _make_stage(self, planes: int, num_blocks: int, stride: int): - strides = [stride] + [1] * (num_blocks - 1) - blocks = [] - for stride in strides: - cur_groups = self.override_groups_map.get(self.cur_layer_idx, 1) - blocks.append( - RepVGGBlock( - in_channels=self.in_planes, - out_channels=planes, - kernel_size=3, - stride=stride, - padding=1, - groups=cur_groups, - use_se=self.use_se, - ) - ) - self.in_planes = planes - self.cur_layer_idx += 1 - return nn.ModuleList(blocks) - - def set_export_mode(self, mode: bool = True) -> None: - """Reparametrizes instances of L{RepVGGBlock} in the network. - - @type mode: bool - @param mode: Whether to set the export mode. Defaults to C{True}. - """ - super().set_export_mode(mode) - if self.export: - logger.info("Reparametrizing RepVGG.") - for module in self.modules(): - if isinstance(module, RepVGGBlock): - module.reparametrize() diff --git a/luxonis_train/nodes/backbones/repvgg/__init__.py b/luxonis_train/nodes/backbones/repvgg/__init__.py new file mode 100644 index 00000000..61a5a4fc --- /dev/null +++ b/luxonis_train/nodes/backbones/repvgg/__init__.py @@ -0,0 +1,3 @@ +from .repvgg import RepVGG + +__all__ = ["RepVGG"] diff --git a/luxonis_train/nodes/backbones/repvgg/repvgg.py b/luxonis_train/nodes/backbones/repvgg/repvgg.py new file mode 100644 index 00000000..fd8a5e67 --- /dev/null +++ b/luxonis_train/nodes/backbones/repvgg/repvgg.py @@ -0,0 +1,135 @@ +import logging +from collections import defaultdict +from typing import Any, Literal + +import torch.utils.checkpoint as checkpoint +from torch import Tensor, nn + +from luxonis_train.nodes.base_node import BaseNode +from luxonis_train.nodes.blocks import RepVGGBlock + +from .variants import get_variant + +logger = logging.getLogger(__name__) + + +class RepVGG(BaseNode[Tensor, list[Tensor]]): + in_channels: int + attach_index: int = -1 + + def __init__( + self, + variant: Literal["A0", "A1", "A2"] = "A0", + n_blocks: tuple[int, int, int, int] | None = None, + width_multiplier: tuple[float, float, float, float] | None = None, + override_groups_map: dict[int, int] | None = None, + use_se: bool = False, + use_checkpoint: bool = False, + **kwargs: Any, + ): + """RepVGG backbone. + + RepVGG is a VGG-style convolutional architecture. + + - Simple feed-forward topology without any branching. + - 3x3 convolutions and ReLU activations. + - No automatic search, manual refinement or compound scaling. + + @license: U{MIT + }. + + @see: U{https://github.com/DingXiaoH/RepVGG} + @see: U{https://paperswithcode.com/method/repvgg} + @see: U{RepVGG: Making VGG-style ConvNets Great Again + } + + + @type variant: Literal["A0", "A1", "A2"] + @param variant: RepVGG model variant. Defaults to "A0". + @type override_groups_map: dict[int, int] | None + @param override_groups_map: Dictionary mapping layer index to number of groups. The layers are indexed starting from 0. + @type use_se: bool + @param use_se: Whether to use Squeeze-and-Excitation blocks. + @type use_checkpoint: bool + @param use_checkpoint: Whether to use checkpointing. + @type n_blocks: tuple[int, int, int, int] | None + @param n_blocks: Number of blocks in each stage. + @type width_multiplier: tuple[float, float, float, float] | None + @param width_multiplier: Width multiplier for each stage. + """ + super().__init__(**kwargs) + var = get_variant(variant) + + n_blocks = n_blocks or var.n_blocks + width_multiplier = width_multiplier or var.width_multiplier + override_groups_map = defaultdict(lambda: 1, override_groups_map or {}) + self.use_se = use_se + self.use_checkpoint = use_checkpoint + + self.in_planes = min(64, int(64 * width_multiplier[0])) + self.stage0 = RepVGGBlock( + in_channels=self.in_channels, + out_channels=self.in_planes, + kernel_size=3, + stride=2, + padding=1, + use_se=self.use_se, + ) + self.blocks = nn.ModuleList( + [ + block + for i in range(4) + for block in self._make_stage( + int(2**i * 64 * width_multiplier[i]), + n_blocks[i], + stride=2, + groups=override_groups_map[i], + ) + ] + ) + self.gap = nn.AdaptiveAvgPool2d(output_size=1) + + def forward(self, inputs: Tensor) -> list[Tensor]: + outputs: list[Tensor] = [] + out = self.stage0(inputs) + for block in self.blocks: + if self.use_checkpoint: + out = checkpoint.checkpoint(block, out) + else: + out = block(out) + outputs.append(out) # type: ignore + return outputs + + def _make_stage( + self, channels: int, n_blocks: int, stride: int, groups: int + ) -> nn.ModuleList: + strides = [stride] + [1] * (n_blocks - 1) + blocks: list[nn.Module] = [] + for stride in strides: + blocks.append( + RepVGGBlock( + in_channels=self.in_planes, + out_channels=channels, + kernel_size=3, + stride=stride, + padding=1, + groups=groups, + use_se=self.use_se, + ) + ) + self.in_planes = channels + return nn.ModuleList(blocks) + + def set_export_mode(self, mode: bool = True) -> None: + """Reparametrizes instances of L{RepVGGBlock} in the network. + + @type mode: bool + @param mode: Whether to set the export mode. Defaults to + C{True}. + """ + super().set_export_mode(mode) + if self.export: + logger.info("Reparametrizing RepVGG.") + for module in self.modules(): + if isinstance(module, RepVGGBlock): + module.reparametrize() diff --git a/luxonis_train/nodes/backbones/repvgg/variants.py b/luxonis_train/nodes/backbones/repvgg/variants.py new file mode 100644 index 00000000..a5c734b5 --- /dev/null +++ b/luxonis_train/nodes/backbones/repvgg/variants.py @@ -0,0 +1,31 @@ +from typing import Literal + +from pydantic import BaseModel + + +class RepVGGVariant(BaseModel): + n_blocks: tuple[int, int, int, int] + width_multiplier: tuple[float, float, float, float] + + +def get_variant(variant: Literal["A0", "A1", "A2"]) -> RepVGGVariant: + variants = { + "A0": RepVGGVariant( + n_blocks=(2, 4, 14, 1), + width_multiplier=(0.75, 0.75, 0.75, 2.5), + ), + "A1": RepVGGVariant( + n_blocks=(2, 4, 14, 1), + width_multiplier=(1, 1, 1, 2.5), + ), + "A2": RepVGGVariant( + n_blocks=(2, 4, 14, 1), + width_multiplier=(1.5, 1.5, 1.5, 2.75), + ), + } + if variant not in variants: # pragma: no cover + raise ValueError( + f"RepVGG variant should be one of " + f"{list(variants.keys())}, got '{variant}'." + ) + return variants[variant] diff --git a/luxonis_train/nodes/backbones/resnet.py b/luxonis_train/nodes/backbones/resnet.py index e4228410..93a13d4a 100644 --- a/luxonis_train/nodes/backbones/resnet.py +++ b/luxonis_train/nodes/backbones/resnet.py @@ -1,55 +1,98 @@ -"""ResNet backbone. - -Source: U{https://pytorch.org/vision/main/models/resnet.html} -@license: U{PyTorch} -""" -from typing import Literal +from typing import Any, Literal import torchvision -from torch import Tensor, nn +from torch import Tensor +from torchvision.models import ResNet as TorchResNet -from ..base_node import BaseNode +from luxonis_train.nodes.base_node import BaseNode class ResNet(BaseNode[Tensor, list[Tensor]]): def __init__( self, variant: Literal["18", "34", "50", "101", "152"] = "18", - channels_list: list[int] | None = None, download_weights: bool = False, - **kwargs, + zero_init_residual: bool = False, + groups: int = 1, + width_per_group: int = 64, + replace_stride_with_dilation: tuple[bool, bool, bool] = ( + False, + False, + False, + ), + **kwargs: Any, ): - """Implementation of the ResNetX backbone. + """ResNet backbone. + + Implements the backbone of a ResNet (Residual Network) architecture. + + ResNet is designed to address the vanishing gradient problem in deep neural networks + by introducing skip connections. These connections allow the network to learn + residual functions with reference to the layer inputs, enabling training of much + deeper networks. + + This backbone can be used as a feature extractor for various computer vision tasks + such as image classification, object detection, and semantic segmentation. It + provides a robust set of features that can be fine-tuned for specific applications. - TODO: add more info + The architecture consists of stacked residual blocks, each containing convolutional + layers, batch normalization, and ReLU activations. The skip connections can be + either identity mappings or projections, depending on the block type. + Source: U{https://pytorch.org/vision/main/models/resnet.html} + + @license: U{PyTorch} + + @param variant: ResNet variant, determining the depth and structure of the network. Options are: + - "18": 18 layers, uses basic blocks, smaller model suitable for simpler tasks. + - "34": 34 layers, uses basic blocks, good balance of depth and computation. + - "50": 50 layers, introduces bottleneck blocks, deeper feature extraction. + - "101": 101 layers, uses bottleneck blocks, high capacity for complex tasks. + - "152": 152 layers, deepest variant, highest capacity but most computationally intensive. + The number in each variant represents the total number of weighted layers. + Deeper networks generally offer higher accuracy but require more computation. @type variant: Literal["18", "34", "50", "101", "152"] - @param variant: ResNet variant. Defaults to "18". - @type channels_list: list[int] | None - @param channels_list: List of channels to return. - If unset, defaults to [64, 128, 256, 512]. + @default variant: "18" @type download_weights: bool - @param download_weights: If True download weights from imagenet. + @param download_weights: If True download weights trained on imagenet. Defaults to False. + @type zero_init_residual: bool + @param zero_init_residual: Zero-initialize the last BN in each residual branch, + so that the residual branch starts with zeros, and each residual block behaves like an identity. + This improves the model by 0.2~0.3% according to U{Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour }. Defaults to C{False}. + + @type groups: int + @param groups: Number of groups for each block. + Defaults to 1. Can be set to a different value only + for ResNet-50, ResNet-101, and ResNet-152. + The width of the convolutional blocks is computed as + C{int(in_channels * (width_per_group / 64.0)) * groups} + + @type width_per_group: int + @param width_per_group: Number of channels per group. + Defaults to 64. Can be set to a different value only + for ResNet-50, ResNet-101, and ResNet-152. + The width of the convolutional blocks is computed as + C{int(in_channels * (width_per_group / 64.0)) * groups} + + @type replace_stride_with_dilation: tuple[bool, bool, bool] + @param replace_stride_with_dilation: Tuple of booleans where each + indicates if the 2x2 strides should be replaced with a dilated convolution instead. + Defaults to (False, False, False). Can be set to a different value only for ResNet-50, ResNet-101, and ResNet-152. """ super().__init__(**kwargs) - - if variant not in RESNET_VARIANTS: - raise ValueError( - f"ResNet model variant should be in {list(RESNET_VARIANTS.keys())}" - ) - - self.backbone = RESNET_VARIANTS[variant]( - weights="DEFAULT" if download_weights else None + self.backbone = self._get_backbone( + variant, + weights="DEFAULT" if download_weights else None, + zero_init_residual=zero_init_residual, + groups=groups, + width_per_group=width_per_group, + replace_stride_with_dilation=replace_stride_with_dilation, ) - self.backbone.fc = nn.Identity() - - self.channels_list = channels_list or [64, 128, 256, 512] - def forward(self, inputs: Tensor) -> list[Tensor]: - outs = [] + outs: list[Tensor] = [] x = self.backbone.conv1(inputs) x = self.backbone.bn1(x) x = self.backbone.relu(x) @@ -66,11 +109,20 @@ def forward(self, inputs: Tensor) -> list[Tensor]: return outs - -RESNET_VARIANTS = { - "18": torchvision.models.resnet18, - "34": torchvision.models.resnet34, - "50": torchvision.models.resnet50, - "101": torchvision.models.resnet101, - "152": torchvision.models.resnet152, -} + @staticmethod + def _get_backbone( + variant: Literal["18", "34", "50", "101", "152"], **kwargs: Any + ) -> TorchResNet: + variants = { + "18": torchvision.models.resnet18, + "34": torchvision.models.resnet34, + "50": torchvision.models.resnet50, + "101": torchvision.models.resnet101, + "152": torchvision.models.resnet152, + } + if variant not in variants: + raise ValueError( + "ResNet model variant should be in " + f"{list(variants.keys())}, got {variant}." + ) + return variants[variant](**kwargs) diff --git a/luxonis_train/nodes/backbones/rexnetv1.py b/luxonis_train/nodes/backbones/rexnetv1.py index 6d23857e..6567586a 100644 --- a/luxonis_train/nodes/backbones/rexnetv1.py +++ b/luxonis_train/nodes/backbones/rexnetv1.py @@ -1,15 +1,11 @@ -"""Implementation of the ReXNetV1 backbone. - -Source: U{https://github.com/clovaai/rexnet} -@license: U{MIT} -""" +from typing import Any import torch from torch import Tensor, nn from luxonis_train.nodes.base_node import BaseNode from luxonis_train.nodes.blocks import ConvModule -from luxonis_train.utils.general import make_divisible +from luxonis_train.utils import make_divisible class ReXNetV1_lite(BaseNode[Tensor, list[Tensor]]): @@ -21,10 +17,33 @@ def __init__( final_ch: int = 164, multiplier: float = 1.0, kernel_sizes: int | list[int] = 3, - **kwargs, + out_indices: list[int] | None = None, + **kwargs: Any, ): - """ReXNetV1_lite backbone. + """ReXNetV1 (Rank Expansion Networks) backbone, lite version. + + ReXNet proposes a new approach to designing lightweight CNN architectures by: + + - Studying proper channel dimension expansion at the layer level using rank analysis + - Searching for effective channel configurations across the entire network + - Parameterizing channel dimensions as a linear function of network depth + + Key aspects: + + - Uses inverted bottleneck blocks similar to MobileNetV2 + - Employs a linear parameterization of channel dimensions across blocks + - Replaces ReLU6 with SiLU (Swish-1) activation in certain layers + - Incorporates Squeeze-and-Excitation modules + + ReXNet achieves state-of-the-art performance among lightweight models on ImageNet + classification and transfers well to tasks like object detection and fine-grained classification. + Source: U{https://github.com/clovaai/rexnet} + + @license: U{MIT + } + @copyright: 2021-present NAVER Corp. + @see U{Rethinking Channel Dimensions for Efficient Model Design } @type fix_head_stem: bool @param fix_head_stem: Whether to multiply head stem. Defaults to False. @type divisible_value: int @@ -37,40 +56,44 @@ def __init__( @param multiplier: Channel dimension multiplier. Defaults to 1.0. @type kernel_sizes: int | list[int] @param kernel_sizes: Kernel size for each block. Defaults to 3. + @param out_indices: list[int] | None + @param out_indices: Indices of the output layers. Defaults to [1, 4, 10, 17]. """ super().__init__(**kwargs) - self.out_indices = [1, 4, 10, 17] - self.channels = [16, 48, 112, 184] layers = [1, 2, 2, 3, 3, 5] strides = [1, 2, 2, 2, 1, 2] + self.n_convblocks = sum(layers) + self.out_indices = out_indices or [1, 4, 10, 17] + kernel_sizes = ( - [kernel_sizes] * 6 if isinstance(kernel_sizes, int) else kernel_sizes + [kernel_sizes] * 6 + if isinstance(kernel_sizes, int) + else kernel_sizes ) - strides = sum( - [ - [element] + [1] * (layers[idx] - 1) - for idx, element in enumerate(strides) - ], - [], - ) + strides = [ + s if i == 0 else 1 + for layer, s in zip(layers, strides) + for i in range(layer) + ] ts = [1] * layers[0] + [6] * sum(layers[1:]) - kernel_sizes = sum( - [[element] * layers[idx] for idx, element in enumerate(kernel_sizes)], [] - ) - self.num_convblocks = sum(layers[:]) + kernel_sizes = [ + ks for ks, layer in zip(kernel_sizes, layers) for _ in range(layer) + ] features: list[nn.Module] = [] inplanes = input_ch / multiplier if multiplier < 1.0 else input_ch - first_channel = 32 / multiplier if multiplier < 1.0 or fix_head_stem else 32 + first_channel = ( + 32 / multiplier if multiplier < 1.0 or fix_head_stem else 32 + ) first_channel = make_divisible( int(round(first_channel * multiplier)), divisible_value ) - in_channels_group = [] - channels_group = [] + in_channels_group: list[int] = [] + channels_group: list[int] = [] features.append( ConvModule( @@ -83,7 +106,7 @@ def __init__( ) ) - for i in range(self.num_convblocks): + for i in range(self.n_convblocks): inplanes_divisible = make_divisible( int(round(inplanes * multiplier)), divisible_value ) @@ -92,7 +115,7 @@ def __init__( channels_group.append(inplanes_divisible) else: in_channels_group.append(inplanes_divisible) - inplanes += final_ch / (self.num_convblocks - 1 * 1.0) + inplanes += final_ch / (self.n_convblocks - 1 * 1.0) inplanes_divisible = make_divisible( int(round(inplanes * multiplier)), divisible_value ) @@ -100,7 +123,12 @@ def __init__( assert channels_group for in_c, c, t, k, s in zip( - in_channels_group, channels_group, ts, kernel_sizes, strides, strict=True + in_channels_group, + channels_group, + ts, + kernel_sizes, + strides, + strict=True, ): features.append( LinearBottleneck( @@ -109,7 +137,9 @@ def __init__( ) pen_channels = ( - int(1280 * multiplier) if multiplier > 1 and not fix_head_stem else 1280 + int(1280 * multiplier) + if multiplier > 1 and not fix_head_stem + else 1280 ) features.append( ConvModule( @@ -121,12 +151,12 @@ def __init__( ) self.features = nn.Sequential(*features) - def forward(self, x: Tensor) -> list[Tensor]: - outs = [] + def forward(self, inputs: Tensor) -> list[Tensor]: + outs: list[Tensor] = [] for i, module in enumerate(self.features): - x = module(x) + inputs = module(inputs) if i in self.out_indices: - outs.append(x) + outs.append(inputs) return outs @@ -138,14 +168,12 @@ def __init__( t: int, kernel_size: int = 3, stride: int = 1, - **kwargs, ): - super(LinearBottleneck, self).__init__(**kwargs) - self.conv_shortcut = None + super().__init__() self.use_shortcut = stride == 1 and in_channels <= channels self.in_channels = in_channels self.out_channels = channels - out = [] + out: list[nn.Module] = [] if t != 1: dw_channels = in_channels * t out.append( diff --git a/luxonis_train/nodes/base_node.py b/luxonis_train/nodes/base_node.py index 9db45316..aad0b2f2 100644 --- a/luxonis_train/nodes/base_node.py +++ b/luxonis_train/nodes/base_node.py @@ -1,25 +1,26 @@ import inspect +import logging from abc import ABC, abstractmethod +from contextlib import suppress from typing import Generic, TypeVar +from luxonis_ml.data import LabelType from luxonis_ml.utils.registry import AutoRegisterMeta -from pydantic import BaseModel, ValidationError from torch import Size, Tensor, nn +from typeguard import TypeCheckError, check_type -from luxonis_train.utils.general import DatasetMetadata, validate_packet -from luxonis_train.utils.registry import NODES -from luxonis_train.utils.types import ( +from luxonis_train.utils import ( AttachIndexType, - FeaturesProtocol, + DatasetMetadata, IncompatibleException, - LabelType, Packet, ) +from luxonis_train.utils.registry import NODES ForwardOutputT = TypeVar("ForwardOutputT") ForwardInputT = TypeVar("ForwardInputT") -__all__ = ["BaseNode"] +logger = logging.getLogger(__name__) class BaseNode( @@ -41,13 +42,10 @@ class BaseNode( of lists of tensors. Each key in the dictionary represents a different output from the previous node. Input to the node is a list of L{Packet}s, output is a single L{Packet}. - Each node can define a list of L{BaseProtocol}s that the inputs must conform to. - L{BaseProtocol} is a pydantic model that defines the structure of the input. - When the node is called, the inputs are validated against the protocols and - then sent to the L{unwrap} method. The C{unwrap} method should return a valid - input to the L{forward} method. Outputs of the C{forward} method are then - send to L{weap} method, which wraps the output into a C{Packet}, which is the - output of the node. + When the node is called, the inputs are sent to the L{unwrap} method. + The C{unwrap} method should return a valid input to the L{forward} method. + Outputs of the C{forward} method are then send to L{wrap} method, + which wraps the output into a C{Packet}. The wrapped C{Packet} is the final output of the node. The L{run} method combines the C{unwrap}, C{forward} and C{wrap} methods together with input validation. @@ -55,13 +53,12 @@ class BaseNode( When subclassing, the following methods should be implemented: - L{forward}: Forward pass of the module. - L{unwrap}: Optional. Unwraps the inputs from the input packet. - The default implementation expects a single input with `features` key. + The default implementation expects a single input with `features` key. - L{wrap}: Optional. Wraps the output of the forward pass - into a `Packet[Tensor]`. The default implementation expects wraps the output - of the forward pass into a packet with either "features" or the task name as the key. + into a `Packet[Tensor]`. The default implementation expects wraps the output + of the forward pass into a packet with either "features" or the task name as the key. Additionally, the following class attributes can be defined: - - L{input_protocols}: List of input protocols used to validate inputs to the node. - L{attach_index}: Index of previous output that this node attaches to. - L{tasks}: Dictionary of tasks that the node supports. @@ -94,32 +91,6 @@ def wrap(output: Tensor) -> Packet[Tensor]: # by the attached modules. return {"classification": [output]} - @type input_shapes: list[Packet[Size]] | None - @param input_shapes: List of input shapes for the module. - - @type original_in_shape: Size | None - @param original_in_shape: Original input shape of the model. Some - nodes won't function if not provided. - - @type dataset_metadata: L{DatasetMetadata} | None - @param dataset_metadata: Metadata of the dataset. - Some nodes won't function if not provided. - - @type n_classes: int | None - @param n_classes: Number of classes in the dataset. Provide only - in case `dataset_metadata` is not provided. Defaults to None. - - @type in_sizes: Size | list[Size] | None - @param in_sizes: List of input sizes for the node. - Provide only in case the `input_shapes` were not provided. - - @type _tasks: dict[LabelType, str] | None - @param _tasks: Dictionary of tasks that the node supports. Overrides the - class L{tasks} attribute. Shouldn't be provided by the user in most cases. - - @type input_protocols: list[type[BaseModel]] - @ivar input_protocols: List of input protocols used to validate inputs to the node. - Defaults to [L{FeaturesProtocol}]. @type attach_index: AttachIndexType @ivar attach_index: Index of previous output that this node attaches to. @@ -135,7 +106,6 @@ class L{tasks} attribute. Shouldn't be provided by the user in most cases. Only needs to be defined for head nodes. """ - input_protocols: list[type[BaseModel]] = [FeaturesProtocol] attach_index: AttachIndexType tasks: list[LabelType] | dict[LabelType, str] | None = None @@ -148,10 +118,50 @@ def __init__( n_classes: int | None = None, n_keypoints: int | None = None, in_sizes: Size | list[Size] | None = None, + attach_index: AttachIndexType | None = None, _tasks: dict[LabelType, str] | None = None, ): + """Constructor for the BaseNode. + + @type input_shapes: list[Packet[Size]] | None + @param input_shapes: List of input shapes for the module. + + @type original_in_shape: Size | None + @param original_in_shape: Original input shape of the model. Some + nodes won't function if not provided. + + @type dataset_metadata: L{DatasetMetadata} | None + @param dataset_metadata: Metadata of the dataset. + Some nodes won't function if not provided. + + @type n_classes: int | None + @param n_classes: Number of classes in the dataset. Provide only + in case `dataset_metadata` is not provided. Defaults to None. + + @type in_sizes: Size | list[Size] | None + @param in_sizes: List of input sizes for the node. + Provide only in case the `input_shapes` were not provided. + + @type attach_index: AttachIndexType + @param attach_index: Index of previous output that this node attaches to. + Can be a single integer to specify a single output, a tuple of + two or three integers to specify a range of outputs or `"all"` to + specify all outputs. Defaults to "all". Python indexing conventions apply. If provided as a constructor argument, overrides the class attribute. + + + @type _tasks: dict[LabelType, str] | None + @param _tasks: Dictionary of tasks that the node supports. Overrides the + class L{tasks} attribute. Shouldn't be provided by the user in most cases. + """ super().__init__() + if attach_index is not None: + logger.warning( + f"Node {self.name} overrides `attach_index` " + f"by setting it to '{attach_index}'. " + "Make sure this is intended." + ) + self.attach_index = attach_index self._tasks = None if _tasks is not None: self._tasks = _tasks @@ -180,15 +190,36 @@ def __init__( self._epoch = 0 self._in_sizes = in_sizes + self._check_type_overrides() + @staticmethod def _process_tasks( tasks: dict[LabelType, str] | list[LabelType], ) -> dict[LabelType, str]: if isinstance(tasks, dict): return tasks - if isinstance(tasks, list): + else: return {task: task.value for task in tasks} + def _check_type_overrides(self) -> None: + properties = [] + for name, value in inspect.getmembers(self.__class__): + if isinstance(value, property): + properties.append(name) + for name, typ in self.__annotations__.items(): + if name in properties: + with suppress(RuntimeError): + value = getattr(self, name) + try: + check_type(value, typ) + except TypeCheckError as e: + raise IncompatibleException( + f"Node '{self.name}' specifies the type of the property `{name}` as `{typ}`, " + f"but received `{type(value)}`. " + f"This may indicate that the '{self.name}' node is " + "not compatible with its predecessor." + ) from e + def get_task_name(self, task: LabelType) -> str: """Gets the name of a task for a particular C{LabelType}. @@ -196,14 +227,15 @@ def get_task_name(self, task: LabelType) -> str: @param task: Task to get the name for. @rtype: str @return: Name of the task. + @raises RuntimeError: If the node does not define any tasks. @raises ValueError: If the task is not supported by the node. """ if not self._tasks: - raise ValueError(f"Node {self.name} does not have any tasks defined.") + raise RuntimeError(f"Node '{self.name}' does not define any task.") if task not in self._tasks: raise ValueError( - f"Node {self.name} does not support the {task.value} task." + f"Node '{self.name}' does not support the '{task.value}' task." ) return self._tasks[task] @@ -213,14 +245,20 @@ def name(self) -> str: @property def task(self) -> str: - """Getter for the task.""" + """Getter for the task. + + @type: str + @raises RuntimeError: If the node doesn't define any task. + @raises ValueError: If the node defines more than one task. In + that case, use the L{get_task_name} method instead. + """ if not self._tasks: - raise ValueError(f"{self.name} does not have any tasks defined.") + raise RuntimeError(f"{self.name} does not define any task.") if len(self._tasks) > 1: raise ValueError( f"Node {self.name} has multiple tasks defined. " - "Use `get_task_name` method instead." + "Use the `get_task_name` method instead." ) return next(iter(self._tasks.values())) @@ -242,22 +280,27 @@ def get_class_names(self, task: LabelType) -> list[str]: @rtype: list[str] @return: Class names for the task. """ - return self.dataset_metadata.class_names(self.get_task_name(task)) + return self.dataset_metadata.classes(self.get_task_name(task)) @property def n_keypoints(self) -> int: - """Getter for the number of keypoints.""" + """Getter for the number of keypoints. + + @type: int + @raises ValueError: If the node does not support keypoints. + @raises RuntimeError: If the node doesn't define any task. + """ if self._n_keypoints is not None: return self._n_keypoints if self._tasks: if LabelType.KEYPOINTS not in self._tasks: - raise (ValueError(f"{self.name} does not support keypoints.")) + raise ValueError(f"{self.name} does not support keypoints.") return self.dataset_metadata.n_keypoints( self.get_task_name(LabelType.KEYPOINTS) ) - raise ValueError( + raise RuntimeError( f"{self.name} does not have any tasks defined, " "`BaseNode.n_keypoints` property cannot be used. " "Either override the `tasks` class attribute, " @@ -267,12 +310,19 @@ def n_keypoints(self) -> int: @property def n_classes(self) -> int: - """Getter for the number of classes.""" + """Getter for the number of classes. + + @type: int + @raises RuntimeError: If the node doesn't define any task. + @raises ValueError: If the number of classes is different for + different tasks. In that case, use the L{get_n_classes} + method. + """ if self._n_classes is not None: return self._n_classes if not self._tasks: - raise ValueError( + raise RuntimeError( f"{self.name} does not have any tasks defined, " "`BaseNode.n_classes` property cannot be used. " "Either override the `tasks` class attribute, " @@ -296,9 +346,16 @@ def n_classes(self) -> int: @property def class_names(self) -> list[str]: - """Getter for the class names.""" + """Getter for the class names. + + @type: list[str] + @raises RuntimeError: If the node doesn't define any task. + @raises ValueError: If the class names are different for + different tasks. In that case, use the L{get_class_names} + method. + """ if not self._tasks: - raise ValueError( + raise RuntimeError( f"{self.name} does not have any tasks defined, " "`BaseNode.class_names` property cannot be used. " "Either override the `tasks` class attribute, " @@ -306,10 +363,10 @@ def class_names(self) -> list[str]: "the `BaseNode.dataset_metadata.class_names` method manually." ) elif len(self._tasks) == 1: - return self.dataset_metadata.class_names(self.task) + return self.dataset_metadata.classes(self.task) else: class_names = [ - self.dataset_metadata.class_names(self.get_task_name(task)) + self.dataset_metadata.classes(self.get_task_name(task)) for task in self._tasks ] if all(set(names) == set(class_names[0]) for names in class_names): @@ -322,14 +379,25 @@ def class_names(self) -> list[str]: @property def input_shapes(self) -> list[Packet[Size]]: - """Getter for the input shapes.""" + """Getter for the input shapes. + + @type: list[Packet[Size]] + @raises RuntimeError: If the C{input_shapes} were not set during + initialization. + """ + if self._input_shapes is None: raise self._non_set_error("input_shapes") return self._input_shapes @property def original_in_shape(self) -> Size: - """Getter for the original input shape.""" + """Getter for the original input shape as [N, H, W]. + + @type: Size + @raises RuntimeError: If the C{original_in_shape} were not set + during initialization. + """ if self._original_in_shape is None: raise self._non_set_error("original_in_shape") return self._original_in_shape @@ -339,10 +407,11 @@ def dataset_metadata(self) -> DatasetMetadata: """Getter for the dataset metadata. @type: L{DatasetMetadata} - @raises ValueError: If the C{dataset_metadata} is C{None}. + @raises RuntimeError: If the C{dataset_metadata} were not set + during initialization. """ if self._dataset_metadata is None: - raise ValueError( + raise RuntimeError( f"{self._non_set_error('dataset_metadata')}" "Either provide `dataset_metadata` or `n_classes`." ) @@ -358,7 +427,7 @@ def in_sizes(self) -> Size | list[Size]: In case `in_sizes` were provided during initialization, they are returned directly. - Example: + Example:: >>> input_shapes = [{"features": [Size(64, 128, 128), Size(3, 224, 224)]}] >>> attach_index = -1 @@ -369,7 +438,7 @@ def in_sizes(self) -> Size | list[Size]: >>> in_sizes = [Size(64, 128, 128), Size(3, 224, 224)] @type: Size | list[Size] - @raises IncompatibleException: If the C{input_shapes} are too complicated for + @raises RuntimeError: If the C{input_shapes} are too complicated for the default implementation. """ if self._in_sizes is not None: @@ -377,27 +446,25 @@ def in_sizes(self) -> Size | list[Size]: features = self.input_shapes[0].get("features") if features is None: - raise IncompatibleException( + raise RuntimeError( f"Feature field is missing in {self.name}. " "The default implementation of `in_sizes` cannot be used." ) - shapes = self.get_attached(self.input_shapes[0]["features"]) - if isinstance(shapes, list) and len(shapes) == 1: - return shapes[0] - return shapes + return self.get_attached(self.input_shapes[0]["features"]) @property def in_channels(self) -> int | list[int]: """Simplified getter for the number of input channels. - Should work out of the box for most cases where the C{input_shapes} are - sufficiently simple. Otherwise the C{input_shapes} should be used directly. If - C{attach_index} is set to "all" or is a slice, returns a list of input channels, + Should work out of the box for most cases where the + C{input_shapes} are sufficiently simple. Otherwise the + C{input_shapes} should be used directly. If C{attach_index} is + set to "all" or is a slice, returns a list of input channels, otherwise returns a single value. @type: int | list[int] - @raises IncompatibleException: If the C{input_shapes} are too complicated for - the default implementation. + @raises RuntimeError: If the C{input_shapes} are too complicated + for the default implementation of C{in_sizes}. """ return self._get_nth_size(-3) @@ -409,8 +476,8 @@ def in_height(self) -> int | list[int]: sufficiently simple. Otherwise the `input_shapes` should be used directly. @type: int | list[int] - @raises IncompatibleException: If the C{input_shapes} are too complicated for - the default implementation. + @raises RuntimeError: If the C{input_shapes} are too complicated for + the default implementation of C{in_sizes}. """ return self._get_nth_size(-2) @@ -422,8 +489,8 @@ def in_width(self) -> int | list[int]: sufficiently simple. Otherwise the `input_shapes` should be used directly. @type: int | list[int] - @raises IncompatibleException: If the C{input_shapes} are too complicated for - the default implementation. + @raises RuntimeError: If the C{input_shapes} are too complicated for + the default implementation of C{in_sizes}. """ return self._get_nth_size(-1) @@ -443,23 +510,26 @@ def set_export_mode(self, mode: bool = True) -> None: def unwrap(self, inputs: list[Packet[Tensor]]) -> ForwardInputT: """Prepares inputs for the forward pass. - Unwraps the inputs from the C{list[Packet[Tensor]]} input so they can be passed - to the forward call. The default implementation expects a single input with - C{features} key and returns the tensor or tensors at the C{attach_index} - position. + Unwraps the inputs from the C{list[Packet[Tensor]]} input so + they can be passed to the forward call. The default + implementation expects a single input with C{features} key and + returns the tensor or tensors at the C{attach_index} position. - For most cases the default implementation should be sufficient. Exceptions are - modules with multiple inputs or producing more complex outputs. This is - typically the case for output nodes. + For most cases the default implementation should be sufficient. + Exceptions are modules with multiple inputs or producing more + complex outputs. This is typically the case for output nodes. @type inputs: list[Packet[Tensor]] @param inputs: Inputs to the node. @rtype: ForwardInputT - @return: Prepared inputs, ready to be passed to the L{forward} method. + @return: Prepared inputs, ready to be passed to the L{forward} + method. + @raises ValueError: If the number of inputs is not equal to 1. + In such cases the method has to be overridden. """ if len(inputs) > 1: - raise IncompatibleException( - f"Node {self.name} expects a single input, but got {len(inputs)} inputs instead." + raise ValueError( + f"Node {self.name} expects a single input, but got {len(inputs)} inputs instead. " "If the node expects multiple inputs, the `unwrap` method should be overridden." ) return self.get_attached(inputs[0]["features"]) # type: ignore @@ -468,9 +538,9 @@ def unwrap(self, inputs: list[Packet[Tensor]]) -> ForwardInputT: def forward(self, inputs: ForwardInputT) -> ForwardOutputT: """Forward pass of the module. - @type inputs: ForwardInputT + @type inputs: L{ForwardInputT} @param inputs: Inputs to the module. - @rtype: ForwardOutputT + @rtype: L{ForwardOutputT} @return: Result of the forward pass. """ ... @@ -502,27 +572,30 @@ def wrap(self, output: ForwardOutputT) -> Packet[Tensor]: @rtype: L{Packet}[Tensor] @return: Wrapped output. + + @raises ValueError: If the C{output} argument is not a tensor or a list of tensors. + In such cases the L{wrap} method should be overridden. """ - match output: - case Tensor() as out: - outputs = [out] - case list(tensors) if all(isinstance(t, Tensor) for t in tensors): - outputs = tensors - case _: - raise IncompatibleException( - "Default `wrap` expects a single tensor or a list of tensors." - ) + if isinstance(output, Tensor): + outputs = [output] + elif isinstance(output, (list, tuple)) and all( + isinstance(t, Tensor) for t in output + ): + outputs = list(output) + else: + raise ValueError( + "Default `wrap` expects a single tensor or a list of tensors." + ) try: task = self.task - except ValueError: + except RuntimeError: task = "features" return {task: outputs} def run(self, inputs: list[Packet[Tensor]]) -> Packet[Tensor]: - """Combines the forward pass with the wrapping and unwrapping of the inputs. - - Additionally validates the inputs against `input_protocols`. + """Combines the forward pass with the wrapping and unwrapping of + the inputs. @type inputs: list[Packet[Tensor]] @param inputs: Inputs to the module. @@ -531,9 +604,9 @@ def run(self, inputs: list[Packet[Tensor]]) -> Packet[Tensor]: @return: Outputs of the module as a dictionary of list of tensors: `{"features": [Tensor, ...], "segmentation": [Tensor]}` - @raises IncompatibleException: If the inputs are not compatible with the node. + @raises RuntimeError: If default L{wrap} or L{unwrap} methods are not sufficient. """ - unwrapped = self.unwrap(self.validate(inputs)) + unwrapped = self.unwrap(inputs) outputs = self(unwrapped) wrapped = self.wrap(outputs) str_tasks = [task.value for task in self._tasks] if self._tasks else [] @@ -543,38 +616,21 @@ def run(self, inputs: list[Packet[Tensor]]) -> Packet[Tensor]: wrapped[self.get_task_name(LabelType(key))] = value return wrapped - def validate(self, data: list[Packet[Tensor]]) -> list[Packet[Tensor]]: - """Validates the inputs against `input_protocols`.""" - if len(data) != len(self.input_protocols): - raise IncompatibleException( - f"Node {self.name} expects {len(self.input_protocols)} inputs, " - f"but got {len(data)} inputs instead." - ) - try: - return [ - validate_packet(d, protocol) - for d, protocol in zip(data, self.input_protocols) - ] - except ValidationError as e: - raise IncompatibleException.from_validation_error(e, self.name) from e - T = TypeVar("T", Tensor, Size) def get_attached(self, lst: list[T]) -> list[T] | T: """Gets the attached elements from a list. - This method is used to get the attached elements from a list based on - the `attach_index` attribute. + This method is used to get the attached elements from a list + based on the C{attach_index} attribute. @type lst: list[T] - @param lst: List to get the attached elements from. Can be either - a list of tensors or a list of sizes. - + @param lst: List to get the attached elements from. Can be + either a list of tensors or a list of sizes. @rtype: list[T] | T - @return: Attached elements. If `attach_index` is set to `"all"` or is a slice, - returns a list of attached elements. - - @raises ValueError: If the `attach_index` is invalid. + @return: Attached elements. If C{attach_index} is set to + C{"all"} or is a slice, returns a list of attached elements. + @raises ValueError: If the C{attach_index} is invalid. """ def _normalize_index(index: int) -> int: @@ -608,7 +664,9 @@ def _normalize_slice(i: int, j: int) -> slice: case (int(i), int(j), int(k)): return lst[i:j:k] case _: - raise ValueError(f"Invalid attach index: `{self.attach_index}`") + raise ValueError( + f"Invalid attach index: `{self.attach_index}`" + ) def _get_nth_size(self, idx: int) -> int | list[int]: match self.in_sizes: @@ -617,8 +675,8 @@ def _get_nth_size(self, idx: int) -> int | list[int]: case list(sizes): return [size[idx] for size in sizes] - def _non_set_error(self, name: str) -> ValueError: - return ValueError( - f"{self.name} is trying to access `{name}`, " + def _non_set_error(self, name: str) -> RuntimeError: + return RuntimeError( + f"'{self.name}' node is trying to access `{name}`, " "but it was not set during initialization. " ) diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py index 0e0a4ad2..9231ea85 100644 --- a/luxonis_train/nodes/blocks/blocks.py +++ b/luxonis_train/nodes/blocks/blocks.py @@ -1,6 +1,3 @@ -# TODO: cleanup, document -# Check if some blocks could be merged togetner. - import math from typing import TypeVar @@ -13,7 +10,8 @@ class EfficientDecoupledBlock(nn.Module): def __init__(self, n_classes: int, in_channels: int): - """Efficient Decoupled block used for class and regression predictions. + """Efficient Decoupled block used for class and regression + predictions. @type n_classes: int @param n_classes: Number of classes. @@ -39,7 +37,9 @@ def __init__(self, n_classes: int, in_channels: int): padding=1, activation=nn.SiLU(), ), - nn.Conv2d(in_channels=in_channels, out_channels=n_classes, kernel_size=1), + nn.Conv2d( + in_channels=in_channels, out_channels=n_classes, kernel_size=1 + ), ) self.regression_branch = nn.Sequential( ConvModule( @@ -152,7 +152,10 @@ def __init__( super().__init__( nn.ConvTranspose2d( - in_channels, out_channels, kernel_size=kernel_size, stride=stride + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, ), ConvModule(out_channels, out_channels, kernel_size=3, padding=1), ) @@ -299,7 +302,9 @@ def forward(self, x: Tensor) -> Tensor: else: id_out = self.rbr_identity(x) - return self.nonlinearity(self.se(self.rbr_dense(x) + self.rbr_1x1(x) + id_out)) + return self.nonlinearity( + self.se(self.rbr_dense(x) + self.rbr_1x1(x) + id_out) + ) def reparametrize(self) -> None: if hasattr(self, "rbr_reparam"): @@ -318,15 +323,16 @@ def reparametrize(self) -> None: ) self.rbr_reparam.weight.data = kernel # type: ignore self.rbr_reparam.bias.data = bias # type: ignore - self.__delattr__("rbr_dense") - self.__delattr__("rbr_1x1") + del self.rbr_dense + del self.rbr_1x1 if hasattr(self, "rbr_identity"): - self.__delattr__("rbr_identity") + del self.rbr_identity if hasattr(self, "id_tensor"): - self.__delattr__("id_tensor") + del self.id_tensor def _get_equivalent_kernel_bias(self) -> tuple[Tensor, Tensor]: - """Derives the equivalent kernel and bias in a DIFFERENTIABLE way.""" + """Derives the equivalent kernel and bias in a DIFFERENTIABLE + way.""" kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense) kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1) kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity) @@ -343,7 +349,9 @@ def _pad_1x1_to_3x3_tensor(self, kernel1x1: Tensor | None) -> Tensor: else: return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1]) - def _fuse_bn_tensor(self, branch: nn.Module | None) -> tuple[Tensor, Tensor]: + def _fuse_bn_tensor( + self, branch: nn.Module | None + ) -> tuple[Tensor, Tensor]: if branch is None: return torch.tensor(0), torch.tensor(0) if isinstance(branch, nn.Sequential): @@ -381,11 +389,11 @@ def __init__( block: type[nn.Module], in_channels: int, out_channels: int, - num_blocks: int = 1, + n_blocks: int = 1, ): - """Module which repeats the block n times. First block accepts in_channels and - outputs out_channels while subsequent blocks accept out_channels and output - out_channels. + """Module which repeats the block n times. First block accepts + in_channels and outputs out_channels while subsequent blocks + accept out_channels and output out_channels. @type block: L{nn.Module} @param block: Block to repeat. @@ -393,14 +401,14 @@ def __init__( @param in_channels: Number of input channels. @type out_channels: int @param out_channels: Number of output channels. - @type num_blocks: int - @param num_blocks: Number of blocks to repeat. Defaults to C{1}. + @type n_blocks: int + @param n_blocks: Number of blocks to repeat. Defaults to C{1}. """ super().__init__() in_channels = in_channels self.blocks = nn.ModuleList() - for _ in range(num_blocks): + for _ in range(n_blocks): self.blocks.append( block(in_channels=in_channels, out_channels=out_channels) ) @@ -413,8 +421,11 @@ def forward(self, x: Tensor) -> Tensor: class SpatialPyramidPoolingBlock(nn.Module): - def __init__(self, in_channels: int, out_channels: int, kernel_size: int = 5): - """Spatial Pyramid Pooling block with ReLU activation on three different scales. + def __init__( + self, in_channels: int, out_channels: int, kernel_size: int = 5 + ): + """Spatial Pyramid Pooling block with ReLU activation on three + different scales. @type in_channels: int @param in_channels: Number of input channels. @@ -476,7 +487,9 @@ def forward(self, x: Tensor) -> Tensor: class FeatureFusionBlock(nn.Module): - def __init__(self, in_channels: int, out_channels: int, reduction: int = 1): + def __init__( + self, in_channels: int, out_channels: int, reduction: int = 1 + ): """Feature Fusion block adapted from: U{https://github.com/taveraantonio/BiseNetv1}. @type in_channels: int @@ -600,19 +613,19 @@ def __init__( in_channels: int, in_channels_next: int, out_channels: int, - num_repeats: int, + n_repeats: int, ): """UpBlock used in RepPAN neck. @type in_channels: int @param in_channels: Number of input channels. @type in_channels_next: int - @param in_channels_next: Number of input channels of next input which is used in - concat. + @param in_channels_next: Number of input channels of next input + which is used in concat. @type out_channels: int @param out_channels: Number of output channels. - @type num_repeats: int - @param num_repeats: Number of RepVGGBlock repeats. + @type n_repeats: int + @param n_repeats: Number of RepVGGBlock repeats. """ super().__init__() @@ -634,7 +647,7 @@ def __init__( block=RepVGGBlock, in_channels=in_channels_next + out_channels, out_channels=out_channels, - num_blocks=num_repeats, + n_blocks=n_repeats, ) def forward(self, x0: Tensor, x1: Tensor) -> tuple[Tensor, Tensor]: @@ -652,21 +665,22 @@ def __init__( downsample_out_channels: int, in_channels_next: int, out_channels: int, - num_repeats: int, + n_repeats: int, ): """DownBlock used in RepPAN neck. @type in_channels: int @param in_channels: Number of input channels. @type downsample_out_channels: int - @param downsample_out_channels: Number of output channels after downsample. + @param downsample_out_channels: Number of output channels after + downsample. @type in_channels_next: int - @param in_channels_next: Number of input channels of next input which is used in - concat. + @param in_channels_next: Number of input channels of next input + which is used in concat. @type out_channels: int @param out_channels: Number of output channels. - @type num_repeats: int - @param num_repeats: Number of RepVGGBlock repeats. + @type n_repeats: int + @param n_repeats: Number of RepVGGBlock repeats. """ super().__init__() @@ -681,7 +695,7 @@ def __init__( block=RepVGGBlock, in_channels=downsample_out_channels + in_channels_next, out_channels=out_channels, - num_blocks=num_repeats, + n_blocks=n_repeats, ) def forward(self, x0: Tensor, x1: Tensor) -> Tensor: diff --git a/luxonis_train/nodes/heads/bisenet_head.py b/luxonis_train/nodes/heads/bisenet_head.py index 3fef7584..dd6e6333 100644 --- a/luxonis_train/nodes/heads/bisenet_head.py +++ b/luxonis_train/nodes/heads/bisenet_head.py @@ -1,31 +1,28 @@ -"""BiSeNet segmentation head. - -Adapted from U{https://github.com/taveraantonio/BiseNetv1}. -License: NOT SPECIFIED. -""" - +from typing import Any +from luxonis_ml.data import LabelType from torch import Tensor, nn from luxonis_train.nodes.base_node import BaseNode from luxonis_train.nodes.blocks import ConvModule -from luxonis_train.utils.general import infer_upscale_factor -from luxonis_train.utils.types import LabelType, Packet +from luxonis_train.utils import infer_upscale_factor class BiSeNetHead(BaseNode[Tensor, Tensor]): in_height: int + in_width: int in_channels: int tasks: list[LabelType] = [LabelType.SEGMENTATION] - def __init__( - self, - intermediate_channels: int = 64, - **kwargs, - ): + def __init__(self, intermediate_channels: int = 64, **kwargs: Any): """BiSeNet segmentation head. - TODO: Add more documentation. + + Source: U{BiseNetV1} + @license: NOT SPECIFIED. + @see: U{BiseNetv1: Bilateral Segmentation Network for + Real-time Semantic Segmentation + } @type intermediate_channels: int @param intermediate_channels: How many intermediate channels to use. @@ -33,17 +30,28 @@ def __init__( """ super().__init__(**kwargs) - original_height = self.original_in_shape[1] - upscale_factor = 2 ** infer_upscale_factor(self.in_height, original_height) + h, w = self.original_in_shape[1:] + upscale_factor = 2 ** infer_upscale_factor( + (self.in_height, self.in_width), (h, w) + ) out_channels = self.n_classes * upscale_factor * upscale_factor - self.conv_3x3 = ConvModule(self.in_channels, intermediate_channels, 3, 1, 1) - self.conv_1x1 = nn.Conv2d(intermediate_channels, out_channels, 1, 1, 0) + self.conv_3x3 = ConvModule( + self.in_channels, + intermediate_channels, + kernel_size=3, + stride=1, + padding=1, + ) + self.conv_1x1 = nn.Conv2d( + intermediate_channels, + out_channels, + kernel_size=1, + stride=1, + padding=0, + ) self.upscale = nn.PixelShuffle(upscale_factor) - def wrap(self, output: Tensor) -> Packet[Tensor]: - return {"segmentation": [output]} - def forward(self, inputs: Tensor) -> Tensor: x = self.conv_3x3(inputs) x = self.conv_1x1(x) diff --git a/luxonis_train/nodes/heads/classification_head.py b/luxonis_train/nodes/heads/classification_head.py index 07b3d72b..5961c853 100644 --- a/luxonis_train/nodes/heads/classification_head.py +++ b/luxonis_train/nodes/heads/classification_head.py @@ -1,3 +1,5 @@ +from typing import Any + from torch import Tensor, nn from luxonis_train.nodes.base_node import BaseNode @@ -8,16 +10,15 @@ class ClassificationHead(BaseNode[Tensor, Tensor]): in_channels: int tasks: list[LabelType] = [LabelType.CLASSIFICATION] - def __init__( - self, - dropout_rate: float = 0.2, - **kwargs, - ): + def __init__(self, dropout_rate: float = 0.2, **kwargs: Any): """Simple classification head. + Consists of a global average pooling layer followed by a dropout + layer and a single linear layer. + @type dropout_rate: float - @param dropout_rate: Dropout rate before last layer, range C{[0, 1]}. Defaults - to C{0.2}. + @param dropout_rate: Dropout rate before last layer, range C{[0, + 1]}. Defaults to C{0.2}. """ super().__init__(**kwargs) diff --git a/luxonis_train/nodes/heads/efficient_bbox_head.py b/luxonis_train/nodes/heads/efficient_bbox_head.py index 5607a2a8..6f0e01e7 100644 --- a/luxonis_train/nodes/heads/efficient_bbox_head.py +++ b/luxonis_train/nodes/heads/efficient_bbox_head.py @@ -1,22 +1,20 @@ -"""Head for object detection. - -Adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial -Applications}. -""" - -from typing import Literal +import logging +from typing import Any, Literal import torch +from luxonis_ml.data import LabelType from torch import Tensor, nn from luxonis_train.nodes.base_node import BaseNode from luxonis_train.nodes.blocks import EfficientDecoupledBlock -from luxonis_train.utils.boxutils import ( +from luxonis_train.utils import ( + Packet, anchors_for_fpn_features, dist2bbox, non_max_suppression, ) -from luxonis_train.utils.types import LabelType, Packet + +logger = logging.getLogger(__name__) class EfficientBBoxHead( @@ -31,24 +29,24 @@ def __init__( conf_thres: float = 0.25, iou_thres: float = 0.45, max_det: int = 300, - **kwargs, + **kwargs: Any, ): """Head for object detection. - TODO: add more documentation - + Adapted from U{YOLOv6: A Single-Stage Object Detection Framework + for Industrial Applications + }. @type n_heads: Literal[2,3,4] - @param n_heads: Number of output heads. Defaults to 3. - ***Note:*** Should be same also on neck in most cases. - + @param n_heads: Number of output heads. Defaults to 3. B{Note:} + Should be same also on neck in most cases. @type conf_thres: float - @param conf_thres: Threshold for confidence. Defaults to C{0.25}. - + @param conf_thres: Threshold for confidence. Defaults to + C{0.25}. @type iou_thres: float @param iou_thres: Threshold for IoU. Defaults to C{0.45}. - @type max_det: int - @param max_det: Maximum number of detections retained after NMS. Defaults to C{300}. + @param max_det: Maximum number of detections retained after NMS. + Defaults to C{300}. """ super().__init__(**kwargs) @@ -58,11 +56,18 @@ def __init__( self.iou_thres = iou_thres self.max_det = max_det - self.stride = self._fit_stride_to_num_heads() + self.stride = self._fit_stride_to_n_heads() self.grid_cell_offset = 0.5 self.grid_cell_size = 5.0 self.heads = nn.ModuleList() + if len(self.in_channels) < self.n_heads: + logger.warning( + f"Head '{self.name}' was set to use {self.n_heads} heads, " + f"but received only {len(self.in_channels)} inputs. " + f"Changing number of heads to {len(self.in_channels)}." + ) + self.n_heads = len(self.in_channels) for i in range(self.n_heads): curr_head = EfficientDecoupledBlock( n_classes=self.n_classes, @@ -92,18 +97,25 @@ def wrap( features, cls_score_list, reg_distri_list = output if self.export: - outputs = [] - for out_cls, out_reg in zip(cls_score_list, reg_distri_list, strict=True): + outputs: list[Tensor] = [] + for out_cls, out_reg in zip( + cls_score_list, reg_distri_list, strict=True + ): conf, _ = out_cls.max(1, keepdim=True) out = torch.cat([out_reg, conf, out_cls], dim=1) outputs.append(out) return {self.task: outputs} cls_tensor = torch.cat( - [cls_score_list[i].flatten(2) for i in range(len(cls_score_list))], dim=2 + [cls_score_list[i].flatten(2) for i in range(len(cls_score_list))], + dim=2, ).permute(0, 2, 1) reg_tensor = torch.cat( - [reg_distri_list[i].flatten(2) for i in range(len(reg_distri_list))], dim=2 + [ + reg_distri_list[i].flatten(2) + for i in range(len(reg_distri_list)) + ], + dim=2, ).permute(0, 2, 1) if self.training: @@ -122,8 +134,9 @@ def wrap( "distributions": [reg_tensor], } - def _fit_stride_to_num_heads(self): - """Returns correct stride for number of heads and attach index.""" + def _fit_stride_to_n_heads(self): + """Returns correct stride for number of heads and attach + index.""" stride = torch.tensor( [ self.original_in_shape[1] / x[2] # type: ignore @@ -136,7 +149,8 @@ def _fit_stride_to_num_heads(self): def _process_to_bbox( self, output: tuple[list[Tensor], Tensor, Tensor] ) -> list[Tensor]: - """Performs post-processing of the output and returns bboxs after NMS.""" + """Performs post-processing of the output and returns bboxs + after NMS.""" features, cls_score_list, reg_dist_list = output _, anchor_points, _, stride_tensor = anchors_for_fpn_features( features, @@ -146,7 +160,9 @@ def _process_to_bbox( multiply_with_stride=False, ) - pred_bboxes = dist2bbox(reg_dist_list, anchor_points, out_format="xyxy") + pred_bboxes = dist2bbox( + reg_dist_list, anchor_points, out_format="xyxy" + ) pred_bboxes *= stride_tensor output_merged = torch.cat( diff --git a/luxonis_train/nodes/heads/efficient_keypoint_bbox_head.py b/luxonis_train/nodes/heads/efficient_keypoint_bbox_head.py index 03d29296..51b8b704 100644 --- a/luxonis_train/nodes/heads/efficient_keypoint_bbox_head.py +++ b/luxonis_train/nodes/heads/efficient_keypoint_bbox_head.py @@ -1,15 +1,16 @@ -from typing import Literal +from typing import Any, Literal import torch +from luxonis_ml.data import LabelType from torch import Tensor, nn from luxonis_train.nodes.blocks import ConvModule -from luxonis_train.utils.boxutils import ( +from luxonis_train.utils import ( + Packet, anchors_for_fpn_features, dist2bbox, non_max_suppression, ) -from luxonis_train.utils.types import LabelType, Packet from .efficient_bbox_head import EfficientBBoxHead @@ -23,7 +24,7 @@ def __init__( conf_thres: float = 0.25, iou_thres: float = 0.45, max_det: int = 300, - **kwargs, + **kwargs: Any, ): """Head for object and keypoint detection. @@ -68,7 +69,12 @@ def forward( ) -> tuple[list[Tensor], list[Tensor], list[Tensor], list[Tensor]]: features, cls_score_list, reg_distri_list = super().forward(inputs) - _, self.anchor_points, _, self.stride_tensor = anchors_for_fpn_features( + ( + _, + self.anchor_points, + _, + self.stride_tensor, + ) = anchors_for_fpn_features( features, self.stride, self.grid_cell_size, @@ -84,17 +90,18 @@ def forward( return features, cls_score_list, reg_distri_list, kpt_list def wrap( - self, output: tuple[list[Tensor], list[Tensor], list[Tensor], list[Tensor]] + self, + output: tuple[list[Tensor], list[Tensor], list[Tensor], list[Tensor]], ) -> Packet[Tensor]: features, cls_score_list, reg_distri_list, kpt_list = output bs = features[0].shape[0] if self.export: - outputs = [] + outputs: list[Tensor] = [] for out_cls, out_reg, out_kpts in zip( cls_score_list, reg_distri_list, kpt_list, strict=True ): - chunks = out_kpts.split(3, dim=1) - modified_chunks = [] + chunks = torch.split(out_kpts, 3, dim=1) + modified_chunks: list[Tensor] = [] for chunk in chunks: x = chunk[:, 0:1, :, :] y = chunk[:, 1:2, :, :] @@ -105,11 +112,17 @@ def wrap( out = torch.cat([out_reg, out_cls, out_kpts_modified], dim=1) outputs.append(out) return {"outputs": outputs} + cls_tensor = torch.cat( - [cls_score_list[i].flatten(2) for i in range(len(cls_score_list))], dim=2 + [cls_score_list[i].flatten(2) for i in range(len(cls_score_list))], + dim=2, ).permute(0, 2, 1) reg_tensor = torch.cat( - [reg_distri_list[i].flatten(2) for i in range(len(reg_distri_list))], dim=2 + [ + reg_distri_list[i].flatten(2) + for i in range(len(reg_distri_list)) + ], + dim=2, ).permute(0, 2, 1) kpt_tensor = torch.cat( [ @@ -143,7 +156,7 @@ def wrap( "keypoints_raw": [kpt_tensor], } - def _dist2kpts(self, kpts): + def _dist2kpts(self, kpts: Tensor) -> Tensor: """Decodes keypoints.""" y = kpts.clone() @@ -154,8 +167,12 @@ def _dist2kpts(self, kpts): anchor_points_x = anchor_points_transposed[0].view(1, -1, 1) anchor_points_y = anchor_points_transposed[1].view(1, -1, 1) - y[:, :, 0::3] = (y[:, :, 0::3] * 2.0 + (anchor_points_x - 0.5)) * stride_tensor - y[:, :, 1::3] = (y[:, :, 1::3] * 2.0 + (anchor_points_y - 0.5)) * stride_tensor + y[:, :, 0::3] = ( + y[:, :, 0::3] * 2.0 + (anchor_points_x - 0.5) + ) * stride_tensor + y[:, :, 1::3] = ( + y[:, :, 1::3] * 2.0 + (anchor_points_y - 0.5) + ) * stride_tensor y[:, :, 2::3] = y[:, :, 2::3].sigmoid() return y @@ -163,10 +180,13 @@ def _dist2kpts(self, kpts): def _process_to_bbox_and_kps( self, output: tuple[list[Tensor], Tensor, Tensor, Tensor] ) -> list[Tensor]: - """Performs post-processing of the output and returns bboxs after NMS.""" + """Performs post-processing of the output and returns bboxs + after NMS.""" features, cls_score_list, reg_dist_list, keypoints = output - pred_bboxes = dist2bbox(reg_dist_list, self.anchor_points, out_format="xyxy") + pred_bboxes = dist2bbox( + reg_dist_list, self.anchor_points, out_format="xyxy" + ) pred_bboxes *= self.stride_tensor output_merged = torch.cat( diff --git a/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py b/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py index 0ca995c5..5de88650 100644 --- a/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py +++ b/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py @@ -1,34 +1,38 @@ import logging import math -from typing import cast +from typing import Any, cast import torch +from luxonis_ml.data import LabelType from torch import Tensor, nn from luxonis_train.nodes.base_node import BaseNode from luxonis_train.nodes.blocks import KeypointBlock, LearnableMulAddConv -from luxonis_train.utils.boxutils import ( +from luxonis_train.utils import ( + Packet, non_max_suppression, process_bbox_predictions, process_keypoints_predictions, ) -from luxonis_train.utils.types import LabelType, Packet logger = logging.getLogger(__name__) -class ImplicitKeypointBBoxHead(BaseNode): - tasks: list[LabelType] = [LabelType.KEYPOINTS, LabelType.BOUNDINGBOX] +class ImplicitKeypointBBoxHead( + BaseNode[list[Tensor], tuple[list[Tensor], Tensor]] +): + tasks = [LabelType.KEYPOINTS, LabelType.BOUNDINGBOX] + in_channels: list[int] def __init__( self, - num_heads: int = 3, + n_heads: int = 3, anchors: list[list[float]] | None = None, init_coco_biases: bool = True, conf_thres: float = 0.25, iou_thres: float = 0.45, max_det: int = 300, - **kwargs, + **kwargs: Any, ): """Head for object and keypoint detection. @@ -37,8 +41,8 @@ def __init__( TODO: more technical documentation - @type num_heads: int - @param num_heads: Number of output heads. Defaults to C{3}. + @type n_heads: int + @param n_heads: Number of output heads. Defaults to C{3}. B{Note:} Should be same also on neck in most cases. @type anchors: list[list[float]] | None @param anchors: Anchors used for object detection. @@ -53,16 +57,27 @@ def __init__( """ super().__init__(**kwargs) - if anchors is None: - logger.info("No anchors provided, generating them automatically.") - anchors, recall = self.dataset_metadata.autogenerate_anchors(num_heads) - logger.info(f"Anchors generated. Best possible recall: {recall:.2f}") - self.conf_thres = conf_thres self.iou_thres = iou_thres self.max_det = max_det - self.num_heads = num_heads + self.n_heads = n_heads + if len(self.in_channels) < self.n_heads: + logger.warning( + f"Head '{self.name}' was set to use {self.n_heads} heads, " + f"but received only {len(self.in_channels)} inputs. " + f"Changing number of heads to {len(self.in_channels)}." + ) + self.n_heads = len(self.in_channels) + + if anchors is None: + logger.info("No anchors provided, generating them automatically.") + anchors, recall = self.dataset_metadata.autogenerate_anchors( + self.n_heads + ) + logger.info( + f"Anchors generated. Best possible recall: {recall:.2f}" + ) self.box_offset = 5 self.n_det_out = self.n_classes + self.box_offset @@ -71,13 +86,13 @@ def __init__( self.n_anchors = len(anchors[0]) // 2 self.grid: list[Tensor] = [] - self.anchors = torch.tensor(anchors).float().view(self.num_heads, -1, 2) - self.anchor_grid = self.anchors.clone().view(self.num_heads, 1, -1, 1, 1, 2) - - self.channel_list, self.stride = self._fit_to_num_heads( - cast(list[int], self.in_channels) + self.anchors = torch.tensor(anchors).float().view(self.n_heads, -1, 2) + self.anchor_grid = self.anchors.clone().view( + self.n_heads, 1, -1, 1, 1, 2 ) + self.channel_list, self.stride = self._fit_to_n_heads(self.in_channels) + self.learnable_mul_add_conv = nn.ModuleList( LearnableMulAddConv( add_channel=in_channels, @@ -108,7 +123,7 @@ def forward(self, inputs: list[Tensor]) -> tuple[list[Tensor], Tensor]: self.anchor_grid = self.anchor_grid.to(inputs[0].device) - for i in range(self.num_heads): + for i in range(self.n_heads): feat = cast( Tensor, torch.cat( @@ -123,11 +138,17 @@ def forward(self, inputs: list[Tensor]) -> tuple[list[Tensor], Tensor]: batch_size, _, feature_height, feature_width = feat.shape if i >= len(self.grid): self.grid.append( - self._construct_grid(feature_width, feature_height).to(feat.device) + self._construct_grid(feature_width, feature_height).to( + feat.device + ) ) feat = feat.reshape( - batch_size, self.n_anchors, self.n_out, feature_height, feature_width + batch_size, + self.n_anchors, + self.n_out, + feature_height, + feature_width, ).permute(0, 1, 3, 4, 2) features.append(feat) @@ -139,8 +160,8 @@ def forward(self, inputs: list[Tensor]) -> tuple[list[Tensor], Tensor]: return features, torch.cat(predictions, dim=1) - def wrap(self, outputs: tuple[list[Tensor], Tensor]) -> Packet[Tensor]: - features, predictions = outputs + def wrap(self, output: tuple[list[Tensor], Tensor]) -> Packet[Tensor]: + features, predictions = output if self.export: return {"boxes_and_keypoints": [predictions]} @@ -160,7 +181,8 @@ def wrap(self, outputs: tuple[list[Tensor], Tensor]) -> Packet[Tensor]: return { "boundingbox": [detection[:, :6] for detection in nms], "keypoints": [ - detection[:, 6:].reshape(-1, self.n_keypoints, 3) for detection in nms + detection[:, 6:].reshape(-1, self.n_keypoints, 3) + for detection in nms ], "features": features, } @@ -169,10 +191,12 @@ def _build_predictions( self, feat: Tensor, anchor_grid: Tensor, grid: Tensor, stride: Tensor ) -> Tensor: batch_size = feat.shape[0] - x_bbox = feat[..., : self.box_offset + self.n_classes] - x_keypoints = feat[..., self.box_offset + self.n_classes :] + bbox = feat[..., : self.box_offset + self.n_classes] + keypoints = feat[..., self.box_offset + self.n_classes :] - box_cxcy, box_wh, box_tail = process_bbox_predictions(x_bbox, anchor_grid) + box_cxcy, box_wh, box_tail = process_bbox_predictions( + bbox, anchor_grid + ) grid = grid.to(box_cxcy.device) stride = stride.to(box_cxcy.device) box_cxcy = (box_cxcy + grid) * stride @@ -180,7 +204,7 @@ def _build_predictions( grid_x = grid[..., 0:1] grid_y = grid[..., 1:2] - kpt_x, kpt_y, kpt_vis = process_keypoints_predictions(x_keypoints) + kpt_x, kpt_y, kpt_vis = process_keypoints_predictions(keypoints) kpt_x = (kpt_x + grid_x) * stride kpt_y = (kpt_y + grid_y) * stride kpt_vis_sig = kpt_vis.sigmoid() @@ -200,12 +224,14 @@ def _infer_bbox( ) return torch.cat((out_bbox_xy, out_bbox_wh, out_bbox[..., 4:]), dim=-1) - def _fit_to_num_heads(self, channel_list: list): - out_channel_list = channel_list[: self.num_heads] + def _fit_to_n_heads( + self, channel_list: list[int] + ) -> tuple[list[int], Tensor]: + out_channel_list = channel_list[: self.n_heads] stride = torch.tensor( [ self.original_in_shape[1] / h - for h in cast(list[int], self.in_height)[: self.num_heads] + for h in cast(list[int], self.in_height)[: self.n_heads] ], dtype=torch.int, ) @@ -214,11 +240,15 @@ def _fit_to_num_heads(self, channel_list: list): def _initialize_weights_and_biases(self, class_freq: Tensor | None = None): for m in self.modules(): if isinstance(m, nn.Conv2d): - nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") + nn.init.kaiming_normal_( + m.weight, mode="fan_out", nonlinearity="relu" + ) elif isinstance(m, nn.BatchNorm2d): m.eps = 1e-3 m.momentum = 0.03 - elif isinstance(m, (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6)): + elif isinstance( + m, (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6) + ): m.inplace = True for mi, s in zip(self.learnable_mul_add_conv, self.stride): @@ -233,7 +263,8 @@ def _initialize_weights_and_biases(self, class_freq: Tensor | None = None): def _construct_grid(self, feature_width: int, feature_height: int): grid_y, grid_x = torch.meshgrid( - [torch.arange(feature_height), torch.arange(feature_width)], indexing="ij" + [torch.arange(feature_height), torch.arange(feature_width)], + indexing="ij", ) return ( torch.stack((grid_x, grid_y), 2) diff --git a/luxonis_train/nodes/heads/segmentation_head.py b/luxonis_train/nodes/heads/segmentation_head.py index 1b29df7b..240b956c 100644 --- a/luxonis_train/nodes/heads/segmentation_head.py +++ b/luxonis_train/nodes/heads/segmentation_head.py @@ -1,39 +1,33 @@ -"""Implementation of a basic segmentation head. +from typing import Any -Adapted from: U{https://github.com/pytorch/vision/blob/main/torchvision/models/segmentation/fcn.py} -@license: U{BSD-3 } -""" - -import torch.nn as nn -from torch import Tensor +from luxonis_ml.data import LabelType +from torch import Tensor, nn from luxonis_train.nodes.base_node import BaseNode from luxonis_train.nodes.blocks import UpBlock -from luxonis_train.utils.general import infer_upscale_factor -from luxonis_train.utils.types import LabelType +from luxonis_train.utils import infer_upscale_factor class SegmentationHead(BaseNode[Tensor, Tensor]): in_height: int + in_width: int in_channels: int + tasks: list[LabelType] = [LabelType.SEGMENTATION] - def __init__(self, **kwargs): + def __init__(self, **kwargs: Any): """Basic segmentation FCN head. - Note that it doesn't ensure that ouptut is same size as input. - - @type kwargs: Any - @param kwargs: Additional arguments to pass to L{BaseNode}. + Adapted from: U{https://github.com/pytorch/vision/blob/main/torchvision/models/segmentation/fcn.py} + @license: U{BSD-3 } """ super().__init__(**kwargs) + h, w = self.original_in_shape[1:] + n_up = infer_upscale_factor((self.in_height, self.in_width), (h, w)) - original_height = self.original_in_shape[1] - num_up = infer_upscale_factor(self.in_height, original_height, strict=False) - - modules = [] + modules: list[nn.Module] = [] in_channels = self.in_channels - for _ in range(int(num_up)): + for _ in range(int(n_up)): modules.append( UpBlock(in_channels=in_channels, out_channels=in_channels // 2) ) diff --git a/luxonis_train/nodes/necks/reppan_neck.py b/luxonis_train/nodes/necks/reppan_neck.py index bd05f083..107151a6 100644 --- a/luxonis_train/nodes/necks/reppan_neck.py +++ b/luxonis_train/nodes/necks/reppan_neck.py @@ -1,141 +1,147 @@ -"""Implementation of the RepPANNeck module. - -Adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial -Applications}. -It has the balance of feature fusion ability and hardware efficiency. -""" - - -from typing import Literal, cast +from typing import Any, Literal from torch import Tensor, nn from luxonis_train.nodes.base_node import BaseNode from luxonis_train.nodes.blocks import RepDownBlock, RepUpBlock -from luxonis_train.utils.general import make_divisible +from luxonis_train.utils import make_divisible class RepPANNeck(BaseNode[list[Tensor], list[Tensor]]): + in_channels: list[int] + def __init__( self, - num_heads: Literal[2, 3, 4] = 3, + n_heads: Literal[2, 3, 4] = 3, channels_list: list[int] | None = None, - num_repeats: list[int] | None = None, + n_repeats: list[int] | None = None, depth_mul: float = 0.33, width_mul: float = 0.25, - **kwargs, + **kwargs: Any, ): - """Constructor for the RepPANNeck module. + """Implementation of the RepPANNeck module. + + Adapted from U{YOLOv6: A Single-Stage Object Detection Framework + for Industrial Applications}. + It has the balance of feature fusion ability and hardware efficiency. - @type num_heads: Literal[2,3,4] - @param num_heads: Number of output heads. Defaults to 3. ***Note: Should be same - also on head in most cases.*** + @type n_heads: Literal[2,3,4] + @param n_heads: Number of output heads. Defaults to 3. B{Note: Should be same + also on head in most cases.} @type channels_list: list[int] | None - @param channels_list: List of number of channels for each block. Defaults to - C{[256, 128, 128, 256, 256, 512]}. - @type num_repeats: list[int] | None - @param num_repeats: List of number of repeats of RepVGGBlock. Defaults to C{[12, - 12, 12, 12]}. + @param channels_list: List of number of channels for each block. + Defaults to C{[256, 128, 128, 256, 256, 512]}. + @type n_repeats: list[int] | None + @param n_repeats: List of number of repeats of RepVGGBlock. + Defaults to C{[12, 12, 12, 12]}. @type depth_mul: float - @param depth_mul: Depth multiplier. Defaults to 0.33. + @param depth_mul: Depth multiplier. Defaults to C{0.33}. @type width_mul: float - @param width_mul: Width multiplier. Defaults to 0.25. + @param width_mul: Width multiplier. Defaults to C{0.25}. """ super().__init__(**kwargs) - num_repeats = num_repeats or [12, 12, 12, 12] - channels_list = channels_list or [256, 128, 128, 256, 256, 512] + self.n_heads = n_heads - self.num_heads = num_heads + n_repeats = n_repeats or [12, 12, 12, 12] + channels_list = channels_list or [256, 128, 128, 256, 256, 512] - channels_list = [make_divisible(ch * width_mul, 8) for ch in channels_list] - num_repeats = [ - (max(round(i * depth_mul), 1) if i > 1 else i) for i in num_repeats + channels_list = [ + make_divisible(ch * width_mul, 8) for ch in channels_list ] - channels_list, num_repeats = self._fit_to_num_heads(channels_list, num_repeats) + n_repeats = [ + (max(round(i * depth_mul), 1) if i > 1 else i) for i in n_repeats + ] + channels_list, n_repeats = self._fit_to_n_heads( + channels_list, n_repeats + ) self.up_blocks = nn.ModuleList() - in_channels = cast(list[int], self.in_channels)[-1] + in_channels = self.in_channels[-1] out_channels = channels_list[0] - in_channels_next = cast(list[int], self.in_channels)[-2] - curr_num_repeats = num_repeats[0] + in_channels_next = self.in_channels[-2] + curr_n_repeats = n_repeats[0] up_out_channel_list = [in_channels] # used in DownBlocks - for i in range(1, num_heads): + for i in range(1, n_heads): curr_up_block = RepUpBlock( in_channels=in_channels, in_channels_next=in_channels_next, out_channels=out_channels, - num_repeats=curr_num_repeats, + n_repeats=curr_n_repeats, ) up_out_channel_list.append(out_channels) self.up_blocks.append(curr_up_block) - if len(self.up_blocks) == (num_heads - 1): + if len(self.up_blocks) == (n_heads - 1): up_out_channel_list.reverse() break in_channels = out_channels out_channels = channels_list[i] - in_channels_next = cast(list[int], self.in_channels)[-1 - (i + 1)] - curr_num_repeats = num_repeats[i] + in_channels_next = self.in_channels[-1 - (i + 1)] + curr_n_repeats = n_repeats[i] self.down_blocks = nn.ModuleList() - channels_list_down_blocks = channels_list[(num_heads - 1) :] - num_repeats_down_blocks = num_repeats[(num_heads - 1) :] + channels_list_down_blocks = channels_list[(n_heads - 1) :] + n_repeats_down_blocks = n_repeats[(n_heads - 1) :] in_channels = out_channels downsample_out_channels = channels_list_down_blocks[0] in_channels_next = up_out_channel_list[0] out_channels = channels_list_down_blocks[1] - curr_num_repeats = num_repeats_down_blocks[0] + curr_n_repeats = n_repeats_down_blocks[0] - for i in range(1, num_heads): + for i in range(1, n_heads): curr_down_block = RepDownBlock( in_channels=in_channels, downsample_out_channels=downsample_out_channels, in_channels_next=in_channels_next, out_channels=out_channels, - num_repeats=curr_num_repeats, + n_repeats=curr_n_repeats, ) self.down_blocks.append(curr_down_block) - if len(self.down_blocks) == (num_heads - 1): + if len(self.down_blocks) == (n_heads - 1): break in_channels = out_channels downsample_out_channels = channels_list_down_blocks[2 * i] in_channels_next = up_out_channel_list[i] out_channels = channels_list_down_blocks[2 * i + 1] - curr_num_repeats = num_repeats_down_blocks[i] + curr_n_repeats = n_repeats_down_blocks[i] def forward(self, inputs: list[Tensor]) -> list[Tensor]: - x0 = inputs[-1] - up_block_outs = [] - for i, up_block in enumerate(self.up_blocks): - conv_out, x0 = up_block(x0, inputs[-1 - (i + 1)]) + x = inputs[-1] + up_block_outs: list[Tensor] = [] + for up_block, input_ in zip( + self.up_blocks, inputs[-2::-1], strict=False + ): + conv_out, x = up_block(x, input_) up_block_outs.append(conv_out) - up_block_outs.reverse() - outs = [x0] - for i, down_block in enumerate(self.down_blocks): - x0 = down_block(x0, up_block_outs[i]) - outs.append(x0) + outs = [x] + for down_block, up_out in zip( + self.down_blocks, reversed(up_block_outs) + ): + x = down_block(x, up_out) + outs.append(x) return outs - def _fit_to_num_heads( - self, channels_list: list[int], num_repeats: list[int] + def _fit_to_n_heads( + self, channels_list: list[int], n_repeats: list[int] ) -> tuple[list[int], list[int]]: - """Fits channels_list and num_repeats to num_heads by removing or adding items. + """Fits channels_list and n_repeats to n_heads by removing or + adding items. Also scales the numbers based on offset """ - if self.num_heads == 3: - ... - elif self.num_heads == 2: - channels_list = [channels_list[0], channels_list[4], channels_list[5]] - num_repeats = [num_repeats[0], num_repeats[3]] - elif self.num_heads == 4: + if self.n_heads == 2: + channels_list = [channels_list[i] for i in [0, 4, 5]] + n_repeats = [n_repeats[0], n_repeats[3]] + elif self.n_heads == 3: + return channels_list, n_repeats + elif self.n_heads == 4: channels_list = [ channels_list[0], channels_list[1], @@ -147,17 +153,11 @@ def _fit_to_num_heads( channels_list[4], channels_list[5], ] - num_repeats = [ - num_repeats[0], - num_repeats[1], - num_repeats[1], - num_repeats[2], - num_repeats[2], - num_repeats[3], - ] + n_repeats = [n_repeats[i] for i in [0, 1, 1, 2, 2, 3]] else: raise ValueError( - f"Specified number of heads ({self.num_heads}) not supported." + f"Specified number of heads ({self.n_heads}) not supported." + "The number of heads should be 2, 3 or 4." ) - return channels_list, num_repeats + return channels_list, n_repeats diff --git a/luxonis_train/optimizers/__init__.py b/luxonis_train/optimizers/__init__.py new file mode 100644 index 00000000..acd73792 --- /dev/null +++ b/luxonis_train/optimizers/__init__.py @@ -0,0 +1 @@ +from .optimizers import * diff --git a/luxonis_train/utils/optimizers.py b/luxonis_train/optimizers/optimizers.py similarity index 92% rename from luxonis_train/utils/optimizers.py rename to luxonis_train/optimizers/optimizers.py index 7583cef9..c2a4bf12 100644 --- a/luxonis_train/utils/optimizers.py +++ b/luxonis_train/optimizers/optimizers.py @@ -1,4 +1,4 @@ -from torch import optim +import torch.optim as optim from luxonis_train.utils.registry import OPTIMIZERS diff --git a/luxonis_train/schedulers/__init__.py b/luxonis_train/schedulers/__init__.py new file mode 100644 index 00000000..99bcd9d9 --- /dev/null +++ b/luxonis_train/schedulers/__init__.py @@ -0,0 +1 @@ +from .schedulers import * diff --git a/luxonis_train/utils/schedulers.py b/luxonis_train/schedulers/schedulers.py similarity index 100% rename from luxonis_train/utils/schedulers.py rename to luxonis_train/schedulers/schedulers.py diff --git a/luxonis_train/utils/__init__.py b/luxonis_train/utils/__init__.py index 609304c3..c47d3d33 100644 --- a/luxonis_train/utils/__init__.py +++ b/luxonis_train/utils/__init__.py @@ -1,5 +1,52 @@ -from .assigners import * -from .config import * -from .loaders import * -from .optimizers import * -from .schedulers import * +from .boundingbox import ( + anchors_for_fpn_features, + anchors_from_dataset, + bbox2dist, + bbox_iou, + compute_iou_loss, + dist2bbox, + match_to_anchor, + non_max_suppression, + process_bbox_predictions, +) +from .config import Config +from .dataset_metadata import DatasetMetadata +from .exceptions import IncompatibleException +from .general import ( + get_with_default, + infer_upscale_factor, + make_divisible, + to_shape_packet, +) +from .graph import is_acyclic, traverse_graph +from .keypoints import get_sigmas, process_keypoints_predictions +from .tracker import LuxonisTrackerPL +from .types import AttachIndexType, Kwargs, Labels, Packet + +__all__ = [ + "Config", + "AttachIndexType", + "Kwargs", + "Labels", + "Packet", + "IncompatibleException", + "DatasetMetadata", + "make_divisible", + "infer_upscale_factor", + "to_shape_packet", + "get_with_default", + "LuxonisTrackerPL", + "match_to_anchor", + "dist2bbox", + "bbox2dist", + "bbox_iou", + "non_max_suppression", + "anchors_from_dataset", + "anchors_for_fpn_features", + "process_bbox_predictions", + "compute_iou_loss", + "process_keypoints_predictions", + "get_sigmas", + "is_acyclic", + "traverse_graph", +] diff --git a/luxonis_train/utils/boxutils.py b/luxonis_train/utils/boundingbox.py similarity index 87% rename from luxonis_train/utils/boxutils.py rename to luxonis_train/utils/boundingbox.py index 3a206c75..9b97bfe6 100644 --- a/luxonis_train/utils/boxutils.py +++ b/luxonis_train/utils/boundingbox.py @@ -1,12 +1,10 @@ -"""This module contains various utility functions for working with bounding boxes.""" - import math from typing import Literal, TypeAlias import torch +from luxonis_ml.data import LabelType from scipy.cluster.vq import kmeans from torch import Tensor -from torch.utils.data import DataLoader from torchvision.ops import ( batched_nms, box_convert, @@ -15,24 +13,11 @@ generalized_box_iou, ) -from luxonis_train.utils.types import LabelType +from luxonis_train.loaders import BaseLoaderTorch IoUType: TypeAlias = Literal["none", "giou", "diou", "ciou", "siou"] BBoxFormatType: TypeAlias = Literal["xyxy", "xywh", "cxcywh"] -__all__ = [ - "anchors_for_fpn_features", - "anchors_from_dataset", - "bbox2dist", - "bbox_iou", - "compute_iou_loss", - "dist2bbox", - "match_to_anchor", - "non_max_suppression", - "process_bbox_predictions", - "process_keypoints_predictions", -] - def match_to_anchor( targets: Tensor, @@ -178,8 +163,21 @@ def bbox_iou( @param bbox2: Second set of bboxes [M, 4]. @type bbox_format: BBoxFormatType @param bbox_format: Input bbox format. Defaults to "xyxy". - @type iou_type: IoUType + @type iou_type: Literal["none", "giou", "diou", "ciou", "siou"] @param iou_type: IoU type. Defaults to "none". + Possible values are: + - "none": standard IoU + - "giou": Generalized IoU + - "diou": Distance IoU + - "ciou": Complete IoU. Introduced in U{ + Enhancing Geometric Factors in Model Learning and + Inference for Object Detection and Instance + Segmentation}. + Implementation adapted from torchvision C{complete_box_iou} + with improved stability. + - "siou": Soft IoU. Introduced in U{ + SIoU Loss: More Powerful Learning for Bounding Box + Regression}. @type element_wise: bool @param element_wise: If True returns element wise IoUs. Defaults to False. @rtype: Tensor @@ -197,9 +195,6 @@ def bbox_iou( elif iou_type == "diou": iou = distance_box_iou(bbox1, bbox2) elif iou_type == "ciou": - # CIoU from `Enhancing Geometric Factors in Model Learning and Inference for - # Object Detection and Instance Segmentation`, https://arxiv.org/pdf/2005.03572.pdf. - # Implementation adapted from torchvision complete_box_iou with added eps for stability eps = 1e-7 iou = bbox_iou(bbox1, bbox2, iou_type="none") @@ -218,9 +213,6 @@ def bbox_iou( iou = diou - alpha * v elif iou_type == "siou": - # SIoU from `SIoU Loss: More Powerful Learning for Bounding Box Regression`, - # https://arxiv.org/pdf/2205.12740.pdf - eps = 1e-7 bbox1_xywh = box_convert(bbox1, in_fmt="xyxy", out_fmt="xywh") w1, h1 = bbox1_xywh[:, 2], bbox1_xywh[:, 3] @@ -247,7 +239,9 @@ def bbox_iou( sin_alpha_1 = torch.abs(s_cw) / sigma sin_alpha_2 = torch.abs(s_ch) / sigma threshold = pow(2, 0.5) / 2 - sin_alpha = torch.where(sin_alpha_1 > threshold, sin_alpha_2, sin_alpha_1) + sin_alpha = torch.where( + sin_alpha_1 > threshold, sin_alpha_2, sin_alpha_1 + ) angle_cost = torch.cos(torch.arcsin(sin_alpha) * 2 - math.pi / 2) # distance cost @@ -287,7 +281,8 @@ def non_max_suppression( max_det: int = 300, predicts_objectness: bool = True, ) -> list[Tensor]: - """Non-maximum suppression on model's predictions to keep only best instances. + """Non-maximum suppression on model's predictions to keep only best + instances. @type preds: Tensor @param preds: Model's prediction tensor of shape [bs, N, M]. @@ -340,7 +335,9 @@ def non_max_suppression( torch.max(preds[..., 5 : 5 + n_classes], dim=-1)[0] > conf_thres, ) - output = [torch.zeros((0, preds.size(-1)), device=preds.device)] * preds.size(0) + output = [ + torch.zeros((0, preds.size(-1)), device=preds.device) + ] * preds.size(0) for i, x in enumerate(preds): curr_out = x[candidate_mask[i]] @@ -363,7 +360,9 @@ def non_max_suppression( if multi_label: box_idx, class_idx = ( - (curr_out[:, 5 : 5 + n_classes] > conf_thres).nonzero(as_tuple=False).T + (curr_out[:, 5 : 5 + n_classes] > conf_thres) + .nonzero(as_tuple=False) + .T ) keep_mask[box_idx] = True curr_out = torch.cat( @@ -375,9 +374,13 @@ def non_max_suppression( 1, ) else: - conf, class_idx = curr_out[:, 5 : 5 + n_classes].max(1, keepdim=True) + conf, class_idx = curr_out[:, 5 : 5 + n_classes].max( + 1, keepdim=True + ) keep_mask[conf.view(-1) > conf_thres] = True - curr_out = torch.cat((bboxes, conf, class_idx.float()), 1)[keep_mask] + curr_out = torch.cat((bboxes, conf, class_idx.float()), 1)[ + keep_mask + ] if has_additional: curr_out = torch.hstack( @@ -409,41 +412,37 @@ def non_max_suppression( def anchors_from_dataset( - loader: DataLoader, + loader: BaseLoaderTorch, n_anchors: int = 9, n_generations: int = 1000, ratio_threshold: float = 4.0, ) -> tuple[Tensor, float]: - """Generates anchors based on bounding box annotations present in provided data - loader. It uses K-Means for initial proposals which are then refined with genetic - algorithm. + """Generates anchors based on bounding box annotations present in + provided data loader. It uses K-Means for initial proposals which + are then refined with genetic algorithm. @type loader: L{torch.utils.data.DataLoader} @param loader: Data loader. @type n_anchors: int - @param n_anchors: Number of anchors, this is normally num_heads * 3 which generates - 3 anchors per layer. Defaults to 9. + @param n_anchors: Number of anchors, this is normally n_heads * 3 + which generates 3 anchors per layer. Defaults to 9. @type n_generations: int - @param n_generations: Number of iterations for anchor improvement with genetic - algorithm. Defaults to 1000. + @param n_generations: Number of iterations for anchor improvement + with genetic algorithm. Defaults to 1000. @type ratio_threshold: float - @param ratio_threshold: Minimum threshold for ratio. Defaults to 4.0. + @param ratio_threshold: Minimum threshold for ratio. Defaults to + 4.0. @rtype: tuple[Tensor, float] @return: Proposed anchors and the best possible recall. """ - widths = [] - inputs = None - for inp, labels in loader: + widths: list[Tensor] = [] + for _, labels in loader: for tensor, label_type in labels.values(): if label_type == LabelType.BOUNDINGBOX: curr_wh = tensor[:, 4:] widths.append(curr_wh) - inputs = inp - assert inputs is not None, "No inputs found in data loader" - _, _, h, w = inputs[ - loader.dataset.image_source # type: ignore - ].shape # assuming all images are same size + _, h, w = loader.input_shape img_size = torch.tensor([w, h]) wh = torch.vstack(widths) * img_size @@ -463,7 +462,8 @@ def anchors_from_dataset( except Exception: print("Fallback to random anchor init") proposed_anchors = ( - torch.sort(torch.rand(n_anchors * 2))[0].reshape(n_anchors, 2) * img_size + torch.sort(torch.rand(n_anchors * 2))[0].reshape(n_anchors, 2) + * img_size ) proposed_anchors = proposed_anchors[ @@ -471,7 +471,8 @@ def anchors_from_dataset( ] # sort small to large def calc_best_anchor_ratio(anchors: Tensor, wh: Tensor) -> Tensor: - """Calculate how well most suitable anchor box matches each target bbox.""" + """Calculate how well most suitable anchor box matches each + target bbox.""" symmetric_size_ratios = torch.min( wh[:, None] / anchors[None], anchors[None] / wh[:, None] ) @@ -480,17 +481,20 @@ def calc_best_anchor_ratio(anchors: Tensor, wh: Tensor) -> Tensor: return best_anchor_ratio def calc_best_possible_recall(anchors: Tensor, wh: Tensor) -> Tensor: - """Calculate best possible recall if every bbox is matched to an appropriate - anchor.""" + """Calculate best possible recall if every bbox is matched to an + appropriate anchor.""" best_anchor_ratio = calc_best_anchor_ratio(anchors, wh) - best_possible_recall = (best_anchor_ratio > 1 / ratio_threshold).float().mean() + best_possible_recall = ( + (best_anchor_ratio > 1 / ratio_threshold).float().mean() + ) return best_possible_recall def anchor_fitness(anchors: Tensor, wh: Tensor) -> Tensor: """Fitness function used for anchor evolve.""" best_anchor_ratio = calc_best_anchor_ratio(anchors, wh) return ( - best_anchor_ratio * (best_anchor_ratio > 1 / ratio_threshold).float() + best_anchor_ratio + * (best_anchor_ratio > 1 / ratio_threshold).float() ).mean() # Genetic algorithm @@ -508,7 +512,9 @@ def anchor_fitness(anchors: Tensor, wh: Tensor) -> Tensor: + mutation_noise_mean ).clip(0.3, 3.0) - mutated_anchors = (proposed_anchors.clone() * anchor_mutation).clip(min=2.0) + mutated_anchors = (proposed_anchors.clone() * anchor_mutation).clip( + min=2.0 + ) mutated_fitness = anchor_fitness(mutated_anchors, wh) if mutated_fitness > best_fitness: best_fitness = mutated_fitness @@ -529,20 +535,22 @@ def anchors_for_fpn_features( grid_cell_offset: float = 0.5, multiply_with_stride: bool = False, ) -> tuple[Tensor, Tensor, list[int], Tensor]: - """Generates anchor boxes, points and strides based on FPN feature shapes and - strides. + """Generates anchor boxes, points and strides based on FPN feature + shapes and strides. @type features: list[Tensor] @param features: List of FPN features. @type strides: Tensor @param strides: Strides of FPN features. @type grid_cell_size: float - @param grid_cell_size: Cell size in respect to input image size. Defaults to 5.0. + @param grid_cell_size: Cell size in respect to input image size. + Defaults to 5.0. @type grid_cell_offset: float - @param grid_cell_offset: Percent grid cell center's offset. Defaults to 0.5. + @param grid_cell_offset: Percent grid cell center's offset. Defaults + to 0.5. @type multiply_with_stride: bool - @param multiply_with_stride: Whether to multiply per FPN values with its stride. - Defaults to False. + @param multiply_with_stride: Whether to multiply per FPN values with + its stride. Defaults to False. @rtype: tuple[Tensor, Tensor, list[int], Tensor] @return: BBox anchors, center anchors, number of anchors, strides """ @@ -576,7 +584,9 @@ def anchors_for_fpn_features( anchors.append(anchor) anchor_point = ( - torch.stack([shift_x, shift_y], dim=-1).reshape(-1, 2).to(feature.dtype) + torch.stack([shift_x, shift_y], dim=-1) + .reshape(-1, 2) + .to(feature.dtype) ) anchor_points.append(anchor_point) @@ -595,26 +605,6 @@ def anchors_for_fpn_features( ) -def process_keypoints_predictions(keypoints: Tensor) -> tuple[Tensor, Tensor, Tensor]: - """Extracts x, y and visibility from keypoints predictions. - - @type keypoints: Tensor - @param keypoints: Keypoints predictions. The last dimension must be divisible by 3 - and is expected to be in format [x1, y1, v1, x2, y2, v2, ...]. - - @rtype: tuple[Tensor, Tensor, Tensor] - @return: x, y and visibility tensors. - """ - x = keypoints[..., ::3] * 2.0 - 0.5 - y = keypoints[..., 1::3] * 2.0 - 0.5 - visibility = keypoints[..., 2::3] - return ( - x, - y, - visibility, - ) - - def process_bbox_predictions( bbox: Tensor, anchor: Tensor ) -> tuple[Tensor, Tensor, Tensor]: @@ -625,7 +615,8 @@ def process_bbox_predictions( @type anchor: Tensor @param anchor: Anchor boxes @rtype: tuple[Tensor, Tensor, Tensor] - @return: xy and wh predictions and tail. The tail is anything after xywh. + @return: xy and wh predictions and tail. The tail is anything after + xywh. """ out_bbox = bbox.sigmoid() out_bbox_xy = out_bbox[..., 0:2] * 2.0 - 0.5 @@ -681,10 +672,12 @@ def compute_iou_loss( else: bbox_mask = torch.ones_like(pred_bboxes, dtype=torch.bool) - pred_bboxes_pos = torch.masked_select(pred_bboxes, bbox_mask).reshape([-1, 4]) - target_bboxes_pos = torch.masked_select(target_bboxes, bbox_mask).reshape( + pred_bboxes_pos = torch.masked_select(pred_bboxes, bbox_mask).reshape( [-1, 4] ) + target_bboxes_pos = torch.masked_select( + target_bboxes, bbox_mask + ).reshape([-1, 4]) iou = bbox_iou( pred_bboxes_pos, diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py index 31e4fe5b..b94f08a5 100644 --- a/luxonis_train/utils/config.py +++ b/luxonis_train/utils/config.py @@ -10,8 +10,13 @@ LuxonisConfig, LuxonisFileSystem, ) -from pydantic import Field, field_validator, model_validator -from pydantic.types import FilePath, NonNegativeFloat, NonNegativeInt, PositiveInt +from pydantic import AliasChoices, Field, field_validator, model_validator +from pydantic.types import ( + FilePath, + NonNegativeFloat, + NonNegativeInt, + PositiveInt, +) from typing_extensions import Self logger = logging.getLogger(__name__) @@ -82,7 +87,9 @@ def check_predefined_model(self) -> Self: from luxonis_train.utils.registry import MODELS if self.predefined_model: - logger.info(f"Using predefined model: `{self.predefined_model.name}`") + logger.info( + f"Using predefined model: `{self.predefined_model.name}`" + ) model = MODELS.get(self.predefined_model.name)( **self.predefined_model.params ) @@ -122,14 +129,16 @@ def check_main_metric(self) -> Self: @model_validator(mode="after") def check_graph(self) -> Self: - from luxonis_train.utils.general import is_acyclic + from luxonis_train.utils import is_acyclic graph = {node.alias or node.name: node.inputs for node in self.nodes} if not is_acyclic(graph): raise ValueError("Model graph is not acyclic.") if not self.outputs: outputs: list[str] = [] # nodes which are not inputs to any nodes - inputs = set(node_name for node in self.nodes for node_name in node.inputs) + inputs = set( + node_name for node in self.nodes for node_name in node.inputs + ) for node in self.nodes: name = node.alias or node.name if name not in inputs: @@ -147,7 +156,7 @@ def check_unique_names(self) -> Self: ("metrics", self.metrics), ("visualizers", self.visualizers), ]: - names = set() + names: set[str] = set() for obj in objects: obj: AttachedModuleConfig name = obj.alias or obj.name @@ -232,7 +241,9 @@ class PreprocessingConfig(BaseModelExtraForbid): def check_normalize(self) -> Self: if self.normalize.active: self.augmentations.append( - AugmentationConfig(name="Normalize", params=self.normalize.params) + AugmentationConfig( + name="Normalize", params=self.normalize.params + ) ) return self @@ -268,20 +279,34 @@ class TrainerConfig(BaseModelExtraForbid): accelerator: Literal["auto", "cpu", "gpu", "tpu"] = "auto" devices: int | list[int] | str = "auto" strategy: Literal["auto", "ddp"] = "auto" - num_sanity_val_steps: int = 2 + n_sanity_val_steps: Annotated[ + int, + Field( + validation_alias=AliasChoices( + "n_sanity_val_steps", "num_sanity_val_steps" + ) + ), + ] = 2 profiler: Literal["simple", "advanced"] | None = None matmul_precision: Literal["medium", "high", "highest"] | None = None verbose: bool = True seed: int | None = None + deterministic: bool | Literal["warn"] | None = None batch_size: PositiveInt = 32 accumulate_grad_batches: PositiveInt = 1 use_weighted_sampler: bool = False epochs: PositiveInt = 100 - num_workers: NonNegativeInt = 4 + n_workers: Annotated[ + NonNegativeInt, + Field(validation_alias=AliasChoices("n_workers", "num_workers")), + ] = 4 train_metrics_interval: Literal[-1] | PositiveInt = -1 validation_interval: Literal[-1] | PositiveInt = 5 - num_log_images: NonNegativeInt = 4 + n_log_images: Annotated[ + NonNegativeInt, + Field(validation_alias=AliasChoices("n_log_images", "num_log_images")), + ] = 4 skip_last_batch: bool = True pin_memory: bool = True log_sub_losses: bool = True @@ -293,13 +318,24 @@ class TrainerConfig(BaseModelExtraForbid): scheduler: SchedulerConfig = SchedulerConfig() @model_validator(mode="after") - def check_num_workes_platform(self) -> Self: + def validate_deterministic(self) -> Self: + if self.seed is not None and self.deterministic is None: + logger.warning( + "Setting `trainer.deterministic` to True because `trainer.seed` is set." + "This can cause certain layers to fail. " + "In such cases, set `trainer.deterministic` to `'warn'`." + ) + self.deterministic = True + return self + + @model_validator(mode="after") + def check_n_workes_platform(self) -> Self: if ( sys.platform == "win32" or sys.platform == "darwin" - ) and self.num_workers != 0: - self.num_workers = 0 + ) and self.n_workers != 0: + self.n_workers = 0 logger.warning( - "Setting `num_workers` to 0 because of platform compatibility." + "Setting `n_workers` to 0 because of platform compatibility." ) return self @@ -321,7 +357,9 @@ class OnnxExportConfig(BaseModelExtraForbid): class BlobconverterExportConfig(BaseModelExtraForbid): active: bool = False shaves: int = 6 - version: Literal["2021.2", "2021.3", "2021.4", "2022.1", "2022.3_RVC3"] = "2022.1" + version: Literal["2021.2", "2021.3", "2021.4", "2022.1", "2022.3_RVC3"] = ( + "2022.1" + ) class ArchiveConfig(BaseModelExtraForbid): @@ -403,7 +441,9 @@ def get_config( return instance fs = LuxonisFileSystem(cfg) if fs.is_mlflow: - logger.info("Setting `project_id` and `run_id` to config's MLFlow run") + logger.info( + "Setting `project_id` and `run_id` to config's MLFlow run" + ) instance.tracker.project_id = fs.experiment_id instance.tracker.run_id = fs.run_id return instance diff --git a/luxonis_train/utils/dataset_metadata.py b/luxonis_train/utils/dataset_metadata.py new file mode 100644 index 00000000..35ebbef8 --- /dev/null +++ b/luxonis_train/utils/dataset_metadata.py @@ -0,0 +1,154 @@ +from luxonis_train.loaders import BaseLoaderTorch +from luxonis_train.utils import anchors_from_dataset + + +class DatasetMetadata: + """Metadata about the dataset.""" + + def __init__( + self, + *, + classes: dict[str, list[str]] | None = None, + n_keypoints: dict[str, int] | None = None, + loader: BaseLoaderTorch | None = None, + ): + """An object containing metadata about the dataset. Used to + infer the number of classes, number of keypoints, I{etc.} + instead of passing them as arguments to the model. + + @type classes: dict[str, list[str]] | None + @param classes: Dictionary mapping tasks to lists of class + names. + @type n_keypoints: dict[str, int] | None + @param n_keypoints: Dictionary mapping tasks to the number of + keypoints. + @type loader: DataLoader | None + @param loader: Dataset loader. + """ + self._classes = classes or {} + self._n_keypoints = n_keypoints or {} + self._loader = loader + + def n_classes(self, task: str | None = None) -> int: + """Gets the number of classes for the specified task. + + @type task: str | None + @param task: Task to get the number of classes for. + @rtype: int + @return: Number of classes for the specified label type. + @raises ValueError: If the C{task} is not present in the + dataset. + @raises RuntimeError: If the C{task} was not provided and the + dataset contains different number of classes for different + label types. + """ + if task is not None: + if task not in self._classes: + raise ValueError( + f"Task '{task}' is not present in the dataset." + ) + return len(self._classes[task]) + n_classes = len(list(self._classes.values())[0]) + for classes in self._classes.values(): + if len(classes) != n_classes: + raise RuntimeError( + "The dataset contains different number of classes for different tasks." + "Please specify the 'task' argument to get the number of classes." + ) + return n_classes + + def n_keypoints(self, task: str | None = None) -> int: + """Gets the number of keypoints for the specified task. + + @type task: str | None + @param task: Task to get the number of keypoints for. + @rtype: int + @return: Number of keypoints for the specified label type. + @raises ValueError: If the C{task} is not present in the + dataset. + @raises RuntimeError: If the C{task} was not provided and the + dataset contains different number of keypoints for different + label types. + """ + if task is not None: + if task not in self._n_keypoints: + raise ValueError( + f"Task '{task}' is not present in the dataset." + ) + return self._n_keypoints[task] + n_keypoints = next(iter(self._n_keypoints.values())) + for n in self._n_keypoints.values(): + if n != n_keypoints: + raise RuntimeError( + "The dataset contains different number of keypoints for different tasks." + "Please specify the 'task' argument to get the number of keypoints." + ) + return n_keypoints + + def classes(self, task: str | None = None) -> list[str]: + """Gets the class names for the specified task. + + @type task: str | None + @param task: Task to get the class names for. + @rtype: list[str] + @return: List of class names for the specified label type. + @raises ValueError: If the C{task} is not present in the + dataset. + @raises RuntimeError: If the C{task} was not provided and the + dataset contains different class names for different label + types. + """ + if task is not None: + if task not in self._classes: + raise ValueError( + f"Task type {task} is not present in the dataset." + ) + return self._classes[task] + class_names = list(self._classes.values())[0] + for classes in self._classes.values(): + if classes != class_names: + raise RuntimeError( + "The dataset contains different class names for different tasks." + ) + return class_names + + def autogenerate_anchors( + self, n_heads: int + ) -> tuple[list[list[float]], float]: + """Automatically generates anchors for the provided dataset. + + @type n_heads: int + @param n_heads: Number of heads to generate anchors for. + @rtype: tuple[list[list[float]], float] + @return: List of anchors in [-1,6] format and recall of the + anchors. + @raises RuntimeError: If the dataset loader was not provided + during initialization. + """ + if self._loader is None: + raise RuntimeError( + "Cannot generate anchors without a dataset loader. " + "Please provide a dataset loader to the constructor " + "or call `set_loader` method." + ) + + proposed_anchors, recall = anchors_from_dataset( + self._loader, n_anchors=n_heads * 3 + ) + return proposed_anchors.reshape(-1, 6).tolist(), recall + + @classmethod + def from_loader(cls, loader: BaseLoaderTorch) -> "DatasetMetadata": + """Creates a L{DatasetMetadata} object from a L{LuxonisDataset}. + + @type dataset: LuxonisDataset + @param dataset: Dataset to create the metadata from. + @rtype: DatasetMetadata + @return: Instance of L{DatasetMetadata} created from the + provided dataset. + """ + classes = loader.get_classes() + n_keypoints = loader.get_n_keypoints() + + instance = cls(classes=classes, n_keypoints=n_keypoints, loader=loader) + return instance diff --git a/luxonis_train/utils/exceptions.py b/luxonis_train/utils/exceptions.py new file mode 100644 index 00000000..bab8c1aa --- /dev/null +++ b/luxonis_train/utils/exceptions.py @@ -0,0 +1,12 @@ +class IncompatibleException(Exception): + """Raised when two parts of the model are incompatible with each + other.""" + + @classmethod + def from_missing_task( + cls, task: str, present_tasks: list[str], class_name: str + ): + return cls( + f"{class_name} requires '{task}' label, but it was not found in " + f"the label dictionary. Available labels: {present_tasks}." + ) diff --git a/luxonis_train/utils/general.py b/luxonis_train/utils/general.py index 5ae3b43f..45013807 100644 --- a/luxonis_train/utils/general.py +++ b/luxonis_train/utils/general.py @@ -1,272 +1,141 @@ import logging import math -from copy import deepcopy -from typing import Generator, TypeVar +from typing import TypeVar -from pydantic import BaseModel from torch import Size, Tensor -from torch.utils.data import DataLoader -from luxonis_train.utils.boxutils import anchors_from_dataset -from luxonis_train.utils.loaders import BaseLoaderTorch from luxonis_train.utils.types import Packet +logger = logging.getLogger(__name__) -class DatasetMetadata: - """Metadata about the dataset.""" - def __init__( - self, - *, - classes: dict[str, list[str]] | None = None, - n_keypoints: dict[str, int] | None = None, - loader: DataLoader | None = None, - ): - """An object containing metadata about the dataset. Used to infer the number of - classes, number of keypoints, I{etc.} instead of passing them as arguments to - the model. - - @type classes: dict[str, list[str]] | None - @param classes: Dictionary mapping tasks to lists of class names. - @type n_keypoints: dict[str, int] | None - @param n_keypoints: Dictionary mapping tasks to the number of keypoints. - @type loader: DataLoader | None - @param loader: Dataset loader. - """ - self._classes = classes or {} - self._n_keypoints = n_keypoints or {} - self._loader = loader +def make_divisible(x: int | float, divisor: int) -> int: + """Upward revision the value x to make it evenly divisible by the + divisor. - @property - def classes(self) -> dict[str, list[str]]: - """Dictionary mapping label types to lists of class names. + Equivalent to M{ceil(x / divisor) * divisor}. - @type: dict[str, list[str]] - @raises ValueError: If classes were not provided during initialization. - """ - if self._classes is None: - raise ValueError( - "Trying to access `classes`, byt they were not" - "provided during initialization." - ) - return self._classes + @type x: int | float + @param x: Value to be revised. + @type divisor: int + @param divisor: Divisor. + @rtype: int + @return: Revised value. + """ + return math.ceil(x / divisor) * divisor - def n_classes(self, task: str | None) -> int: - """Gets the number of classes for the specified task. - @type task: str | None - @param task: Task to get the number of classes for. - @rtype: int - @return: Number of classes for the specified label type. - @raises ValueError: If the dataset loader was not provided during - initialization. - @raises ValueError: If the dataset contains different number of classes for - different label types. - """ - if task is not None: - if task not in self.classes: - raise ValueError(f"Task '{task}' is not present in the dataset.") - return len(self.classes[task]) - n_classes = len(list(self.classes.values())[0]) - for classes in self.classes.values(): - if len(classes) != n_classes: - raise ValueError( - "The dataset contains different number of classes for different tasks." - ) - return n_classes +def infer_upscale_factor( + in_size: tuple[int, int] | int, orig_size: tuple[int, int] | int +) -> int: + """Infer the upscale factor from the input shape and the original + shape. + + @type in_size: tuple[int, int] | int + @param in_size: Input shape as a tuple of (height, width) or just + one of them. + @type orig_size: tuple[int, int] | int + @param orig_size: Original shape as a tuple of (height, width) or + just one of them. + @rtype: int + @return: Upscale factor. + @raise ValueError: If the C{in_size} cannot be upscaled to the + C{orig_size}. This can happen if the upscale factors are not + integers or are different. + """ - def n_keypoints(self, task: str | None) -> int: - if task is not None: - if task not in self._n_keypoints: - raise ValueError(f"Task '{task}' is not present in the dataset.") - return self._n_keypoints[task] - if len(self._n_keypoints) > 1: + def _infer_upscale_factor(in_size: int, orig_size: int) -> int | float: + factor = math.log2(orig_size) - math.log2(in_size) + if abs(round(factor) - factor) < 1e-6: + return int(round(factor)) + return factor + + if isinstance(in_size, int): + in_size = (in_size, in_size) + if isinstance(orig_size, int): + orig_size = (orig_size, orig_size) + in_height, in_width = in_size + orig_height, orig_width = orig_size + + width_factor = _infer_upscale_factor(in_width, orig_width) + height_factor = _infer_upscale_factor(in_height, orig_height) + + match (width_factor, height_factor): + case (int(wf), int(hf)) if wf == hf: + return wf + case (int(wf), int(hf)): raise ValueError( - "The dataset specifies multiple keypoint tasks, " - "please specify the 'task' argument to get the number of keypoints." + f"Width and height upscale factors are different. " + f"Width: {wf}, height: {hf}." ) - return next(iter(self._n_keypoints.values())) - - def class_names(self, task: str | None) -> list[str]: - """Gets the class names for the specified task. - - @type task: str | None - @param task: Task to get the class names for. - @rtype: list[str] - @return: List of class names for the specified label type. - @raises ValueError: If the dataset loader was not provided during - initialization. - @raises ValueError: If the dataset contains different class names for different - label types. - """ - if task is not None: - if task not in self.classes: - raise ValueError(f"Task type {task} is not present in the dataset.") - return self.classes[task] - class_names = list(self.classes.values())[0] - for classes in self.classes.values(): - if classes != class_names: - raise ValueError( - "The dataset contains different class names for different tasks." - ) - return class_names - - def autogenerate_anchors(self, n_heads: int) -> tuple[list[list[float]], float]: - """Automatically generates anchors for the provided dataset. - - @type n_heads: int - @param n_heads: Number of heads to generate anchors for. - @rtype: tuple[list[list[float]], float] - @return: List of anchors in [-1,6] format and recall of the anchors. - @raises ValueError: If the dataset loader was not provided during - initialization. - """ - if self.loader is None: + case (int(wf), float(hf)): raise ValueError( - "Cannot generate anchors without a dataset loader. " - "Please provide a dataset loader to the constructor " - "or call `set_loader` method." + f"Width upscale factor is an integer, but height upscale factor is not. " + f"Width: {wf}, height: {hf}." ) - - proposed_anchors, recall = anchors_from_dataset( - self.loader, n_anchors=n_heads * 3 - ) - return proposed_anchors.reshape(-1, 6).tolist(), recall - - def set_loader(self, loader: DataLoader) -> None: - """Sets the dataset loader. - - @type loader: DataLoader - @param loader: Dataset loader. - """ - self.loader = loader - - @classmethod - def from_loader(cls, loader: BaseLoaderTorch) -> "DatasetMetadata": - """Creates a L{DatasetMetadata} object from a L{LuxonisDataset}. - - @type dataset: LuxonisDataset - @param dataset: Dataset to create the metadata from. - @rtype: DatasetMetadata - @return: Instance of L{DatasetMetadata} created from the provided dataset. - """ - classes = loader.get_classes() - n_keypoints = loader.get_n_keypoints() - - return cls(classes=classes, n_keypoints=n_keypoints) - - -def make_divisible(x: int | float, divisor: int) -> int: - """Upward revision the value x to make it evenly divisible by the divisor.""" - return math.ceil(x / divisor) * divisor - - -def infer_upscale_factor( - in_height: int, orig_height: int, strict: bool = True, warn: bool = True -) -> int: - """Infer the upscale factor from the input height and original height.""" - num_up = math.log2(orig_height) - math.log2(in_height) - if abs(round(num_up) - num_up) < 1e-6: - return int(round(num_up)) - elif not strict: - if warn: - logging.getLogger(__name__).warning( - f"Upscale factor is not an integer: {num_up}. " - "Output shape will not be the same as input shape." + case (float(wf), int(hf)): + raise ValueError( + f"Height upscale factor is an integer, but width upscale factor is not. " + f"Width: {wf}, height: {hf}." + ) + case (float(wf), float(hf)): + raise ValueError( + "Width and height upscale factors are not integers. " + f"Width: {wf}, height: {hf}." ) - return round(num_up) - else: - raise ValueError( - f"Upscale factor is not an integer: {num_up}. " - "Output shape will not be the same as input shape." - ) + + raise NotImplementedError( + f"Unexpected case: {width_factor}, {height_factor}" + ) def to_shape_packet(packet: Packet[Tensor]) -> Packet[Size]: + """Converts a packet of tensors to a packet of shapes. Used for + debugging purposes. + + @type packet: Packet[Tensor] + @param packet: Packet of tensors. + @rtype: Packet[Size] + @return: Packet of shapes. + """ shape_packet: Packet[Size] = {} for name, value in packet.items(): shape_packet[name] = [x.shape for x in value] return shape_packet -def is_acyclic(graph: dict[str, list[str]]) -> bool: - """Tests if graph is acyclic. - - @type graph: dict[str, list[str]] - @param graph: Graph in a format of a dictionary of predecessors. Keys are node - names, values are inputs to the node (list of node names). - @rtype: bool - @return: True if graph is acyclic, False otherwise. - """ - graph = graph.copy() - - def dfs(node: str, visited: set[str], recursion_stack: set[str]): - visited.add(node) - recursion_stack.add(node) - - for predecessor in graph.get(node, []): - if predecessor in recursion_stack: - return True - if predecessor not in visited: - if dfs(predecessor, visited, recursion_stack): - return True - - recursion_stack.remove(node) - return False - - visited: set[str] = set() - recursion_stack: set[str] = set() - - for node in graph.keys(): - if node not in visited: - if dfs(node, visited, recursion_stack): - return False - - return True - - -def validate_packet(data: Packet[Tensor], protocol: type[BaseModel]) -> Packet[Tensor]: - return protocol(**data).model_dump() - - T = TypeVar("T") -# TEST: -def traverse_graph( - graph: dict[str, list[str]], nodes: dict[str, T] -) -> Generator[tuple[str, T, list[str], list[str]], None, None]: - """Traverses the graph in topological order. - - @type graph: dict[str, list[str]] - @param graph: Graph in a format of a dictionary of predecessors. Keys are node - names, values are inputs to the node (list of node names). - @type nodes: dict[str, T] - @param nodes: Dictionary mapping node names to node objects. - @rtype: Generator[tuple[str, T, list[str], list[str]], None, None] - @return: Generator of tuples containing node name, node object, node dependencies - and unprocessed nodes. - @raises RuntimeError: If the graph is malformed. +def get_with_default( + value: T | None, + action_name: str, + caller_name: str | None = None, + *, + default: T, +) -> T: + """Returns value if it is not C{None}, otherwise returns the default + value and log an info. + + @type value: T | None + @param value: Value to return. + @type action_name: str + @param action_name: Name of the action for which the default value + is being used. Used for logging. + @type caller_name: str | None + @param caller_name: Name of the caller function. Used for logging. + @type default: T + @param default: Default value to return if C{value} is C{None}. + @rtype: T + @return: C{value} if it is not C{None}, otherwise C{default}. """ - unprocessed_nodes = sorted( - set(nodes.keys()) - ) # sort the set to allow reproducibility - processed: set[str] = set() + if value is not None: + return value - graph = deepcopy(graph) - while unprocessed_nodes: - unprocessed_nodes_copy = unprocessed_nodes.copy() - for node_name in unprocessed_nodes_copy: - node_dependencies = graph[node_name] - if not node_dependencies or all( - dependency in processed for dependency in node_dependencies - ): - yield node_name, nodes[node_name], node_dependencies, unprocessed_nodes - processed.add(node_name) - unprocessed_nodes.remove(node_name) + msg = f"Default value of {value} is being used for {action_name}." - if unprocessed_nodes_copy == unprocessed_nodes: - raise RuntimeError( - "Malformed graph. " - "Please check that all nodes are connected in a directed acyclic graph." - ) + if caller_name: + msg = f"[{caller_name}] {msg}" + + logger.info(msg, stacklevel=2) + return default diff --git a/luxonis_train/utils/graph.py b/luxonis_train/utils/graph.py new file mode 100644 index 00000000..a2b72832 --- /dev/null +++ b/luxonis_train/utils/graph.py @@ -0,0 +1,92 @@ +from copy import deepcopy +from typing import Iterator, TypeAlias, TypeVar + +Graph: TypeAlias = dict[str, list[str]] +"""Graph in a format of a dictionary of predecessors. + +Keys are node names, values are inputs to the node (list of node names). +""" + + +def is_acyclic(graph: Graph) -> bool: + """Tests if graph is acyclic. + + @type graph: dict[str, list[str]] + @param graph: Graph in a format of a dictionary of predecessors. + Keys are node names, values are inputs to the node (list of node + names). + @rtype: bool + @return: True if graph is acyclic, False otherwise. + """ + graph = graph.copy() + + def dfs(node: str, visited: set[str], recursion_stack: set[str]): + visited.add(node) + recursion_stack.add(node) + + for predecessor in graph.get(node, []): + if predecessor in recursion_stack: + return True + if predecessor not in visited: + if dfs(predecessor, visited, recursion_stack): + return True + + recursion_stack.remove(node) + return False + + visited: set[str] = set() + recursion_stack: set[str] = set() + + for node in graph.keys(): + if node not in visited: + if dfs(node, visited, recursion_stack): + return False + + return True + + +T = TypeVar("T") + + +def traverse_graph( + graph: Graph, nodes: dict[str, T] +) -> Iterator[tuple[str, T, list[str], list[str]]]: + """Traverses the graph in topological order. + + @type graph: dict[str, list[str]] + @param graph: Graph in a format of a dictionary of predecessors. + Keys are node names, values are inputs to the node (list of node + names). + @type nodes: dict[str, T] + @param nodes: Dictionary mapping node names to node objects. + @rtype: Iterator[tuple[str, T, list[str], list[str]]] + @return: Iterator of tuples containing node name, node object, node + dependencies and unprocessed nodes. + @raises RuntimeError: If the graph is malformed. + """ + # sort the set to allow reproducibility + unprocessed_nodes = sorted(set(nodes.keys())) + processed: set[str] = set() + + graph = deepcopy(graph) + while unprocessed_nodes: + unprocessed_nodes_copy = unprocessed_nodes.copy() + for node_name in unprocessed_nodes_copy: + node_dependencies = graph[node_name] + if not node_dependencies or all( + dependency in processed for dependency in node_dependencies + ): + unprocessed_nodes.remove(node_name) + yield ( + node_name, + nodes[node_name], + node_dependencies, + unprocessed_nodes.copy(), + ) + processed.add(node_name) + + if unprocessed_nodes_copy == unprocessed_nodes: + raise RuntimeError( + "Malformed graph. " + "Please check that all nodes are connected in a directed acyclic graph." + ) diff --git a/luxonis_train/utils/keypoints.py b/luxonis_train/utils/keypoints.py new file mode 100644 index 00000000..9fbc741d --- /dev/null +++ b/luxonis_train/utils/keypoints.py @@ -0,0 +1,85 @@ +import logging + +import torch +from torch import Tensor + +logger = logging.getLogger(__name__) + + +def process_keypoints_predictions( + keypoints: Tensor, +) -> tuple[Tensor, Tensor, Tensor]: + """Extracts x, y and visibility from keypoints predictions. + + @type keypoints: Tensor + @param keypoints: Keypoints predictions. The last dimension must be divisible by 3 + and is expected to be in format [x1, y1, v1, x2, y2, v2, ...]. + + @rtype: tuple[Tensor, Tensor, Tensor] + @return: x, y and visibility tensors. + """ + x = keypoints[..., ::3] + y = keypoints[..., 1::3] + visibility = keypoints[..., 2::3] + return x, y, visibility + + +def get_sigmas( + sigmas: list[float] | None, + n_keypoints: int, + caller_name: str | None = None, +) -> Tensor: + """Validate or create sigma values for each keypoint. + + @type sigmas: list[float] | None + @param sigmas: List of sigmas for each keypoint. If C{None}, then + default sigmas are used. + @type n_keypoints: int + @param n_keypoints: Number of keypoints. + @type caller_name: str | None + @param caller_name: Name of the caller function. Used for logging. + @rtype: Tensor + @return: Tensor of sigmas. + """ + if sigmas is not None: + if len(sigmas) == n_keypoints: + return torch.tensor(sigmas, dtype=torch.float32) + else: + error_msg = "The length of the sigmas list must be the same as the number of keypoints." + if caller_name: + error_msg = f"[{caller_name}] {error_msg}" + raise ValueError(error_msg) + else: + if n_keypoints == 17: + msg = "Default COCO sigmas are being used." + if caller_name: + msg = f"[{caller_name}] {msg}" + logger.warning(msg) + return torch.tensor( + [ + 0.026, + 0.025, + 0.025, + 0.035, + 0.035, + 0.079, + 0.079, + 0.072, + 0.072, + 0.062, + 0.062, + 0.107, + 0.107, + 0.087, + 0.087, + 0.089, + 0.089, + ], + dtype=torch.float32, + ) + else: + msg = "Default sigma of 0.04 is being used for each keypoint." + if caller_name: + msg = f"[{caller_name}] {msg}" + logger.info(msg) + return torch.tensor([0.04] * n_keypoints, dtype=torch.float32) diff --git a/luxonis_train/utils/registry.py b/luxonis_train/utils/registry.py index 2222ecbd..02532d32 100644 --- a/luxonis_train/utils/registry.py +++ b/luxonis_train/utils/registry.py @@ -1,46 +1,46 @@ -"""This module implements a metaclass for automatic registration of classes.""" +"""This module implements a metaclass for automatic registration of +classes.""" import lightning.pytorch as pl -import torch from luxonis_ml.utils.registry import Registry +from torch.optim.lr_scheduler import _LRScheduler +from torch.optim.optimizer import Optimizer -import luxonis_train +import luxonis_train as lt CALLBACKS: Registry[type[pl.Callback]] = Registry(name="callbacks") """Registry for all callbacks.""" -LOADERS: Registry[type["luxonis_train.utils.loaders.BaseLoaderTorch"]] = Registry( +LOADERS: Registry[type["lt.loaders.BaseLoaderTorch"]] = Registry( name="loaders" ) """Registry for all loaders.""" -LOSSES: Registry[type["luxonis_train.attached_modules.BaseLoss"]] = Registry( +LOSSES: Registry[type["lt.attached_modules.BaseLoss"]] = Registry( name="losses" ) """Registry for all losses.""" -METRICS: Registry[type["luxonis_train.attached_modules.BaseMetric"]] = Registry( +METRICS: Registry[type["lt.attached_modules.BaseMetric"]] = Registry( name="metrics" ) """Registry for all metrics.""" -MODELS: Registry[type["luxonis_train.models.BasePredefinedModel"]] = Registry( +MODELS: Registry[type["lt.models.BasePredefinedModel"]] = Registry( name="models" ) """Registry for all models.""" -NODES: Registry[type["luxonis_train.nodes.BaseNode"]] = Registry(name="nodes") +NODES: Registry[type["lt.nodes.BaseNode"]] = Registry(name="nodes") """Registry for all nodes.""" -OPTIMIZERS: Registry[type[torch.optim.Optimizer]] = Registry(name="optimizers") +OPTIMIZERS: Registry[type[Optimizer]] = Registry(name="optimizers") """Registry for all optimizers.""" -SCHEDULERS: Registry[type[torch.optim.lr_scheduler._LRScheduler]] = Registry( - name="schedulers" -) +SCHEDULERS: Registry[type[_LRScheduler]] = Registry(name="schedulers") """Registry for all schedulers.""" -VISUALIZERS: Registry[type["luxonis_train.visualizers.BaseVisualizer"]] = Registry( +VISUALIZERS: Registry[type["lt.visualizers.BaseVisualizer"]] = Registry( "visualizers" ) """Registry for all visualizers.""" diff --git a/luxonis_train/utils/tracker.py b/luxonis_train/utils/tracker.py index 4df76edd..35d7af70 100644 --- a/luxonis_train/utils/tracker.py +++ b/luxonis_train/utils/tracker.py @@ -1,12 +1,15 @@ +from typing import Any + from lightning.pytorch.loggers.logger import Logger from lightning.pytorch.utilities import rank_zero_only # type: ignore from luxonis_ml.tracker import LuxonisTracker class LuxonisTrackerPL(LuxonisTracker, Logger): - """Implementation of LuxonisTracker that is compatible with PytorchLightning.""" + """Implementation of LuxonisTracker that is compatible with + PytorchLightning.""" - def __init__(self, *, _auto_finalize: bool = True, **kwargs): + def __init__(self, *, _auto_finalize: bool = True, **kwargs: Any): """ @type _auto_finalize: bool @param _auto_finalize: If True, the run will be finalized automatically when the training ends. @@ -21,7 +24,7 @@ def __init__(self, *, _auto_finalize: bool = True, **kwargs): self.finalize = self._finalize @rank_zero_only - def _finalize(self, status: str = "success") -> None: + def _finalize(self, status: str = "success") -> None: # pragma: no cover """Finalizes current run.""" if self.is_tensorboard: self.experiment["tensorboard"].flush() diff --git a/luxonis_train/utils/types.py b/luxonis_train/utils/types.py index 84b8e019..3a7ca7f4 100644 --- a/luxonis_train/utils/types.py +++ b/luxonis_train/utils/types.py @@ -1,19 +1,21 @@ -from typing import Annotated, Any, Literal, TypeVar +from typing import Any, Literal, TypeVar from luxonis_ml.data import LabelType -from pydantic import BaseModel, Field, ValidationError from torch import Size, Tensor Kwargs = dict[str, Any] -OutputTypes = Literal["boundingbox", "class", "keypoints", "segmentation", "features"] +"""Kwargs is a dictionary containing keyword arguments.""" + Labels = dict[str, tuple[Tensor, LabelType]] +"""Labels is a dictionary containing a tuple of tensors and their +corresponding label type.""" AttachIndexType = Literal["all"] | int | tuple[int, int] | tuple[int, int, int] -"""AttachIndexType is used to specify to which output of the prevoius node does the -current node attach to. +"""AttachIndexType is used to specify to which output of the prevoius +node does the current node attach to. -It can be either "all" (all outputs), an index of the output or a tuple of indices of -the output (specifying a range of outputs). +It can be either "all" (all outputs), an index of the output or a tuple +of indices of the output (specifying a range of outputs). """ T = TypeVar("T", Tensor, Size) @@ -22,31 +24,3 @@ It is used to pass data between different nodes of the network graph. """ - - -class IncompatibleException(Exception): - """Raised when two parts of the model are incompatible with each other.""" - - @classmethod - def from_validation_error(cls, val_error: ValidationError, class_name: str): - return cls( - f"{class_name} received an input not conforming to the protocol. " - f"Validation error: {val_error.errors(include_input=False, include_url=False)}." - ) - - @classmethod - def from_missing_task(cls, task: str, present_tasks: list[str], class_name: str): - return cls( - f"{class_name} requires '{task}' label, but it was not found in " - f"the label dictionary. Available labels: {present_tasks}." - ) - - -class BaseProtocol(BaseModel): - class Config: - arbitrary_types_allowed = True - extra = "forbid" - - -class FeaturesProtocol(BaseProtocol): - features: Annotated[list[Tensor], Field(min_length=1)] diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg index 8e21255a..34387324 100644 --- a/media/coverage_badge.svg +++ b/media/coverage_badge.svg @@ -9,13 +9,13 @@ - + coverage coverage - 84% - 84% + 97% + 97% diff --git a/pyproject.toml b/pyproject.toml index 2093e25b..d65978d4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,5 @@ [project] name = "luxonis-train" -version = "0.0.1" description = "Luxonis training framework for seamless training of various neural networks." readme = "README.md" requires-python = ">=3.10" @@ -8,7 +7,7 @@ license = { file = "LICENSE" } authors = [{ name = "Luxonis", email = "support@luxonis.com" }] maintainers = [{ name = "Luxonis", email = "support@luxonis.com" }] keywords = ["ml", "training", "luxonis", "oak"] -dynamic = ["dependencies", "optional-dependencies"] +dynamic = ["dependencies", "optional-dependencies", "version"] classifiers = [ "License :: OSI Approved :: Apache Software License", "Development Status :: 3 - Alpha", @@ -35,10 +34,11 @@ where = ["."] [tool.setuptools.dynamic] dependencies = { file = ["requirements.txt"] } optional-dependencies = { dev = { file = ["requirements-dev.txt"] } } +version = {attr = "luxonis_train.__version__"} [tool.ruff] target-version = "py310" -line-length = 88 +line-length = 79 indent-width = 4 [tool.ruff.lint] @@ -47,10 +47,44 @@ select = ["E4", "E7", "E9", "F", "W", "B", "I"] [tool.docformatter] black = true - -[tool.mypy] -python_version = "3.10" -ignore_missing_imports = true +wrap-summaries = 72 +wrap-descriptions = 72 [tool.pyright] typeCheckingMode = "basic" +reportMissingTypeStubs = "none" +reportPrivateImportUsage = "none" +reportPrivateUsage = "none" +reportIncompatibleVariableOverride = "none" +reportIncompatibleMethodOverride = "none" +reportUnnecessaryIsInstance = "none" + +[tool.pytest.ini_options] +testpaths = ["tests"] +addopts = "--disable-warnings" +markers = [ + "unit: mark a test as a unit test", + "integration: mark a test as an integration test", +] + +[tool.coverage.run] +omit = [ + "**/__main__.py", + "**/gpu_stats_monitor.py" +] + +[tool.coverage.report] +exclude_also = [ + "def __repr__", + "def __rich_repr__", + "def __str__", + "assert", + "raise NotImplementedError", + "except ImportError", + "@abstractmethod", + "@overload", + "exit\\(\\)", + "cv2\\.imshow", + "cv2\\.waitKey", + "logger\\.", +] diff --git a/requirements-dev.txt b/requirements-dev.txt index 7f915575..e4dbd194 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -4,3 +4,5 @@ pre-commit>=3.2.1 opencv-stubs>=0.0.8 pytest-cov>=4.1.0 pytest-subtests>=0.12.1 +pytest-md>=0.2.0 +pytest-order>=1.3.0 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/configs/archive_config.yaml b/tests/configs/archive_config.yaml new file mode 100644 index 00000000..71589f4d --- /dev/null +++ b/tests/configs/archive_config.yaml @@ -0,0 +1,43 @@ + +model: + name: archive_test + nodes: + - name: EfficientRep + + - name: EfficientBBoxHead + inputs: + - EfficientRep + + - name: EfficientKeypointBBoxHead + inputs: + - EfficientRep + + - name: ImplicitKeypointBBoxHead + inputs: + - EfficientRep + + - name: SegmentationHead + inputs: + - EfficientRep + + - name: BiSeNetHead + inputs: + - EfficientRep + + - name: ClassificationHead + inputs: + - EfficientRep + +exporter: + output_names: + - seg0 + - class0 + - bbox0 + - bbox1 + - bbox2 + - effkpt0 + - effkpt1 + - effkpt2 + - impl + - seg1 + diff --git a/tests/configs/parking_lot_config.yaml b/tests/configs/parking_lot_config.yaml index ae9f8069..bb15ac37 100644 --- a/tests/configs/parking_lot_config.yaml +++ b/tests/configs/parking_lot_config.yaml @@ -3,54 +3,26 @@ model: name: parking_lot_model nodes: - - name: ReXNetV1_lite - alias: rexnet-detection-backbone - - name: EfficientRep - alias: efficient-detection-backbone - params: - channels_list: [64, 128, 256, 512, 1024] - num_repeats: [1, 6, 12, 18, 6] - depth_mul: 0.33 - width_mul: 0.33 + alias: backbone - name: RepPANNeck - alias: efficient-detection-neck + alias: neck inputs: - - efficient-detection-backbone - params: - channels_list: [256, 128, 128, 256, 256, 512] - num_repeats: [12, 12, 12, 12] - depth_mul: 0.33 - width_mul: 0.33 - - - name: MicroNet - alias: color-segmentation-backbone - - - name: MobileOne - alias: brand-segmentation-backbone - - - name: MobileNetV2 - alias: vehicle-type-segmentation-backbone - - - name: ContextSpatial - alias: context-brand-segmentation-backbone + - backbone - name: EfficientBBoxHead alias: bbox-head inputs: - - efficient-detection-neck + - neck - name: ImplicitKeypointBBoxHead alias: car-detection-head inputs: - - rexnet-detection-backbone + - neck task: keypoints: car-keypoints boundingbox: car-boundingbox - params: - conf_thres: 0.25 - iou_thres: 0.45 - name: EfficientKeypointBBoxHead alias: motorbike-detection-head @@ -58,40 +30,31 @@ model: keypoints: motorbike-keypoints boundingbox: motorbike-boundingbox inputs: - - efficient-detection-neck - params: - conf_thres: 0.25 - iou_thres: 0.45 - - - name: BiSeNetHead - alias: context-brand-segmentation-head - task: brand_segmentation - inputs: - - context-brand-segmentation-backbone + - neck - name: SegmentationHead alias: color-segmentation-head - task: color_segmentation + task: color-segmentation inputs: - - color-segmentation-backbone + - neck - name: SegmentationHead alias: any-vehicle-segmentation-head - task: vehicle_segmentation + task: vehicle-segmentation inputs: - - vehicle-type-segmentation-backbone + - neck - name: BiSeNetHead alias: brand-segmentation-head - task: brand_segmentation + task: brand-segmentation inputs: - - brand-segmentation-backbone + - neck - name: BiSeNetHead alias: vehicle-type-segmentation-head - task: vehicle_type_segmentation + task: vehicle_type-segmentation inputs: - - vehicle-type-segmentation-backbone + - neck losses: - name: AdaptiveDetectionLoss @@ -100,12 +63,8 @@ model: attached_to: any-vehicle-segmentation-head - name: CrossEntropyLoss attached_to: vehicle-type-segmentation-head - - name: CrossEntropyLoss - attached_to: context-brand-segmentation-head - name: CrossEntropyLoss attached_to: color-segmentation-head - - name: SoftmaxFocalLoss - attached_to: brand-segmentation-head - name: ImplicitKeypointBBoxLoss attached_to: car-detection-head - name: EfficientKeypointBBoxLoss @@ -127,8 +86,6 @@ model: attached_to: vehicle-type-segmentation-head - name: Precision attached_to: brand-segmentation-head - - name: Recall - attached_to: context-brand-segmentation-head visualizers: - name: MultiVisualizer @@ -160,9 +117,6 @@ model: - name: SegmentationVisualizer alias: vehicle-segmentation-visualizer attached_to: any-vehicle-segmentation-head - - name: SegmentationVisualizer - alias: context-brand-segmentation-visualizer - attached_to: context-brand-segmentation-head - name: SegmentationVisualizer alias: brand-segmentation-visualizer attached_to: brand-segmentation-head @@ -184,16 +138,16 @@ trainer: devices: auto strategy: auto - num_sanity_val_steps: 1 + n_sanity_val_steps: 1 profiler: null verbose: True batch_size: 2 accumulate_grad_batches: 1 epochs: 200 - num_workers: 8 + n_workers: 8 train_metrics_interval: -1 validation_interval: 10 - num_log_images: 8 + n_log_images: 8 skip_last_batch: True log_sub_losses: True save_top_k: 3 @@ -214,6 +168,5 @@ trainer: callbacks: - name: ExportOnTrainEnd - - name: TestOnTrainEnd - name: ArchiveOnTrainEnd diff --git a/tests/configs/segmentation_parse_loader.yaml b/tests/configs/segmentation_parse_loader.yaml index 60f7a30d..14814571 100644 --- a/tests/configs/segmentation_parse_loader.yaml +++ b/tests/configs/segmentation_parse_loader.yaml @@ -22,6 +22,6 @@ trainer: batch_size: 4 epochs: &epochs 1 - num_workers: 4 + n_workers: 4 validation_interval: 1 - num_log_images: 8 + n_log_images: 8 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..4a8a492c --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,18 @@ +import pytest + + +def pytest_collection_modifyitems(items): + for item in items: + if "/unittests/" in str(item.fspath): + item.add_marker(pytest.mark.unit) + # ensure unittests run before integration tests + item.add_marker(pytest.mark.order(0)) + elif "/integration/" in str(item.fspath): + item.add_marker(pytest.mark.integration) + + +def pytest_configure(config): + config.addinivalue_line("markers", "unit: mark test as a unit test") + config.addinivalue_line( + "markers", "integration: mark test as an integration test" + ) diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 9b24271b..ef5a2142 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -1,9 +1,14 @@ import json +import multiprocessing as mp +import os +import shutil from collections import defaultdict from pathlib import Path +from typing import Any import cv2 import gdown +import numpy as np import pytest import torchvision from luxonis_ml.data import LuxonisDataset @@ -12,15 +17,34 @@ from luxonis_ml.utils import LuxonisFileSystem, environ WORK_DIR = Path("tests", "data") -WORK_DIR.mkdir(parents=True, exist_ok=True) -environ.LUXONISML_BASE_PATH = WORK_DIR / "luxonisml" + +@pytest.fixture(scope="session") +def test_output_dir() -> Path: + return Path("tests/integration/save-directory") + + +@pytest.fixture(scope="session", autouse=True) +def setup(test_output_dir: Path): + WORK_DIR.mkdir(parents=True, exist_ok=True) + shutil.rmtree(WORK_DIR / "luxonisml", ignore_errors=True) + shutil.rmtree(test_output_dir, ignore_errors=True) + environ.LUXONISML_BASE_PATH = WORK_DIR / "luxonisml" + test_output_dir.mkdir(exist_ok=True) @pytest.fixture +def train_overfit() -> bool: + return bool(os.getenv("LUXONIS_TRAIN_OVERFIT")) + + +@pytest.fixture(scope="session") def parking_lot_dataset() -> LuxonisDataset: url = "gs://luxonis-test-bucket/luxonis-ml-test-data/D1_ParkingSlotTest" - base_path = LuxonisFileSystem.download(url, WORK_DIR) + base_path = WORK_DIR / "D1_ParkingSlotTest" + if not base_path.exists(): + base_path = LuxonisFileSystem.download(url, WORK_DIR) + mask_brand_path = base_path / "mask_brand" mask_color_path = base_path / "mask_color" kpt_mask_path = base_path / "keypoints_mask_vehicle" @@ -28,7 +52,7 @@ def parking_lot_dataset() -> LuxonisDataset: def generator(): filenames: dict[int, Path] = {} for base_path in [kpt_mask_path, mask_brand_path, mask_color_path]: - for sequence_path in list(sorted(base_path.glob("sequence.*"))): + for sequence_path in sorted(list(base_path.glob("sequence.*"))): frame_data = sequence_path / "step0.frame_data.json" with open(frame_data) as f: data = json.load(f)["captures"][0] @@ -52,7 +76,9 @@ def generator(): for bbox_annotation in annotations.get( "BoundingBox2DAnnotation", defaultdict(list) )["values"]: - class_ = bbox_annotation["labelName"].split("-")[-1].lower() + class_ = ( + bbox_annotation["labelName"].split("-")[-1].lower() + ) if class_ == "motorbiek": class_ = "motorbike" x, y = bbox_annotation["origin"] @@ -113,7 +139,10 @@ def generator(): ] mask = cv2.cvtColor( cv2.imread( - str(sequence_path / vehicle_type_segmentation["filename"]) + str( + sequence_path + / vehicle_type_segmentation["filename"] + ) ), cv2.COLOR_BGR2RGB, ) @@ -122,11 +151,11 @@ def generator(): for inst in vehicle_type_segmentation["instances"] } if base_path == kpt_mask_path: - task = "vehicle_type_segmentation" + task = "vehicle_type-segmentation" elif base_path == mask_brand_path: - task = "brand_segmentation" + task = "brand-segmentation" else: - task = "color_segmentation" + task = "color-segmentation" for class_, mask_ in rgb_to_bool_masks( mask, classes, add_background_class=True ): @@ -145,36 +174,40 @@ def generator(): "annotation": { "type": "mask", "class": "vehicle", - "task": "vehicle_segmentation", + "task": "vehicle-segmentation", "mask": mask.astype(bool)[..., 0] | mask.astype(bool)[..., 1] | mask.astype(bool)[..., 2], }, } - dataset = LuxonisDataset("__D1ParkingSLot-test", delete_existing=True) + dataset = LuxonisDataset("_ParkingLot", delete_existing=True) dataset.add(generator()) + np.random.seed(42) dataset.make_splits() return dataset -@pytest.fixture(scope="session", autouse=True) -def create_coco_dataset(): +@pytest.fixture(scope="session") +def coco_dataset() -> LuxonisDataset: dataset_name = "coco_test" url = "https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT" output_zip = WORK_DIR / "COCO_people_subset.zip" - if not output_zip.exists() and not (WORK_DIR / "COCO_people_subset").exists(): + if ( + not output_zip.exists() + and not (WORK_DIR / "COCO_people_subset").exists() + ): gdown.download(url, str(output_zip), quiet=False) parser = LuxonisParser( str(output_zip), dataset_name=dataset_name, delete_existing=True ) - parser.parse(random_split=True) + return parser.parse(random_split=True) -@pytest.fixture(scope="session", autouse=True) -def create_cifar10_dataset(): +@pytest.fixture(scope="session") +def cifar10_dataset() -> LuxonisDataset: dataset = LuxonisDataset("cifar10_test", delete_existing=True) output_folder = WORK_DIR / "cifar10" output_folder.mkdir(parents=True, exist_ok=True) @@ -210,3 +243,40 @@ def CIFAR10_subset_generator(): dataset.add(CIFAR10_subset_generator()) dataset.make_splits() + return dataset + + +@pytest.fixture +def config(train_overfit: bool) -> dict[str, Any]: + if train_overfit: + epochs = 100 + else: + epochs = 1 + + return { + "tracker": { + "save_directory": "tests/integration/save-directory", + }, + "loader": { + "train_view": "val", + "params": { + "dataset_name": "_ParkingLot", + }, + }, + "trainer": { + "batch_size": 4, + "epochs": epochs, + "n_workers": mp.cpu_count(), + "validation_interval": epochs, + "save_top_k": 0, + "preprocessing": { + "train_image_size": [256, 320], + "keep_aspect_ratio": False, + "normalize": {"active": True}, + }, + "callbacks": [ + {"name": "ExportOnTrainEnd"}, + ], + "matmul_precision": "medium", + }, + } diff --git a/tests/integration/multi_input_modules.py b/tests/integration/multi_input_modules.py index dbc5a449..e6fd0476 100644 --- a/tests/integration/multi_input_modules.py +++ b/tests/integration/multi_input_modules.py @@ -1,9 +1,10 @@ import torch +from luxonis_ml.data import LabelType from torch import Tensor, nn +from luxonis_train.loaders import BaseLoaderTorch from luxonis_train.nodes import BaseNode -from luxonis_train.utils.loaders import BaseLoaderTorch -from luxonis_train.utils.types import FeaturesProtocol, LabelType, Packet +from luxonis_train.utils import Packet class CustomMultiInputLoader(BaseLoaderTorch): @@ -60,29 +61,23 @@ def unwrap(self, inputs: list[dict[str, list[Tensor]]]): return [item for inp in inputs for key in inp for item in inp[key]] -class FullBackbone(MultiInputTestBaseNode): - input_protocols = [FeaturesProtocol] * 4 +class FullBackbone(MultiInputTestBaseNode): ... -class RGBDBackbone(MultiInputTestBaseNode): - input_protocols = [FeaturesProtocol] * 3 +class RGBDBackbone(MultiInputTestBaseNode): ... -class PointcloudBackbone(MultiInputTestBaseNode): - input_protocols = [FeaturesProtocol] +class PointcloudBackbone(MultiInputTestBaseNode): ... -class FusionNeck(MultiInputTestBaseNode): - input_protocols = [FeaturesProtocol] * 3 +class FusionNeck(MultiInputTestBaseNode): ... -class FusionNeck2(MultiInputTestBaseNode): - input_protocols = [FeaturesProtocol] * 3 +class FusionNeck2(MultiInputTestBaseNode): ... class CustomSegHead1(MultiInputTestBaseNode): tasks = {LabelType.SEGMENTATION: "segmentation"} - input_protocols = [FeaturesProtocol] def __init__(self, **kwargs): super().__init__(**kwargs) @@ -98,7 +93,6 @@ def forward(self, inputs: Tensor): class CustomSegHead2(MultiInputTestBaseNode): tasks = {LabelType.SEGMENTATION: "segmentation"} - input_protocols = [FeaturesProtocol] * 3 def __init__(self, **kwargs): super().__init__(**kwargs) diff --git a/tests/integration/parking_lot.json b/tests/integration/parking_lot.json index d9599642..0059241e 100644 --- a/tests/integration/parking_lot.json +++ b/tests/integration/parking_lot.json @@ -36,7 +36,7 @@ ], "outputs": [ { - "name": "any-vehicle-segmentation-head/vehicle_segmentation/0", + "name": "any-vehicle-segmentation-head/vehicle-segmentation/0", "dtype": "float32", "shape": [ 1, @@ -80,7 +80,7 @@ "layout": "NCHW" }, { - "name": "brand-segmentation-head/brand_segmentation/0", + "name": "brand-segmentation-head/brand-segmentation/0", "dtype": "float32", "shape": [ 1, @@ -95,13 +95,13 @@ "dtype": "float32", "shape": [ 1, - 66240, + 5040, 24 ], "layout": "NCD" }, { - "name": "color-segmentation-head/color_segmentation/0", + "name": "color-segmentation-head/color-segmentation/0", "dtype": "float32", "shape": [ 1, @@ -111,17 +111,6 @@ ], "layout": "NCHW" }, - { - "name": "context-brand-segmentation-head/brand_segmentation/0", - "dtype": "float32", - "shape": [ - 1, - 23, - 256, - 320 - ], - "layout": "NCHW" - }, { "name": "motorbike-detection-head/outputs/0", "dtype": "float32", @@ -156,7 +145,7 @@ "layout": "NCDE" }, { - "name": "vehicle-type-segmentation-head/vehicle_type_segmentation/0", + "name": "vehicle-type-segmentation-head/vehicle_type-segmentation/0", "dtype": "float32", "shape": [ 1, @@ -227,42 +216,6 @@ "motorbike-detection-head/outputs/2" ] }, - { - "parser": "SegmentationParser", - "metadata": { - "postprocessor_path": null, - "classes": [ - "background", - "chrysler", - "bmw", - "ducati", - "dodge", - "ferrari", - "infiniti", - "land-rover", - "roll-royce", - "saab", - "Kawasaki", - "moto", - "truimph", - "alfa-romeo", - "harley", - "honda", - "jeep", - "aprilia", - "piaggio", - "yamaha", - "buick", - "pontiac", - "isuzu" - ], - "n_classes": 23, - "is_softmax": false - }, - "outputs": [ - "context-brand-segmentation-head/brand_segmentation/0" - ] - }, { "parser": "SegmentationParser", "metadata": { @@ -277,7 +230,7 @@ "is_softmax": false }, "outputs": [ - "color-segmentation-head/color_segmentation/0" + "color-segmentation-head/color-segmentation/0" ] }, { @@ -291,7 +244,7 @@ "is_softmax": false }, "outputs": [ - "any-vehicle-segmentation-head/vehicle_segmentation/0" + "any-vehicle-segmentation-head/vehicle-segmentation/0" ] }, { @@ -327,7 +280,7 @@ "is_softmax": false }, "outputs": [ - "brand-segmentation-head/brand_segmentation/0" + "brand-segmentation-head/brand-segmentation/0" ] }, { @@ -343,7 +296,7 @@ "is_softmax": false }, "outputs": [ - "vehicle-type-segmentation-head/vehicle_type_segmentation/0" + "vehicle-type-segmentation-head/vehicle_type-segmentation/0" ] } ] diff --git a/tests/integration/test_detection.py b/tests/integration/test_detection.py new file mode 100644 index 00000000..fb184b6f --- /dev/null +++ b/tests/integration/test_detection.py @@ -0,0 +1,95 @@ +from typing import Any + +import pytest +from luxonis_ml.data import LuxonisDataset + +from luxonis_train.core import LuxonisModel +from luxonis_train.nodes.backbones import __all__ as BACKBONES + + +def get_opts(backbone: str) -> dict[str, Any]: + return { + "model": { + "nodes": [ + { + "name": backbone, + }, + { + "name": "EfficientBBoxHead", + "inputs": [backbone], + }, + { + "name": "EfficientKeypointBBoxHead", + "task": { + "keypoints": "car-keypoints", + "boundingbox": "car-boundingbox", + }, + "inputs": [backbone], + }, + { + "name": "ImplicitKeypointBBoxHead", + "task": { + "keypoints": "car-keypoints", + "boundingbox": "car-boundingbox", + }, + "inputs": [backbone], + }, + ], + "losses": [ + { + "name": "AdaptiveDetectionLoss", + "attached_to": "EfficientBBoxHead", + }, + { + "name": "EfficientKeypointBBoxLoss", + "attached_to": "EfficientKeypointBBoxHead", + "params": {"area_factor": 0.5}, + }, + { + "name": "ImplicitKeypointBBoxLoss", + "attached_to": "ImplicitKeypointBBoxHead", + }, + ], + "metrics": [ + { + "name": "MeanAveragePrecision", + "attached_to": "EfficientBBoxHead", + }, + { + "name": "MeanAveragePrecisionKeypoints", + "alias": "EfficientKeypointBBoxHead-MaP", + "attached_to": "EfficientKeypointBBoxHead", + }, + { + "name": "MeanAveragePrecisionKeypoints", + "alias": "ImplicitKeypointBBoxHead-MaP", + "attached_to": "ImplicitKeypointBBoxHead", + }, + ], + } + } + + +def train_and_test( + config: dict[str, Any], + opts: dict[str, Any], + train_overfit: bool = False, +): + model = LuxonisModel(config, opts) + model.train() + results = model.test(view="val") + if train_overfit: + for name, value in results.items(): + if "/map_50" in name or "/kpt_map_medium" in name: + assert value > 0.8, f"{name} = {value} (expected > 0.8)" + + +@pytest.mark.parametrize("backbone", BACKBONES) +def test_backbones( + backbone: str, + config: dict[str, Any], + parking_lot_dataset: LuxonisDataset, +): + opts = get_opts(backbone) + opts["loader.params.dataset_name"] = parking_lot_dataset.identifier + train_and_test(config, opts) diff --git a/tests/integration/test_sanity.py b/tests/integration/test_sanity.py deleted file mode 100644 index 5afa385b..00000000 --- a/tests/integration/test_sanity.py +++ /dev/null @@ -1,136 +0,0 @@ -import json -import shutil -import sys -import tarfile -from copy import deepcopy -from pathlib import Path - -import pytest -from luxonis_ml.data import LuxonisDataset -from multi_input_modules import * - -from luxonis_train.core import LuxonisModel - -TEST_OUTPUT = Path("tests/integration/_test-output") -INFER_PATH = Path("tests/integration/_infer_save_dir") -ONNX_PATH = Path("tests/integration/_model.onnx") -STUDY_PATH = Path("study_local.db") - -OPTS = { - "trainer.epochs": 1, - "trainer.batch_size": 1, - "trainer.validation_interval": 1, - "trainer.callbacks": "[]", - "tracker.save_directory": str(TEST_OUTPUT), - "tuner.n_trials": 4, -} - - -@pytest.fixture(scope="session", autouse=True) -def manage_out_dir(): - shutil.rmtree(TEST_OUTPUT, ignore_errors=True) - TEST_OUTPUT.mkdir(exist_ok=True) - - -@pytest.fixture(scope="function", autouse=True) -def clear_files(): - yield - STUDY_PATH.unlink(missing_ok=True) - ONNX_PATH.unlink(missing_ok=True) - shutil.rmtree(INFER_PATH, ignore_errors=True) - - -@pytest.mark.parametrize( - "config_file", - [ - "classification_model", - "segmentation_model", - "detection_model", - "keypoint_bbox_model", - "resnet_model", - "coco_model", - "efficient_coco_model", - ], -) -def test_simple_models(config_file: str): - config_file = f"configs/{config_file}.yaml" - model = LuxonisModel(config_file, opts=OPTS) - model.train() - model.test() - model.export() - assert ( - Path(model.run_save_dir, "export", model.cfg.model.name) - .with_suffix(".onnx") - .exists() - ) - model.archive() - assert ( - Path( - model.run_save_dir, - "archive", - model.cfg.archiver.name or model.cfg.model.name, - ) - .with_suffix(".onnx.tar.xz") - .exists() - ) - del model - - -def test_multi_input(): - config_file = "configs/example_multi_input.yaml" - model = LuxonisModel(config_file, opts=OPTS) - model.train() - model.test(view="val") - - assert not ONNX_PATH.exists() - model.export(str(ONNX_PATH)) - assert ONNX_PATH.exists() - - assert not INFER_PATH.exists() - model.infer(view="val", save_dir=INFER_PATH) - assert INFER_PATH.exists() - del model - - -def test_custom_tasks(parking_lot_dataset: LuxonisDataset, subtests): - config_file = "tests/configs/parking_lot_config.yaml" - opts = deepcopy(OPTS) | { - "loader.params.dataset_name": parking_lot_dataset.dataset_name, - "trainer.batch_size": 2, - } - del opts["trainer.callbacks"] - model = LuxonisModel(config_file, opts=opts) - model.train() - archive_path = Path( - model.run_save_dir, "archive", model.cfg.model.name - ).with_suffix(".onnx.tar.xz") - correct_archive_config = json.loads( - Path("tests/integration/parking_lot.json").read_text() - ) - - with subtests.test("test_archive"): - assert archive_path.exists() - with tarfile.open(archive_path) as tar: - extracted_cfg = tar.extractfile("config.json") - - assert extracted_cfg is not None, "Config JSON not found in the archive." - generated_config = json.loads(extracted_cfg.read().decode()) - - del generated_config["model"]["heads"][1]["metadata"]["anchors"] - assert generated_config == correct_archive_config - - del model - - -def test_parsing_loader(): - model = LuxonisModel("tests/configs/segmentation_parse_loader.yaml") - model.train() - del model - - -@pytest.mark.skipif(sys.platform == "win32", reason="Tuning not supported on Windows") -def test_tuner(): - model = LuxonisModel("configs/example_tuning.yaml", opts=OPTS) - model.tune() - assert STUDY_PATH.exists() - del model diff --git a/tests/integration/test_segmentation.py b/tests/integration/test_segmentation.py new file mode 100644 index 00000000..c24e6fb9 --- /dev/null +++ b/tests/integration/test_segmentation.py @@ -0,0 +1,134 @@ +from typing import Any + +import pytest +from luxonis_ml.data import LuxonisDataset + +from luxonis_train.core import LuxonisModel +from luxonis_train.nodes.backbones import __all__ as BACKBONES + + +def get_opts(backbone: str) -> dict[str, Any]: + opts = { + "model": { + "nodes": [ + { + "name": backbone, + }, + { + "name": "SegmentationHead", + "alias": "seg-color-segmentation", + "task": "color-segmentation", + "inputs": [backbone], + }, + { + "name": "BiSeNetHead", + "alias": "bi-color-segmentation", + "task": "color-segmentation", + "inputs": [backbone], + }, + { + "name": "SegmentationHead", + "alias": "seg-vehicle-segmentation", + "task": "vehicle-segmentation", + "inputs": [backbone], + }, + { + "name": "BiSeNetHead", + "alias": "bi-vehicle-segmentation", + "task": "vehicle-segmentation", + "inputs": [backbone], + }, + { + "name": "SegmentationHead", + "alias": "seg-vehicle-segmentation-2", + "task": "vehicle-segmentation", + "inputs": [backbone], + }, + { + "name": "SegmentationHead", + "alias": "seg-vehicle-segmentation-3", + "task": "vehicle-segmentation", + "inputs": [backbone], + }, + ], + "losses": [ + { + "name": "CrossEntropyLoss", + "attached_to": "seg-color-segmentation", + }, + { + "name": "CrossEntropyLoss", + "attached_to": "bi-color-segmentation", + }, + { + "name": "BCEWithLogitsLoss", + "attached_to": "seg-vehicle-segmentation", + }, + { + "name": "SigmoidFocalLoss", + "attached_to": "bi-vehicle-segmentation", + "params": {"alpha": 0.5, "gamma": 1.0}, + }, + { + "name": "SoftmaxFocalLoss", + "attached_to": "seg-vehicle-segmentation-2", + "params": {"alpha": 0.5, "gamma": 1.0}, + }, + { + "name": "SmoothBCEWithLogitsLoss", + "attached_to": "seg-vehicle-segmentation-3", + "params": {"label_smoothing": 0.1}, + }, + ], + "metrics": [], + "visualizers": [], + } + } + aliases = [head["alias"] for head in opts["model"]["nodes"][1:]] + for alias in aliases: + opts["model"]["metrics"].extend( + [ + { + "name": "JaccardIndex", + "alias": f"JaccardIndex_{alias}", + "attached_to": alias, + }, + { + "name": "F1Score", + "alias": f"F1Score_{alias}", + "attached_to": alias, + }, + ] + ) + opts["model"]["visualizers"].append( + { + "name": "SegmentationVisualizer", + "attached_to": alias, + } + ) + return opts + + +def train_and_test( + config: dict[str, Any], + opts: dict[str, Any], + train_overfit: bool = False, +): + model = LuxonisModel(config, opts) + model.train() + results = model.test(view="val") + if train_overfit: + for name, value in results.items(): + if "metric" in name: + assert value > 0.8, f"{name} = {value} (expected > 0.8)" + + +@pytest.mark.parametrize("backbone", BACKBONES) +def test_backbones( + backbone: str, + config: dict[str, Any], + parking_lot_dataset: LuxonisDataset, +): + opts = get_opts(backbone) + opts["loader.params.dataset_name"] = parking_lot_dataset.identifier + train_and_test(config, opts) diff --git a/tests/integration/test_simple.py b/tests/integration/test_simple.py new file mode 100644 index 00000000..784db01a --- /dev/null +++ b/tests/integration/test_simple.py @@ -0,0 +1,215 @@ +import json +import shutil +import sys +import tarfile +from copy import deepcopy +from pathlib import Path +from typing import Any + +import pytest +from luxonis_ml.data import LuxonisDataset +from luxonis_ml.utils import environ + +from luxonis_train.core import LuxonisModel + +from .multi_input_modules import * + +INFER_PATH = Path("tests/integration/infer-save-directory") +ONNX_PATH = Path("tests/integration/_model.onnx") +STUDY_PATH = Path("study_local.db") + + +@pytest.fixture +def opts(test_output_dir: Path) -> dict[str, Any]: + return { + "trainer.epochs": 1, + "trainer.batch_size": 1, + "trainer.validation_interval": 1, + "trainer.callbacks": "[]", + "tracker.save_directory": str(test_output_dir), + "tuner.n_trials": 4, + } + + +@pytest.fixture(scope="function", autouse=True) +def clear_files(): + # todo + yield + STUDY_PATH.unlink(missing_ok=True) + ONNX_PATH.unlink(missing_ok=True) + shutil.rmtree(INFER_PATH, ignore_errors=True) + + +@pytest.mark.parametrize( + "config_file", + [ + "classification_model", + "segmentation_model", + "detection_model", + "keypoint_bbox_model", + ], +) +def test_predefined_models( + opts: dict[str, Any], + config_file: str, + coco_dataset: LuxonisDataset, + cifar10_dataset: LuxonisDataset, +): + config_file = f"configs/{config_file}.yaml" + opts |= { + "loader.params.dataset_name": cifar10_dataset.dataset_name + if "classification_model" in config_file + else coco_dataset.dataset_name, + } + model = LuxonisModel(config_file, opts) + model.train() + model.test() + + +def test_multi_input(opts: dict[str, Any]): + config_file = "configs/example_multi_input.yaml" + model = LuxonisModel(config_file, opts) + model.train() + model.test(view="val") + + assert not ONNX_PATH.exists() + model.export(str(ONNX_PATH)) + assert ONNX_PATH.exists() + + assert not INFER_PATH.exists() + model.infer(view="val", save_dir=INFER_PATH) + assert INFER_PATH.exists() + + +def test_custom_tasks( + opts: dict[str, Any], parking_lot_dataset: LuxonisDataset, subtests +): + config_file = "tests/configs/parking_lot_config.yaml" + opts |= { + "loader.params.dataset_name": parking_lot_dataset.dataset_name, + "trainer.batch_size": 2, + } + del opts["trainer.callbacks"] + model = LuxonisModel(config_file, opts) + model.train() + archive_path = Path( + model.run_save_dir, "archive", model.cfg.model.name + ).with_suffix(".onnx.tar.xz") + correct_archive_config = json.loads( + Path("tests/integration/parking_lot.json").read_text() + ) + + with subtests.test("test_archive"): + assert archive_path.exists() + with tarfile.open(archive_path) as tar: + extracted_cfg = tar.extractfile("config.json") + + assert ( + extracted_cfg is not None + ), "Config JSON not found in the archive." + generated_config = json.loads(extracted_cfg.read().decode()) + + del generated_config["model"]["heads"][1]["metadata"]["anchors"] + assert generated_config == correct_archive_config + + +@pytest.mark.skipif( + environ.GOOGLE_APPLICATION_CREDENTIALS is None, + reason="GCP credentials not set", +) +def test_parsing_loader(): + model = LuxonisModel("tests/configs/segmentation_parse_loader.yaml") + model.train() + + +@pytest.mark.skipif( + sys.platform == "win32", + reason="Tuning not supported on Windows", +) +def test_tune(opts: dict[str, Any], coco_dataset: LuxonisDataset): + opts["tuner.params"] = { + "trainer.optimizer.name_categorical": ["Adam", "SGD"], + "trainer.optimizer.params.lr_float": [0.0001, 0.001], + "trainer.batch_size_int": [4, 16, 4], + "trainer.preprocessing.augmentations_subset": [ + ["Defocus", "Sharpen", "Flip", "Normalize", "invalid"], + 2, + ], + "model.losses.0.weight_uniform": [0.1, 0.9], + "model.nodes.0.freezing.unfreeze_after_loguniform": [0.1, 0.9], + } + opts["loader.params.dataset_name"] = coco_dataset.identifier + model = LuxonisModel("configs/example_tuning.yaml", opts) + model.tune() + assert STUDY_PATH.exists() + + +def test_archive(test_output_dir: Path, coco_dataset: LuxonisDataset): + opts = { + "tracker.save_directory": str(test_output_dir), + "loader.params.dataset_name": coco_dataset.identifier, + } + model = LuxonisModel("tests/configs/archive_config.yaml", opts) + model.archive() + assert ( + Path( + model.run_save_dir, + "archive", + model.cfg.archiver.name or model.cfg.model.name, + ) + .with_suffix(".onnx.tar.xz") + .exists() + ) + + +def test_callbacks(opts: dict[str, Any], parking_lot_dataset: LuxonisDataset): + config_file = "tests/configs/parking_lot_config.yaml" + opts = deepcopy(opts) + del opts["trainer.callbacks"] + opts |= { + "trainer.use_rich_progress_bar": False, + "trainer.seed": 42, + "trainer.deterministic": "warn", + "trainer.callbacks": [ + { + "name": "MetadataLogger", + "params": { + "hyperparams": ["trainer.epochs", "trainer.batch_size"], + }, + }, + {"name": "TestOnTrainEnd"}, + {"name": "UploadCheckpoint"}, + { + "name": "ExportOnTrainEnd", + }, + { + "name": "ArchiveOnTrainEnd", + "params": {"preferred_checkpoint": "loss"}, + }, + ], + "exporter.scale_values": [0.5, 0.5, 0.5], + "exporter.mean_values": [0.5, 0.5, 0.5], + "exporter.blobconverter.active": True, + } + opts["loader.params.dataset_name"] = parking_lot_dataset.identifier + model = LuxonisModel(config_file, opts) + model.train() + + +def test_freezing(opts: dict[str, Any], coco_dataset: LuxonisDataset): + config_file = "configs/segmentation_model.yaml" + opts = deepcopy(opts) + opts |= { + "model.predefined_model.params": { + "head_params": { + "freezing": { + "active": True, + "unfreeze_after": 2, + }, + } + } + } + opts["trainer.epochs"] = 3 + opts["loader.params.dataset_name"] = coco_dataset.identifier + model = LuxonisModel(config_file, opts) + model.train() diff --git a/tests/unittests/__init__.py b/tests/unittests/__init__.py index f9269fdf..e69de29b 100644 --- a/tests/unittests/__init__.py +++ b/tests/unittests/__init__.py @@ -1,2 +0,0 @@ -# import warnings -# warnings.filterwarnings("module", category=DeprecationWarning) diff --git a/tests/unittests/test_assigners/__init__.py b/tests/unittests/test_assigners/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/unittests/test_utils/test_assigners/test_atts_assigner.py b/tests/unittests/test_assigners/test_atts_assigner.py similarity index 88% rename from tests/unittests/test_utils/test_assigners/test_atts_assigner.py rename to tests/unittests/test_assigners/test_atts_assigner.py index a3801ebb..4ab6f939 100644 --- a/tests/unittests/test_utils/test_assigners/test_atts_assigner.py +++ b/tests/unittests/test_assigners/test_atts_assigner.py @@ -1,6 +1,6 @@ import torch -from luxonis_train.utils.assigners.atts_assigner import ATSSAssigner +from luxonis_train.assigners import ATSSAssigner def test_init(): @@ -25,7 +25,12 @@ def test_forward(): pred_bboxes = torch.rand(bs, n_anchors, 4) labels, bboxes, scores, mask, assigned_gt_idx = assigner.forward( - anchor_bboxes, n_level_bboxes, gt_labels, gt_bboxes, mask_gt, pred_bboxes + anchor_bboxes, + n_level_bboxes, + gt_labels, + gt_bboxes, + mask_gt, + pred_bboxes, ) assert labels.shape == (bs, n_anchors) @@ -59,7 +64,11 @@ def test_select_topk_candidates(): ) assert is_in_topk.shape == (batch_size, n_max_boxes, n_anchors) - assert topk_idxs.shape == (batch_size, n_max_boxes, topk * len(n_level_bboxes)) + assert topk_idxs.shape == ( + batch_size, + n_max_boxes, + topk * len(n_level_bboxes), + ) def test_get_positive_samples(): @@ -97,7 +106,11 @@ def test_get_final_assignments(): assigned_gt_idx = torch.randint(0, n_max_boxes, (batch_size, n_anchors)) mask_pos_sum = torch.randint(0, 2, (batch_size, n_anchors)) - assigned_labels, assigned_bboxes, assigned_scores = assigner._get_final_assignments( + ( + assigned_labels, + assigned_bboxes, + assigned_scores, + ) = assigner._get_final_assignments( gt_labels, gt_bboxes, assigned_gt_idx, mask_pos_sum ) diff --git a/tests/unittests/test_assigners/test_tal_assigner.py b/tests/unittests/test_assigners/test_tal_assigner.py new file mode 100644 index 00000000..cb94b62d --- /dev/null +++ b/tests/unittests/test_assigners/test_tal_assigner.py @@ -0,0 +1,135 @@ +import torch + +from luxonis_train.assigners import TaskAlignedAssigner + + +def test_init(): + assigner = TaskAlignedAssigner( + n_classes=80, topk=13, alpha=1.0, beta=6.0, eps=1e-9 + ) + assert assigner.n_classes == 80 + assert assigner.topk == 13 + assert assigner.alpha == 1.0 + assert assigner.beta == 6.0 + assert assigner.eps == 1e-9 + + +def test_forward(): + batch_size = 10 + n_anchors = 100 + n_max_boxes = 5 + n_classes = 80 + + assigner = TaskAlignedAssigner(n_classes=n_classes, topk=13) + + # Create mock inputs + pred_scores = torch.rand(batch_size, n_anchors, 1) + pred_bboxes = torch.rand(batch_size, n_anchors, 4) + anchor_points = torch.rand(n_anchors, 2) + gt_labels = torch.rand(batch_size, n_max_boxes, 1) + gt_bboxes = torch.zeros(batch_size, n_max_boxes, 4) # no gt bboxes + mask_gt = torch.rand(batch_size, n_max_boxes, 1) + + labels, bboxes, scores, mask, assigned_gt_idx = assigner.forward( + pred_scores, pred_bboxes, anchor_points, gt_labels, gt_bboxes, mask_gt + ) + + assert labels.shape == (batch_size, n_anchors) + assert bboxes.shape == (batch_size, n_anchors, 4) + assert scores.shape == ( + batch_size, + n_anchors, + n_classes, + ) + assert mask.shape == (batch_size, n_anchors) + assert assigned_gt_idx.shape == (batch_size, n_anchors) + + # Labels should be `n_classes` as there are no GT boxes + assert labels.unique().tolist() == [n_classes] + + # All results should be zero as there are no GT boxes + assert torch.equal(bboxes, torch.zeros_like(bboxes)) + assert torch.equal(scores, torch.zeros_like(scores)) + assert torch.equal(mask, torch.zeros_like(mask)) + assert torch.equal(assigned_gt_idx, torch.zeros_like(assigned_gt_idx)) + + +def test_get_alignment_metric(): + batch_size = 2 + n_anchors = 5 + n_max_boxes = 3 + n_classes = 80 + + pred_scores = torch.rand(batch_size, n_anchors, n_classes) + pred_bboxes = torch.rand(batch_size, n_anchors, 4) + gt_labels = torch.randint(0, n_classes, (batch_size, n_max_boxes, 1)) + gt_bboxes = torch.rand(batch_size, n_max_boxes, 4) + + assigner = TaskAlignedAssigner( + n_classes=n_classes, topk=13, alpha=1.0, beta=6.0, eps=1e-9 + ) + assigner.bs = pred_scores.size(0) + assigner.n_max_boxes = gt_bboxes.size(1) + + align_metric, overlaps = assigner._get_alignment_metric( + pred_scores, pred_bboxes, gt_labels, gt_bboxes + ) + + assert align_metric.shape == (batch_size, n_max_boxes, n_anchors) + assert overlaps.shape == (batch_size, n_max_boxes, n_anchors) + assert align_metric.dtype == torch.float32 + assert overlaps.dtype == torch.float32 + assert align_metric.min() >= 0 and align_metric.max() <= 1 + assert overlaps.min() >= 0 and overlaps.max() <= 1 + + +def test_select_topk_candidates(): + batch_size = 2 + n_max_boxes = 3 + n_anchors = 5 + topk = 2 + + metrics = torch.rand(batch_size, n_max_boxes, n_anchors) + mask_gt = torch.rand(batch_size, n_max_boxes, 1) + + assigner = TaskAlignedAssigner(n_classes=80, topk=topk) + + is_in_topk = assigner._select_topk_candidates(metrics) + topk_mask = mask_gt.repeat([1, 1, topk]).bool() + assert torch.equal( + assigner._select_topk_candidates(metrics), + assigner._select_topk_candidates(metrics, topk_mask=topk_mask), + ) + assert is_in_topk.shape == (batch_size, n_max_boxes, n_anchors) + assert is_in_topk.dtype == torch.float32 + + assert is_in_topk.sum(dim=-1).max() <= topk + + +def test_get_final_assignments(): + batch_size = 2 + n_max_boxes = 3 + n_anchors = 5 + n_classes = 80 + + gt_labels = torch.randint(0, n_classes, (batch_size, n_max_boxes, 1)) + gt_bboxes = torch.rand(batch_size, n_max_boxes, 4) + assigned_gt_idx = torch.randint(0, n_max_boxes, (batch_size, n_anchors)) + mask_pos_sum = torch.randint(0, 2, (batch_size, n_anchors)) + + assigner = TaskAlignedAssigner(n_classes=n_classes, topk=13) + assigner.bs = batch_size # Set batch size + assigner.n_max_boxes = gt_bboxes.size(1) + + ( + assigned_labels, + assigned_bboxes, + assigned_scores, + ) = assigner._get_final_assignments( + gt_labels, gt_bboxes, assigned_gt_idx, mask_pos_sum + ) + + assert assigned_labels.shape == (batch_size, n_anchors) + assert assigned_bboxes.shape == (batch_size, n_anchors, 4) + assert assigned_scores.shape == (batch_size, n_anchors, n_classes) + assert assigned_labels.min() >= 0 and assigned_labels.max() <= n_classes diff --git a/tests/unittests/test_utils/test_assigners/test_utils.py b/tests/unittests/test_assigners/test_utils.py similarity index 96% rename from tests/unittests/test_utils/test_assigners/test_utils.py rename to tests/unittests/test_assigners/test_utils.py index bf849e25..d10e1d47 100644 --- a/tests/unittests/test_utils/test_assigners/test_utils.py +++ b/tests/unittests/test_assigners/test_utils.py @@ -1,6 +1,6 @@ import torch -from luxonis_train.utils.assigners.utils import ( +from luxonis_train.assigners.utils import ( batch_iou, candidates_in_gt, fix_collisions, diff --git a/tests/unittests/test_base_attached_module.py b/tests/unittests/test_base_attached_module.py new file mode 100644 index 00000000..c6ffdd48 --- /dev/null +++ b/tests/unittests/test_base_attached_module.py @@ -0,0 +1,153 @@ +import pytest +from luxonis_ml.data import LabelType + +from luxonis_train import BaseLoss, BaseNode +from luxonis_train.utils.exceptions import IncompatibleException + + +class DummyBackbone(BaseNode): + def forward(self, _): ... + + +class DummySegmentationHead(BaseNode): + tasks = [LabelType.SEGMENTATION] + + def forward(self, _): ... + + +class DummyBBoxHead(BaseNode): + tasks = [LabelType.BOUNDINGBOX] + + def forward(self, _): ... + + +class DummyDetectionHead(BaseNode): + tasks = [LabelType.BOUNDINGBOX, LabelType.KEYPOINTS] + + def forward(self, _): ... + + +class DummyLoss(BaseLoss): + supported_labels = [ + LabelType.SEGMENTATION, + (LabelType.KEYPOINTS, LabelType.BOUNDINGBOX), + ] + + def forward(self, _): ... + + +class NoLabelLoss(BaseLoss): + def forward(self, _): ... + + +@pytest.fixture +def labels(): + return { + "segmentation": ("segmentation", LabelType.SEGMENTATION), + "keypoints": ("keypoints", LabelType.KEYPOINTS), + "boundingbox": ("boundingbox", LabelType.BOUNDINGBOX), + "classification": ("classification", LabelType.CLASSIFICATION), + } + + +@pytest.fixture +def inputs(): + return { + "features": ["features"], + "segmentation": ["segmentation"], + } + + +def test_valid_properties(): + head = DummySegmentationHead() + loss = DummyLoss(node=head) + no_labels_loss = NoLabelLoss(node=head) + assert loss.node == head + assert loss.node_tasks == {LabelType.SEGMENTATION: "segmentation"} + assert loss.required_labels == [LabelType.SEGMENTATION] + assert no_labels_loss.node == head + assert no_labels_loss.node_tasks == { + LabelType.SEGMENTATION: "segmentation" + } + assert no_labels_loss.required_labels == [] + + +def test_invalid_properties(): + backbone = DummyBackbone() + with pytest.raises(IncompatibleException): + DummyLoss(node=backbone) + with pytest.raises(IncompatibleException): + DummyLoss(node=DummyBBoxHead()) + with pytest.raises(RuntimeError): + _ = DummyLoss().node + with pytest.raises(RuntimeError): + _ = NoLabelLoss(node=backbone).node_tasks + + +def test_get_label(labels): + seg_head = DummySegmentationHead() + det_head = DummyDetectionHead() + seg_loss = DummyLoss(node=seg_head) + assert seg_loss.get_label(labels) == "segmentation" + assert seg_loss.get_label(labels, LabelType.SEGMENTATION) == "segmentation" + + del labels["segmentation"] + labels["segmentation-task"] = ("segmentation", LabelType.SEGMENTATION) + + with pytest.raises(IncompatibleException): + seg_loss.get_label(labels) + + det_loss = DummyLoss(node=det_head) + assert det_loss.get_label(labels, LabelType.KEYPOINTS) == "keypoints" + assert det_loss.get_label(labels, LabelType.BOUNDINGBOX) == "boundingbox" + + with pytest.raises(ValueError): + det_loss.get_label(labels) + + with pytest.raises(ValueError): + det_loss.get_label(labels, LabelType.SEGMENTATION) + + +def test_input_tensors(inputs): + seg_head = DummySegmentationHead() + seg_loss = DummyLoss(node=seg_head) + assert seg_loss.get_input_tensors(inputs) == ["segmentation"] + assert seg_loss.get_input_tensors(inputs, "segmentation") == [ + "segmentation" + ] + assert seg_loss.get_input_tensors(inputs, LabelType.SEGMENTATION) == [ + "segmentation" + ] + + with pytest.raises(IncompatibleException): + seg_loss.get_input_tensors(inputs, LabelType.KEYPOINTS) + with pytest.raises(IncompatibleException): + seg_loss.get_input_tensors(inputs, "keypoints") + + det_head = DummyDetectionHead() + det_loss = DummyLoss(node=det_head) + with pytest.raises(ValueError): + det_loss.get_input_tensors(inputs) + + +def test_prepare(inputs, labels): + backbone = DummyBackbone() + seg_head = DummySegmentationHead() + seg_loss = DummyLoss(node=seg_head) + det_head = DummyDetectionHead() + + assert seg_loss.prepare(inputs, labels) == ("segmentation", "segmentation") + inputs["segmentation"].append("segmentation2") + assert seg_loss.prepare(inputs, labels) == ( + "segmentation2", + "segmentation", + ) + + with pytest.raises(RuntimeError): + NoLabelLoss(node=backbone).prepare(inputs, labels) + + with pytest.raises(RuntimeError): + NoLabelLoss(node=seg_head).prepare(inputs, labels) + + with pytest.raises(RuntimeError): + DummyLoss(node=det_head).prepare(inputs, labels) diff --git a/tests/unittests/test_base_node.py b/tests/unittests/test_base_node.py new file mode 100644 index 00000000..68386f73 --- /dev/null +++ b/tests/unittests/test_base_node.py @@ -0,0 +1,160 @@ +import pytest +import torch +from luxonis_ml.data import LabelType +from torch import Size, Tensor + +from luxonis_train.nodes import AttachIndexType, BaseNode +from luxonis_train.utils import DatasetMetadata, Packet +from luxonis_train.utils.exceptions import IncompatibleException + + +class DummyNode(BaseNode, register=False): + def forward(self, _): ... + + +@pytest.fixture +def packet() -> Packet[Tensor]: + return { + "features": [torch.rand(3, 224, 224)], + } + + +@pytest.mark.parametrize( + ("attach_index", "expected"), + [ + (-1, 5), + (0, 1), + ("all", [1, 2, 3, 4, 5]), + ((0, 2), [1, 2]), + ((0, 4, 2), [1, 3]), + ((-1, -3, -1), [5, 4]), + ((4, 2), [5, 4]), + ((-1, -3), [5, 4]), + ((-4, 4), [2, 3, 4]), + ((1, -1), [2, 3, 4]), + ], +) +def test_attach_index( + attach_index: AttachIndexType, expected: list[int] | int +): + lst = [1, 2, 3, 4, 5] + + class DummyBaseNode: + attach_index: AttachIndexType + + DummyBaseNode.attach_index = attach_index + + assert BaseNode.get_attached(DummyBaseNode, lst) == expected # type: ignore + + +def test_attach_index_error(): + lst = [1, 2, 3, 4, 5] + + class DummyNode(BaseNode, register=False): + attach_index: AttachIndexType + + with pytest.raises(ValueError): + DummyNode.attach_index = 10 + BaseNode.get_attached(DummyNode, lst) # type: ignore + + with pytest.raises(ValueError): + DummyNode.attach_index = "none" # type: ignore + BaseNode.get_attached(DummyNode, lst) # type: ignore + + +def test_invalid(packet: Packet[Tensor]): + node = DummyNode() + with pytest.raises(RuntimeError): + _ = node.input_shapes + with pytest.raises(RuntimeError): + _ = node.original_in_shape + with pytest.raises(RuntimeError): + _ = node.dataset_metadata + with pytest.raises(ValueError): + node.unwrap([packet, packet]) + with pytest.raises(ValueError): + node.wrap({"inp": torch.rand(3, 224, 224)}) + + +def tets_in_sizes(): + node = DummyNode( + input_shapes=[{"features": [Size((3, 224, 224)) for _ in range(3)]}] + ) + assert node.in_sizes == [Size((3, 224, 224)) for _ in range(3)] + node = DummyNode(in_sizes=Size((3, 224, 224))) + assert node.in_sizes == Size((3, 224, 224)) + with pytest.raises(RuntimeError): + node = DummyNode(input_shapes=[{"feats": [Size((3, 224, 224))]}]) + _ = node.in_sizes + + +def test_check_type_override(): + class DummyNode(BaseNode, register=False): + in_channels: int + + def forward(self, _): ... + + with pytest.raises(IncompatibleException): + DummyNode( + input_shapes=[ + {"features": [Size((3, 224, 224)) for _ in range(3)]} + ] + ) + + +def test_tasks(): + class DummyHead(DummyNode): + tasks = [LabelType.CLASSIFICATION] + + class DummyMultiHead(DummyNode): + tasks = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION] + + dummy_head = DummyHead() + dummy_node = DummyNode() + dummy_multi_head = DummyMultiHead(n_keypoints=4) + assert ( + dummy_head.get_task_name(LabelType.CLASSIFICATION) == "classification" + ) + assert dummy_head.task == "classification" + with pytest.raises(ValueError): + dummy_head.get_task_name(LabelType.SEGMENTATION) + + with pytest.raises(RuntimeError): + dummy_node.get_task_name(LabelType.SEGMENTATION) + + with pytest.raises(RuntimeError): + _ = dummy_node.task + + with pytest.raises(ValueError): + _ = dummy_multi_head.task + + metadata = DatasetMetadata( + classes={ + "segmentation": ["car", "person", "dog"], + "classification": ["car-class", "person-class"], + }, + n_keypoints={"color-segmentation": 0, "detection": 0}, + ) + + dummy_multi_head._dataset_metadata = metadata + assert dummy_multi_head.get_class_names(LabelType.SEGMENTATION) == [ + "car", + "person", + "dog", + ] + assert dummy_multi_head.get_class_names(LabelType.CLASSIFICATION) == [ + "car-class", + "person-class", + ] + assert dummy_multi_head.get_n_classes(LabelType.SEGMENTATION) == 3 + assert dummy_multi_head.get_n_classes(LabelType.CLASSIFICATION) == 2 + assert dummy_multi_head.n_keypoints == 4 + with pytest.raises(ValueError): + _ = dummy_head.n_keypoints + with pytest.raises(RuntimeError): + _ = dummy_node.n_keypoints + + dummy_head = DummyHead(n_classes=5) + assert dummy_head.n_classes == 5 + with pytest.raises(ValueError): + _ = dummy_multi_head.n_classes diff --git a/tests/unittests/test_blocks.py b/tests/unittests/test_blocks.py new file mode 100644 index 00000000..8b6110d4 --- /dev/null +++ b/tests/unittests/test_blocks.py @@ -0,0 +1,15 @@ +import torch + +from luxonis_train.nodes.blocks import SqueezeExciteBlock, autopad + + +def test_autopad(): + assert autopad(1, 2) == 2 + assert autopad(2) == 1 + assert autopad((2, 4)) == (1, 2) + + +def test_squeeze_excite_block(): + se_block = SqueezeExciteBlock(64, 32) + x = torch.rand(1, 64, 224, 224) + assert se_block(x).shape == (1, 64, 224, 224) diff --git a/tests/unittests/test_callbacks/__init__.py b/tests/unittests/test_callbacks/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/unittests/test_callbacks/test_needs_checkpoint.py b/tests/unittests/test_callbacks/test_needs_checkpoint.py new file mode 100644 index 00000000..bd296dea --- /dev/null +++ b/tests/unittests/test_callbacks/test_needs_checkpoint.py @@ -0,0 +1,6 @@ +from luxonis_train.callbacks.needs_checkpoint import NeedsCheckpoint + + +def test_other_type(): + assert NeedsCheckpoint._get_other_type("loss") == "metric" + assert NeedsCheckpoint._get_other_type("metric") == "loss" diff --git a/tests/unittests/test_loaders/__init__.py b/tests/unittests/test_loaders/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/unittests/test_loaders/test_base_loader.py b/tests/unittests/test_loaders/test_base_loader.py new file mode 100644 index 00000000..dee1ecef --- /dev/null +++ b/tests/unittests/test_loaders/test_base_loader.py @@ -0,0 +1,94 @@ +import pytest +import torch +from luxonis_ml.data import LabelType +from torch import Size + +from luxonis_train.loaders import collate_fn + + +@pytest.mark.parametrize( + "input_names_and_shapes", + [ + [("features", Size([3, 224, 224]))], + [ + ("features", Size([3, 224, 224])), + ("segmentation", Size([1, 224, 224])), + ], + [ + ("features", Size([3, 224, 224])), + ("segmentation", Size([1, 224, 224])), + ("disparity", Size([1, 224, 224])), + ], + [ + ("features", Size([3, 224, 224])), + ("pointcloud", Size([1000, 3])), + ], + [ + ("features", Size([3, 224, 224])), + ("pointcloud", Size([1000, 3])), + ("foobar", Size([2, 3, 4, 5, 6])), + ], + ], +) +@pytest.mark.parametrize("batch_size", [1, 2]) +def test_collate_fn( + input_names_and_shapes: list[tuple[str, Size]], batch_size: int, subtests +): + def build_batch_element(): + inputs = {} + for name, shape in input_names_and_shapes: + inputs[name] = torch.rand(shape, dtype=torch.float32) + + labels = { + "classification": ( + torch.randint(0, 2, (2,), dtype=torch.int64), + LabelType.CLASSIFICATION, + ), + "segmentation": ( + torch.randint(0, 2, (1, 224, 224), dtype=torch.int64), + LabelType.SEGMENTATION, + ), + "keypoints": ( + torch.rand(1, 52, dtype=torch.float32), + LabelType.KEYPOINTS, + ), + "boundingbox": ( + torch.rand(1, 5, dtype=torch.float32), + LabelType.BOUNDINGBOX, + ), + } + + return inputs, labels + + batch = [build_batch_element() for _ in range(batch_size)] + + inputs, annotations = collate_fn(batch) # type: ignore + + with subtests.test("inputs"): + assert inputs["features"].shape == (batch_size, 3, 224, 224) + assert inputs["features"].dtype == torch.float32 + + with subtests.test("classification"): + assert "classification" in annotations + assert annotations["classification"][0].shape == (batch_size, 2) + assert annotations["classification"][0].dtype == torch.int64 + + with subtests.test("segmentation"): + assert "segmentation" in annotations + assert annotations["segmentation"][0].shape == ( + batch_size, + 1, + 224, + 224, + ) + assert annotations["segmentation"][0].dtype == torch.int64 + + with subtests.test("keypoints"): + assert "keypoints" in annotations + assert annotations["keypoints"][0].shape == (batch_size, 53) + assert annotations["keypoints"][0].dtype == torch.float32 + + with subtests.test("boundingbox"): + assert "boundingbox" in annotations + assert annotations["boundingbox"][0].shape == (batch_size, 6) + assert annotations["boundingbox"][0].dtype == torch.float32 diff --git a/tests/unittests/test_losses/test_bce_with_logits_loss.py b/tests/unittests/test_losses/test_bce_with_logits_loss.py index 27871019..f94b5cb1 100644 --- a/tests/unittests/test_losses/test_bce_with_logits_loss.py +++ b/tests/unittests/test_losses/test_bce_with_logits_loss.py @@ -16,7 +16,9 @@ def test_forward_pass(): predictions = torch.full([bs, n_cl], 1.5) # logit loss_fn = BCEWithLogitsLoss() - loss = loss_fn.forward(predictions, targets) # -log(sigmoid(1.5)) = 0.2014 + loss = loss_fn.forward( + predictions, targets + ) # -log(sigmoid(1.5)) = 0.2014 assert isinstance(loss, torch.Tensor) assert loss.shape == torch.Size([]) @@ -57,5 +59,7 @@ def test_weights(): assert loss_weight != loss_no_weight -if __name__ == "__main__": - pytest.main() +def test_invalid(): + loss_fn = BCEWithLogitsLoss() + with pytest.raises(RuntimeError): + loss_fn.forward(torch.rand(10, 10), torch.rand(15, 15)) diff --git a/tests/unittests/test_metrics/test_torchmetrics.py b/tests/unittests/test_metrics/test_torchmetrics.py new file mode 100644 index 00000000..141a3785 --- /dev/null +++ b/tests/unittests/test_metrics/test_torchmetrics.py @@ -0,0 +1,52 @@ +import pytest +import torchmetrics +from luxonis_ml.data import LabelType + +from luxonis_train.attached_modules.metrics.torchmetrics import ( + TorchMetricWrapper, +) +from luxonis_train.nodes import BaseNode + + +def test_torchmetrics(): + class DummyNode(BaseNode): + tasks = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION] + + def forward(self, _): ... + + class DummyMetric(TorchMetricWrapper): + supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION] + Metric = torchmetrics.Accuracy + + node_1_class = DummyNode(n_classes=1) + node_2_classes = DummyNode(n_classes=2) + node = DummyNode() + assert DummyMetric(node=node_1_class)._task == "binary" + assert DummyMetric(node=node_2_classes)._task == "multiclass" + assert DummyMetric(node=node_2_classes, task="multilabel") + assert DummyMetric(num_classes=1)._task == "binary" + assert DummyMetric(num_classes=2)._task == "multiclass" + assert DummyMetric(num_labels=2)._task == "multilabel" + + assert DummyMetric(task="binary") + + with pytest.raises(ValueError): + DummyMetric() + + with pytest.raises(ValueError): + DummyMetric(task="multiclass") + + with pytest.raises(ValueError): + DummyMetric(task="invalid") + + with pytest.raises(ValueError): + DummyMetric(task="binary", node=node_2_classes) + + with pytest.raises(ValueError): + DummyMetric(task="multiclass", node=node_1_class) + + with pytest.raises(ValueError): + DummyMetric(task="multiclass", node=node) + + with pytest.raises(ValueError): + DummyMetric(task="multilabel", node=node) diff --git a/tests/unittests/test_utils/test_assigners/test_tal_assigner.py b/tests/unittests/test_utils/test_assigners/test_tal_assigner.py deleted file mode 100644 index 8f291615..00000000 --- a/tests/unittests/test_utils/test_assigners/test_tal_assigner.py +++ /dev/null @@ -1,165 +0,0 @@ -import torch - -from luxonis_train.utils.assigners.tal_assigner import TaskAlignedAssigner - - -def test_init(): - assigner = TaskAlignedAssigner(n_classes=80, topk=13, alpha=1.0, beta=6.0, eps=1e-9) - assert assigner.n_classes == 80 - assert assigner.topk == 13 - assert assigner.alpha == 1.0 - assert assigner.beta == 6.0 - assert assigner.eps == 1e-9 - - -def test_forward(): - # Constants for clarity - batch_size = 10 - num_anchors = 100 - num_max_boxes = 5 - num_classes = 80 - - # Initialize the TaskAlignedAssigner - assigner = TaskAlignedAssigner(n_classes=num_classes, topk=13) - - # Create mock inputs - pred_scores = torch.rand(batch_size, num_anchors, 1) - pred_bboxes = torch.rand(batch_size, num_anchors, 4) - anchor_points = torch.rand(num_anchors, 2) - gt_labels = torch.rand(batch_size, num_max_boxes, 1) - gt_bboxes = torch.zeros(batch_size, num_max_boxes, 4) # no gt bboxes - mask_gt = torch.rand(batch_size, num_max_boxes, 1) - - # Call the forward method - labels, bboxes, scores, mask, assigned_gt_idx = assigner.forward( - pred_scores, pred_bboxes, anchor_points, gt_labels, gt_bboxes, mask_gt - ) - - # Assert the expected outcomes - assert labels.shape == (batch_size, num_anchors) - assert labels.unique().tolist() == [ - num_classes - ] # All labels should be num_classes as there are no GT boxes - assert bboxes.shape == (batch_size, num_anchors, 4) - assert torch.equal( - bboxes, torch.zeros_like(bboxes) - ) # All bboxes should be zero as there are no GT boxes - assert ( - scores.shape - == ( - batch_size, - num_anchors, - num_classes, - ) - ) # TODO: We have this in doc string: Returns: ... assigned scores of shape [bs, n_anchors, 1], - # it returns tensor of shape [bs, n_anchors, n_classes] instead - assert torch.equal( - scores, torch.zeros_like(scores) - ) # All scores should be zero as there are no GT boxes - assert mask.shape == (batch_size, num_anchors) - assert torch.equal( - mask, torch.zeros_like(mask) - ) # All mask values should be zero as there are no GT boxes - assert assigned_gt_idx.shape == (batch_size, num_anchors) - assert torch.equal( - assigned_gt_idx, torch.zeros_like(assigned_gt_idx) - ) # All assigned_gt_idx values should be zero as there are no GT boxes - - -def test_get_alignment_metric(): - # Create mock inputs - bs = 2 # batch size - n_anchors = 5 - n_max_boxes = 3 - n_classes = 80 - - pred_scores = torch.rand( - bs, n_anchors, n_classes - ) # TODO: Same issue: works with n_classes instead of 1, change it in the doc string in the method itself!!! - pred_bboxes = torch.rand(bs, n_anchors, 4) - gt_labels = torch.randint(0, n_classes, (bs, n_max_boxes, 1)) - gt_bboxes = torch.rand(bs, n_max_boxes, 4) - - # Initialize the TaskAlignedAssigner - assigner = TaskAlignedAssigner( - n_classes=n_classes, topk=13, alpha=1.0, beta=6.0, eps=1e-9 - ) - assigner.bs = pred_scores.size(0) - assigner.n_max_boxes = gt_bboxes.size(1) - - # Call the method - align_metric, overlaps = assigner._get_alignment_metric( - pred_scores, pred_bboxes, gt_labels, gt_bboxes - ) - - # Assert the expected outcomes - assert align_metric.shape == (bs, n_max_boxes, n_anchors) - assert overlaps.shape == (bs, n_max_boxes, n_anchors) - assert align_metric.dtype == torch.float32 - assert overlaps.dtype == torch.float32 - assert (align_metric >= 0).all() and ( - align_metric <= 1 - ).all() # Alignment metric should be in the range [0, 1] - assert (overlaps >= 0).all() and ( - overlaps <= 1 - ).all() # IoU should be in the range [0, 1] - - -def test_select_topk_candidates(): - # Constants for the test - batch_size = 2 - num_max_boxes = 3 - num_anchors = 5 - topk = 2 - - metrics = torch.rand(batch_size, num_max_boxes, num_anchors) - mask_gt = torch.rand(batch_size, num_max_boxes, 1) - - # Initialize the TaskAlignedAssigner - assigner = TaskAlignedAssigner(n_classes=80, topk=topk) - - # Call the method - is_in_topk = assigner._select_topk_candidates( - metrics, - ) - topk_mask = mask_gt.repeat([1, 1, topk]).bool() - assert torch.equal( - assigner._select_topk_candidates(metrics), - assigner._select_topk_candidates(metrics, topk_mask=topk_mask), - ) - # Assert the expected outcomes - assert is_in_topk.shape == (batch_size, num_max_boxes, num_anchors) - assert is_in_topk.dtype == torch.float32 - - # Check that each ground truth has at most 'topk' anchors selected - assert (is_in_topk.sum(dim=-1) <= topk).all() - - -def test_get_final_assignments(): - # Constants for the test - batch_size = 2 - num_max_boxes = 3 - num_anchors = 5 - num_classes = 80 - - # Mock inputs - gt_labels = torch.randint(0, num_classes, (batch_size, num_max_boxes, 1)) - gt_bboxes = torch.rand(batch_size, num_max_boxes, 4) - assigned_gt_idx = torch.randint(0, num_max_boxes, (batch_size, num_anchors)) - mask_pos_sum = torch.randint(0, 2, (batch_size, num_anchors)) - - # Initialize the TaskAlignedAssigner - assigner = TaskAlignedAssigner(n_classes=num_classes, topk=13) - assigner.bs = batch_size # Set batch size - assigner.n_max_boxes = gt_bboxes.size(1) - - # Call the method - assigned_labels, assigned_bboxes, assigned_scores = assigner._get_final_assignments( - gt_labels, gt_bboxes, assigned_gt_idx, mask_pos_sum - ) - - # Assert the expected outcomes - assert assigned_labels.shape == (batch_size, num_anchors) - assert assigned_bboxes.shape == (batch_size, num_anchors, 4) - assert assigned_scores.shape == (batch_size, num_anchors, num_classes) - assert (assigned_labels >= 0).all() and (assigned_labels <= num_classes).all() diff --git a/tests/unittests/test_utils/test_boxutils.py b/tests/unittests/test_utils/test_boxutils.py index 2cb3df24..2b05a428 100644 --- a/tests/unittests/test_utils/test_boxutils.py +++ b/tests/unittests/test_utils/test_boxutils.py @@ -1,39 +1,42 @@ +import pytest import torch -from luxonis_train.utils.boxutils import ( +from luxonis_train.utils.boundingbox import ( + IoUType, anchors_for_fpn_features, bbox2dist, bbox_iou, compute_iou_loss, dist2bbox, process_bbox_predictions, - process_keypoints_predictions, ) -def generate_random_bboxes(num_bboxes, max_width, max_height, format="xyxy"): - # Generate top-left corners (x1, y1) - x1y1 = torch.rand(num_bboxes, 2) * torch.tensor([max_width - 1, max_height - 1]) +def generate_random_bboxes( + n_bboxes: int, max_width: int, max_height: int, format: str = "xyxy" +): + x1y1 = torch.rand(n_bboxes, 2) * torch.tensor( + [max_width - 1, max_height - 1] + ) - # Generate widths and heights ensuring x2 > x1 and y2 > y1 wh = ( - torch.rand(num_bboxes, 2) * (torch.tensor([max_width, max_height]) - 1 - x1y1) + torch.rand(n_bboxes, 2) + * (torch.tensor([max_width, max_height]) - 1 - x1y1) + 1 ) if format == "xyxy": - # Calculate bottom-right corners (x2, y2) for xyxy format x2y2 = x1y1 + wh bboxes = torch.cat((x1y1, x2y2), dim=1) elif format == "xywh": - # Use x1y1 as top-left corner and wh as width and height for xywh format bboxes = torch.cat((x1y1, wh), dim=1) elif format == "cxcywh": - # Calculate center coordinates and use wh as width and height for cxcywh format cxcy = x1y1 + wh / 2 bboxes = torch.cat((cxcy, wh), dim=1) else: - raise ValueError("Unsupported format. Choose from 'xyxy', 'xywh', 'cxcywh'.") + raise ValueError( + "Unsupported format. Choose from 'xyxy', 'xywh', 'cxcywh'." + ) return bboxes @@ -44,6 +47,8 @@ def test_dist2bbox(): bbox = dist2bbox(distance, anchor_points) assert bbox.shape == distance.shape + with pytest.raises(ValueError): + dist2bbox(distance, anchor_points, out_format="invalid") # type: ignore def test_bbox2dist(): @@ -56,22 +61,41 @@ def test_bbox2dist(): assert distance.shape == bbox.shape -def test_bbox_iou(): +@pytest.mark.parametrize("iou_type", ["none", "giou", "diou", "ciou", "siou"]) +def test_bbox_iou(iou_type: IoUType): for format in ["xyxy", "cxcywh", "xywh"]: bbox1 = generate_random_bboxes(5, 640, 640, format) - bbox2 = generate_random_bboxes(8, 640, 640, format) - - iou = bbox_iou(bbox1, bbox2) - - assert iou.shape == (5, 8) - assert iou.min() >= 0 and iou.max() <= 1 + if iou_type == "siou": + bbox2 = generate_random_bboxes(5, 640, 640, format) + else: + bbox2 = generate_random_bboxes(8, 640, 640, format) + + iou = bbox_iou( + bbox1, + bbox2, + bbox_format=format, # type: ignore + iou_type=iou_type, + ) + + assert iou.shape == (bbox1.shape[0], bbox2.shape[0]) + if iou_type == "none": + min = 0 + else: + min = -1.5 + assert iou.min() >= min and iou.max() <= 1 + + if iou_type == "none": + with pytest.raises(ValueError): + bbox_iou(bbox1, bbox2, iou_type="invalid") # type: ignore def test_compute_iou_loss(): pred_bboxes = generate_random_bboxes(8, 640, 640, "xyxy") target_bboxes = generate_random_bboxes(8, 640, 640, "xyxy") - loss_iou, iou = compute_iou_loss(pred_bboxes, target_bboxes, iou_type="giou") + loss_iou, iou = compute_iou_loss( + pred_bboxes, target_bboxes, iou_type="giou" + ) assert isinstance(loss_iou, torch.Tensor) assert isinstance(iou, torch.Tensor) @@ -93,21 +117,16 @@ def test_process_bbox_predictions(): assert out_bbox_tail.shape == (10, 4) -def test_process_keypoints_predictions(): - keypoints = torch.rand(10, 15) # 5 keypoints * 3 (x, y, visibility) - - x, y, visibility = process_keypoints_predictions(keypoints) - - assert x.shape == y.shape == visibility.shape == (10, 5) - - def test_anchors_for_fpn_features(): features = [torch.rand(1, 256, 14, 14), torch.rand(1, 256, 28, 28)] strides = torch.tensor([8, 16]) - anchors, anchor_points, n_anchors_list, stride_tensor = anchors_for_fpn_features( - features, strides - ) + ( + anchors, + anchor_points, + n_anchors_list, + stride_tensor, + ) = anchors_for_fpn_features(features, strides) assert isinstance(anchors, torch.Tensor) assert isinstance(anchor_points, torch.Tensor) diff --git a/tests/unittests/test_utils/test_dataset_metadata.py b/tests/unittests/test_utils/test_dataset_metadata.py new file mode 100644 index 00000000..8dba11a8 --- /dev/null +++ b/tests/unittests/test_utils/test_dataset_metadata.py @@ -0,0 +1,53 @@ +import pytest + +from luxonis_train.utils import DatasetMetadata + + +@pytest.fixture +def metadata(): + return DatasetMetadata( + classes={ + "color-segmentation": ["car", "person"], + "detection": ["car", "person"], + }, + n_keypoints={"color-segmentation": 0, "detection": 0}, + ) + + +def test_n_classes(metadata): + assert metadata.n_classes("color-segmentation") == 2 + assert metadata.n_classes("detection") == 2 + assert metadata.n_classes() == 2 + with pytest.raises(ValueError): + metadata.n_classes("segmentation") + metadata._classes["segmentation"] = ["car", "person", "tree"] + with pytest.raises(RuntimeError): + metadata.n_classes() + + +def test_n_keypoints(metadata): + assert metadata.n_keypoints("color-segmentation") == 0 + assert metadata.n_keypoints("detection") == 0 + assert metadata.n_keypoints() == 0 + with pytest.raises(ValueError): + metadata.n_keypoints("segmentation") + metadata._n_keypoints["segmentation"] = 1 + with pytest.raises(RuntimeError): + metadata.n_keypoints() + + +def test_class_names(metadata): + assert metadata.classes("color-segmentation") == ["car", "person"] + assert metadata.classes("detection") == ["car", "person"] + assert metadata.classes() == ["car", "person"] + with pytest.raises(ValueError): + metadata.classes("segmentation") + metadata._classes["segmentation"] = ["car", "person", "tree"] + with pytest.raises(RuntimeError): + metadata.classes() + + +def test_no_loader(): + metadata = DatasetMetadata() + with pytest.raises(RuntimeError): + metadata.autogenerate_anchors(3) diff --git a/tests/unittests/test_utils/test_general.py b/tests/unittests/test_utils/test_general.py new file mode 100644 index 00000000..7f13f796 --- /dev/null +++ b/tests/unittests/test_utils/test_general.py @@ -0,0 +1,44 @@ +import pytest + +from luxonis_train.utils.general import infer_upscale_factor + + +@pytest.mark.parametrize( + ("in_size", "orig_size", "expected"), + [ + ((1, 1), (1, 1), 0), + ((1, 1), (2, 2), 1), + ((2, 2), (1, 1), -1), + ((2, 2), (4, 4), 1), + ((4, 4), (2, 2), -1), + ((4, 4), (8, 8), 1), + ((8, 8), (4, 4), -1), + ((2, 2), (16, 16), 3), + ((16, 16), (4, 4), -2), + (4, 8, 1), + ], +) +def test_infer_upscale_factor( + in_size: tuple[int, int] | int, + orig_size: tuple[int, int] | int, + expected: int, +): + assert infer_upscale_factor(in_size, orig_size) == expected + + +@pytest.mark.parametrize( + ("in_size", "orig_size"), + [ + ((1, 1), (2, 1)), + ((1, 1), (1, 2)), + ((2, 3), (16, 16)), + ((3, 2), (16, 16)), + ((3, 3), (16, 16)), + ], +) +def test_infer_upscale_factor_fail( + in_size: tuple[int, int] | int, + orig_size: tuple[int, int] | int, +): + with pytest.raises(ValueError): + infer_upscale_factor(in_size, orig_size) diff --git a/tests/unittests/test_utils/test_graph.py b/tests/unittests/test_utils/test_graph.py new file mode 100644 index 00000000..c63e4b72 --- /dev/null +++ b/tests/unittests/test_utils/test_graph.py @@ -0,0 +1,79 @@ +import pytest + +from luxonis_train.utils.graph import Graph, is_acyclic, traverse_graph + + +@pytest.mark.parametrize( + ("graph", "acyclic"), + [ + ({}, True), + ({"a": []}, True), + ({"a": ["b"], "b": ["a"]}, False), + ({"a": ["b"], "b": []}, True), + ({"a": ["b"], "b": ["c"], "c": ["a"]}, False), + ({"a": ["b"], "b": ["c"], "c": []}, True), + ({"a": ["b", "c"], "b": ["d"], "c": ["d"], "d": []}, True), + ({"a": ["b", "c"], "b": ["d"], "c": ["d"], "d": ["a"]}, False), + ], +) +def test_acyclic(graph: Graph, acyclic: bool): + assert is_acyclic(graph) == acyclic + + +@pytest.mark.parametrize( + ("graph", "nodes", "expected"), + [ + ({}, {}, []), + ( + {"a": []}, + {"a": 1}, + [("a", 1, [], [])], + ), + ( + {"a": ["b"], "b": []}, + {"a": 1, "b": 2}, + [("b", 2, [], ["a"]), ("a", 1, ["b"], [])], + ), + ( + {"a": ["b"], "b": ["c"], "c": []}, + {"a": 1, "b": 2, "c": 3}, + [ + ("c", 3, [], ["a", "b"]), + ("b", 2, ["c"], ["a"]), + ("a", 1, ["b"], []), + ], + ), + ( + {"a": ["b", "c"], "b": ["d"], "c": ["d"], "d": []}, + {"a": 1, "b": 2, "c": 3, "d": 4}, + [ + ("d", 4, [], ["a", "b", "c"]), + ("b", 2, ["d"], ["a", "c"]), + ("c", 3, ["d"], ["a"]), + ("a", 1, ["b", "c"], []), + ], + ), + ], +) +def test_traverse( + graph: Graph, + nodes: dict[str, int], + expected: list[tuple[str, int, list[str], list[str]]], +): + result = list(traverse_graph(graph, nodes)) + assert result == expected + + +@pytest.mark.parametrize( + ("graph", "nodes"), + [ + ({"a": ["b"], "b": ["a"]}, {"a": 1, "b": 2}), + ( + {"a": ["b", "c"], "b": ["d"], "c": ["d"], "d": ["a"]}, + {"a": 1, "b": 2, "c": 3, "d": 4}, + ), + ], +) +def test_traverse_fail(graph: Graph, nodes: dict[str, int]): + with pytest.raises(RuntimeError): + list(traverse_graph(graph, nodes)) diff --git a/tests/unittests/test_utils/test_keypoints.py b/tests/unittests/test_utils/test_keypoints.py new file mode 100644 index 00000000..3d20dae6 --- /dev/null +++ b/tests/unittests/test_utils/test_keypoints.py @@ -0,0 +1,24 @@ +import pytest +import torch + +from luxonis_train.utils.keypoints import ( + get_sigmas, + process_keypoints_predictions, +) + + +def test_get_sigmas(): + sigmas = [0.1, 0.2, 0.3] + pytest.approx(get_sigmas(sigmas, 3).tolist(), sigmas) + with pytest.raises(ValueError): + get_sigmas(sigmas, 2) + assert len(get_sigmas(None, 17)) == 17 + assert len(get_sigmas(None, 5)) == 5 + + +def test_process_keypoints_predictions(): + keypoints = torch.tensor([[0.1, 0.2, 1.0, 0.4, 0.5, 0.0]]) + x, y, visibility = process_keypoints_predictions(keypoints) + pytest.approx(x[0].tolist(), [0.1, 0.4]) + pytest.approx(y[0].tolist(), [0.2, 0.5]) + pytest.approx(visibility[0].tolist(), [1.0, 0.0]) diff --git a/tests/unittests/test_utils/test_loaders/test_base_loader.py b/tests/unittests/test_utils/test_loaders/test_base_loader.py deleted file mode 100644 index 0209c192..00000000 --- a/tests/unittests/test_utils/test_loaders/test_base_loader.py +++ /dev/null @@ -1,69 +0,0 @@ -import pytest -import torch - -from luxonis_train.utils.loaders import collate_fn -from luxonis_train.utils.types import LabelType - - -@pytest.mark.parametrize( - "input_names_and_shapes", - [ - [("features", torch.Size([3, 224, 224]))], - [ - ("features", torch.Size([3, 224, 224])), - ("segmentation", torch.Size([1, 224, 224])), - ], - [ - ("features", torch.Size([3, 224, 224])), - ("segmentation", torch.Size([1, 224, 224])), - ("disparity", torch.Size([1, 224, 224])), - ], - [ - ("features", torch.Size([3, 224, 224])), - ("pointcloud", torch.Size([1000, 3])), - ], - [ - ("features", torch.Size([3, 224, 224])), - ("pointcloud", torch.Size([1000, 3])), - ("foobar", torch.Size([2, 3, 4, 5, 6])), - ], - ], -) -@pytest.mark.parametrize("batch_size", [1, 2]) -def test_collate_fn(input_names_and_shapes, batch_size): - # Mock batch data - - def build_batch_element(): - inputs = {} - for name, shape in input_names_and_shapes: - inputs[name] = torch.rand(shape, dtype=torch.float32) - - labels = { - "classification": ( - torch.randint(0, 2, (2,), dtype=torch.int64), - LabelType.CLASSIFICATION, - ) - } - - return inputs, labels - - batch = [build_batch_element() for _ in range(batch_size)] - - # Call collate_fn - inputs, annotations = collate_fn(batch) # type: ignore - - # Check images tensor - assert inputs["features"].shape == (batch_size, 3, 224, 224) - assert inputs["features"].dtype == torch.float32 - - # Check annotations - assert "classification" in annotations - assert annotations["classification"][0].shape == (batch_size, 2) - assert annotations["classification"][0].dtype == torch.int64 - - -# TODO: test also segmentation, boundingbox and keypoint - - -if __name__ == "__main__": - pytest.main()