luxonis · klemen1999 · Sep 30, 2024 · Sep 22, 2024 · Sep 22, 2024 · Sep 23, 2024
diff --git a/luxonis_train/nodes/README.md b/luxonis_train/nodes/README.md
@@ -74,13 +74,15 @@ Adapted from [here](https://arxiv.org/pdf/2209.02976.pdf).
 
 **Params**
 
-| Key           | Type        | Default value               | Description                                         |
-| ------------- | ----------- | --------------------------- | --------------------------------------------------- |
-| channels_list | List\[int\] | \[64, 128, 256, 512, 1024\] | List of number of channels for each block           |
-| n_repeats     | List\[int\] | \[1, 6, 12, 18, 6\]         | List of number of repeats of RepVGGBlock            |
-| in_channels   | int         | 3                           | Number of input channels, should be 3 in most cases |
-| depth_mul     | int         | 0.33                        | Depth multiplier                                    |
-| width_mul     | int         | 0.25                        | Width multiplier                                    |
+| Key           | Type                                                              | Default value               | Description                                                     |
+| ------------- | ----------------------------------------------------------------- | --------------------------- | --------------------------------------------------------------- |
+| variant       | Literal\["n", "nano", "s", "small", "m", "medium", "l", "large"\] | "nano"                      | Variant of the network                                          |
+| channels_list | List\[int\]                                                       | \[64, 128, 256, 512, 1024\] | List of number of channels for each block                       |
+| n_repeats     | List\[int\]                                                       | \[1, 6, 12, 18, 6\]         | List of number of repeats of RepVGGBlock                        |
+| depth_mul     | float                                                             | 0.33                        | Depth multiplier                                                |
+| width_mul     | float                                                             | 0.25                        | Width multiplier                                                |
+| block         | Literal\["RepBlock", "CSPStackRepBlock"\]                         | "RepBlock"                  | Base block used                                                 |
+| csp_e         | float                                                             | 0.5                         | Factor for intermediate channels when block=="CSPStackRepBlock" |
 
 ## RexNetV1_lite
 
@@ -143,13 +145,16 @@ Adapted from [here](https://arxiv.org/pdf/2209.02976.pdf).
 
 **Params**
 
-| Key           | Type             | Default value                                           | Description                               |
-| ------------- | ---------------- | ------------------------------------------------------- | ----------------------------------------- |
-| n_heads       | Literal\[2,3,4\] | 3 ***Note:** Should be same also on head in most cases* | Number of output heads                    |
-| channels_list | List\[int\]      | \[256, 128, 128, 256, 256, 512\]                        | List of number of channels for each block |
-| n_repeats     | List\[int\]      | \[12, 12, 12, 12\]                                      | List of number of repeats of RepVGGBlock  |
-| depth_mul     | int              | 0.33                                                    | Depth multiplier                          |
-| width_mul     | int              | 0.25                                                    | Width multiplier                          |
+| Key           | Type                                                              | Default value                                           | Description                                                     |
+| ------------- | ----------------------------------------------------------------- | ------------------------------------------------------- | --------------------------------------------------------------- |
+| variant       | Literal\["n", "nano", "s", "small", "m", "medium", "l", "large"\] | "nano"                                                  | Variant of the network                                          |
+| n_heads       | Literal\[2,3,4\]                                                  | 3 ***Note:** Should be same also on head in most cases* | Number of output heads                                          |
+| channels_list | List\[int\]                                                       | \[256, 128, 128, 256, 256, 512\]                        | List of number of channels for each block                       |
+| n_repeats     | List\[int\]                                                       | \[12, 12, 12, 12\]                                      | List of number of repeats of RepVGGBlock                        |
+| depth_mul     | float                                                             | 0.33                                                    | Depth multiplier                                                |
+| width_mul     | float                                                             | 0.25                                                    | Width multiplier                                                |
+| block         | Literal\["RepBlock", "CSPStackRepBlock"\]                         | "RepBlock"                                              | Base block used                                                 |
+| csp_e         | float                                                             | 0.5                                                     | Factor for intermediate channels when block=="CSPStackRepBlock" |
 
 ## ClassificationHead
 

diff --git a/luxonis_train/nodes/backbones/efficientrep/efficientrep.py b/luxonis_train/nodes/backbones/efficientrep/efficientrep.py
@@ -1,11 +1,12 @@
 import logging
-from typing import Any
+from typing import Any, Literal
 
 from torch import Tensor, nn
 
 from luxonis_train.nodes.base_node import BaseNode
 from luxonis_train.nodes.blocks import (
     BlockRepeater,
+    CSPStackRepBlock,
     RepVGGBlock,
     SpatialPyramidPoolingBlock,
 )
@@ -26,23 +27,26 @@ def __init__(
         n_repeats: list[int] | None = None,
         depth_mul: float | None = None,
         width_mul: float | None = None,
+        block: Literal["RepBlock", "CSPStackRepBlock"] | None = None,
+        csp_e: float | None = None,
         **kwargs: Any,
     ):
-        """Implementation of the EfficientRep backbone.
+        """Implementation of the EfficientRep backbone. Supports the
+        version with RepBlock and CSPStackRepBlock (for larger networks)
 
         Adapted from U{YOLOv6: A Single-Stage Object Detection Framework
         for Industrial Applications
         <https://arxiv.org/pdf/2209.02976.pdf>}.
 
         @type variant: Literal["n", "nano", "s", "small", "m", "medium", "l", "large"]
         @param variant: EfficientRep variant. Defaults to "nano".
-            The variant determines the depth and width multipliers.
+            The variant determines the depth and width multipliers, block used and intermediate channel scaling factor.
             The depth multiplier determines the number of blocks in each stage and the width multiplier determines the number of channels.
             The following variants are available:
-                - "n" or "nano" (default): depth_multiplier=0.33, width_multiplier=0.25
-                - "s" or "small": depth_multiplier=0.33, width_multiplier=0.50
-                - "m" or "medium": depth_multiplier=0.60, width_multiplier=0.75
-                - "l" or "large": depth_multiplier=1.0, width_multiplier=1.0
+                - "n" or "nano" (default): depth_multiplier=0.33, width_multiplier=0.25, block=RepBlock, e=None
+                - "s" or "small": depth_multiplier=0.33, width_multiplier=0.50, block=RepBlock, e=None
+                - "m" or "medium": depth_multiplier=0.60, width_multiplier=0.75, block=CSPStackRepBlock, e=2/3
+                - "l" or "large": depth_multiplier=1.0, width_multiplier=1.0, block=CSPStackRepBlock, e=1/2
         @type channels_list: list[int] | None
         @param channels_list: List of number of channels for each block. If unspecified,
             defaults to [64, 128, 256, 512, 1024].
@@ -53,12 +57,19 @@ def __init__(
         @param depth_mul: Depth multiplier. If provided, overrides the variant value.
         @type width_mul: float
         @param width_mul: Width multiplier. If provided, overrides the variant value.
+        @type block: Literal["RepBlock", "CSPStackRepBlock"] | None
+        @param block: Base block used when building the backbone. If provided, overrides the variant value.
+        @tpe csp_e: float | None
+        @param csp_e: Factor that controls number of intermediate channels if block="CSPStackRepBlock". If provided,
+            overrides the variant value.
         """
         super().__init__(**kwargs)
 
         var = get_variant(variant)
         depth_mul = depth_mul or var.depth_multiplier
         width_mul = width_mul or var.width_multiplier
+        block = block or var.block
+        csp_e = csp_e or var.csp_e or 0.5
 
         channels_list = channels_list or [64, 128, 256, 512, 1024]
         n_repeats = n_repeats or [1, 6, 12, 18, 6]
@@ -85,11 +96,20 @@ def __init__(
                     kernel_size=3,
                     stride=2,
                 ),
-                BlockRepeater(
-                    block=RepVGGBlock,
-                    in_channels=channels_list[i + 1],
-                    out_channels=channels_list[i + 1],
-                    n_blocks=n_repeats[i + 1],
+                (
+                    BlockRepeater(
+                        block=RepVGGBlock,
+                        in_channels=channels_list[i + 1],
+                        out_channels=channels_list[i + 1],
+                        n_blocks=n_repeats[i + 1],
+                    )
+                    if block == "RepBlock"
+                    else CSPStackRepBlock(
+                        in_channels=channels_list[i + 1],
+                        out_channels=channels_list[i + 1],
+                        n_blocks=n_repeats[i + 1],
+                        e=csp_e,
+                    )
                 ),
             )
             self.blocks.append(curr_block)

diff --git a/luxonis_train/nodes/backbones/efficientrep/variants.py b/luxonis_train/nodes/backbones/efficientrep/variants.py
@@ -10,25 +10,35 @@
 class EfficientRepVariant(BaseModel):
     depth_multiplier: float
     width_multiplier: float
+    block: Literal["RepBlock", "CSPStackRepBlock"]
+    csp_e: float | None
 
 
 def get_variant(variant: VariantLiteral) -> EfficientRepVariant:
     variants = {
         "n": EfficientRepVariant(
             depth_multiplier=0.33,
             width_multiplier=0.25,
+            block="RepBlock",
+            csp_e=None,
         ),
         "s": EfficientRepVariant(
             depth_multiplier=0.33,
             width_multiplier=0.50,
+            block="RepBlock",
+            csp_e=None,
         ),
         "m": EfficientRepVariant(
             depth_multiplier=0.60,
             width_multiplier=0.75,
+            block="CSPStackRepBlock",
+            csp_e=2 / 3,
         ),
         "l": EfficientRepVariant(
             depth_multiplier=1.0,
             width_multiplier=1.0,
+            block="CSPStackRepBlock",
+            csp_e=1 / 2,
         ),
     }
     variants["nano"] = variants["n"]

diff --git a/luxonis_train/nodes/blocks/__init__.py b/luxonis_train/nodes/blocks/__init__.py
@@ -4,15 +4,14 @@
     BlockRepeater,
     Bottleneck,
     ConvModule,
+    CSPStackRepBlock,
     DropPath,
     EfficientDecoupledBlock,
     FeatureFusionBlock,
     KeypointBlock,
     LearnableAdd,
     LearnableMulAddConv,
     LearnableMultiply,
-    RepDownBlock,
-    RepUpBlock,
     RepVGGBlock,
     SpatialPyramidPoolingBlock,
     SqueezeExciteBlock,
@@ -26,18 +25,17 @@
     "EfficientDecoupledBlock",
     "ConvModule",
     "UpBlock",
-    "RepDownBlock",
     "SqueezeExciteBlock",
     "RepVGGBlock",
     "BlockRepeater",
+    "CSPStackRepBlock",
     "AttentionRefinmentBlock",
     "SpatialPyramidPoolingBlock",
     "FeatureFusionBlock",
     "LearnableAdd",
     "LearnableMultiply",
     "LearnableMulAddConv",
     "KeypointBlock",
-    "RepUpBlock",
     "BasicResNetBlock",
     "Bottleneck",
     "UpscaleOnline",