Update benchmark scirpt for RVC2 using daiv3

luxonis · ptoupas · Jan 31, 2025 · Jan 8, 2025 · Jan 9, 2025 · Jan 9, 2025
commit c424740b0f921b67fd8e28c020fe122d7780d9f9
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,3 +1,5 @@
+default_language_version:
+    python: python3
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
     rev: v0.1.2

@@ -175,10 +175,12 @@ def benchmark(
 
     **RVC2**
 
-    - `--repetitions`: The number of repetitions to perform. Default: `1`
+    - `--repetitions`: The number of repetitions to perform. Default: `10`
 
     - `--num-threads`: The number of threads to use for inference. Default: `2`
 
+    - `--num-messages`: The number of messages to measure for each report. Default: `50`
+
     ---
 
     **RVC3**

@@ -7,7 +7,7 @@
 import pandas as pd
 from typing_extensions import TypeAlias
 
-from modelconverter.utils import resolve_path
+from modelconverter.utils import is_hubai_available, resolve_path
 
 logger = getLogger(__name__)
 
@@ -28,9 +28,14 @@ def __init__(
         model_path: str,
         dataset_path: Optional[Path] = None,
     ):
-        self.model_path = resolve_path(model_path, Path.cwd())
+        if not is_hubai_available(model_path):
+            self.model_path = resolve_path(model_path, Path.cwd())
+            self.model_name = self.model_path.stem
+        else:
+            self.model_path = model_path
+            self.model_name = self.model_path.split("/", 1)[-1]
         self.dataset_path = dataset_path
-        self.model_name = self.model_path.stem
+
         self.header = [
             *self.default_configuration.keys(),
             "fps",

@@ -1,11 +1,9 @@
 import logging
-import time
 from pathlib import Path
-from typing import Dict, List, cast
+from typing import List
 
 import depthai as dai
 import numpy as np
-from depthai import NNData
 from rich.progress import Progress
 
 from ..base_benchmark import Benchmark, BenchmarkResult, Configuration
@@ -20,150 +18,110 @@ def default_configuration(self) -> Configuration:
         repetitions: The number of repetitions to perform.
         num_threads: The number of threads to use for inference.
         """
-        return {"repetitions": 1, "num_threads": 2}
+        return {"repetitions": 10, "num_messages": 50, "num_threads": 2}
 
     @property
     def all_configurations(self) -> List[Configuration]:
         return [
-            {"repetitions": 5, "num_threads": 1},
-            {"repetitions": 5, "num_threads": 2},
-            {"repetitions": 5, "num_threads": 3},
+            {"repetitions": 10, "num_messages": 50, "num_threads": 1},
+            {"repetitions": 10, "num_messages": 50, "num_threads": 2},
+            {"repetitions": 10, "num_messages": 50, "num_threads": 3},
         ]
 
     def benchmark(self, configuration: Configuration) -> BenchmarkResult:
         return self._benchmark(self.model_path, **configuration)
 
     @staticmethod
     def _benchmark(
-        model_path: Path, repetitions: int, num_threads: int
+        model_path: Path | str,
+        repetitions: int,
+        num_messages: int,
+        num_threads: int,
     ) -> BenchmarkResult:
-        model = dai.OpenVINO.Blob(model_path)
-        input_name_shape: Dict[str, List[int]] = {}
-        input_name_type = {}
-        for i in list(model.networkInputs):
-            input_name_shape[i] = model.networkInputs[i].dims
-            input_name_type[i] = model.networkInputs[i].dataType.name
-
-        output_name_shape = {}
-        output_name_type = {}
-        for i in list(model.networkOutputs):
-            output_name_shape[i] = model.networkOutputs[i].dims
-            output_name_type[i] = model.networkOutputs[i].dataType.name
-
-        pipeline = dai.Pipeline()
-
-        detection_nn = pipeline.createNeuralNetwork()
-        detection_nn.setBlobPath(model_path)
-        detection_nn.setNumInferenceThreads(num_threads)
-        detection_nn.input.setBlocking(True)
-        detection_nn.input.setQueueSize(1)
-
-        nn_in = pipeline.createXLinkIn()
-        nn_in.setMaxDataSize(6291456)
-        nn_in.setStreamName("in_nn")
-        nn_in.out.link(detection_nn.input)
-
-        xout_nn = pipeline.createXLinkOut()
-        xout_nn.setStreamName("nn")
-        xout_nn.input.setQueueSize(1)
-        xout_nn.input.setBlocking(True)
-        detection_nn.out.link(xout_nn.input)
-
-        xlink_buffer_max_size = 5 * 1024 * 1024
-        product_sum = sum(
-            map(lambda x: np.product(np.array(x)), output_name_shape.values())
-        )
-
-        xlink_buffer_count = int(xlink_buffer_max_size / product_sum)
-
-        logger.info(f"XLink buffer count: {xlink_buffer_count}")
-        if xlink_buffer_count > 1000:
-            logger.warning(
-                "XLink buffer count is too high! "
-                "The benchmarking will take more time and "
-                "the results may be overestimated."
+        device = dai.Device()
+
+        if isinstance(model_path, str):
+            modelPath = dai.getModelFromZoo(
+                dai.NNModelDescription(
+                    model_path,
+                    platform=device.getPlatformAsString(),
+                )
+            )
+        elif str(model_path).endswith(".tar.xz"):
+            modelPath = str(model_path)
+        elif str(model_path).endswith(".blob"):
+            modelPath = model_path
+        else:
+            raise ValueError(
+                "Unsupported model format. Supported formats: .tar.xz, .blob, or HubAI model slug."
             )
 
-        with dai.Device(pipeline) as device, Progress() as progress:
-            device = cast(dai.Device, device)
-            detection_in_count = 100 + xlink_buffer_count
-            detection_in = device.getInputQueue(
-                "in_nn", maxSize=detection_in_count, blocking=True
+        inputSizes = []
+        inputNames = []
+        if isinstance(model_path, str) or str(model_path).endswith(".tar.xz"):
+            modelArhive = dai.NNArchive(modelPath)
+            for input in modelArhive.getConfig().model.inputs:
+                inputSizes.append(input.shape[::-1])
+                inputNames.append(input.name)
+        elif str(model_path).endswith(".blob"):
+            blob_model = dai.OpenVINO.Blob(modelPath)
+            for input in blob_model.networkInputs:
+                inputSizes.append(blob_model.networkInputs[input].dims)
+                inputNames.append(input)
+
+        inputData = dai.NNData()
+        for name, inputSize in zip(inputNames, inputSizes):
+            img = np.random.randint(
+                0, 255, (inputSize[1], inputSize[0], 3), np.uint8
             )
-            q_nn = device.getOutputQueue(name="nn", maxSize=1, blocking=True)
+            inputData.addTensor(name, img)
 
-            fps_storage = []
-            diffs = []
-            time.sleep(1)
+        with dai.Pipeline(device) as pipeline, Progress() as progress:
             repet_task = progress.add_task(
                 "[magenta]Repetition", total=repetitions
             )
-            infer_task = progress.add_task(
-                "[magenta]Inference", total=300 + 2 * xlink_buffer_count
-            )
-            for _ in range(repetitions):
-                progress.reset(infer_task, total=300 + 2 * xlink_buffer_count)
-                for _ in range(100 + xlink_buffer_count):
-                    nn_data = dai.NNData()
-                    for inp_name in input_name_shape:
-                        if input_name_type[inp_name] in ["FLOAT16", "FLOAT32"]:
-                            frame = cast(
-                                np.ndarray,
-                                np.random.rand(*input_name_shape[inp_name]),
-                            )
-                            frame = frame.astype(
-                                "float16"
-                                if input_name_type[inp_name] == "FLOAT16"
-                                else "float32"
-                            )
-                        elif input_name_type[inp_name] in ["INT", "I8", "U8F"]:
-                            frame = np.random.randint(
-                                256,
-                                size=input_name_shape[inp_name],
-                                dtype=(
-                                    np.int32
-                                    if input_name_type[inp_name] == "INT"
-                                    else (
-                                        np.uint8
-                                        if input_name_type[inp_name] == "U8F"
-                                        else np.int8
-                                    )
-                                ),
-                            )
-                        else:
-                            raise RuntimeError(
-                                f"Unknown input type detected: {input_name_type[inp_name]}!"
-                            )
-
-                        nn_data.setLayer(inp_name, frame)
-
-                    if len(input_name_shape) == 0:
-                        raise RuntimeError(
-                            "Failed to create input data: missing required information for one or more input layers."
-                        )
-                    detection_in.send(nn_data)
-                    progress.update(infer_task, advance=1)
-
-                for _ in range(100):
-                    progress.update(infer_task, advance=1)
-                    time.sleep(3 / 100)
-
-                for _ in range(40 + xlink_buffer_count):
-                    cast(NNData, q_nn.get()).getFirstLayerFp16()
-                    progress.update(infer_task, advance=1)
-
-                start = time.time()
-                for _ in range(50):
-                    cast(NNData, q_nn.get()).getFirstLayerFp16()
-                    progress.update(infer_task, advance=1)
-                diff = time.time() - start
-                diffs.append(diff / 50)
-                fps_storage.append(50 / diff)
-
-                for _ in range(10):
-                    cast(NNData, q_nn.get()).getFirstLayerFp16()
-                    progress.update(infer_task, advance=1)
+
+            benchmarkOut = pipeline.create(dai.node.BenchmarkOut)
+            benchmarkOut.setRunOnHost(False)
+            benchmarkOut.setFps(-1)
+
+            neuralNetwork = pipeline.create(dai.node.NeuralNetwork)
+            if isinstance(model_path, str) or str(model_path).endswith(
+                ".tar.xz"
+            ):
+                neuralNetwork.setNNArchive(modelArhive)
+            elif str(model_path).endswith(".blob"):
+                neuralNetwork.setBlobPath(modelPath)
+            neuralNetwork.setNumInferenceThreads(num_threads)
+
+            benchmarkIn = pipeline.create(dai.node.BenchmarkIn)
+            benchmarkIn.setRunOnHost(False)
+            benchmarkIn.sendReportEveryNMessages(num_messages)
+            benchmarkIn.logReportsAsWarnings(False)
+
+            benchmarkOut.out.link(neuralNetwork.input)
+            neuralNetwork.out.link(benchmarkIn.input)
+
+            outputQueue = benchmarkIn.report.createOutputQueue()
+            inputQueue = benchmarkOut.input.createInputQueue()
+
+            pipeline.start()
+            inputQueue.send(inputData)
+
+            rep = 0
+            fps_list = []
+            avg_latency_list = []
+            while pipeline.isRunning() and rep < repetitions:
+                benchmarkReport = outputQueue.get()
+                assert isinstance(benchmarkReport, dai.BenchmarkReport)
+                fps = benchmarkReport.fps
+                avg_latency = benchmarkReport.averageLatency
+
+                fps_list.append(fps)
+                avg_latency_list.append(avg_latency)
                 progress.update(repet_task, advance=1)
+                rep += 1
 
-            diffs = np.array(diffs) * 1000
-            return BenchmarkResult(np.mean(fps_storage), np.mean(diffs))
+            return BenchmarkResult(
+                np.mean(fps_list), np.mean(avg_latency_list)
+            )
@@ -19,6 +19,7 @@
     resolve_path,
     upload_file_to_remote,
 )
+from .hubai_utils import is_hubai_available
 from .image import read_calib_dir, read_image
 from .layout import guess_new_layout, make_default_layout
 from .metadata import Metadata, get_metadata
@@ -45,6 +46,7 @@
     "subprocess_run",
     "download_from_remote",
     "upload_file_to_remote",
+    "is_hubai_available",
     "get_protocol",
     "process_nn_archive",
     "modelconverter_config_to_nn",

@@ -0,0 +1,36 @@
+import requests
+
+
+def is_hubai_available(model_slug: str) -> bool:
+    url = "https://easyml.cloud.luxonis.com/models/api/v1/models?is_public=true&limit=1000"
+    response = requests.get(url)
+    if response.status_code != 200:
+        raise ValueError(
+            f"Failed to get models. Status code: {response.status_code}"
+        )
+    hub_ai_models = response.json()
+    for model in hub_ai_models:
+        slug = f"{model['team_slug']}/{model['slug']}"
+        if (
+            slug in model_slug
+            or slug.removeprefix(f"{model['team_slug']}/") in model_slug
+        ):
+            model_id = model["id"]
+
+            url = f"https://easyml.cloud.luxonis.com/models/api/v1/modelVersions?model_id={model_id}&is_public=true"
+            response = requests.get(url)
+            if response.status_code != 200:
+                raise ValueError(
+                    f"Failed to get model versions. Status code: {response.status_code}"
+                )
+            model_versions = response.json()
+            for version in model_versions:
+                if (
+                    f"{slug}:{version['variant_slug']}" == model_slug
+                    or f"{slug}:{version['variant_slug']}".removeprefix(
+                        f"{model['team_slug']}/"
+                    )
+                    == model_slug
+                ):
+                    return True
+    return False