Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update RVC2 and RVC4 benchmark scripts to work with the dai Benchmark Nodes #64

Merged
merged 31 commits into from
Jan 31, 2025
Merged
Changes from 1 commit
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
c424740
Update benchmark scirpt for RVC2 using daiv3
ptoupas Jan 8, 2025
6cb5a2c
Add dai based benchmark execution for RVC4 device
ptoupas Jan 9, 2025
c4b1a5d
Ignore latency measurements on dai based benchmark reports
ptoupas Jan 9, 2025
e97a453
Update is_hubai_available to work with hubAI API calls
ptoupas Jan 10, 2025
7295d96
Update is_hubai_available to work with various teams from HubAI
ptoupas Jan 10, 2025
82a7044
Remove removeprefix to work with python version 3.8 [skip ci]
ptoupas Jan 10, 2025
a34b9ed
Fix test_modifier test error with EfficientVIT model and change the A…
ptoupas Jan 10, 2025
44a097b
Update .pre-commit-config.yaml
ptoupas Jan 10, 2025
7d4d223
Fix model path and HubAI model slug parsing [ci skip]
ptoupas Jan 13, 2025
57b8982
Add HUBAI_API_KEY to getModelFromZoo calls [ci skip]
ptoupas Jan 13, 2025
d6e5da1
Update Benchmarking Section of README file [ci skip]
ptoupas Jan 13, 2025
4d3bc5b
Update .pre-commit-config.yaml [ci skip]
ptoupas Jan 13, 2025
e8bc974
Fix dlc parsing on Benchmark __init__
ptoupas Jan 14, 2025
e2a7ed7
Update the way modify_onnx optimisation runs are conducted in the ONN…
ptoupas Jan 14, 2025
cd2b088
Fix SNPE benchmark on RVC4 and added support for benchmark over model…
ptoupas Jan 14, 2025
addc5f1
Updated ONNX version (#56)
kozlov721 Jan 15, 2025
f0149cd
Update the RVC4 benchmark to take into account the data type for each…
ptoupas Jan 16, 2025
2753987
Merge remote-tracking branch 'origin' into fix/update-benchmarks-scri…
ptoupas Jan 16, 2025
8dfdb84
Update .pre-commit-config.yaml [ci skip]
ptoupas Jan 16, 2025
b58782c
Fix issue when extracting the model from NNArchive in snpe benchmark …
ptoupas Jan 27, 2025
9b2a602
Add bool tensor type during evaluation of onnx models on ONNXModifier…
ptoupas Jan 27, 2025
e081181
Add a try except block on onnx optimisation and validation.
ptoupas Jan 27, 2025
9cd7158
Merge remote-tracking branch 'origin' into fix/update-benchmarks-scri…
ptoupas Jan 28, 2025
565ae6e
add disable_onnx_optimisation flag on the example defaults.yaml file
ptoupas Jan 28, 2025
d37ec5e
Update dai requirement to version 3.0.0a12 [ci skip]
ptoupas Jan 29, 2025
0548541
Add botocore requirement
ptoupas Jan 29, 2025
c2f91f2
Remove the extra-index-url from the requirements-bench.txt file
ptoupas Jan 29, 2025
8fd09f1
Update the README file regarding the depthai v3 installation.
ptoupas Jan 29, 2025
d409c6c
Update .pre-commit-config.yaml [ci skip]
ptoupas Jan 29, 2025
6f3e950
Update README.md [ci skip]
ptoupas Jan 29, 2025
1d08ae7
Merge branch 'main' into fix/update-benchmarks-scripts-with-daiv3
ptoupas Jan 31, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Update benchmark scirpt for RVC2 using daiv3
ptoupas committed Jan 8, 2025

Verified

This commit was signed with the committer’s verified signature.
ptoupas Petros Toupas
commit c424740b0f921b67fd8e28c020fe122d7780d9f9
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
default_language_version:
python: python3
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.2
4 changes: 3 additions & 1 deletion modelconverter/__main__.py
Original file line number Diff line number Diff line change
@@ -175,10 +175,12 @@ def benchmark(

**RVC2**

- `--repetitions`: The number of repetitions to perform. Default: `1`
- `--repetitions`: The number of repetitions to perform. Default: `10`

- `--num-threads`: The number of threads to use for inference. Default: `2`

- `--num-messages`: The number of messages to measure for each report. Default: `50`

---

**RVC3**
11 changes: 8 additions & 3 deletions modelconverter/packages/base_benchmark.py
Original file line number Diff line number Diff line change
@@ -7,7 +7,7 @@
import pandas as pd
from typing_extensions import TypeAlias

from modelconverter.utils import resolve_path
from modelconverter.utils import is_hubai_available, resolve_path

logger = getLogger(__name__)

@@ -28,9 +28,14 @@ def __init__(
model_path: str,
dataset_path: Optional[Path] = None,
):
self.model_path = resolve_path(model_path, Path.cwd())
if not is_hubai_available(model_path):
self.model_path = resolve_path(model_path, Path.cwd())
self.model_name = self.model_path.stem
else:
self.model_path = model_path
self.model_name = self.model_path.split("/", 1)[-1]
self.dataset_path = dataset_path
self.model_name = self.model_path.stem

self.header = [
*self.default_configuration.keys(),
"fps",
216 changes: 87 additions & 129 deletions modelconverter/packages/rvc2/benchmark.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import logging
import time
from pathlib import Path
from typing import Dict, List, cast
from typing import List

import depthai as dai
import numpy as np
from depthai import NNData
from rich.progress import Progress

from ..base_benchmark import Benchmark, BenchmarkResult, Configuration
@@ -20,150 +18,110 @@ def default_configuration(self) -> Configuration:
repetitions: The number of repetitions to perform.
num_threads: The number of threads to use for inference.
"""
return {"repetitions": 1, "num_threads": 2}
return {"repetitions": 10, "num_messages": 50, "num_threads": 2}

@property
def all_configurations(self) -> List[Configuration]:
return [
{"repetitions": 5, "num_threads": 1},
{"repetitions": 5, "num_threads": 2},
{"repetitions": 5, "num_threads": 3},
{"repetitions": 10, "num_messages": 50, "num_threads": 1},
{"repetitions": 10, "num_messages": 50, "num_threads": 2},
{"repetitions": 10, "num_messages": 50, "num_threads": 3},
]

def benchmark(self, configuration: Configuration) -> BenchmarkResult:
return self._benchmark(self.model_path, **configuration)

@staticmethod
def _benchmark(
model_path: Path, repetitions: int, num_threads: int
model_path: Path | str,
repetitions: int,
num_messages: int,
num_threads: int,
) -> BenchmarkResult:
model = dai.OpenVINO.Blob(model_path)
input_name_shape: Dict[str, List[int]] = {}
input_name_type = {}
for i in list(model.networkInputs):
input_name_shape[i] = model.networkInputs[i].dims
input_name_type[i] = model.networkInputs[i].dataType.name

output_name_shape = {}
output_name_type = {}
for i in list(model.networkOutputs):
output_name_shape[i] = model.networkOutputs[i].dims
output_name_type[i] = model.networkOutputs[i].dataType.name

pipeline = dai.Pipeline()

detection_nn = pipeline.createNeuralNetwork()
detection_nn.setBlobPath(model_path)
detection_nn.setNumInferenceThreads(num_threads)
detection_nn.input.setBlocking(True)
detection_nn.input.setQueueSize(1)

nn_in = pipeline.createXLinkIn()
nn_in.setMaxDataSize(6291456)
nn_in.setStreamName("in_nn")
nn_in.out.link(detection_nn.input)

xout_nn = pipeline.createXLinkOut()
xout_nn.setStreamName("nn")
xout_nn.input.setQueueSize(1)
xout_nn.input.setBlocking(True)
detection_nn.out.link(xout_nn.input)

xlink_buffer_max_size = 5 * 1024 * 1024
product_sum = sum(
map(lambda x: np.product(np.array(x)), output_name_shape.values())
)

xlink_buffer_count = int(xlink_buffer_max_size / product_sum)

logger.info(f"XLink buffer count: {xlink_buffer_count}")
if xlink_buffer_count > 1000:
logger.warning(
"XLink buffer count is too high! "
"The benchmarking will take more time and "
"the results may be overestimated."
device = dai.Device()

if isinstance(model_path, str):
modelPath = dai.getModelFromZoo(
dai.NNModelDescription(
model_path,
platform=device.getPlatformAsString(),
)
)
elif str(model_path).endswith(".tar.xz"):
modelPath = str(model_path)
elif str(model_path).endswith(".blob"):
modelPath = model_path
else:
raise ValueError(
"Unsupported model format. Supported formats: .tar.xz, .blob, or HubAI model slug."
)

with dai.Device(pipeline) as device, Progress() as progress:
device = cast(dai.Device, device)
detection_in_count = 100 + xlink_buffer_count
detection_in = device.getInputQueue(
"in_nn", maxSize=detection_in_count, blocking=True
inputSizes = []
inputNames = []
if isinstance(model_path, str) or str(model_path).endswith(".tar.xz"):
modelArhive = dai.NNArchive(modelPath)
for input in modelArhive.getConfig().model.inputs:
inputSizes.append(input.shape[::-1])
inputNames.append(input.name)
elif str(model_path).endswith(".blob"):
blob_model = dai.OpenVINO.Blob(modelPath)
for input in blob_model.networkInputs:
inputSizes.append(blob_model.networkInputs[input].dims)
inputNames.append(input)

inputData = dai.NNData()
for name, inputSize in zip(inputNames, inputSizes):
img = np.random.randint(
0, 255, (inputSize[1], inputSize[0], 3), np.uint8
)
q_nn = device.getOutputQueue(name="nn", maxSize=1, blocking=True)
inputData.addTensor(name, img)

fps_storage = []
diffs = []
time.sleep(1)
with dai.Pipeline(device) as pipeline, Progress() as progress:
repet_task = progress.add_task(
"[magenta]Repetition", total=repetitions
)
infer_task = progress.add_task(
"[magenta]Inference", total=300 + 2 * xlink_buffer_count
)
for _ in range(repetitions):
progress.reset(infer_task, total=300 + 2 * xlink_buffer_count)
for _ in range(100 + xlink_buffer_count):
nn_data = dai.NNData()
for inp_name in input_name_shape:
if input_name_type[inp_name] in ["FLOAT16", "FLOAT32"]:
frame = cast(
np.ndarray,
np.random.rand(*input_name_shape[inp_name]),
)
frame = frame.astype(
"float16"
if input_name_type[inp_name] == "FLOAT16"
else "float32"
)
elif input_name_type[inp_name] in ["INT", "I8", "U8F"]:
frame = np.random.randint(
256,
size=input_name_shape[inp_name],
dtype=(
np.int32
if input_name_type[inp_name] == "INT"
else (
np.uint8
if input_name_type[inp_name] == "U8F"
else np.int8
)
),
)
else:
raise RuntimeError(
f"Unknown input type detected: {input_name_type[inp_name]}!"
)

nn_data.setLayer(inp_name, frame)

if len(input_name_shape) == 0:
raise RuntimeError(
"Failed to create input data: missing required information for one or more input layers."
)
detection_in.send(nn_data)
progress.update(infer_task, advance=1)

for _ in range(100):
progress.update(infer_task, advance=1)
time.sleep(3 / 100)

for _ in range(40 + xlink_buffer_count):
cast(NNData, q_nn.get()).getFirstLayerFp16()
progress.update(infer_task, advance=1)

start = time.time()
for _ in range(50):
cast(NNData, q_nn.get()).getFirstLayerFp16()
progress.update(infer_task, advance=1)
diff = time.time() - start
diffs.append(diff / 50)
fps_storage.append(50 / diff)

for _ in range(10):
cast(NNData, q_nn.get()).getFirstLayerFp16()
progress.update(infer_task, advance=1)

benchmarkOut = pipeline.create(dai.node.BenchmarkOut)
benchmarkOut.setRunOnHost(False)
benchmarkOut.setFps(-1)

neuralNetwork = pipeline.create(dai.node.NeuralNetwork)
if isinstance(model_path, str) or str(model_path).endswith(
".tar.xz"
):
neuralNetwork.setNNArchive(modelArhive)
elif str(model_path).endswith(".blob"):
neuralNetwork.setBlobPath(modelPath)
neuralNetwork.setNumInferenceThreads(num_threads)

benchmarkIn = pipeline.create(dai.node.BenchmarkIn)
benchmarkIn.setRunOnHost(False)
benchmarkIn.sendReportEveryNMessages(num_messages)
benchmarkIn.logReportsAsWarnings(False)

benchmarkOut.out.link(neuralNetwork.input)
neuralNetwork.out.link(benchmarkIn.input)

outputQueue = benchmarkIn.report.createOutputQueue()
inputQueue = benchmarkOut.input.createInputQueue()

pipeline.start()
inputQueue.send(inputData)

rep = 0
fps_list = []
avg_latency_list = []
while pipeline.isRunning() and rep < repetitions:
benchmarkReport = outputQueue.get()
assert isinstance(benchmarkReport, dai.BenchmarkReport)
fps = benchmarkReport.fps
avg_latency = benchmarkReport.averageLatency

fps_list.append(fps)
avg_latency_list.append(avg_latency)
progress.update(repet_task, advance=1)
rep += 1

diffs = np.array(diffs) * 1000
return BenchmarkResult(np.mean(fps_storage), np.mean(diffs))
return BenchmarkResult(
np.mean(fps_list), np.mean(avg_latency_list)
)
2 changes: 2 additions & 0 deletions modelconverter/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -19,6 +19,7 @@
resolve_path,
upload_file_to_remote,
)
from .hubai_utils import is_hubai_available
from .image import read_calib_dir, read_image
from .layout import guess_new_layout, make_default_layout
from .metadata import Metadata, get_metadata
@@ -45,6 +46,7 @@
"subprocess_run",
"download_from_remote",
"upload_file_to_remote",
"is_hubai_available",
"get_protocol",
"process_nn_archive",
"modelconverter_config_to_nn",
36 changes: 36 additions & 0 deletions modelconverter/utils/hubai_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import requests


def is_hubai_available(model_slug: str) -> bool:
ptoupas marked this conversation as resolved.
Show resolved Hide resolved
url = "https://easyml.cloud.luxonis.com/models/api/v1/models?is_public=true&limit=1000"
response = requests.get(url)
if response.status_code != 200:
raise ValueError(
f"Failed to get models. Status code: {response.status_code}"
)
hub_ai_models = response.json()
for model in hub_ai_models:
slug = f"{model['team_slug']}/{model['slug']}"
if (
slug in model_slug
or slug.removeprefix(f"{model['team_slug']}/") in model_slug
):
model_id = model["id"]

url = f"https://easyml.cloud.luxonis.com/models/api/v1/modelVersions?model_id={model_id}&is_public=true"
response = requests.get(url)
if response.status_code != 200:
raise ValueError(
f"Failed to get model versions. Status code: {response.status_code}"
)
model_versions = response.json()
for version in model_versions:
if (
f"{slug}:{version['variant_slug']}" == model_slug
or f"{slug}:{version['variant_slug']}".removeprefix(
f"{model['team_slug']}/"
)
== model_slug
):
return True
return False