From 513dcf1bcf0b60fe8dde48da9ef6f78292674db7 Mon Sep 17 00:00:00 2001 From: Anh Date: Fri, 6 Sep 2024 17:06:10 +0700 Subject: [PATCH 01/18] Enable stable_diffusion tests --- flutter/integration_test/first_test.dart | 3 +- .../tflite_settings_android.pbtxt | 61 +++++++++---------- .../tflite_settings_apple_main.pbtxt | 37 ++++++----- 3 files changed, 49 insertions(+), 52 deletions(-) diff --git a/flutter/integration_test/first_test.dart b/flutter/integration_test/first_test.dart index 77c9ac3b0..23bc1a48d 100644 --- a/flutter/integration_test/first_test.dart +++ b/flutter/integration_test/first_test.dart @@ -47,8 +47,7 @@ void main() { testWidgets('check results', (WidgetTester tester) async { final extendedResult = await obtainResult(); printResults(extendedResult); - // TODO (anhappdev) uncomment when stable_diffusion is ready - // checkTaskCount(extendedResult); + checkTaskCount(extendedResult); checkTasks(extendedResult); }); diff --git a/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt b/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt index 2c0b67ac7..0c18e5805 100644 --- a/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt +++ b/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt @@ -207,34 +207,33 @@ benchmark_setting { delegate_selected: "NNAPI" } -# TODO (anhappdev) uncomment when stable_diffusion is ready -#benchmark_setting { -# benchmark_id: "stable_diffusion" -# framework: "TFLite" -# delegate_choice: { -# delegate_name: "NNAPI" -# accelerator_name: "npu" -# accelerator_desc: "NPU" -# model_file: { -# model_path: "local:///mlperf_models/stable-diffusion/decoder.tflite" -# model_checksum: "491385ad873880ba1876e1d097fcc0e3" -# } -# model_file: { -# model_path: "local:///mlperf_models/stable-diffusion/text_encoder.tflite" -# model_checksum: "8985768b09fe31b805e66b6048da9125" -# } -# model_file: { -# model_path: "local:///mlperf_models/stable-diffusion/first_model.tflite" -# model_checksum: "f0d6f45a2d702456a234c0a9b192816a" -# } -# model_file: { -# model_path: "local:///mlperf_models/stable-diffusion/second_model.tflite" -# model_checksum: "cea07208776347a8a5334106a09444fe" -# } -# } -# delegate_selected: "NNAPI" -# custom_setting { -# id: "pipeline" -# value: "StableDiffusionPipeline" -# } -#} +benchmark_setting { + benchmark_id: "stable_diffusion" + framework: "TFLite" + delegate_choice: { + delegate_name: "NNAPI" + accelerator_name: "npu" + accelerator_desc: "NPU" + model_file: { + model_path: "local:///mlperf_models/stable-diffusion/decoder.tflite" + model_checksum: "491385ad873880ba1876e1d097fcc0e3" + } + model_file: { + model_path: "local:///mlperf_models/stable-diffusion/text_encoder.tflite" + model_checksum: "8985768b09fe31b805e66b6048da9125" + } + model_file: { + model_path: "local:///mlperf_models/stable-diffusion/first_model.tflite" + model_checksum: "f0d6f45a2d702456a234c0a9b192816a" + } + model_file: { + model_path: "local:///mlperf_models/stable-diffusion/second_model.tflite" + model_checksum: "cea07208776347a8a5334106a09444fe" + } + } + delegate_selected: "NNAPI" + custom_setting { + id: "pipeline" + value: "StableDiffusionPipeline" + } +} diff --git a/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_apple_main.pbtxt b/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_apple_main.pbtxt index 980929761..ff3a29f07 100644 --- a/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_apple_main.pbtxt +++ b/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_apple_main.pbtxt @@ -146,22 +146,21 @@ benchmark_setting { delegate_selected: "Core ML" } -# TODO (anhappdev) uncomment when stable_diffusion is ready -#benchmark_setting { -# benchmark_id: "stable_diffusion" -# framework: "TFLite" -# delegate_choice: { -# delegate_name: "Metal" -# accelerator_name: "gpu" -# accelerator_desc: "GPU" -# model_file: { -# model_path: "local:///path/to/stable_diffusion.tflite" -# model_checksum: "" -# } -# } -# delegate_selected: "Metal" -# custom_setting { -# id: "pipeline" -# value: "StableDiffusionPipeline" -# } -#} +benchmark_setting { + benchmark_id: "stable_diffusion" + framework: "TFLite" + delegate_choice: { + delegate_name: "Metal" + accelerator_name: "gpu" + accelerator_desc: "GPU" + model_file: { + model_path: "local:///path/to/stable_diffusion.tflite" + model_checksum: "" + } + } + delegate_selected: "Metal" + custom_setting { + id: "pipeline" + value: "StableDiffusionPipeline" + } +} From 723d323d67a9c73500c672a3a6d8c79a39417337 Mon Sep 17 00:00:00 2001 From: Anh Date: Wed, 18 Sep 2024 06:44:28 +0700 Subject: [PATCH 02/18] Add a caption_id to coco_gen dataset (#918) --- flutter/assets/tasks.pbtxt | 4 ++-- flutter/cpp/datasets/coco_gen.cc | 21 +++++++++++-------- flutter/cpp/datasets/coco_gen.h | 3 ++- .../coco_gen_utils/CLIP_Model_to_TFLite.ipynb | 2 +- .../coco_gen_utils/generate_tfrecords.py | 5 ++++- flutter/cpp/datasets/coco_gen_utils/types.h | 16 ++++++++++---- mobile_back_apple/dev-utils/Makefile | 4 ++-- 7 files changed, 35 insertions(+), 20 deletions(-) diff --git a/flutter/assets/tasks.pbtxt b/flutter/assets/tasks.pbtxt index d9c05491a..dadff26a7 100644 --- a/flutter/assets/tasks.pbtxt +++ b/flutter/assets/tasks.pbtxt @@ -229,12 +229,12 @@ task { type: COCOGEN full { name: "COCO validation set for Stable Diffusion" - input_path: "https://github.com/anhappdev/tmp/releases/download/5/coco_gen_test.tfrecord" + input_path: "https://github.com/anhappdev/tmp/releases/download/6/coco_gen_test.tfrecord" groundtruth_path: "local:///mlperf_models/stable-diffusion/clip_model_512x512.tflite" } lite { name: "COCO validation set for Stable Diffusion" - input_path: "https://github.com/anhappdev/tmp/releases/download/5/coco_gen_full.tfrecord" + input_path: "https://github.com/anhappdev/tmp/releases/download/6/coco_gen_full.tfrecord" groundtruth_path: "" } tiny { diff --git a/flutter/cpp/datasets/coco_gen.cc b/flutter/cpp/datasets/coco_gen.cc index 1b7a75ef8..7d624d952 100644 --- a/flutter/cpp/datasets/coco_gen.cc +++ b/flutter/cpp/datasets/coco_gen.cc @@ -100,18 +100,20 @@ std::vector CocoGen::ProcessOutput(const int sample_idx, } } - std::string raw_output_filename = - raw_output_dir_ + "/output_" + std::to_string(sample_idx) + ".rgb8"; - dump_output_pixels(output_pixels, raw_output_filename); - if (!output_pixels.empty()) { sample_ids_.insert(sample_idx); CaptionRecord* record = samples_.at(sample_idx).get(); - LOG(INFO) << "caption: " << record->get_caption(); - caption_map[sample_idx] = record->get_caption(); + LOG(INFO) << "caption_id: " << record->get_caption_id() + << " caption_text: " << record->get_caption_text(); + caption_id_map[sample_idx] = record->get_caption_id(); + caption_text_map[sample_idx] = record->get_caption_text(); output_pixels_map[sample_idx] = output_pixels; attention_mask_map[sample_idx] = record->get_attention_mask_vector(); input_ids_map[sample_idx] = record->get_input_ids_vector(); + std::string raw_output_filename = raw_output_dir_ + "/caption_id_" + + std::to_string(record->get_caption_id()) + + ".rgb8"; + dump_output_pixels(output_pixels, raw_output_filename); return output_pixels; } else { return std::vector(); @@ -124,7 +126,8 @@ float CocoGen::ComputeAccuracy() { float total_score = 0.0f; float total_samples = static_cast(sample_ids_.size()); for (int sample_idx : sample_ids_) { - std::string caption = caption_map[sample_idx]; + int caption_id = caption_id_map[sample_idx]; + std::string caption_text = caption_text_map[sample_idx]; std::vector input_ids = input_ids_map[sample_idx]; std::vector attention_mask = attention_mask_map[sample_idx]; std::vector output_pixels = output_pixels_map[sample_idx]; @@ -134,8 +137,8 @@ float CocoGen::ComputeAccuracy() { } float score = score_predictor_.predict(attention_mask, input_ids, pixel_values); - LOG(INFO) << "sample_idx: " << sample_idx << " caption: " << caption - << " score: " << score; + LOG(INFO) << "sample_idx: " << sample_idx << " caption_id: " << caption_id + << " caption_text: " << caption_text << " score: " << score; total_score += score; } float avg_score = total_score / total_samples; diff --git a/flutter/cpp/datasets/coco_gen.h b/flutter/cpp/datasets/coco_gen.h index f89016dcd..e99f6f25a 100644 --- a/flutter/cpp/datasets/coco_gen.h +++ b/flutter/cpp/datasets/coco_gen.h @@ -83,7 +83,8 @@ class CocoGen : public Dataset { std::set sample_ids_; bool isModelFound; std::string raw_output_dir_; - std::unordered_map caption_map; + std::unordered_map caption_id_map; + std::unordered_map caption_text_map; std::unordered_map> output_pixels_map; std::unordered_map> attention_mask_map; std::unordered_map> input_ids_map; diff --git a/flutter/cpp/datasets/coco_gen_utils/CLIP_Model_to_TFLite.ipynb b/flutter/cpp/datasets/coco_gen_utils/CLIP_Model_to_TFLite.ipynb index af0baaee9..b0ab574e0 100644 --- a/flutter/cpp/datasets/coco_gen_utils/CLIP_Model_to_TFLite.ipynb +++ b/flutter/cpp/datasets/coco_gen_utils/CLIP_Model_to_TFLite.ipynb @@ -46,7 +46,7 @@ "source": [ "SAVED_MODEL_DIR = './clip_model'\n", "TFLITE_MODEL_PATH = './clip_model.tflite'\n", - "MODEL_NAME = \"openai/clip-vit-base-patch32\"" + "MODEL_NAME = \"openai/clip-vit-large-patch14\"" ], "metadata": { "id": "eOxB3zL_33tq" diff --git a/flutter/cpp/datasets/coco_gen_utils/generate_tfrecords.py b/flutter/cpp/datasets/coco_gen_utils/generate_tfrecords.py index 45070b061..8c3dcbe15 100644 --- a/flutter/cpp/datasets/coco_gen_utils/generate_tfrecords.py +++ b/flutter/cpp/datasets/coco_gen_utils/generate_tfrecords.py @@ -56,9 +56,10 @@ def download_image(url, file_path): print(f"Downloaded image to {file_path}") -def serialize_example(caption, input_ids, attention_mask, file_name, clip_score): +def serialize_example(caption_id, caption, input_ids, attention_mask, file_name, clip_score): """Creates a tf.train.Example message ready to be written to a file.""" feature = { + 'caption_id': tf.train.Feature(int64_list=tf.train.Int64List(value=caption_id)), 'caption': tf.train.Feature(bytes_list=tf.train.BytesList(value=[caption.encode()])), 'input_ids': tf.train.Feature(int64_list=tf.train.Int64List(value=input_ids)), 'attention_mask': tf.train.Feature(int64_list=tf.train.Int64List(value=attention_mask)), @@ -87,6 +88,7 @@ def main(): with tf.io.TFRecordWriter(args.output_tfrecord, options='ZLIB') as writer: total = len(df) for idx, row in df.iterrows(): + caption_id = row['id'] caption = row['caption'] file_name = row['file_name'] coco_url = row['coco_url'] @@ -104,6 +106,7 @@ def main(): clip_score = outputs.logits_per_image.numpy().flatten().tolist() example = serialize_example( + caption_id=[int(caption_id)], caption=caption, input_ids=input_ids, attention_mask=attention_mask, diff --git a/flutter/cpp/datasets/coco_gen_utils/types.h b/flutter/cpp/datasets/coco_gen_utils/types.h index 082c5cf5c..0945500aa 100644 --- a/flutter/cpp/datasets/coco_gen_utils/types.h +++ b/flutter/cpp/datasets/coco_gen_utils/types.h @@ -29,9 +29,14 @@ struct CaptionRecord { tensorflow::Example example; example.ParseFromString(record); + auto caption_id_list = + tensorflow::GetFeatureValues("caption_id", example); + caption_id = + std::vector(caption_id_list.begin(), caption_id_list.end())[0]; + auto caption_list = tensorflow::GetFeatureValues("caption", example); - caption = + caption_text = std::vector(caption_list.begin(), caption_list.end()); auto input_id_list = @@ -57,7 +62,8 @@ struct CaptionRecord { void dump() { std::cout << "CaptionRecord:\n"; - std::cout << " caption: " << get_caption() << "\n"; + std::cout << " caption_id: " << get_caption_id() << "\n"; + std::cout << " caption_text: " << get_caption_text() << "\n"; std::cout << " input_ids: "; for (size_t i = 0; i < input_ids.size(); ++i) { std::cout << input_ids[i]; @@ -80,7 +86,8 @@ struct CaptionRecord { std::cout << " clip_score: " << clip_score << "\n"; } - std::string get_caption() const { return caption[0]; } + int get_caption_id() const { return caption_id; } + std::string get_caption_text() const { return caption_text[0]; } std::string get_filename() const { return filename[0]; } int32_t* get_input_ids() { return input_ids.data(); } int32_t* get_attention_mask() { return attention_mask.data(); } @@ -88,7 +95,8 @@ struct CaptionRecord { std::vector get_attention_mask_vector() { return attention_mask; } private: - std::vector caption; + int caption_id; + std::vector caption_text; std::vector input_ids; std::vector attention_mask; std::vector filename; diff --git a/mobile_back_apple/dev-utils/Makefile b/mobile_back_apple/dev-utils/Makefile index 7ca5c27ca..76fc0fd19 100644 --- a/mobile_back_apple/dev-utils/Makefile +++ b/mobile_back_apple/dev-utils/Makefile @@ -131,9 +131,9 @@ tflite-run-sd: bazel-bin/flutter/cpp/binary/main EXTERNAL stable_diffusion \ --mode=PerformanceOnly \ --output_dir="${REPO_ROOT_DIR}/output" \ - --model_file="${REPO_ROOT_DIR}/mobile_back_apple/dev-resources/stable_diffusion/sd-models" \ + --model_file="${REPO_ROOT_DIR}/mobile_back_apple/dev-resources/stable_diffusion/dynamic-sd-models" \ --lib_path="bazel-bin/mobile_back_tflite/cpp/backend_tflite/libtflitebackend.so" \ - --input_tfrecord="${REPO_ROOT_DIR}/mobile_back_apple/dev-resources/stable_diffusion/coco_gen_full.tfrecord" \ + --input_tfrecord="${REPO_ROOT_DIR}/mobile_back_apple/dev-resources/stable_diffusion/coco_gen_test.tfrecord" \ --input_clip_model="${REPO_ROOT_DIR}/mobile_back_apple/dev-resources/stable_diffusion/clip_model_512x512.tflite" \ --min_query_count=5 From eea4595a4df0c6b4930e0b4e5f3db48681df85da Mon Sep 17 00:00:00 2001 From: Mohit Mundhra Date: Fri, 20 Sep 2024 18:16:30 +0530 Subject: [PATCH 03/18] Final Submission for code for Qualcomm co-author: Aswin B co-author: Utkarsh Mishra --- .gitignore | 6 +- WORKSPACE | 7 + flutter/android/android-docker.mk | 6 +- flutter/android/docker/Dockerfile | 133 ++++ flutter/assets/tasks.pbtxt | 2 +- mobile_back_qti/BUILD | 44 +- mobile_back_qti/DLC/Makefile | 686 +++++++----------- mobile_back_qti/DLC/README.md | 2 +- .../DLC/util/StableDiffusion/AIMET/Dockerfile | 6 + .../DLC/util/StableDiffusion/AIMET/Makefile | 122 ++++ .../DLC/util/StableDiffusion/AIMET/aimet.py | 193 +++++ .../AIMET/graph_config_text_encoder.json | 17 + .../AIMET/graph_config_unet.json | 17 + .../AIMET/graph_config_vae.json | 17 + .../StableDiffusion/AIMET/mcp_config.json | 5 + .../DLC/util/StableDiffusion/README.md | 61 ++ .../DLC/util/StableDiffusion/flatten.py | 294 ++++++++ .../DLC/util/StableDiffusion/readme.txt | 0 mobile_back_qti/README.md | 45 +- mobile_back_qti/cpp/backend_qti/BUILD | 55 +- .../backend_qti/StableDiffusionShared/BUILD | 49 ++ .../StableDiffusionShared/variables.bzl | 31 + .../cpp/backend_qti/mlperf_helper.h | 14 +- .../cpp/backend_qti/qti_backend_helper.cc | 207 ++++-- .../cpp/backend_qti/qti_backend_helper.h | 46 +- mobile_back_qti/cpp/backend_qti/qti_c.cc | 107 ++- .../cpp/backend_qti/qti_settings.h | 2 + mobile_back_qti/cpp/backend_qti/rpcmem.cc | 2 +- .../settings/qti_settings_default_cpu.pbtxt | 78 +- .../settings/qti_settings_default_dsp.pbtxt | 81 +-- .../settings/qti_settings_default_gpu.pbtxt | 124 +--- .../settings/qti_settings_gpufp16.pbtxt | 74 +- .../settings/qti_settings_sd7cxg3.pbtxt | 96 +-- .../settings/qti_settings_sd7g1.pbtxt | 93 +-- .../settings/qti_settings_sd7pg2.pbtxt | 92 +-- .../settings/qti_settings_sd8cxg3.pbtxt | 96 +-- .../settings/qti_settings_sd8g1.pbtxt | 94 +-- .../settings/qti_settings_sd8g2.pbtxt | 98 +-- .../settings/qti_settings_sd8g3.pbtxt | 118 +-- .../settings/qti_settings_sd8pg1.pbtxt | 100 +-- .../settings/qti_settings_sdm778.pbtxt | 102 +-- .../settings/qti_settings_sdm888.pbtxt | 96 +-- .../settings/qti_settings_sm4450.pbtxt | 95 +-- .../settings/qti_settings_sm7550.pbtxt | 106 +-- .../settings/qti_settings_sm8635.pbtxt | 102 +-- .../qti_settings_stablediffusion.pbtxt | 46 ++ .../cpp/backend_qti/soc_utility.cc | 47 +- mobile_back_qti/cpp/backend_qti/soc_utility.h | 17 +- .../docker/mlperf_dlc_prepare/Dockerfile | 42 +- mobile_back_qti/make/qti_backend.mk | 35 +- mobile_back_qti/run_mlperf_tests.bat | 77 +- mobile_back_qti/run_mlperf_tests.sh | 74 +- mobile_back_qti/variables.bzl | 18 +- 53 files changed, 2136 insertions(+), 2041 deletions(-) create mode 100644 mobile_back_qti/DLC/util/StableDiffusion/AIMET/Dockerfile create mode 100644 mobile_back_qti/DLC/util/StableDiffusion/AIMET/Makefile create mode 100644 mobile_back_qti/DLC/util/StableDiffusion/AIMET/aimet.py create mode 100644 mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_text_encoder.json create mode 100644 mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_unet.json create mode 100644 mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_vae.json create mode 100644 mobile_back_qti/DLC/util/StableDiffusion/AIMET/mcp_config.json create mode 100644 mobile_back_qti/DLC/util/StableDiffusion/README.md create mode 100644 mobile_back_qti/DLC/util/StableDiffusion/flatten.py create mode 100644 mobile_back_qti/DLC/util/StableDiffusion/readme.txt create mode 100644 mobile_back_qti/cpp/backend_qti/StableDiffusionShared/BUILD create mode 100644 mobile_back_qti/cpp/backend_qti/StableDiffusionShared/variables.bzl create mode 100644 mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion.pbtxt diff --git a/.gitignore b/.gitignore index 17e98e56c..1c154391b 100644 --- a/.gitignore +++ b/.gitignore @@ -10,7 +10,7 @@ /datasets/output /datasets/downloads /mobile_back_qti/snpe-* -/mobile_back_qti/qaisw-* +/mobile_back_qti/qairt *.so * .apk * .tflite @@ -22,3 +22,7 @@ __pycache__/ *.log *.iml *.env + +*.g.dart +*.gen.dart +*.gen.h diff --git a/WORKSPACE b/WORKSPACE index 9c0bb08ea..34db3f4b6 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -107,3 +107,10 @@ snpe_version_loader( name = "snpe_version_loader", workspace_dir = __workspace_dir__, ) + +load("//mobile_back_qti/cpp/backend_qti/StableDiffusionShared:variables.bzl", "stable_diffusion_external_deps_shared") + +stable_diffusion_external_deps_shared( + name = "stable_diffusion_external_deps_shared", + workspace_dir = __workspace_dir__, +) \ No newline at end of file diff --git a/flutter/android/android-docker.mk b/flutter/android/android-docker.mk index 21b03d649..675ac2475 100644 --- a/flutter/android/android-docker.mk +++ b/flutter/android/android-docker.mk @@ -1,4 +1,4 @@ -# Copyright 2020-2022 The MLPerf Authors. All Rights Reserved. +# Copyright 2020-2024 The MLPerf Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,6 +19,9 @@ user_id=$(shell id -u) .PHONY: flutter/android/docker/image flutter/android/docker/image: output/docker/mlperf_mobile_flutter_android_${user_id}.stamp output/docker/mlperf_mobile_flutter_android_${user_id}.stamp: flutter/android/docker/Dockerfile + ## TODO: change if clause according to make file + rm -f ./mobile_back_qti/cpp/backend_qti/StableDiffusionShared/include/opencv + ln -s /opt/opencv-3.4.7_android/sdk/native ./mobile_back_qti/cpp/backend_qti/StableDiffusionShared/include/opencv docker image build -t ${DOCKER_IMAGE_TAG} flutter/android/docker mkdir -p output/docker touch $@ @@ -42,6 +45,7 @@ flutter_common_docker_flags= \ --env WITH_PIXEL=${WITH_PIXEL} \ --env WITH_MEDIATEK=${WITH_MEDIATEK} \ --env proxy_bazel_args=${proxy_bazel_args} \ + --env BAZEL_OUTPUT_ROOT_ARG="--output_user_root=/image-workdir/cache/bazel" \ --env OFFICIAL_BUILD=${OFFICIAL_BUILD} \ --env FIREBASE_CRASHLYTICS_ENABLED=${FIREBASE_CRASHLYTICS_ENABLED} \ --env FLUTTER_BUILD_NUMBER=${FLUTTER_BUILD_NUMBER} \ diff --git a/flutter/android/docker/Dockerfile b/flutter/android/docker/Dockerfile index 04ad769b3..fdad27713 100644 --- a/flutter/android/docker/Dockerfile +++ b/flutter/android/docker/Dockerfile @@ -73,3 +73,136 @@ RUN mkdir $ANDROID_SDK_HOME && \ # Git repo will be mounted at '/image-workdir/project' RUN git config --global --add safe.directory /image-workdir/project + +# Add 32-bit support since the adb in our sdk's are 32-bit binaries +RUN dpkg --add-architecture i386 +ARG DEBIAN_FRONTEND=noninteractive +ARG APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=DontWarn + +# build-essential: for rest-kit(rest-kit needed for crad-repo) +# file : used by ndk to determine if host is 32 or 64 bit +# libtbb-dev: used for parallelization of postprocessing for pose-estimation +# libcanberra-gtk-module, libgtk2.0-dev: required by opencv runtime +# libstdc++6:i386: to add support for 32 bit binaries. +# libxtst6 : required to run ide(e.g intellij) inside docker +RUN apt-get update && apt-get install --no-install-recommends -y build-essential \ + cmake \ + curl \ + git \ + emacs \ + file \ + less \ + libcanberra-gtk-module \ + libgtk2.0-dev \ + libtbb-dev \ + libstdc++6:i386 \ + libxtst6 + +# pkg-config: required by opencv runtime +# software-properties-common: for curl +# xterm : to make resize available for avoiding line wrapping +# zip : required when sourcing snpe envsetup +RUN apt-get update && apt-get install --no-install-recommends -y mc \ + meld \ + pkg-config \ + python3-dev \ + software-properties-common \ + sudo \ + tmux \ + tree \ + unzip \ + vim \ + wget \ + xterm \ + zip \ + ffmpeg \ + libjpeg-dev \ + zlib1g-dev && \ + apt-get clean autoclean + +# Flatbuffers before installing Python3 +# RUN apt-add-repository ppa:hnakamur/flatbuffers +RUN apt update && apt install -y 'flatbuffers-compiler' && apt install -y 'libflatbuffers-dev' +ENV PATH "/usr/bin/flatc:${PATH}" + +# OpenJDK-11 +# NOTE: JDK is required for Android SDK which is required for openCV 3.4.7 +RUN apt-get update && \ + apt-get install -y openjdk-17-jdk && \ + apt-get install -y ant && \ + apt-get clean +# Fix certificate issues +RUN apt-get update && \ + apt-get install ca-certificates-java && \ + apt-get clean && \ + update-ca-certificates -f +# Setup JAVA_HOME +ENV JAVA_HOME /usr/lib/jvm/java-17-openjdk-amd64 + +# Get and install android command line tool which will be used to install android-sdk +RUN cd /opt && mkdir -p ./Android/cmdline-tools && \ + wget -q https://dl.google.com/android/repository/commandlinetools-linux-8512546_latest.zip && \ + unzip commandlinetools-linux-8512546_latest.zip -d ./Android/cmdline-tools/ && \ + mv ./Android/cmdline-tools/cmdline-tools ./Android/cmdline-tools/tools && \ + rm -rf commandlinetools-linux-8512546_latest.zip + +# Android SDK +RUN cd /opt/Android/cmdline-tools/tools/bin && \ + yes | ./sdkmanager "platforms;android-33" "build-tools;30.0.2" "sources;android-33" \ + "extras;google;m2repository" "extras;android;m2repository" && \ + yes | ./sdkmanager --licenses && \ + yes | ./sdkmanager --update +# Setup ANDROID_HOME +ENV ANDROID_HOME /opt/Android + +# Android NDK +RUN cd /opt/Android/cmdline-tools/tools/bin && \ + yes | ./sdkmanager --install "ndk;25.2.9519653" +# Setup ANDROID_NDK and add into PATH +ENV ANDROID_NDK /opt/Android/ndk/25.2.9519653 +ENV PATH "${ANDROID_NDK}:${PATH}" + +# Get cmake-3.19.3 +# apt has 3.10.2 as latest version, so remove it +RUN apt-get remove --no-install-recommends -y cmake +RUN wget -q https://github.com/Kitware/CMake/releases/download/v3.19.3/cmake-3.19.3-Linux-x86_64.sh -O /opt/cmake-3.19.3-Linux-x86_64.sh && \ + chmod +x /opt/cmake-3.19.3-Linux-x86_64.sh && mkdir -p /opt/cmake && \ + bash /opt/cmake-3.19.3-Linux-x86_64.sh --skip-license --prefix=/opt/cmake && \ + rm -rf /opt/cmake-3.19.3-Linux-x86_64.sh +# Add CMAKE into PATH +ENV PATH "/opt/cmake/bin:${PATH}" + +# OpenCV +ENV CMAKE_TOOLCHAIN_FILE "${ANDROID_NDK}/build/cmake/android.toolchain.cmake" +ENV ANDROID_ABI "arm64-v8a" +ENV API_LEVEL "31" +ENV ANDROID_TOOLCHAIN_NAME "aarch64-linux-android-4.9" +ARG COMMON_CMAKE_OPTIONS="-DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=release \ + -DBUILD_ZLIB=ON -DWITH_FFMPEG=ON -DBUILD_TESTS=OFF \ + -DWITH_TBB=ON -DBUILD_PERF_TESTS=OFF -DWITH_IPP=OFF \ + -DWITH_OPENEXR=ON -DWITH_JASPER=ON -WITH_PNG=ON -DBUILD_FAT_JAVA_LIB=OFF WITH_IMGCODEC=ON" +# Ref: https://docs.opencv.org/3.4.0/d7/d9f/tutorial_linux_install.html +RUN wget -q https://github.com/opencv/opencv/archive/3.4.7/opencv-3.4.7.tar.gz -O /tmp/3.4.7.tar.gz && \ + tar -C /tmp -xvf /tmp/3.4.7.tar.gz && \ + # First build for arm-android + cd /tmp/opencv-3.4.7 && mkdir -p /opt/opencv-3.4.7_android/ release_android && cd release_android && \ + cmake -DCMAKE_TOOLCHAIN_FILE="${CMAKE_TOOLCHAIN_FILE}" \ + -DANDROID_NDK="${ANDROID_NDK}" \ + -DANDROID_HOME="${ANDROID_HOME}" \ + -DANDROID_STL=c++_shared \ + -DBUILD_ANDROID_PROJECTS=OFF \ + -DANDROID_NATIVE_API_LEVEL="${API_LEVEL}" \ + -DANDROID_ABI="${ANDROID_ABI}" \ + -DWITH_CUDA=ON -DWITH_OPENCL=ON \ + $COMMON_CMAKE_OPTIONS \ + -D WITH_ITT=OFF \ + -DCMAKE_INSTALL_PREFIX=/opt/opencv-3.4.7_android/ .. && \ + make -j16 && \ + sudo make -j16 install && cp -rf ./3rdparty/ /opt/opencv-3.4.7_android/ + +# Set the variables to be used for actual app development/build +ENV ANDROID_SYSROOT "${ANDROID_NDK}/toolchains/llvm/prebuilt/linux-x86_64/sysroot" +ENV ANDROID_PLATFORM "${API_LEVEL}" +ENV ANDROID_PLATFORM_TOOLS "${ANDROID_HOME}/platform-tools" +ENV PATH "${ANDROID_PLATFORM_TOOLS}:${PATH}" +ENV LD_LIBRARY_PATH "/usr/local/lib/:${LD_LIBRARY_PATH}" \ No newline at end of file diff --git a/flutter/assets/tasks.pbtxt b/flutter/assets/tasks.pbtxt index dadff26a7..db8020d0e 100644 --- a/flutter/assets/tasks.pbtxt +++ b/flutter/assets/tasks.pbtxt @@ -240,7 +240,7 @@ task { tiny { name: "COCO validation set for Stable Diffusion" input_path: "https://github.com/anhappdev/tmp/releases/download/2/coco_gen.tfrecord" - groundtruth_path: "local:///mlperf_models/stable-diffusion/clip_model_512x512.tflite" + groundtruth_path: "local:///mlperf_models/stable_diffusion/clip_model_512x512.tflite" } } model { diff --git a/mobile_back_qti/BUILD b/mobile_back_qti/BUILD index 7ae9fec89..b34f03292 100644 --- a/mobile_back_qti/BUILD +++ b/mobile_back_qti/BUILD @@ -28,11 +28,11 @@ config_setting( cc_import( name = "snpewindowslib", hdrs = glob([ - SNPE_VERSION + "/include/SNPE/**/*.hpp", - SNPE_VERSION + "/include/SNPE/**/*.h", + "qairt/" + SNPE_VERSION + "/include/SNPE/**/*.hpp", + "qairt/" + SNPE_VERSION + "/include/SNPE/**/*.h", ]), - interface_library = SNPE_VERSION + "/lib/aarch64-windows-msvc/SNPE.lib", - shared_library = SNPE_VERSION + "/lib/aarch64-windows-msvc/SNPE.dll", + interface_library = "qairt/" + SNPE_VERSION + "/lib/aarch64-windows-msvc/SNPE.lib", + shared_library = "qairt/" + SNPE_VERSION + "/lib/aarch64-windows-msvc/SNPE.dll", ) cc_library( @@ -49,30 +49,36 @@ cc_library( cc_library( name = "snpe", srcs = [ - SNPE_VERSION + "/lib/aarch64-android/libSNPE.so", + "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libSNPE.so" ], hdrs = glob([ - SNPE_VERSION + "/include/SNPE/**/*.hpp", - SNPE_VERSION + "/include/SNPE/**/*.h", + "qairt/" + SNPE_VERSION + "/include/SNPE/**/*.hpp", + "qairt/" + SNPE_VERSION + "/include/SNPE/**/*.h", + "qairt/" + SNPE_VERSION + "/include/QNN/**/*.h", + "qairt/" + SNPE_VERSION + "/include/QNN/*.h" ]), - copts = ["-I" + SNPE_VERSION + "/include/SNPE"], + copts = ["-I" + "qairt/" + SNPE_VERSION + "/include/SNPE", "-I" + SNPE_VERSION + "/include/QNN"], visibility = ["//visibility:public"], ) cc_library( name = "snpe_deps", srcs = [ - SNPE_VERSION + "/lib/aarch64-android/libhta.so", - SNPE_VERSION + "/lib/aarch64-android/libSnpeHtpV75Stub.so", - SNPE_VERSION + "/lib/aarch64-android/libSnpeHtpV73Stub.so", - SNPE_VERSION + "/lib/aarch64-android/libSnpeHtpV69Stub.so", - SNPE_VERSION + "/lib/aarch64-android/libSnpeHtpV68Stub.so", - SNPE_VERSION + "/lib/aarch64-android/libSNPE.so", - SNPE_VERSION + "/lib/aarch64-android/libSnpeHtpPrepare.so", - SNPE_VERSION + "/lib/hexagon-v68/unsigned/libSnpeHtpV68Skel.so", - SNPE_VERSION + "/lib/hexagon-v69/unsigned/libSnpeHtpV69Skel.so", - SNPE_VERSION + "/lib/hexagon-v73/unsigned/libSnpeHtpV73Skel.so", - SNPE_VERSION + "/lib/hexagon-v75/unsigned/libSnpeHtpV75Skel.so", + "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libhta.so", + "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libSnpeHtpV75Stub.so", + "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libSnpeHtpV73Stub.so", + "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libSnpeHtpV69Stub.so", + "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libSnpeHtpV68Stub.so", + "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libSNPE.so", + "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libSnpeHtpPrepare.so", + "qairt/" + SNPE_VERSION + "/lib/hexagon-v68/unsigned/libSnpeHtpV68Skel.so", + "qairt/" + SNPE_VERSION + "/lib/hexagon-v69/unsigned/libSnpeHtpV69Skel.so", + "qairt/" + SNPE_VERSION + "/lib/hexagon-v73/unsigned/libSnpeHtpV73Skel.so", + "qairt/" + SNPE_VERSION + "/lib/hexagon-v75/unsigned/libSnpeHtpV75Skel.so", + "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libQnnHtpV73Stub.so", + "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libQnnHtp.so", + "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libQnnHtpPrepare.so", + "qairt/" + SNPE_VERSION + "/lib/hexagon-v73/unsigned/libQnnHtpV73Skel.so", ], ) diff --git a/mobile_back_qti/DLC/Makefile b/mobile_back_qti/DLC/Makefile index 73b0de98b..a70bcf602 100644 --- a/mobile_back_qti/DLC/Makefile +++ b/mobile_back_qti/DLC/Makefile @@ -32,11 +32,13 @@ ${DATASETS_OUT}/state/calibration.stamp: DLCBUILDDIR=${BUILDDIR}/DLC MODEL_BASE_PATH=${DLCBUILDDIR}/mobile -MOBILENETEDGETPU_MODEL_PATH=${MODEL_BASE_PATH}/vision/mobilenet/models_and_code/checkpoints/float MOBILENETV4_MODEL_PATH=${MODEL_BASE_PATH}/vision/mobilenetV4/MobileNetV4-Conv-Large-saved-model MOBILEBERT_MODEL_PATH=${MODEL_BASE_PATH}/language/bert/models_and_code/checkpoints/quant/ MOBILEMOSAIC_MODEL_PATH=${MODEL_BASE_PATH}/vision/mosaic/models_and_checkpoints/R4/ SNUSR_MODEL_PATH = ${MODEL_BASE_PATH}/vision/edsr/models_and_checkpoints/checkpoints/f32b5/ckpt_qat/ +TEXTENCODER_MODEL_PATH = ${DLCBUILDDIR}/stable_diffusion/text_encoder/ +VAEDECODER_MODEL_PATH = ${DLCBUILDDIR}/stable_diffusion/vae_decoder/ +UNET_MODEL_PATH = ${DLCBUILDDIR}/stable_diffusion/unet/ SNUSR_CALIBRATION_PATH=${MODEL_BASE_PATH}/calibration/OpenImages MLPERF_MODELS_PATH = ${DLCBUILDDIR}/mlperf_models/ MOBILEDETSSDQAT_MODEL_PATH = ${DLCBUILDDIR}/mobile/vision/mobiledet/uint8/export_inference_graph/ @@ -52,9 +54,7 @@ $(info "Using normal docker") include load_normal_docker.mk endif -htp-dlc: mobilenet_edgetpu \ - mobilenet_edgetpu_O2 \ - mobilenet_v4 \ +htp-dlc: mobilenet_v4 \ mobilenet_v4_O2 \ mobiledet \ mobiledet_O2 \ @@ -64,48 +64,30 @@ htp-dlc: mobilenet_edgetpu \ mobilebert_O2 \ snusr \ snusr_O2 \ - mobilenet_edgetpu_batched \ - mobilenet_edgetpu_batched_O2 \ mobilenet_v4_batched \ mobilenet_v4_batched_O2 -ifeq ($(MAKECMDGOALS),$(filter $(MAKECMDGOALS),generate-apirec mobilenet_edgetpu_apirec mobilenet_edgetpu_batched_apirec \ + +ifeq ($(MAKECMDGOALS),$(filter $(MAKECMDGOALS),generate-apirec \ mobilenet_v4_apirec mobilenet_v4_batched_apirec mosaic_apirec mobilebert_apirec mobiledet_apirec snusr_apirec)) include ../make/apirec.mk endif -generate-apirec: mobilenet_edgetpu_apirec \ - mobilenet_edgetpu_batched_apirec \ - mobilenet_v4_apirec \ +generate-apirec: mobilenet_v4_apirec \ mobilenet_v4_batched_apirec \ mosaic_apirec \ mobiledet_apirec \ mobilebert_apirec \ snusr_apirec -mobilenet_edgetpu_batched: \ - ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_3.stamp \ - ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_4.stamp \ - ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_8.stamp - -mobilenet_edgetpu_batched_O2: \ - ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_3_O2.stamp \ - ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_4_O2.stamp \ - ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_8_O2.stamp - -mobilenet_edgetpu_batched_apirec: \ - ${DLCBUILDDIR}/generate_mobilenet_edgetpu_batched_4_apirec \ - ${DLCBUILDDIR}/generate_mobilenet_edgetpu_batched_3_apirec \ - ${DLCBUILDDIR}/generate_mobilenet_edgetpu_batched_8_apirec - mobilenet_v4_batched: \ - ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4.stamp \ + ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4.stamp mobilenet_v4_batched_O2: \ - ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4_O2.stamp \ + ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4_O2.stamp mobilenet_v4_batched_apirec: \ - ${DLCBUILDDIR}/generate_mobilenet_v4_batched_4_apirec \ + ${DLCBUILDDIR}/generate_mobilenet_v4_batched_4_apirec mosaic: \ ${DLCBUILDDIR}/mobile_mosaic_htp.stamp @@ -116,15 +98,6 @@ mosaic_O2: \ mosaic_apirec: \ ${DLCBUILDDIR}/generate_mosaic_apirec -mobilenet_edgetpu: \ - ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp.stamp - -mobilenet_edgetpu_O2: \ - ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_O2.stamp - -mobilenet_edgetpu_apirec: \ - ${DLCBUILDDIR}/generate_mobilenet_edgetpu_apirec \ - mobilenet_v4: \ ${DLCBUILDDIR}/mobilenet_v4_htp.stamp @@ -132,7 +105,7 @@ mobilenet_v4_O2: \ ${DLCBUILDDIR}/mobilenet_v4_htp_O2.stamp mobilenet_v4_apirec: \ - ${DLCBUILDDIR}/generate_mobilenet_v4_apirec \ + ${DLCBUILDDIR}/generate_mobilenet_v4_apirec mobiledet: \ ${DLCBUILDDIR}/ssd_mobiledet_qat_htp.stamp @@ -161,6 +134,21 @@ snusr_O2 : \ snusr_apirec: \ ${DLCBUILDDIR}/generate_snusr_apirec +stable_diffusion_qnn: \ + ${DLCBUILDDIR}/sd_precompute_data.tar \ + ${DLCBUILDDIR}/text_encoder_qnn_context_binary_generator.stamp \ + ${DLCBUILDDIR}/vae_decoder_qnn_context_binary_generator.stamp \ + ${DLCBUILDDIR}/unet_qnn_context_binary_generator.stamp + +text_encoder: \ + ${DLCBUILDDIR}/text_encoder_qnn_context_binary_generator.stamp + +vae_decoder: \ + ${DLCBUILDDIR}/vae_decoder_qnn_context_binary_generator.stamp + +unet: \ + ${DLCBUILDDIR}/unet_qnn_context_binary_generator.stamp + mlperf_models: \ ${DLCBUILDDIR}/mlperf_models.stamp @@ -183,94 +171,6 @@ ${DLCBUILDDIR}/mlperf_models.stamp: (mkdir -p ${MLPERF_MODELS_PATH}) touch $@ -${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_float.dlc: \ - ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ - ${DLCBUILDDIR}/mobile/.stamp \ - ${DLCBUILDDIR}/mlperf_models.stamp - # Mobilenetedge TPU model conversion .... - # Float model - mkdir -p ${DLCBUILDDIR} - docker run \ - -e PYTHONPATH=/snpe_sdk/lib/python \ - -v ${SNPE_SDK}:/snpe_sdk \ - -v ${MOBILENETEDGETPU_MODEL_PATH}:/models \ - -v ${DLCBUILDDIR}:/output \ - -u ${USERID}:${GROUPID} \ - mlperf_dlc_prepare \ - /snpe_sdk/bin/x86_64-linux-clang/snpe-tensorflow-to-dlc \ - -i /models/frozen_graph_tf1x_transform.pb \ - -d input "1,224,224,3" --out_node "MobilenetEdgeTPU/Predictions/Softmax" \ - -o /output/mobilenet_edgetpu_224_1.0_float.dlc - -${DLCBUILDDIR}/imagenet/imagenet_image_224_list.txt: ${DLCBUILDDIR}/imagenet/state/quantdata_224.stamp - cat ${TOPDIR}/datasets/downloads/imagenet/cal_image_list_option_1.txt | sed "s!^!quantdata_224/!" | sed "s!JPEG!raw!" > $@ - -${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_quant.stamp: \ - ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ - ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_float.dlc \ - ${DLCBUILDDIR}/imagenet/imagenet_image_224_list.txt - # Quantization of MobilenetEdgeTPU DLC for HTP - docker run \ - -e PYTHONPATH=/snpe_sdk/lib/python \ - -e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \ - -v ${SNPE_SDK}:/snpe_sdk \ - -v ${DLCBUILDDIR}:/output \ - -v ${DLCBUILDDIR}/imagenet:/imagenet \ - -w /imagenet \ - -u ${USERID}:${GROUPID} \ - mlperf_dlc_prepare \ - /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-quant \ - --input_dlc=/output/mobilenet_edgetpu_224_1.0_float.dlc \ - --input_list=imagenet_image_224_list.txt \ - --output_dlc=/output/mobilenet_edgetpu_224_1.0_quant.dlc \ - # Mobilenetedge TPU model conversion completed - -${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp.stamp: \ - ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ - ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_quant.stamp \ - # Offline prepare of MobilenetEdgeTPU DLC for HTP - docker run \ - -e PYTHONPATH=/snpe_sdk/lib/python \ - -e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \ - -v ${SNPE_SDK}:/snpe_sdk \ - -v ${DLCBUILDDIR}:/output \ - -v ${DLCBUILDDIR}/imagenet:/imagenet \ - -w /imagenet \ - -u ${USERID}:${GROUPID} \ - mlperf_dlc_prepare \ - /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ - --input_dlc=/output/mobilenet_edgetpu_224_1.0_quant.dlc \ - --output_dlc=/output/mobilenet_edgetpu_224_1.0_htp.dlc \ - --optimization_level 3 \ - --htp_socs=sm8650,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sm7550,sc8380xp,sc8280x \ - --memorymapped_buffer_hint=true - # Mobilenetedge TPU model conversion completed - cp ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp.dlc ${MLPERF_MODELS_PATH} - touch $@ - -${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_O2.stamp: \ - ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ - ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_quant.stamp \ - # Offline prepare of MobilenetEdgeTPU DLC for HTP - docker run \ - -e PYTHONPATH=/snpe_sdk/lib/python \ - -e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \ - -v ${SNPE_SDK}:/snpe_sdk \ - -v ${DLCBUILDDIR}:/output \ - -v ${DLCBUILDDIR}/imagenet:/imagenet \ - -w /imagenet \ - -u ${USERID}:${GROUPID} \ - mlperf_dlc_prepare \ - /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ - --input_dlc=/output/mobilenet_edgetpu_224_1.0_quant.dlc \ - --output_dlc=/output/mobilenet_edgetpu_224_1.0_htp_O2.dlc \ - --optimization_level 2 \ - --htp_socs=sm8650,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sm7550,sc8380xp,sc8280x \ - --memorymapped_buffer_hint=true - # Mobilenetedge TPU model conversion completed - cp ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_O2.dlc ${MLPERF_MODELS_PATH} - touch $@ - ${DLCBUILDDIR}/mobilenet_v4_float.dlc: \ ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ ${DLCBUILDDIR}/mobile/.stamp \ @@ -327,6 +227,7 @@ ${DLCBUILDDIR}/mobilenet_v4_htp.stamp: \ -u ${USERID}:${GROUPID} \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ + --htp_dlbc=true \ --input_dlc=/output/mobilenet_v4_quant.dlc \ --output_dlc=/output/mobilenet_v4_htp.dlc \ --optimization_level 3 \ @@ -350,6 +251,7 @@ ${DLCBUILDDIR}/mobilenet_v4_htp_O2.stamp: \ -u ${USERID}:${GROUPID} \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ + --htp_dlbc=true \ --input_dlc=/output/mobilenet_v4_quant.dlc \ --output_dlc=/output/mobilenet_v4_htp_O2.dlc \ --optimization_level 2 \ @@ -359,249 +261,6 @@ ${DLCBUILDDIR}/mobilenet_v4_htp_O2.stamp: \ cp ${DLCBUILDDIR}/mobilenet_v4_htp_O2.dlc ${MLPERF_MODELS_PATH} touch $@ -${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_float_batched_3.dlc: \ - ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ - ${DLCBUILDDIR}/mobile/.stamp \ - ${DLCBUILDDIR}/mlperf_models.stamp - # Mobilenetedge TPU model conversion .... - # Batched Float model - mkdir -p ${DLCBUILDDIR} - docker run \ - -e PYTHONPATH=/snpe_sdk/lib/python \ - -v ${SNPE_SDK}:/snpe_sdk \ - -v ${MOBILENETEDGETPU_MODEL_PATH}:/models \ - -v ${DLCBUILDDIR}:/output \ - -u ${USERID}:${GROUPID} \ - mlperf_dlc_prepare \ - /snpe_sdk/bin/x86_64-linux-clang/snpe-tensorflow-to-dlc \ - -i /models/frozen_graph_tf1x_transform.pb \ - -d input "3,224,224,3" --out_node "MobilenetEdgeTPU/Predictions/Softmax" \ - -o /output/mobilenet_edgetpu_224_1.0_float_batched_3.dlc - -${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_quant_batched_3.stamp: \ - ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ - ${DLCBUILDDIR}/imagenet/imagenet_image_224_list.txt \ - ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_float_batched_3.dlc - # Quantization of MobilenetEdgeTPU Batched DLC for HTP - docker run \ - -e PYTHONPATH=/snpe_sdk/lib/python \ - -e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \ - -v ${SNPE_SDK}:/snpe_sdk \ - -v ${DLCBUILDDIR}:/output \ - -v ${DLCBUILDDIR}/imagenet:/imagenet-out \ - -w /imagenet-out \ - -u ${USERID}:${GROUPID} \ - mlperf_dlc_prepare \ - /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-quant \ - --input_dlc=/output/mobilenet_edgetpu_224_1.0_float_batched_3.dlc \ - --input_list=imagenet_image_224_list.txt \ - --output_dlc=/output/mobilenet_edgetpu_224_1.0_quant_batched_3.dlc - # Mobilenetedge TPU model conversion completed - -${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_3.stamp: \ - ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ - ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_quant_batched_3.stamp - # Offline prepare of MobilenetEdgeTPU Batched DLC for HTP - docker run \ - -e PYTHONPATH=/snpe_sdk/lib/python \ - -e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \ - -v ${SNPE_SDK}:/snpe_sdk \ - -v ${DLCBUILDDIR}:/output \ - -u ${USERID}:${GROUPID} \ - mlperf_dlc_prepare \ - /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ - --input_dlc=/output/mobilenet_edgetpu_224_1.0_quant_batched_3.dlc \ - --output_dlc=/output/mobilenet_edgetpu_224_1.0_htp_batched_3.dlc \ - --optimization_level 3 \ - --htp_socs=sm8650,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sm7550,sc8380xp,sc8280x \ - --memorymapped_buffer_hint=true - # Mobilenetedge TPU model conversion completed - cp ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_3.dlc ${MLPERF_MODELS_PATH} - touch $@ - -${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_3_O2.stamp: \ - ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ - ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_quant_batched_3.stamp - # Offline prepare of MobilenetEdgeTPU Batched DLC for HTP - docker run \ - -e PYTHONPATH=/snpe_sdk/lib/python \ - -e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \ - -v ${SNPE_SDK}:/snpe_sdk \ - -v ${DLCBUILDDIR}:/output \ - -u ${USERID}:${GROUPID} \ - mlperf_dlc_prepare \ - /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ - --input_dlc=/output/mobilenet_edgetpu_224_1.0_quant_batched_3.dlc \ - --output_dlc=/output/mobilenet_edgetpu_224_1.0_htp_batched_3_O2.dlc \ - --optimization_level 2 \ - --htp_socs=sm8650,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sm7550,sc8380xp,sc8280x \ - --memorymapped_buffer_hint=true - # Mobilenetedge TPU model conversion completed - cp ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_3_O2.dlc ${MLPERF_MODELS_PATH} - touch $@ - -${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_float_batched_4.dlc: \ - ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ - ${DLCBUILDDIR}/mobile/.stamp \ - ${DLCBUILDDIR}/mlperf_models.stamp - # Mobilenetedge TPU model conversion .... - # Batched Float model - mkdir -p ${DLCBUILDDIR} - docker run \ - -e PYTHONPATH=/snpe_sdk/lib/python \ - -v ${SNPE_SDK}:/snpe_sdk \ - -v ${MOBILENETEDGETPU_MODEL_PATH}:/models \ - -v ${DLCBUILDDIR}:/output \ - -u ${USERID}:${GROUPID} \ - mlperf_dlc_prepare \ - /snpe_sdk/bin/x86_64-linux-clang/snpe-tensorflow-to-dlc \ - -i /models/frozen_graph_tf1x_transform.pb \ - -d input "4,224,224,3" --out_node "MobilenetEdgeTPU/Predictions/Softmax" \ - -o /output/mobilenet_edgetpu_224_1.0_float_batched_4.dlc - -${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_quant_batched_4.stamp: \ - ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ - ${DLCBUILDDIR}/imagenet/imagenet_image_224_list.txt \ - ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_float_batched_4.dlc - # Quantization of MobilenetEdgeTPU Batched DLC for HTP - docker run \ - -e PYTHONPATH=/snpe_sdk/lib/python \ - -e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \ - -v ${SNPE_SDK}:/snpe_sdk \ - -v ${DLCBUILDDIR}:/output \ - -v ${DLCBUILDDIR}/imagenet:/imagenet-out \ - -w /imagenet-out \ - -u ${USERID}:${GROUPID} \ - mlperf_dlc_prepare \ - /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-quant \ - --input_dlc=/output/mobilenet_edgetpu_224_1.0_float_batched_4.dlc \ - --input_list=imagenet_image_224_list.txt \ - --output_dlc=/output/mobilenet_edgetpu_224_1.0_quant_batched_4.dlc - # Mobilenetedge TPU model conversion completed - -${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_4.stamp: \ - ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ - ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_quant_batched_4.stamp - # Offline prepare of MobilenetEdgeTPU Batched DLC for HTP - docker run \ - -e PYTHONPATH=/snpe_sdk/lib/python \ - -e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \ - -v ${SNPE_SDK}:/snpe_sdk \ - -v ${DLCBUILDDIR}:/output \ - -u ${USERID}:${GROUPID} \ - mlperf_dlc_prepare \ - /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ - --input_dlc=/output/mobilenet_edgetpu_224_1.0_quant_batched_4.dlc \ - --output_dlc=/output/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc \ - --optimization_level 3 \ - --htp_socs=sm8650,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sm7550,sc8380xp,sc8280x \ - --memorymapped_buffer_hint=true - # Mobilenetedge TPU model conversion completed - cp ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc ${MLPERF_MODELS_PATH} - touch $@ - -${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_4_O2.stamp: \ - ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ - ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_quant_batched_4.stamp - # Offline prepare of MobilenetEdgeTPU Batched DLC for HTP - docker run \ - -e PYTHONPATH=/snpe_sdk/lib/python \ - -e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \ - -v ${SNPE_SDK}:/snpe_sdk \ - -v ${DLCBUILDDIR}:/output \ - -u ${USERID}:${GROUPID} \ - mlperf_dlc_prepare \ - /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ - --input_dlc=/output/mobilenet_edgetpu_224_1.0_quant_batched_4.dlc \ - --output_dlc=/output/mobilenet_edgetpu_224_1.0_htp_batched_4_O2.dlc \ - --optimization_level 2 \ - --htp_socs=sm8650,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sm7550,sc8380xp,sc8280x \ - --memorymapped_buffer_hint=true - # Mobilenetedge TPU model conversion completed - cp ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_4_O2.dlc ${MLPERF_MODELS_PATH} - touch $@ - -${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_float_batched_8.dlc: \ - ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ - ${DLCBUILDDIR}/mobile/.stamp \ - ${DLCBUILDDIR}/mlperf_models.stamp - # Mobilenetedge TPU model conversion .... - # Batched Float model - mkdir -p ${DLCBUILDDIR} - docker run \ - -e PYTHONPATH=/snpe_sdk/lib/python \ - -v ${SNPE_SDK}:/snpe_sdk \ - -v ${MOBILENETEDGETPU_MODEL_PATH}:/models \ - -v ${DLCBUILDDIR}:/output \ - -u ${USERID}:${GROUPID} \ - mlperf_dlc_prepare \ - /snpe_sdk/bin/x86_64-linux-clang/snpe-tensorflow-to-dlc \ - -i /models/frozen_graph_tf1x_transform.pb \ - -d input "8,224,224,3" --out_node "MobilenetEdgeTPU/Predictions/Softmax" \ - -o /output/mobilenet_edgetpu_224_1.0_float_batched_8.dlc - -${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_quant_batched_8.stamp: \ - ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ - ${DLCBUILDDIR}/imagenet/imagenet_image_224_list.txt \ - ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_float_batched_8.dlc - # Quantization of MobilenetEdgeTPU Batched DLC for HTP - docker run \ - -e PYTHONPATH=/snpe_sdk/lib/python \ - -e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \ - -v ${SNPE_SDK}:/snpe_sdk \ - -v ${DLCBUILDDIR}:/output \ - -v ${DLCBUILDDIR}/imagenet:/imagenet-out \ - -w /imagenet-out \ - -u ${USERID}:${GROUPID} \ - mlperf_dlc_prepare \ - /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-quant \ - --input_dlc=/output/mobilenet_edgetpu_224_1.0_float_batched_8.dlc \ - --input_list=imagenet_image_224_list.txt \ - --output_dlc=/output/mobilenet_edgetpu_224_1.0_quant_batched_8.dlc - # Mobilenetedge TPU model conversion completed - -${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_8.stamp: \ - ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ - ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_quant_batched_8.stamp - # Offline prepare of MobilenetEdgeTPU Batched DLC for HTP - docker run \ - -e PYTHONPATH=/snpe_sdk/lib/python \ - -e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \ - -v ${SNPE_SDK}:/snpe_sdk \ - -v ${DLCBUILDDIR}:/output \ - -u ${USERID}:${GROUPID} \ - mlperf_dlc_prepare \ - /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ - --input_dlc=/output/mobilenet_edgetpu_224_1.0_quant_batched_8.dlc \ - --output_dlc=/output/mobilenet_edgetpu_224_1.0_htp_batched_8.dlc \ - --optimization_level 3 \ - --htp_socs=sm8650,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sm7550,sc8380xp,sc8280x \ - --memorymapped_buffer_hint=true - # Mobilenetedge TPU model conversion completed - cp ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_8.dlc ${MLPERF_MODELS_PATH} - touch $@ - -${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_8_O2.stamp: \ - ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ - ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_quant_batched_8.stamp - # Offline prepare of MobilenetEdgeTPU Batched DLC for HTP - docker run \ - -e PYTHONPATH=/snpe_sdk/lib/python \ - -e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \ - -v ${SNPE_SDK}:/snpe_sdk \ - -v ${DLCBUILDDIR}:/output \ - -u ${USERID}:${GROUPID} \ - mlperf_dlc_prepare \ - /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ - --input_dlc=/output/mobilenet_edgetpu_224_1.0_quant_batched_8.dlc \ - --output_dlc=/output/mobilenet_edgetpu_224_1.0_htp_batched_8_O2.dlc \ - --optimization_level 2 \ - --htp_socs=sm8650,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sm7550,sc8380xp,sc8280x \ - --memorymapped_buffer_hint=true - # Mobilenetedge TPU model conversion completed - cp ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_8_O2.dlc ${MLPERF_MODELS_PATH} - touch $@ - ${DLCBUILDDIR}/mobilenet_v4_float_batched_4.dlc: \ ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ ${DLCBUILDDIR}/mobile/.stamp \ @@ -742,9 +401,10 @@ ${DLCBUILDDIR}/ssd_mobiledet_qat_htp.stamp: \ -u ${USERID}:${GROUPID} \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ + --htp_dlbc=true \ --input_dlc=/output/ssd_mobiledet_qat_quant.dlc \ --output_dlc=/output/ssd_mobiledet_qat_htp.dlc \ - --set_output_layers=Postprocessor/BatchMultiClassNonMaxSuppression \ + --set_output_tensors="Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0"\ --optimization_level 3 \ --htp_socs=sm8650,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sm7550,sc8380xp,sc8280x \ --memorymapped_buffer_hint=true @@ -766,7 +426,7 @@ ${DLCBUILDDIR}/ssd_mobiledet_qat_htp_O2.stamp: \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --input_dlc=/output/ssd_mobiledet_qat_quant.dlc \ --output_dlc=/output/ssd_mobiledet_qat_htp_O2.dlc \ - --set_output_layers=Postprocessor/BatchMultiClassNonMaxSuppression \ + --set_output_tensors="Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0"\ --optimization_level 2 \ --htp_socs=sm8650,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sm7550,sc8380xp,sc8280x \ --memorymapped_buffer_hint=true @@ -792,7 +452,7 @@ ${DLCBUILDDIR}/mobilebert_quant.pb: \ -v ${MOBILEBERT_MODEL_PATH}:/models \ -u ${USERID}:${GROUPID} \ mlperf_dlc_prepare \ - python3 /usr/local/lib/python3.8/dist-packages/tensorflow/python/tools/freeze_graph.py \ + python3 /usr/local/lib/python3.10/dist-packages/tensorflow/python/tools/freeze_graph.py \ --input_graph=/models/saved_model.pb --input_checkpoint=/models/checkpoints/quant \ --output_graph=/output/mobilebert_quant.pb \ --output_node_names=end_logits,start_logits \ @@ -864,9 +524,10 @@ ${DLCBUILDDIR}/mobilebert_htp.stamp: \ -u ${USERID}:${GROUPID} \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ + --htp_dlbc=true \ --input_dlc=/output/mobilebert_quant.dlc \ --output_dlc=/output/mobilebert_quantized_htp.dlc \ - --set_output_layers=transpose \ + --set_output_tensors="transpose:0" \ --optimization_level 3 \ --htp_socs=sm8650,sm8635,sm7325,sm8350,sm8450,sm8550,sm7475,sm7550,sc8380xp,sc8280x \ --memorymapped_buffer_hint=true @@ -886,9 +547,10 @@ ${DLCBUILDDIR}/mobilebert_htp_O2.stamp: \ -u ${USERID}:${GROUPID} \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ + --htp_dlbc=true \ --input_dlc=/output/mobilebert_quant.dlc \ --output_dlc=/output/mobilebert_quantized_htp_O2.dlc \ - --set_output_layers=transpose \ + --set_output_tensors="transpose:0" \ --optimization_level 2 \ --htp_socs=sm8650,sm8635,sm7325,sm8350,sm8450,sm8550,sm7475,sm7550,sc8380xp,sc8280x \ --memorymapped_buffer_hint=true @@ -949,6 +611,7 @@ ${DLCBUILDDIR}/mobile_mosaic_htp.stamp: \ -u ${USERID}:${GROUPID} \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ + --htp_dlbc=true \ --input_dlc=/output/mobile_mosaic_quant.dlc \ --output_dlc=/output/mobile_mosaic_htp.dlc \ --optimization_level 3 \ @@ -1036,6 +699,7 @@ ${DLCBUILDDIR}/snusr_htp.stamp: \ -u ${USERID}:${GROUPID} \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ + --htp_dlbc=true \ --input_dlc=/output/snusr_quant.dlc \ --output_dlc=/output/snusr_htp.dlc \ --optimization_level 3 \ @@ -1067,6 +731,240 @@ ${DLCBUILDDIR}/snusr_htp_O2.stamp: \ cp ${DLCBUILDDIR}/snusr_htp_O2.dlc ${MLPERF_MODELS_PATH} touch $@ +${DLCBUILDDIR}/sd_precompute_data.tar: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + # Preparing sd_precompute_data.tar + docker run \ + -v ${SNPE_SDK}:/qnn_sdk \ + -v ${TOPDIR}/mobile_back_qti/DLC/util/:/util \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion \ + mlperf_dlc_prepare \ + python3 /util/StableDiffusion/flatten.py \ + --random_latent_init /stable_diffusion/random_latent_init/random_init_1.pkl \ + --time_step_embedding /stable_diffusion/time_step_embeddings/unet_time_step_embeddings_20.pkl \ + --time_step_list /stable_diffusion/scheduler/scheduler_time_steps_20.pkl \ + --unconditional_text_emb /stable_diffusion/unconditional_text_emb.pkl \ + --dumpdir /stable_diffusion/. + mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion + cp ${DLCBUILDDIR}/stable_diffusion/sd_precompute_data.tar ${MLPERF_MODELS_PATH}/stable_diffusion/. + cp ${DLCBUILDDIR}/stable_diffusion/scheduler/lambdas.bin ${MLPERF_MODELS_PATH}/stable_diffusion/. + cp ${DLCBUILDDIR}/stable_diffusion/scheduler/betas.bin ${MLPERF_MODELS_PATH}/stable_diffusion/. + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion + +${DLCBUILDDIR}/text_encoder_qnn.cpp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + ${DLCBUILDDIR}/mlperf_models.stamp + # TEXT-ENCODER conversion and quantization + # cpp & bin files + docker run \ + -v ${SNPE_SDK}:/qnn_sdk \ + -v ${TEXTENCODER_MODEL_PATH}:/models \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion/text_encoder \ + -v ${TOPDIR}:/dir \ + mlperf_dlc_prepare \ + /qnn_sdk/bin/x86_64-linux-clang/qnn-onnx-converter \ + --input_network text_encoder_onnx/text_encoder.onnx \ + --input_list stable_diffusion_models/text_encoder_onnx/text_encoder_input_list.txt \ + --act_bitwidth 16 \ + --bias_bitwidth 32 \ + --quantization_overrides text_encoder_onnx/text_encoder.encodings \ + --output_path /output/text_encoder.cpp + +${DLCBUILDDIR}/text_encoder_qnn_model_generator.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + ${DLCBUILDDIR}/text_encoder_qnn.cpp + # TEXT-ENCODER lib generation started + docker run \ + -v ${SNPE_SDK}:/qnn_sdk \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /qnn_sdk/bin/x86_64-linux-clang/qnn-model-lib-generator \ + -c /output/text_encoder.cpp \ + -b /output/text_encoder.bin \ + -o /output/model_libs \ + -t x86_64-linux-clang + # Text-encoder lib generation completed + +${DLCBUILDDIR}/text_encoder_qnn_context_binary_generator.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + ${DLCBUILDDIR}/text_encoder_qnn_model_generator.stamp + # TEXT-ENCODER context-binary generation started + docker run \ + -v ${SNPE_SDK}:/qnn_sdk \ + -v ${TEXTENCODER_MODEL_PATH}:/models \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion/text_encoder \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /qnn_sdk/bin/x86_64-linux-clang/qnn-context-binary-generator \ + --backend /qnn_sdk/lib/x86_64-linux-clang/libQnnHtp.so \ + --model /output/model_libs/x86_64-linux-clang/libtext_encoder.so \ + --binary_file /output/text_encoder.serialized \ + --config_file mcp_config.json + mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion + cp ${DLCBUILDDIR}/text_encoder.serialized.bin ${MLPERF_MODELS_PATH}/stable_diffusion/. + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion + chmod -R 777 ${DLCBUILDDIR}/stable_diffusion + # TEXT-ENCODER context binary generation completed + +${DLCBUILDDIR}/vae_decoder_qnn.cpp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + ${DLCBUILDDIR}/mlperf_models.stamp + # VAE-DECODER conversion and quantization + # cpp & bin files + docker run \ + -v ${SNPE_SDK}:/qnn_sdk \ + -v ${VAEDECODER_MODEL_PATH}:/models \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion/vae_decoder \ + -v ${TOPDIR}:/dir \ + mlperf_dlc_prepare \ + /qnn_sdk/bin/x86_64-linux-clang/qnn-onnx-converter \ + --input_network vae_decoder_onnx/vae_decoder.onnx \ + --input_list stable_diffusion_models/vae_decoder_onnx/vae_decoder_input_list.txt \ + --act_bitwidth 16 \ + --bias_bitwidth 32 \ + --quantization_overrides vae_decoder_onnx/vae_decoder.encodings \ + --output_path /output/vae_decoder.cpp + +${DLCBUILDDIR}/vae_decoder_qnn_model_generator.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + ${DLCBUILDDIR}/vae_decoder_qnn.cpp + # VAE-DECODER lib generation started + docker run \ + -v ${SNPE_SDK}:/qnn_sdk \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /qnn_sdk/bin/x86_64-linux-clang/qnn-model-lib-generator \ + -c /output/vae_decoder.cpp \ + -b /output/vae_decoder.bin \ + -o /output/model_libs \ + -t x86_64-linux-clang + # vae-decoder lib generation completed + +${DLCBUILDDIR}/vae_decoder_qnn_context_binary_generator.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + ${DLCBUILDDIR}/vae_decoder_qnn_model_generator.stamp + # VAE-DECODER lib generation started + docker run \ + -v ${SNPE_SDK}:/qnn_sdk \ + -v ${VAEDECODER_MODEL_PATH}:/models \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /qnn_sdk/bin/x86_64-linux-clang/qnn-context-binary-generator \ + --backend /qnn_sdk/lib/x86_64-linux-clang/libQnnHtp.so \ + --model /output/model_libs/x86_64-linux-clang/libvae_decoder.so \ + --binary_file /output/vae_decoder.serialized \ + --config_file /models/mcp_config.json + mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion + cp ${DLCBUILDDIR}/vae_decoder.serialized.bin ${MLPERF_MODELS_PATH}/stable_diffusion/. + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion + chmod -R 777 ${DLCBUILDDIR}/stable_diffusion + # VAE context binary generation completed + +${DLCBUILDDIR}/unet_qnn.cpp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + ${DLCBUILDDIR}/mlperf_models.stamp + # UNET conversion and quantization + # cpp & bin files + docker run \ + -v ${SNPE_SDK}:/qnn_sdk \ + -v ${UNET_MODEL_PATH}:/models \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion/unet \ + -v ${TOPDIR}:/dir \ + mlperf_dlc_prepare \ + /qnn_sdk/bin/x86_64-linux-clang/qnn-onnx-converter \ + --input_network unet_onnx_batch_1/unet.onnx \ + -l input_3 NONTRIVIAL \ + --input_list stable_diffusion_models/unet_onnx/unet_input_list.txt \ + --act_bitwidth 16 \ + --bias_bitwidth 32 \ + --quantization_overrides unet_onnx_batch_1/unet.encodings \ + --output_path /output/unet.cpp + +${DLCBUILDDIR}/unet_qnn_model_generator.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + ${DLCBUILDDIR}/unet_qnn.cpp + # UNET lib generation started + docker run \ + -v ${SNPE_SDK}:/qnn_sdk \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /qnn_sdk/bin/x86_64-linux-clang/qnn-model-lib-generator \ + -c /output/unet.cpp \ + -b /output/unet.bin \ + -o /output/model_libs \ + -t x86_64-linux-clang \ + -t aarch64-android + # UNET lib generation completed + +${DLCBUILDDIR}/unet_qnn_context_binary_generator.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + ${DLCBUILDDIR}/unet_qnn_model_generator.stamp + # UNET context binary generation started + docker run \ + -v ${SNPE_SDK}:/qnn_sdk \ + -v ${UNET_MODEL_PATH}:/models \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /qnn_sdk/bin/x86_64-linux-clang/qnn-context-binary-generator \ + --backend /qnn_sdk/lib/x86_64-linux-clang/libQnnHtp.so \ + --model /output/model_libs/x86_64-linux-clang/libunet.so \ + --binary_file /output/unet.serialized \ + --config_file /models/mcp_config.json + mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion + cp ${DLCBUILDDIR}/unet.serialized.bin ${MLPERF_MODELS_PATH}/stable_diffusion/. + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion + chmod -R 777 ${DLCBUILDDIR}/stable_diffusion + # UNET context binary generation completed + #################################################################################### # CALIBRATION / QUANTIZATION #################################################################################### @@ -1126,39 +1024,6 @@ ${DLCBUILDDIR}/ade20k/state/quantdata.stamp: \ mv ${DLCBUILDDIR}/ade20k/resized_raw ${DLCBUILDDIR}/ade20k/quantdata touch $@ -# Imagenet 224x224 calibration data preprocessing -${DLCBUILDDIR}/imagenet/state/resized_224.stamp: \ - ${DATASETS_OUT}/state/calibration.stamp \ - ${DLCBUILDDIR}/mlperf_mobile_docker_1_1.stamp - # Scaling Imagenet images to 224x224 - rm -rf ${DLCBUILDDIR}/imagenet/resized_224 - mkdir -p ${DLCBUILDDIR}/imagenet/resized_224 - docker run \ - -v ${TOPDIR}/datasets/util:/util \ - -v ${CALIBRATION_DATA}/imagenet:/imagenet \ - -v ${DLCBUILDDIR}:/output \ - -u ${USERID}:${GROUPID} \ - mlperf_mobile:1.1 \ - /bin/bash -c "python3 /util/imagenet/Resize224.py /imagenet/images /output/imagenet/resized_224" - mkdir -p ${DLCBUILDDIR}/imagenet/state - touch $@ - -${DLCBUILDDIR}/imagenet/state/quantdata_224.stamp: \ - ${DLCBUILDDIR}/imagenet/state/resized_224.stamp \ - ${DLCBUILDDIR}/mlperf_mobile_docker_1_1.stamp - # Generating Imagenet quantization data for SNPE - rm -rf ${DLCBUILDDIR}/imagenet/resized_224_raw - mkdir -p ${DLCBUILDDIR}/imagenet/resized_224_raw - docker run \ - -v ${TOPDIR}/datasets/util:/util \ - -v ${DLCBUILDDIR}/imagenet:/imagenet \ - -u ${USERID}:${GROUPID} \ - mlperf_mobile:1.1 \ - /bin/bash -c "python3 /util/common/jpg_to_raw.py /imagenet/resized_224" - rm -rf ${DLCBUILDDIR}/imagenet/quantdata_224 - mv ${DLCBUILDDIR}/imagenet/resized_224_raw ${DLCBUILDDIR}/imagenet/quantdata_224 - touch $@ - # Imagenet 384x384 calibration data preprocessing ${DLCBUILDDIR}/imagenet/state/resized_384.stamp: \ ${DATASETS_OUT}/state/calibration.stamp \ @@ -1246,12 +1111,8 @@ gen-htp-dlc-info: \ -u ${USERID}:${GROUPID} \ mlperf_dlc_prepare \ /bin/bash -c '\ - /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-info -i /dlc/mobilenet_edgetpu_224_1.0_htp.dlc && \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-info -i /dlc/mobilenet_v4_htp.dlc && \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-info -i /dlc/ssd_mobiledet_qat_htp.dlc && \ - /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-info -i /dlc/mobilenet_edgetpu_224_1.0_htp_batched_3.dlc && \ - /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-info -i /dlc/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc && \ - /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-info -i /dlc/mobilenet_edgetpu_224_1.0_htp_batched_8.dlc && \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-info -i /dlc/mobilenet_v4_htp_batched_4.dlc && \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-info -i /dlc/mobile_mosaic_htp.dlc && \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-info -i /dlc/snusr_htp.dlc && \ @@ -1261,4 +1122,5 @@ gen-htp-dlc-info: \ # Clean #################################################################################### clean: - rm -rf ${BUILDDIR}/DLC \ No newline at end of file + rm -rf ${BUILDDIR}/DLC + diff --git a/mobile_back_qti/DLC/README.md b/mobile_back_qti/DLC/README.md index d467d983e..e4a951579 100644 --- a/mobile_back_qti/DLC/README.md +++ b/mobile_back_qti/DLC/README.md @@ -1,6 +1,6 @@ # DLC -This Makefile will create the DLCs used by the QTI backend. +This Makefile will create the DLCs / Bins used by the QTI backend. The DLCs are already checked into [Mobile Models](https://github.com/mlcommons/mobile_models) diff --git a/mobile_back_qti/DLC/util/StableDiffusion/AIMET/Dockerfile b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/Dockerfile new file mode 100644 index 000000000..d7b9b2dae --- /dev/null +++ b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/Dockerfile @@ -0,0 +1,6 @@ +FROM artifacts.codelinaro.org/codelinaro-aimet/aimet-dev:1.29.0.torch-gpu + +RUN pip install --upgrade huggingface_hub +RUN python3 -m pip install --quiet --upgrade jax ftfy diffusers==0.30.1 transformers==4.30.1 tokenizers==0.11.1 onnx==1.11.0 onnxsim==0.4.33 safetensors==0.3.3 accelerate +RUN pip install --quiet --upgrade https://github.com/quic/aimet/releases/download/1.29.0/AimetCommon-torch_gpu_1.29.0-cp38-cp38-linux_x86_64.whl +RUN pip install --quiet --upgrade https://github.com/quic/aimet/releases/download/1.29.0/AimetTorch-torch_gpu_1.29.0-cp38-cp38-linux_x86_64.whl \ No newline at end of file diff --git a/mobile_back_qti/DLC/util/StableDiffusion/AIMET/Makefile b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/Makefile new file mode 100644 index 000000000..8b2c218ba --- /dev/null +++ b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/Makefile @@ -0,0 +1,122 @@ +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +########################################################################## + +this_mkfile:=$(abspath $(lastword $(MAKEFILE_LIST))) +AIMETBUILDDIR:=$(abspath $(shell dirname ${this_mkfile})) +TOPDIR:=${AIMETBUILDDIR}/../../../../.. + +.PHONY: clean aimet_calibration + +${AIMETBUILDDIR}/nvidia_docker_runtime.stamp: \ + ### Executing nvidia docker container + curl -fsSL https\://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \ + && curl -s -L https\://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list + sed -i -e '/experimental/ s/^#//g' /etc/apt/sources.list.d/nvidia-container-toolkit.list + sudo apt-get update + sudo apt-get install -y nvidia-container-toolkit + sudo nvidia-ctk runtime configure --runtime=docker + sudo systemctl restart docker + touch $@ + +aimet_calibration: \ + clean \ + ${AIMETBUILDDIR}/aimet_calibration.stamp \ + ${AIMETBUILDDIR}/input_vectors_generation.stamp \ + copy_files_to_ouptut \ + copy_configs_to_ouptut + +${AIMETBUILDDIR}/aimet_torch_gpu_docker.stamp: \ + ## Building aimet-docker + docker image build -t aimet_torch_gpu_docker . + touch $@ + +${AIMETBUILDDIR}/aimet_calibration.stamp: \ + ${AIMETBUILDDIR}/nvidia_docker_runtime.stamp \ + ${AIMETBUILDDIR}/aimet_torch_gpu_docker.stamp + # Executing docker instance + docker run \ + -v /etc/localtime:/etc/localtime:ro \ + -v /etc/timezone:/etc/timezone:ro \ + -v ${AIMETBUILDDIR}:/tmp \ + -w /tmp/example1 \ + --network=host \ + --ulimit core=-1 \ + --ipc=host \ + --shm-size=8G \ + --cap-add=SYS_PTRACE \ + --security-opt seccomp=unconfined \ + aimet_torch_gpu_docker \ + /bin/bash -c "cp -rv ../aimet.py . && python3 aimet.py" + +${AIMETBUILDDIR}/input_vectors_generation.stamp: \ + ${AIMETBUILDDIR}/aimet_torch_gpu_docker.stamp + # Creates input vectors to be passed during conversion + docker run \ + -v /etc/localtime:/etc/localtime:ro \ + -v /etc/timezone:/etc/timezone:ro \ + -v ${AIMETBUILDDIR}:/tmp \ + -w /tmp \ + --network=host \ + --ulimit core=-1 \ + --ipc=host \ + --shm-size=8G \ + --cap-add=SYS_PTRACE \ + --security-opt seccomp=unconfined \ + aimet_torch_gpu_docker \ + /bin/bash -c "python3 example2/generate_inputs.py --pickle_path example1/_exports_/fp32.npy --working_dir ." + +copy_files_to_ouptut: + #Copies generated artifacts to output/DLC/stable_diffusion + chmod -R 777 ${TOPDIR}/output/DLC/ + mkdir -p ${TOPDIR}/output/DLC/stable_diffusion + cp -rv ${AIMETBUILDDIR}/example1/_exports_/* ${TOPDIR}/output/DLC/stable_diffusion/. + cp -rv ${AIMETBUILDDIR}/stable_diffusion_models ${TOPDIR}/output/DLC/stable_diffusion/. + +copy_configs_to_ouptut: + #Copies configs to output + mkdir -p ${TOPDIR}/output/DLC/stable_diffusion/text_encoder + mkdir -p ${TOPDIR}/output/DLC/stable_diffusion/vae_decoder + mkdir -p ${TOPDIR}/output/DLC/stable_diffusion/unet + mv ${TOPDIR}/output/DLC/stable_diffusion/text_encoder_onnx ${TOPDIR}/output/DLC/stable_diffusion/text_encoder/. + mv ${TOPDIR}/output/DLC/stable_diffusion/vae_decoder_onnx ${TOPDIR}/output/DLC/stable_diffusion/vae_decoder/. + mv ${TOPDIR}/output/DLC/stable_diffusion/unet_onnx_batch_1 ${TOPDIR}/output/DLC/stable_diffusion/unet/. + cp -rv ${AIMETBUILDDIR}/graph_config_text_encoder.json ${TOPDIR}/output/DLC/stable_diffusion/text_encoder/graph_config.json + cp -rv ${AIMETBUILDDIR}/mcp_config.json ${TOPDIR}/output/DLC/stable_diffusion/text_encoder/. + cp -rv ${AIMETBUILDDIR}/graph_config_vae.json ${TOPDIR}/output/DLC/stable_diffusion/vae_decoder/graph_config.json + cp -rv ${AIMETBUILDDIR}/mcp_config.json ${TOPDIR}/output/DLC/stable_diffusion/vae_decoder/. + cp -rv ${AIMETBUILDDIR}/graph_config_unet.json ${TOPDIR}/output/DLC/stable_diffusion/unet/graph_config.json + cp -rv ${AIMETBUILDDIR}/mcp_config.json ${TOPDIR}/output/DLC/stable_diffusion/unet/. + mkdir -p ${TOPDIR}/output/DLC/stable_diffusion/text_encoder/stable_diffusion_models + cp -rv ${AIMETBUILDDIR}/stable_diffusion_models/text_encoder_onnx ${TOPDIR}/output/DLC/stable_diffusion/text_encoder/stable_diffusion_models/. + mkdir -p ${TOPDIR}/output/DLC/stable_diffusion/vae_decoder/stable_diffusion_models + cp -rv ${AIMETBUILDDIR}/stable_diffusion_models/vae_decoder_onnx ${TOPDIR}/output/DLC/stable_diffusion/vae_decoder/stable_diffusion_models/. + mkdir -p ${TOPDIR}/output/DLC/stable_diffusion/unet/stable_diffusion_models + cp -rv ${AIMETBUILDDIR}/stable_diffusion_models/unet_onnx ${TOPDIR}/output/DLC/stable_diffusion/unet/stable_diffusion_models/. + chmod -R 777 ${TOPDIR}/output/DLC/stable_diffusion + +clean: + # Removes all generated outputs (except example1/_exports_) from AIMET and DLC/stable_diffusion + rm -rf ${AIMETBUILDDIR}/*.stamp + rm -rf ${AIMETBUILDDIR}/example1/*.png + rm -rf ${AIMETBUILDDIR}/stable_diffusion_models + rm -rf ${TOPDIR}/output/DLC/stable_diffusion/* + +deep_clean: + # Caution:: Removes all generated outputs (including example1/_exports_) from AIMET and DLC/stable_diffusion + rm -rf ${AIMETBUILDDIR}/*.stamp + rm -rf ${AIMETBUILDDIR}/example1/_exports_ + rm -rf ${AIMETBUILDDIR}/example1/*.png + rm -rf ${AIMETBUILDDIR}/stable_diffusion_models + rm -rf ${TOPDIR}/output/DLC/stable_diffusion/* \ No newline at end of file diff --git a/mobile_back_qti/DLC/util/StableDiffusion/AIMET/aimet.py b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/aimet.py new file mode 100644 index 000000000..bf989b04c --- /dev/null +++ b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/aimet.py @@ -0,0 +1,193 @@ +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +########################################################################## + +from huggingface_hub.hf_api import HfFolder +HfFolder.save_token('') + +print("##############Json import########################") +import json +from argparse import Namespace + +with open('config.json', 'rt') as f: + config = Namespace(**json.load(f)) + +import sys +import os +sys.path.append('../') + +from utilities.nsptargets import NspTargets + +# Android GEN2 and GEN3 are supported for this notebook +nsp_target = NspTargets.Android.GEN3 + +# Select quantsim config based on target +config.config_file = f'./quantsim_configs/htp_quantsim_config_{nsp_target.dsp_arch}.json' +print(f"Using {config.config_file}") + +# Uncomment the cell below to sanity check the pipeline before doing a full run +# os.environ['SANITY_CHECK_NOTEBOOK_FLOW'] = "True" + +if os.environ.get("SANITY_CHECK_NOTEBOOK_FLOW") == "True": + config.num_calibration_samples = 1 + config.adaround_iter_text_encoder = 1 + config.adaround_samples_text_encoder = 1 + config.adaround_iter_unet = 1 + config.adaround_samples_unet = 1 + config.adaround_iter_vae = 1 + config.adaround_samples_vae = 1 + + + +print("##############Package import########################") +import torch +from redefined_modules.transformers.models.clip.modeling_clip import CLIPTextModel +from redefined_modules.diffusers.models.unet_2d_condition import UNet2DConditionModel +from redefined_modules.diffusers.models.vae import AutoencoderKLDecoder +from diffusers import DPMSolverMultistepScheduler +from transformers import CLIPTokenizer + +if config.stable_diffusion_variant == "1.5": + text_encoder_repo = 'benjamin-paine/stable-diffusion-v1-5' + text_encoder_subfolder = 'text_encoder' + text_encoder_revision = 'main' + unet_repo = 'benjamin-paine/stable-diffusion-v1-5' + unet_subfolder = 'unet' + unet_revision = 'main' + vae_repo = 'benjamin-paine/stable-diffusion-v1-5' + vae_subfolder = 'vae' + vae_revision = 'main' + tokenizer_repo = 'openai/clip-vit-large-patch14' + tokenizer_subfolder = '' + tokenizer_revision = 'main' +elif config.stable_diffusion_variant == "2.1": + text_encoder_repo = "stabilityai/stable-diffusion-2-1-base" + text_encoder_subfolder = 'text_encoder' + text_encoder_revision = 'main' + unet_repo = "stabilityai/stable-diffusion-2-1-base" + unet_subfolder = 'unet' + unet_revision = 'main' + vae_repo = "stabilityai/stable-diffusion-2-1-base" + vae_subfolder = 'vae' + vae_revision = 'main' + tokenizer_repo = "stabilityai/stable-diffusion-2-1-base" + tokenizer_subfolder = 'tokenizer' + tokenizer_revision = 'main' +else: + raise Exception(f"config.stable_diffusion_variant must be either '1.5' or '2.1', found {config.stable_diffusion_variant}") + + + +print("############## Hugging face pipeline initialization ########################") +device = 'cuda' +dtype = torch.half if config.half_precision else torch.float + +print("Loading pre-trained TextEncoder model") +text_encoder = CLIPTextModel.from_pretrained(text_encoder_repo, + subfolder=text_encoder_subfolder, revision=text_encoder_revision, + torch_dtype=dtype, cache_dir=config.cache_dir).to(device) +text_encoder.config.return_dict = False + +print("Loading pre-trained UNET model") +unet = UNet2DConditionModel.from_pretrained(unet_repo, + subfolder=unet_subfolder, revision=unet_revision, + torch_dtype=dtype, cache_dir=config.cache_dir).to(device) + +print("Loading pre-trained VAE model") +vae = AutoencoderKLDecoder.from_pretrained(vae_repo, + subfolder=vae_subfolder, revision=vae_revision, + torch_dtype=dtype, cache_dir=config.cache_dir).to(device) +vae.config.return_dict = False + +print("Loading scheduler") +scheduler = DPMSolverMultistepScheduler(beta_start=0.00085, + beta_end=0.012, + beta_schedule="scaled_linear", + num_train_timesteps=1000) +scheduler.set_timesteps(config.diffusion_steps) +scheduler.config.prediction_type = 'epsilon' + +print("Loading tokenizer") +tokenizer = CLIPTokenizer.from_pretrained(tokenizer_repo, + subfolder=tokenizer_subfolder, revision=tokenizer_revision, + cache_dir=config.cache_dir) + + + +print("############## Floating pt evaluation ########################") +from stable_diff_pipeline import run_the_pipeline, run_tokenizer, run_text_encoder, run_diffusion_steps, run_vae_decoder, save_image + +prompt = "decorated modern country house interior, 8 k, light reflections" +image = run_the_pipeline(prompt, unet, text_encoder, vae, tokenizer, scheduler, config, test_name='fp32') +save_image(image.squeeze(0), 'generated.png') + +from IPython.display import Image, display +display(Image(filename='generated.png')) + + + +print("############## Calibrating TE ########################") +from aimet_quantsim import apply_adaround_te, calibrate_te + +with open(config.calibration_prompts, "rt") as f: + print(f'Loading prompts from {config.calibration_prompts}') + prompts = f.readlines() + prompts = prompts[:config.num_calibration_samples] + +tokens = [run_tokenizer(tokenizer, prompt) for prompt in prompts] + +text_encoder_sim = calibrate_te(text_encoder, tokens, config) + + +print("############## Calibrating UNET ########################") +from aimet_quantsim import calibrate_unet, replace_mha_with_sha_blocks + +embeddings = [(run_text_encoder(text_encoder, uncond), + run_text_encoder(text_encoder, cond)) for cond, uncond in tokens] +embeddings = [torch.cat([uncond, cond])for uncond, cond in embeddings] + +unet_sim = calibrate_unet(unet, embeddings, scheduler, config) + +replace_mha_with_sha_blocks(unet) # convert unet to SHA so it has same expected inputs as unet_sim which is SHA + + +print("############## Calibrating VAE ########################") +from aimet_quantsim import calibrate_vae +from tqdm.auto import tqdm + +latents = [run_diffusion_steps(unet, emb, scheduler, config, randomize_seed=True) for emb in tqdm(embeddings)] +print('Obtained latents using UNET QuantSim') + +vae_sim = calibrate_vae(vae, latents, config) + + + +print("############## Running quantized off target inference ########################") +image = run_the_pipeline(prompt, unet_sim.model, text_encoder_sim.model, vae_sim.model, tokenizer, scheduler, config, test_name="quantized") +save_image(image.squeeze(0), 'generated_after_quant.png') + +display(Image(filename='generated_after_quant.png')) + + + +print("############## Export all models ########################") +from aimet_quantsim import export_all_models + +export_all_models(text_encoder_sim, unet_sim, vae_sim, tokens, embeddings, latents, batch_sizes_unet=[1]) + + +print("############## Generate artifacts ########################") +from utilities.generate_target_artifacts import generate_target_artifacts + +generate_target_artifacts(text_encoder, unet, None, tokenizer, scheduler, config, diffusion_steps=[20,50], seed_list=[1], min_seed=633994880, max_seed=633994880) diff --git a/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_text_encoder.json b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_text_encoder.json new file mode 100644 index 000000000..57345a74f --- /dev/null +++ b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_text_encoder.json @@ -0,0 +1,17 @@ +{ + "graphs": [{ + "graph_names":["text_encoder"], + "vtcm_mb":8, + "O" : 3, + "fp16_relaxed_precision": 0 + }], + "devices": [ + { "pd_session": "unsigned", + "dsp_arch": "v75", + "cores":[{ + "rpc_control_latency": 100, + "perf_profile": "burst" + }] + } + ] + } \ No newline at end of file diff --git a/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_unet.json b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_unet.json new file mode 100644 index 000000000..9d2d4ee11 --- /dev/null +++ b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_unet.json @@ -0,0 +1,17 @@ +{ + "graphs": [{ + "graph_names":["unet"], + "vtcm_mb":8, + "O" : 3, + "fp16_relaxed_precision": 0 + }], + "devices": [ + { "pd_session": "unsigned", + "dsp_arch": "v75", + "cores":[{ + "rpc_control_latency": 100, + "perf_profile": "burst" + }] + } + ] + } diff --git a/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_vae.json b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_vae.json new file mode 100644 index 000000000..39b312bb1 --- /dev/null +++ b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_vae.json @@ -0,0 +1,17 @@ +{ + "graphs": [{ + "graph_names":["vae_decoder"], + "vtcm_mb":8, + "O" : 3, + "fp16_relaxed_precision": 0 + }], + "devices": [ + { "pd_session": "unsigned", + "dsp_arch": "v75", + "cores":[{ + "rpc_control_latency": 100, + "perf_profile": "burst" + }] + } + ] + } diff --git a/mobile_back_qti/DLC/util/StableDiffusion/AIMET/mcp_config.json b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/mcp_config.json new file mode 100644 index 000000000..7a3a1927a --- /dev/null +++ b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/mcp_config.json @@ -0,0 +1,5 @@ +{ +"backend_extensions" : + {"shared_library_path" : "/qnn_sdk/lib/x86_64-linux-clang/libQnnHtpNetRunExtensions.so", + "config_file_path" : "/models/graph_config.json"} +} \ No newline at end of file diff --git a/mobile_back_qti/DLC/util/StableDiffusion/README.md b/mobile_back_qti/DLC/util/StableDiffusion/README.md new file mode 100644 index 000000000..31c9b7f97 --- /dev/null +++ b/mobile_back_qti/DLC/util/StableDiffusion/README.md @@ -0,0 +1,61 @@ +# Stable Diffusion + +### This readme contains necessary steps to: +* Run AIMET quantization +* Convert generated onnx files to bin files +* To generate all the artifacts needed for stable diffusion inference on Qualcomm Soc + + +### Platform requirements + +* Machine running Ubuntu 20.04 at least +* AIMET PRO version 1.29.0 (make script will automatically be installing it) +* Docker version 20.10.24 +* Machine enabled with Nvidia Tesla A100 or Tesla V100 (32GB at least) +* NVIDIA driver version equivalent to 525.60.13 + + +### Steps to execute + +`Please follow below steps in the mentioned order and run them as root to avoid permission issues` + +#### Prerequisites +* Clone the mobile_app_open repository + +* Install Qualcomm Package manager on the linux machine + +```shell +sudo dpkg -i ./QualcommPackageManager3.3.0.111.1.Linux-x86.deb +``` + +* Extract the SNPE SDK (from Requirements above) to mobile_app_open/mobile_back_qti + +```shell +qpm-cli --extract ./qualcomm_neural_processing_sdk.2.25.0.240728.Linux-AnyCPU.qik +mkdir mobile_app_open/mobile_back_qti/qairt/ +cp -rv /opt/qcom/aistack/qairt/2.25.0.240728 mobile_app_open/mobile_back_qti/qairt/ +``` + +Once done, + +* Clone the AIMET SD notebook repository inside + /mobile_back_qti/DLC/util/StableDiffusion/AIMET + +* Create hugging face access token and paste it on line 2 of aimet.py script, inside /mobile_back_qti/DLC/util/StableDiffusion/AIMET folder. + Place holder provided in aimet.py. + +* Inside AIMET directory run this make command + ```shell + sudo make aimet_calibration + ``` +* Once, the above make command completes successfully, move to + /mobile_back_qti/DLC or type + ```shell + cd ../../../ + ``` +* After reaching /mobile_back_qti/DLC run this make command + ```shell + sudo make stable_diffusion_qnn SNPE_SDK=/mobile_back_qti/qairt/ + ``` +* After successful execution, all the artifacts needed to run stable diffusion inference on device will be located in + /output/DLC/mlperf_models/stable_diffusion diff --git a/mobile_back_qti/DLC/util/StableDiffusion/flatten.py b/mobile_back_qti/DLC/util/StableDiffusion/flatten.py new file mode 100644 index 000000000..4a0f25d68 --- /dev/null +++ b/mobile_back_qti/DLC/util/StableDiffusion/flatten.py @@ -0,0 +1,294 @@ +# Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +########################################################################## + +#!/usr/bin/python3 +import argparse +from datetime import datetime +import pickle +import copy +import sys +import os +import numpy as np +import pprint +pp = pprint.PrettyPrinter(indent=4) + +def check_shape_type(name, tensor_np, expected_shape, expected_np_type): + if tensor_np.shape != expected_shape: + raise Exception(f"{name} shape is {tensor_np.shape}, expect {expected_shape}") + if tensor_np.dtype != expected_np_type: + raise Exception(f"{name} is {tensor_np.dtype}, expect {expected_np_type}") + +def extract_data(ts_dict): + iter = ts_dict['iteration'] + if not 'time_step' in ts_dict: + raise Exception(f'Picke file parsing error: key "time_step" is not found') + ts_np = ts_dict['time_step'].astype(np.int32) + print(f'iteration {iter}, time_step: {ts_np}') + + if not 'timeembedding' in ts_dict: + raise Exception(f'Picke file parsing error: key "timeembedding" is not found') + + ts_embed_np = ts_dict['timeembedding'] + print(f'iteration {iter}, timeembedding: ', type(ts_embed_np), ts_embed_np.shape, ts_embed_np.dtype,ts_embed_np[0,0:4]) + check_shape_type('timeembedding', ts_embed_np, (1,1280), np.float32) + if not 'random_init' in ts_dict: + raise Exception(f'Picke file parsing error: key "random_init" is not found') + + rad = ts_dict['random_init'] + if not 'seed' in rad: + raise Exception(f'Picke file parsing error: key "seed" is not found') + if not 'latent_vector' in rad: + raise Exception(f'Picke file parsing error: key "latent_vector" is not found') + + seed = rad['seed'] + lv_np = rad['latent_vector'] + print('random_init.latent_vector_nchw', type(lv_np), lv_np.shape, lv_np.dtype) + check_shape_type('random_init.latent_vector', lv_np, (1,4,64,64), np.float32) + + lv_nhwc_np = np.moveaxis(lv_np, [0,1,2,3], [0,3,1,2]) + print(f'iteration {iter}, seed: {seed}, random_init.latent_vector_nhwc', type(lv_nhwc_np), lv_nhwc_np.shape, lv_nhwc_np.dtype) + print(lv_nhwc_np[0,0,0:2,:]) + + # extract uncond_text_embedding_np, TBD + uncond_embedding_key = 'uncond_text_embedding' + if not uncond_embedding_key in ts_dict: + print(f'Picke file parsing error: key {uncond_embedding_key} is not found') + uncond_text_embedding_np = None + else: + uncond_text_embedding_np = ts_dict[uncond_embedding_key] + print(f'iteration {iter}, {uncond_embedding_key}: ', type(uncond_text_embedding_np), + uncond_text_embedding_np.shape, uncond_text_embedding_np.dtype,uncond_text_embedding_np[0,0,0:4]) + check_shape_type(uncond_embedding_key, uncond_text_embedding_np, (1,77,768), np.float32) + + return (iter, ts_np, ts_embed_np, seed, lv_nhwc_np, uncond_text_embedding_np) + + +def parse_pickle(pickle_file): + random_init_dict = {} + ts_list_dict = {} + ts_embed_list_dict = {} + num_rec = 0 + + print(f'parsing {pickle_file} ...') + fd = open(pickle_file, 'rb') + tensor_dict_list = pickle.load(fd) + + num_steps = 0 + ts_list = [] + ts_embed_list = [] + for ts_dict in tensor_dict_list: + if not 'iteration' in ts_dict: + raise Exception(f'Picke file parsing error: key "iteration" is not found') + + iter = ts_dict['iteration'] + (iter, ts_np, ts_embed_np, seed, lv_nhwc_np, uncond_text_embedding_np) = extract_data(ts_dict) + num_rec += 1 + if iter == 0 and iter < num_steps: + print(f'num_steps is {num_steps}, iteration:{iter} wrap around, reset num_steps...') + # iter wrap around start of next session, the first session is done + ts_list_dict[num_steps] = ts_list + ts_embed_list_dict[num_steps] = ts_embed_list + num_steps = 0 + ts_list = [] + ts_embed_list = [] + + num_steps += 1 + ts_list.append(ts_np) + ts_embed_list.append(ts_embed_np) + + random_init_dict[seed] = lv_nhwc_np + + ts_list_dict[num_steps] = ts_list + ts_embed_list_dict[num_steps] = ts_embed_list + + return (num_rec, ts_list_dict, ts_embed_list_dict, random_init_dict, uncond_text_embedding_np) + +def parse_random_latent_init_pickle(pickle_file): + random_init_dict = {} + + print(f'parsing {pickle_file} ...') + fd = open(pickle_file, 'rb') + tensor_dict_list = pickle.load(fd) + + for seed in tensor_dict_list: + lv_np = tensor_dict_list[seed] + print('random_init.latent_vector_nchw', type(lv_np), lv_np.shape, lv_np.dtype) + check_shape_type('random_init.latent_vector', lv_np, (1,4,64,64), np.float32) + + lv_nhwc_np = np.moveaxis(lv_np, [0,1,2,3], [0,3,1,2]) + print(f'seed: {seed}, random_init.latent_vector_nhwc', type(lv_nhwc_np), lv_nhwc_np.shape, lv_nhwc_np.dtype) + print(lv_nhwc_np[0,0,0:2,:]) + + random_init_dict[int(seed)] = lv_nhwc_np + + return len(tensor_dict_list.keys()), random_init_dict + +def parse_ts_embedding_pickle(pickle_file): + print(f'parsing {pickle_file} ...') + fd = open(pickle_file, 'rb') + tensor_dict_list = pickle.load(fd) + + keys = sorted([int(key) for key in tensor_dict_list.keys()]) + print(keys) + + ts_embed_list = [] + for iter_num in keys: + ts_embed_np = tensor_dict_list[str(iter_num)] + print(f'iteration {iter_num}, timeembedding: ', type(ts_embed_np), ts_embed_np.shape, ts_embed_np.dtype,ts_embed_np[0,0:4]) + check_shape_type('timeembedding', ts_embed_np, (1,1280), np.float32) + ts_embed_list.append(ts_embed_np) + + return len(keys), ts_embed_list + +def parse_ts_list_pickle(pickle_file): + print(f'parsing {pickle_file} ...') + fd = open(pickle_file, 'rb') + ts_data = pickle.load(fd) + + ts_np = ts_data.astype(np.int32) + print(f'time_step: {ts_np}') + + return len(ts_np), ts_np + +def parse_unconditional_encoding_pickle(pickle_file): + print(f'parsing {pickle_file} ...') + fd = open(pickle_file, 'rb') + uncond_text_embedding_np = pickle.load(fd) + + print(f'uncond_text_embedding: ', type(uncond_text_embedding_np), + uncond_text_embedding_np.shape, uncond_text_embedding_np.dtype,uncond_text_embedding_np[0,0,0:4]) + check_shape_type('uncond_text_embedding', uncond_text_embedding_np, (1,77,768), np.float32) + + return uncond_text_embedding_np + +def dump_data(pickle_stats, + ts_list_dict, ts_embed_list_dict, random_init_dict, uncond_text_embedding_np, dumpdir): + + file_list =[] + #dump random_init varaibles + seed_list = list(sorted(random_init_dict.keys())) + seed = seed_list[0] + shape_str = 'x'.join([str(e) for e in random_init_dict[seed].shape]) + f_name = os.path.join(dumpdir, f'rand_init_{len(seed_list)}_seeds_{shape_str}_float32.bin.rand') + rand_file = open(f_name, 'wb') + + v_np = np.array(len(seed_list)).astype(np.int32) + v_np.tofile(rand_file) + for seed in seed_list: + v_np = np.array(seed).astype(np.int32) + v_np.tofile(rand_file) + for seed in seed_list: + random_init_dict[seed].tofile(rand_file) + rand_file.close() + file_list.append(f_name) + + for num_steps in ts_list_dict: + ts_list = ts_list_dict[num_steps] + ts_embed_list = ts_embed_list_dict[num_steps] + shape_str = 'x'.join([str(e) for e in ts_embed_list[0].shape]) + f_name = os.path.join(dumpdir, f'timestep_steps_{num_steps}_int32_embedding_{shape_str}_float32.bin.ts') + ts_file = open(f_name, 'wb') + v_np = np.array(len(ts_list)).astype(np.int32) + v_np.tofile(ts_file) + for l in ts_list: + l.tofile(ts_file) + for l in ts_embed_list: + l.tofile(ts_file) + ts_file.close() + file_list.append(f_name) + + shape_str = 'x'.join([str(e) for e in uncond_text_embedding_np.shape]) + uncond_text_embedding_file = os.path.join(dumpdir, f'batch_1_uncond_text_embedding_{shape_str}_float32.bin.cte') + with open(uncond_text_embedding_file, 'wb') as f: + uncond_text_embedding_np.tofile(f) + + with open(os.path.join(dumpdir, 'readme.txt'), 'w') as f: + print('From:', file=f) + for pfile in pickle_stats: + f_stat = os.stat(pfile) + dts = datetime.fromtimestamp(f_stat.st_ctime) + (v0, v1, v2) = pickle_stats[pfile] + print(f' {pfile}, total_rec: {v0}, created on {dts} ', file=f) + print(f' num_steps: {v1}', file=f) + print(f' {len(v2)} unique_random_seeds: {v2}', file=f) + (v1,v2) = (list(sorted(ts_list_dict.keys())), sorted(seed_list)) + print('\nTotal:', file=f) + print(f' num_steps: {v1}', file=f) + print(f' {len(v2)} unique_random_seeds: {v2}', file=f) + print(f' number of uncond_text_embedding: 1', file=f) + #tar the files together + tar_file = os.path.join(dumpdir, f'sd_precompute_data.tar') + cmd = f'/bin/tar cvf {tar_file} {os.path.join(dumpdir, "readme.txt")} ' + ' '.join(file_list) + f' {uncond_text_embedding_file}' + print(f'Run {cmd}') + os.system(cmd) + +# for debugging only +def create_smaller_pickle(file_name, count=10): + s_file_name = "small_" + file_name + with open(file_name, 'rb') as f: + data_dict_seq = pickle.load(f) + with open(s_file_name, 'wb') as f: + pickle.dump(data_dict_seq[0:count],f) + +if __name__ == '__main__': + default_logdir = os.path.join("tar_output", datetime.now().strftime("%Y-%m-%d-%H-%M-%S")) + + parser = argparse.ArgumentParser( + description='Generates sd_precompute_data.tar file based onprovided pkl files.') + + parser.add_argument('--random_latent_init', type=str, + required=True, + help="Path to a random-latent-init pkl file containing random initial latents.") + parser.add_argument('--time_step_embedding', type=str, + required=True, + help="Comma seperated time-step-embedding pkl files containing ts-embedding.") + parser.add_argument('--time_step_list', type=str, + required=True, + help="Comma seperated time-step-list pkl files containing timestamp.") + parser.add_argument('--unconditional_text_emb', type=str, + required=True, + help="Path to a unconditional-text-emb pkl file containing unconditional text embedding.") + parser.add_argument('--dumpdir', type=str, + default=default_logdir, + help="Path to a directory for dumping.\ + Default value is 'tar_output/'") + + config = parser.parse_args() + config.time_step_embedding = config.time_step_embedding.split(',') + config.time_step_list = config.time_step_list.split(',') + + os.makedirs(config.dumpdir, exist_ok=True) + + pickle_stats ={} + + num_rec, random_init_dict = parse_random_latent_init_pickle(config.random_latent_init) + + ts_embed_list_dict = {} + for ts_embedd_file in config.time_step_embedding: + length, data = parse_ts_embedding_pickle(ts_embedd_file) + ts_embed_list_dict[length] = data + + ts_list_dict = {} + for ts_list_file in config.time_step_list: + length, data = parse_ts_list_pickle(ts_list_file) + ts_list_dict[length] = data + + uncond_text_embedding_np = parse_unconditional_encoding_pickle(config.unconditional_text_emb) + + if sorted(ts_embed_list_dict.keys()) != sorted(ts_list_dict.keys()): + raise Exception('Wrong files for time_step_embedding and time_step_list') + + dump_data(pickle_stats, + ts_list_dict, ts_embed_list_dict, random_init_dict, uncond_text_embedding_np, config.dumpdir) diff --git a/mobile_back_qti/DLC/util/StableDiffusion/readme.txt b/mobile_back_qti/DLC/util/StableDiffusion/readme.txt new file mode 100644 index 000000000..e69de29bb diff --git a/mobile_back_qti/README.md b/mobile_back_qti/README.md index 15201db4a..8fdf7d5ef 100644 --- a/mobile_back_qti/README.md +++ b/mobile_back_qti/README.md @@ -13,12 +13,12 @@ uploaded with the other submission files to here: ` * [Qualcomm Package Manager 3](https://qpm.qualcomm.com/#/main/tools/details/QPM3) -* [SNPE SDK](https://qpm.qualcomm.com/#/main/tools/details/qualcomm_neural_processing_sdk) (Version 2.20.0) +* [SNPE SDK](https://qpm.qualcomm.com/#/main/tools/details/qualcomm_neural_processing_sdk) (Version 2.25.0.240728) * Linux machine capable of running Ubuntu docker images ### Optional -If you wish to rebuild the DLC files yourself, you will have these additional requirements: +To rebuild the DLC files yourself, you will have these additional requirements: * Imagenet dataset (LSVRC2012_img_val.tar) put in the build/imagenet/downloads directory * Linux machine also capable of running Tensorflow debian based docker images @@ -27,6 +27,12 @@ Use your browser to download the SNPE SDK using the links above. Create your Github personal access token. + +### Note for Stable Diffusion + +To generate stable diffusion model, please follow the instructions mentioned at +[](DLC/util/StableDiffusion/README.md) + ```shell export SNPE_SDK= cd DLC/ && make @@ -46,14 +52,15 @@ cd mobile_app_open * Install Qualcomm Package manager on the linux machine ```shell -sudo apt-get install ./QualcommPackageManager3.3.0.99.0.Linux-x86.deb +sudo dpkg -i ./QualcommPackageManager3.3.0.111.1.Linux-x86.deb ``` -* Extract the SNPE SDK (from Requirements above) to mobile_app_open/mobile_back_qti +* Extract the QAIRT SDK (from Requirements above) to mobile_app_open/mobile_back_qti ```shell -qpm-cli --extract ./qualcomm_neural_processing_sdk.2.20.0.240223.Linux-AnyCPU.qik -cp -rv /opt/qcom/aistack/snpe/2.20.0.240223/. mobile_app_open/mobile_back_qti/qaisw-2.20.0.240223 +qpm-cli --extract ./qualcomm_neural_processing_sdk.2.25.0.240728.Linux-AnyCPU.qik +mkdir mobile_app_open/mobile_back_qti/qairt/ +cp -rv /opt/qcom/aistack/qairt/2.25.0.240728 mobile_app_open/mobile_back_qti/qairt/ ``` * If you have an HTTP proxy, you may need the following @@ -66,7 +73,16 @@ export USE_PROXY_WORKAROUND=1 Build with the following build command. ```shell -make OFFICIAL_BUILD=true FLUTTER_BUILD_NUMBER=1 WITH_QTI=1 docker/flutter/android/release +make OFFICIAL_BUILD=true FLUTTER_BUILD_NUMBER=1 WITH_QTI=1 docker/flutter/android/release +``` + +Build with the following build command to include stable_diffusion. +* Download Notebook for stable diffusion from QPM Manager. +* Copy include folder from path_to_notebook>/model/example3/host_linux_target_android_with_MLPerf/include to mobile_back_qti/cpp/backend_qti/StableDiffusionShared/ +* Copy libStableDiffusionShared.so from /model/example3/host_linux_target_android_with_MLPerf/libs/aarch64-android/ to mobile_back_qti/cpp/backend_qti/StableDiffusionShared/ +* Run the command below: +```shell +make OFFICIAL_BUILD=true FLUTTER_BUILD_NUMBER=1 WITH_STABLEDIFFUSION=1 WITH_QTI=1 docker/flutter/android/release ``` This will generate the MLPerf flutter app with QTI backend in ```mobile_app_open/output/android-apks/_mlperfbench--.apk``` @@ -95,9 +111,8 @@ uploaded with the other submission files to here: ` -* [SNPE windows SDK] () - * Version 2.20.0 +* [SNPE windows SDK](https://qpm.qualcomm.com/#/main/tools/details/qualcomm_neural_processing_sdk) + * Version 2.25.0.240728 * Windows x86 machine ## Setting up the environment @@ -152,16 +167,20 @@ Run accuracy mode with following command .\run_mlperf_test.bat --models --dataset --usecase --mode accuracy ``` -* --usecase parameter can take one of these arguments => image_classification_v2, image_classification, object_detection, image_segmentation, language_understanding, super_resolution, image_classification_offline_v2, image_classification_offline +* --usecase parameter can take one of these arguments => image_classification_v2, object_detection, image_segmentation, language_understanding, super_resolution, image_classification_offline_v2 * see the results in accuracy_results.txt and performance_results.txt ## FAQ ### What devices does this backend support? -This backend only supports SDX_Elite, SD7G3, SD8SG3, SD8G3 devices. +This backend only supports SDX_Elite, SD8SG3, SD8G3 devices. Other already launched Snapdragon based devices can also run the MLPerf app as default fallback. ### Is SNPE used to run all the models? -Yes. All the models use SNPE for execution for current version. +Yes. All the models use Qualcomm AI Runtime(QAIRT) for execution for current version. + +### What devices supports stable diffusion? + +Currently, SD8G3 device supports stable_diffusion. diff --git a/mobile_back_qti/cpp/backend_qti/BUILD b/mobile_back_qti/cpp/backend_qti/BUILD index 5fe46ce8f..5320caf10 100644 --- a/mobile_back_qti/cpp/backend_qti/BUILD +++ b/mobile_back_qti/cpp/backend_qti/BUILD @@ -21,6 +21,7 @@ # load("@bazel_skylib//rules:common_settings.bzl", "string_flag") +load("@bazel_skylib//lib:selects.bzl", "selects") load("@org_tensorflow//tensorflow/lite:build_def.bzl", "tflite_copts", "tflite_jni_binary") load("@snpe_version_loader//:snpe_var_def.bzl", "SNPE_VERSION") load("//flutter/cpp/proto:pbtxt2header.bzl", "pbtxt2header") @@ -49,6 +50,35 @@ string_flag( ], ) +string_flag( + name = "with_stablediffusion", + build_setting_default = "2", + values = [ + "1", + "2", + ], +) + +config_setting( + name = "stablediffusion_option", + flag_values = {":with_stablediffusion": "1"}, +) + +config_setting( + name = "nostablediffusion_option", + flag_values = {":with_stablediffusion": "2"}, +) + +selects.config_setting_group( + name = "android_with_stablediffusion", + match_all = ["android_arm64","stablediffusion_option"], +) + +selects.config_setting_group( + name = "android_without_stablediffusion", + match_all = ["android_arm64","nostablediffusion_option"], +) + config_setting( name = "debug_option", flag_values = {":with_qti": "2"}, @@ -69,11 +99,14 @@ config_setting( ) snpe_copts = [ - "-Imobile_back_qti/" + SNPE_VERSION + "/include/SNPE", - "-fexceptions", + "-Imobile_back_qti/" + "qairt/" + SNPE_VERSION + "/include/SNPE", + "-Imobile_back_qti/" + "qairt/" + SNPE_VERSION + "/include/QNN", + "-Imobile_back_qti/" + "qairt/" + SNPE_VERSION + "/include/QNN/HTP", + "-fcxx-exceptions", "-lc++_shared", ] + pbtxt2header( name = "qti_settings", srcs = [ @@ -94,6 +127,7 @@ pbtxt2header( "settings/qti_settings_sm4450.pbtxt", "settings/qti_settings_sm7550.pbtxt", "settings/qti_settings_sm8635.pbtxt", + "settings/qti_settings_stablediffusion.pbtxt", ], ) @@ -166,18 +200,29 @@ cc_library( "debug_option": ["DEBUG_FLAG"], "//conditions:default": [], "config_option": ["EXTERNAL_CONFIG"], + }) + + select({ + "stablediffusion_option": ["STABLEDIFFUSION_FLAG"], + "//conditions:default": [], }), deps = [ + "//flutter/cpp/c:headers", ":qti_allocator", ":qti_settings", - "//flutter/cpp/c:headers", "@org_tensorflow//tensorflow/core:tflite_portable_logging", ] + select({ - "android_arm64": [ - "//mobile_back_qti:snpe", + ":android_without_stablediffusion": [ + "@org_tensorflow//tensorflow/lite/delegates/gpu:delegate", "@org_tensorflow//tensorflow/lite/c:c_api", "@org_tensorflow//tensorflow/lite/c:common", + "//mobile_back_qti:snpe", + ], + ":android_with_stablediffusion": [ "@org_tensorflow//tensorflow/lite/delegates/gpu:delegate", + "@org_tensorflow//tensorflow/lite/c:c_api", + "@org_tensorflow//tensorflow/lite/c:common", + "//mobile_back_qti:snpe", + "//mobile_back_qti/cpp/backend_qti/StableDiffusionShared:StableDiffusionShared", ], "windows_arm64": [ "//mobile_back_qti:snpewindowslib", diff --git a/mobile_back_qti/cpp/backend_qti/StableDiffusionShared/BUILD b/mobile_back_qti/cpp/backend_qti/StableDiffusionShared/BUILD new file mode 100644 index 000000000..a72b94dc8 --- /dev/null +++ b/mobile_back_qti/cpp/backend_qti/StableDiffusionShared/BUILD @@ -0,0 +1,49 @@ +# Copyright (c) 2020-2024 Qualcomm Innovation Center, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +########################################################################## + +load("@stable_diffusion_external_deps_shared//:stable_diffusion_var_def_shared.bzl", "OPENCV_ROOT_DIR") +load("@snpe_version_loader//:snpe_var_def.bzl", "SNPE_VERSION") + +package( + default_visibility = ["//visibility:public"], + licenses = ["notice"], # Apache 2.0 +) + +config_setting( + name = "android_arm64", + values = {"cpu": "arm64-v8a"}, +) + +cc_library( + name = "opencv_core", + srcs = [OPENCV_ROOT_DIR + "/libs/arm64-v8a/libopencv_core.so", + OPENCV_ROOT_DIR + "/libs/arm64-v8a/libopencv_imgcodecs.so",], + includes = [OPENCV_ROOT_DIR + "/jni/include/"], + hdrs = glob([ + OPENCV_ROOT_DIR + "/jni/include/opencv2/*.hpp", + OPENCV_ROOT_DIR + "/jni/include/opencv2/**/*.hpp", + ]), + visibility = ["//visibility:public"], +) + +cc_library( + name = "StableDiffusionShared", + srcs = glob(["libStableDiffusion.so"]), + hdrs = glob(["include/*.hpp","include/**/*.hpp","include/**/*.h"]), + visibility = ["//visibility:public"], + deps = [ + ":opencv_core", + ], +) diff --git a/mobile_back_qti/cpp/backend_qti/StableDiffusionShared/variables.bzl b/mobile_back_qti/cpp/backend_qti/StableDiffusionShared/variables.bzl new file mode 100644 index 000000000..67c9a8b9d --- /dev/null +++ b/mobile_back_qti/cpp/backend_qti/StableDiffusionShared/variables.bzl @@ -0,0 +1,31 @@ +# Copyright (c) 2020-2024 Qualcomm Innovation Center, Inc. All rights reserved. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Find OPENCV folder and pass it to bazel build config""" + +def _impl(repository_ctx): + opencv_path = "opencv" + repository_ctx.file("BUILD", "") + repository_ctx.file( + "stable_diffusion_var_def_shared.bzl", + "OPENCV_ROOT_DIR = \"include/%s\"" % opencv_path, + ) + +stable_diffusion_external_deps_shared = repository_rule( + implementation = _impl, + environ = ["OPENCV_ROOT_DIR"], + local = True, + attrs = {"workspace_dir": attr.string(mandatory = True)}, +) + diff --git a/mobile_back_qti/cpp/backend_qti/mlperf_helper.h b/mobile_back_qti/cpp/backend_qti/mlperf_helper.h index d7ca688cb..af4c3f244 100644 --- a/mobile_back_qti/cpp/backend_qti/mlperf_helper.h +++ b/mobile_back_qti/cpp/backend_qti/mlperf_helper.h @@ -57,6 +57,8 @@ static void process_config(const mlperf_backend_configuration_t *configs, backend_data->scenario_ = configs->values[i]; } else if (strcmp(configs->keys[i], "snpe_output_layers") == 0) { backend_data->snpeOutputLayers_ = configs->values[i]; + } else if (strcmp(configs->keys[i], "snpe_output_tensors") == 0) { + backend_data->snpeOutputTensors_ = configs->values[i]; } else if (strcmp(configs->keys[i], "bg_load") == 0) { if (strcmp(configs->values[i], "true") == 0) { backend_data->bgLoad_ = true; @@ -141,12 +143,19 @@ static void process_config(const mlperf_backend_configuration_t *configs, } else { backend_data->useCpuInt8_ = false; } + } else if (strcmp(configs->keys[i], "pipeline") == 0) { + if (std::strcmp(configs->values[i], "StableDiffusionPipeline") == 0) { + backend_data->isStableDiffusion = true; + } else { + backend_data->isStableDiffusion = false; + } } } LOG(INFO) << "Config: delegate: " << delegate << " | scenario: " << backend_data->scenario_ - << " | output: " << backend_data->snpeOutputLayers_ + << " | output layer: " << backend_data->snpeOutputLayers_ + << " | output tensor: " << backend_data->snpeOutputTensors_ << " | isTfLite: " << backend_data->isTflite_ << " | batchSize: " << backend_data->batchSize_ << " | useSNPE: " << backend_data->useSnpe_ @@ -159,7 +168,8 @@ static void process_config(const mlperf_backend_configuration_t *configs, << " | profileLevel: " << profileLevel << " | useIonBuffer: " << backend_data->useIonBuffers_ << " | acceleratorName: " << backend_data->acceleratorName_ - << " | useCpuInt8: " << backend_data->useCpuInt8_; + << " | useCpuInt8: " << backend_data->useCpuInt8_ + << " | isStableDiffusion: " << backend_data->isStableDiffusion; } #endif diff --git a/mobile_back_qti/cpp/backend_qti/qti_backend_helper.cc b/mobile_back_qti/cpp/backend_qti/qti_backend_helper.cc index b7009ab3b..10a795cf2 100644 --- a/mobile_back_qti/cpp/backend_qti/qti_backend_helper.cc +++ b/mobile_back_qti/cpp/backend_qti/qti_backend_helper.cc @@ -17,6 +17,7 @@ limitations under the License. #include #include +#include #include "DiagLog/IDiagLog.h" #include "DlContainer/DlContainer.h" @@ -67,15 +68,15 @@ static void split(std::vector &split_string, } } -static Snpe_StringList_Handle_t ResolveOutputLayerNames(std::string &line) { - Snpe_StringList_Handle_t outputLayersHandle = Snpe_StringList_Create(); +static Snpe_StringList_Handle_t ResolveCommaSeparatedList(std::string &line) { + Snpe_StringList_Handle_t stringListHandle = Snpe_StringList_Create(); if (!line.empty()) { std::vector names; split(names, line.substr(0), ','); for (auto &name : names) - Snpe_StringList_Append(outputLayersHandle, name.c_str()); + Snpe_StringList_Append(stringListHandle, name.c_str()); } - return outputLayersHandle; + return stringListHandle; } static Snpe_TensorShape_Handle_t calcStrides( @@ -116,19 +117,17 @@ static Snpe_Runtime_t Str2Delegate(const snpe_runtimes_t delegate) { break; } - if (Snpe_Util_IsRuntimeAvailableCheckOption( - runtime, SNPE_RUNTIME_CHECK_OPTION_UNSIGNEDPD_CHECK)) { + if (Snpe_Util_IsRuntimeAvailableCheckOption(runtime, SNPE_RUNTIME_CHECK_OPTION_UNSIGNEDPD_CHECK)) { LOG(INFO) << "runtime " << delegate << " is available on this platform"; } else { - LOG(FATAL) << "runtime " << delegate - << " is not available on this platform"; + LOG(FATAL) << "runtime " << delegate << " is not available on this platform"; } return runtime; } bool QTIBackendHelper::IsRuntimeAvailable(const snpe_runtimes_t delegate) { - return (Str2Delegate(delegate) != SNPE_RUNTIME_UNSET); + return (Str2Delegate (delegate) != SNPE_RUNTIME_UNSET); } void QTIBackendHelper::use_psnpe(const char *model_path) { @@ -168,7 +167,10 @@ void QTIBackendHelper::use_psnpe(const char *model_path) { SNPE_PSNPE_INPUTOUTPUTTRANSMISSIONMODE_SYNC)); Snpe_StringList_Handle_t outputLayers = - ResolveOutputLayerNames(snpeOutputLayers_); + ResolveCommaSeparatedList(snpeOutputLayers_); + + Snpe_StringList_Handle_t outputTensors = + ResolveCommaSeparatedList(snpeOutputTensors_); Snpe_SNPEBuilder_Handle_t snpeBuilderHandle = Snpe_SNPEBuilder_Create(containerHandle); @@ -180,12 +182,13 @@ void QTIBackendHelper::use_psnpe(const char *model_path) { Snpe_SNPEBuilder_SetRuntimeProcessorOrder(snpeBuilderHandle, dummyInputRuntimeListHandle); Snpe_SNPEBuilder_SetOutputLayers(snpeBuilderHandle, outputLayers); + Snpe_SNPEBuilder_SetOutputTensors(snpeBuilderHandle, outputTensors); if (Snpe_StringList_Size(outputLayers) > 0) Snpe_BuildConfig_SetOutputBufferNames(buildConfigHandle, outputLayers); std::string platformOptionStr = ""; - if (useCpuInt8_) { + if(useCpuInt8_){ platformOptionStr = "enableCpuFxpMode:ON"; } if (Socs::get_use_dsp_features()) { @@ -230,22 +233,22 @@ void QTIBackendHelper::use_psnpe(const char *model_path) { LOG(FATAL) << "Error in init of snpe_ " << snpe_->snpeHandle; } - if (profilingLevel_ != SNPE_PROFILING_LEVEL_OFF) { + if(profilingLevel_ != SNPE_PROFILING_LEVEL_OFF){ auto diagLogHandle = Snpe_SNPE_GetDiagLogInterface_Ref(snpe_->snpeHandle); - if (!diagLogHandle) LOG(INFO) << "Get diagLogHandle failed"; + if (!diagLogHandle) + LOG(INFO)<<"Get diagLogHandle failed"; auto optionsHandle = Snpe_IDiagLog_GetOptions(diagLogHandle); std::string OutputDir = ".\diaglogs"; #ifdef __ANDROID__ - OutputDir = - "/sdcard/Android/data/org.mlcommons.android.mlperfbench/files/diaglogs"; + OutputDir = "/sdcard/Android/data/org.mlcommons.android.mlperfbench/files/diaglogs"; #endif Snpe_Options_SetLogFileDirectory(optionsHandle, OutputDir.c_str()); - if (Snpe_IDiagLog_SetOptions(diagLogHandle, optionsHandle) != SNPE_SUCCESS) - LOG(INFO) << "Failed to set DiagLog options"; + if(Snpe_IDiagLog_SetOptions(diagLogHandle, optionsHandle) != SNPE_SUCCESS) + LOG(INFO)<<"Failed to set DiagLog options"; if (Snpe_IDiagLog_Start(diagLogHandle) != SNPE_SUCCESS) - LOG(INFO) << "Failed to start logger "; + LOG(INFO)<<"Failed to start logger "; } // Snpe_DlContainer_Delete(containerHandle); } @@ -302,7 +305,9 @@ void QTIBackendHelper::use_snpe(const char *model_path) { Snpe_SNPEBuilder_Create(containerHandle); Snpe_SNPEBuilder_SetCpuFixedPointMode(snpeBuilderHandle, useCpuInt8_); Snpe_StringList_Handle_t outputLayers = - ResolveOutputLayerNames(snpeOutputLayers_); + ResolveCommaSeparatedList(snpeOutputLayers_); + Snpe_StringList_Handle_t outputTensors = + ResolveCommaSeparatedList(snpeOutputTensors_); Snpe_SNPEBuilder_SetPerformanceProfile(snpeBuilderHandle, perfProfile_); Snpe_SNPEBuilder_SetProfilingLevel(snpeBuilderHandle, profilingLevel_); Snpe_SNPEBuilder_SetExecutionPriorityHint(snpeBuilderHandle, @@ -311,6 +316,7 @@ void QTIBackendHelper::use_snpe(const char *model_path) { inputRuntimeListHandle); Snpe_SNPEBuilder_SetUseUserSuppliedBuffers(snpeBuilderHandle, true); Snpe_SNPEBuilder_SetOutputLayers(snpeBuilderHandle, outputLayers); + Snpe_SNPEBuilder_SetOutputTensors(snpeBuilderHandle, outputTensors); std::string platformOptionStr = ""; if (Socs::soc_check_feature(useIonBuffers_, platformOptionStr)) { @@ -338,30 +344,29 @@ void QTIBackendHelper::use_snpe(const char *model_path) { LOG(FATAL) << "Error in init of the model " << snpe_; } - if (profilingLevel_ != SNPE_PROFILING_LEVEL_OFF) { + if(profilingLevel_ != SNPE_PROFILING_LEVEL_OFF){ auto diagLogHandle = Snpe_SNPE_GetDiagLogInterface_Ref(snpe_->snpeHandle); - if (!diagLogHandle) LOG(INFO) << "Get diagLogHandle failed"; + if (!diagLogHandle) + LOG(INFO)<<"Get diagLogHandle failed"; auto optionsHandle = Snpe_IDiagLog_GetOptions(diagLogHandle); std::string OutputDir = ".\diaglogs"; -#ifdef __ANDROID__ - OutputDir = - "/sdcard/Android/data/org.mlcommons.android.mlperfbench/files/diaglogs"; -#endif + #ifdef __ANDROID__ + OutputDir = "/sdcard/Android/data/org.mlcommons.android.mlperfbench/files/diaglogs"; + #endif Snpe_Options_SetLogFileDirectory(optionsHandle, OutputDir.c_str()); - if (Snpe_IDiagLog_SetOptions(diagLogHandle, optionsHandle) != SNPE_SUCCESS) - LOG(INFO) << "Failed to set DiagLog options"; + if(Snpe_IDiagLog_SetOptions(diagLogHandle, optionsHandle) != SNPE_SUCCESS) + LOG(INFO)<<"Failed to set DiagLog options"; if (Snpe_IDiagLog_Start(diagLogHandle) != SNPE_SUCCESS) - LOG(INFO) << "Failed to start logger "; + LOG(INFO)<<"Failed to start logger "; } } inline int QTIBackendHelper::get_num_inits() { return Socs::soc_num_inits(); } -void QTIBackendHelper::get_accelerator_instances(int &num_dsp, int &num_gpu, - int &num_cpu, - int &num_gpu_fp16) { +void QTIBackendHelper::get_accelerator_instances(int &num_dsp, + int &num_gpu, int &num_cpu, int &num_gpu_fp16) { std::string &delegate = delegate_; num_dsp = 0; num_gpu = 0; @@ -383,13 +388,14 @@ void QTIBackendHelper::get_accelerator_instances(int &num_dsp, int &num_gpu, } else if (delegate == "snpe_gpu_fp16" || delegate == "psnpe_gpu_fp16") { num_gpu_fp16 = 1; Socs::set_use_dsp_features(false); - } else { - LOG(FATAL) << "Error: Unsupported delegate " << delegate << " SoC ID " - << Socs::get_soc_name(); } - } - LOG(INFO) << "Using " << num_dsp << " dsp " << num_gpu << " gpu" << num_cpu - << " cpu" << num_gpu_fp16 << " gpu_fp16"; + else { + LOG(FATAL) << "Error: Unsupported delegate " << delegate << " SoC ID " + << Socs::get_soc_name(); + } + } + LOG(INFO) << "Using " << num_dsp << " dsp " << num_gpu + << " gpu" << num_cpu << " cpu" << num_gpu_fp16 << " gpu_fp16"; } void QTIBackendHelper::map_inputs() { @@ -417,9 +423,11 @@ void QTIBackendHelper::map_inputs() { Snpe_IBufferAttributes_GetDims(ubaOptHandle), sizeof(float)); Snpe_UserBufferEncoding_Handle_t ubeFloatHandle = Snpe_UserBufferEncodingFloat_Create(); - ubPtr.push_back(Snpe_Util_CreateUserBufferShared( - std::move(inputBuffer.data()), inputBuffer.size(), 0, stridesHandle, - ubeFloatHandle)); + ubPtr.push_back(Snpe_Util_CreateUserBufferShared(std::move(inputBuffer.data()), + inputBuffer.size(), + 0, + stridesHandle, + ubeFloatHandle)); Snpe_UserBufferMap_Add(inputMapHandle, name, ubPtr.back()); Snpe_TensorShape_Delete(stridesHandle); @@ -440,9 +448,11 @@ void QTIBackendHelper::map_inputs() { if (!ubeTfN) ubeTfN = Snpe_UserBufferEncodingTfN_Create(128.0, 1.0 / 255, 8); - ubPtr.push_back(Snpe_Util_CreateUserBufferShared( - std::move(inputBuffer.data()), inputBuffer.size(), 0, stridesHandle, - ubeTfN)); + ubPtr.push_back(Snpe_Util_CreateUserBufferShared(std::move(inputBuffer.data()), + inputBuffer.size(), + 0, + stridesHandle, + ubeTfN)); Snpe_UserBufferMap_Add(inputMapHandle, name, ubPtr.back()); Snpe_TensorShape_Delete(stridesHandle); @@ -494,7 +504,7 @@ void QTIBackendHelper::map_outputs() { Snpe_UserBufferMap_Add(outputMapHandle, name, x.back()); if (useIonBuffers_) Snpe_UserMemoryMap_Add(userMemoryMappedBufferMapHandle_, name, - bufs_[bi].at(name).data()); + bufs_[bi].at(name).data()); Snpe_UserBufferEncodingTfN_Delete(ubeTfN); Snpe_TensorShape_Delete(stridesHandle); @@ -603,7 +613,7 @@ void QTIBackendHelper::get_data_formats() { long bufSize = calcSizeFromDims(Snpe_TensorShape_Rank(dimsHandle), Snpe_TensorShape_GetDimensions(dimsHandle)); if (outputBufferType_ == FLOAT_32) { - if (snpeOutputLayers_ == "transpose") { + if (snpeOutputLayers_ == "transpose" || snpeOutputTensors_ == "transpose:0") { // For mobileBERT, return output size as half the size of computed // values, // because the DLC returns only single layer as output but the app needs @@ -634,7 +644,7 @@ void QTIBackendHelper::get_data_formats() { } void QTIBackendHelper::set_runtime_config() { - int numDSP = 0, numGPU = 0, numCPU = 0, numGPU_FP16 = 0; + int numDSP = 0, numGPU = 0, numCPU = 0, numGPU_FP16=0; get_accelerator_instances(numDSP, numGPU, numCPU, numGPU_FP16); Snpe_Runtime_t runtime; @@ -698,3 +708,108 @@ std::string QTIBackendHelper::get_snpe_version() { Snpe_DlVersion_Handle_t version = Snpe_Util_GetLibraryVersion(); return Snpe_DlVersion_GetBuild(version); } + +std::vector get_normal(unsigned numbers, unsigned seed = 5, + float mean = 0.0, float stddev = 1.0) { + std::default_random_engine generator(seed); + std::normal_distribution distribution(mean, stddev); + + std::vector d; + for (unsigned i = 0; i < numbers; i++) d.push_back(distribution(generator)); + + return d; +} + +void QTIBackendHelper::initSd(const char *model_path, const char *lib_path) { +#ifdef STABLEDIFFUSION_FLAG + bool use_mmap = false; // we don't want to use cached + uint64_t context_bin_mmap_read_budget = 100000; + std::string temp(lib_path); + native_lib_path = temp ; + std::string newtemp (model_path); + data_folder_path = newtemp; + + // TODO: Below vars are using in preprocessInputSd + // May need to be set from the configuration from MLC. Hardcoded for now. + num_steps = 20; + seed = 633994880; + guidance_scale = 7.5; + + mlperf_data_t input; + input.type = mlperf_data_t::Int32; + input.size = 77*1; // tokenized inputs 77 numbers + inputFormat_.push_back(input); + + mlperf_data_t output; + output.type = mlperf_data_t::Uint8; + output.size = 512*512*3; + outputFormat_.push_back(output); + + sd_pipeline = new QnnApiHelpers(); + + if (0 != sd_pipeline->Init(data_folder_path, native_lib_path, + 768, 77, 1.0, + 512, 512, 3.0, + use_mmap, context_bin_mmap_read_budget)) { + LOG(FATAL) << "Initialization Failure"; + } +#endif +} + +bool QTIBackendHelper::preprocessInputSd(void *data) { +#ifdef STABLEDIFFUSION_FLAG + int32_t *input_prompt_ids = (int32_t *) data; + std::vector noise = get_normal(64 * 64 * 4, seed); + return sd_pipeline->PreProcessInput(input_prompt_ids, noise, num_steps, guidance_scale); +#else + return false; +#endif +} + +bool QTIBackendHelper::executeSd() { +#ifdef STABLEDIFFUSION_FLAG + for (int stepIdx = 0; stepIdx < num_steps; stepIdx++) { + bool runVAE = ((stepIdx + 1) == num_steps); + if (true != sd_pipeline->RunInference(runVAE)) { + LOG(FATAL) << "RunInference failure"; + return false; + } + } + return true; +#else + return false; +#endif +} + +bool QTIBackendHelper::getOutputSd(void **data) { +#ifdef STABLEDIFFUSION_FLAG + JniHelpers::InferenceReturn inferenceReturn; + if (true != sd_pipeline->PostProcessOutput(false, false, inferenceReturn)) { + LOG(FATAL) << "PostProcessOutput failure"; + return false; + } + *data = inferenceReturn.m_ImageData; + + //delete sd_pipeline; + //sd_pipeline = new QnnApiHelpers(); + return true; +#else + return false; +#endif +} + +void QTIBackendHelper::deinitSd() { +#ifdef STABLEDIFFUSION_FLAG + bool use_mmap = false; // we don't want to use cached + uint64_t context_bin_mmap_read_budget = 100000; + /*if (0 != sd_pipeline->Init(data_folder_path, native_lib_path, + 768, 77, 1.0, + 512, 512, 3.0, + use_mmap, context_bin_mmap_read_budget)) { + LOG(FATAL) << "Initialization Failure"; + } +*/ + delete sd_pipeline; + sd_pipeline = nullptr; +#endif +} \ No newline at end of file diff --git a/mobile_back_qti/cpp/backend_qti/qti_backend_helper.h b/mobile_back_qti/cpp/backend_qti/qti_backend_helper.h index bc5c45893..38d7e5fd6 100644 --- a/mobile_back_qti/cpp/backend_qti/qti_backend_helper.h +++ b/mobile_back_qti/cpp/backend_qti/qti_backend_helper.h @@ -25,6 +25,10 @@ limitations under the License. #include "flutter/cpp/c/backend_c.h" #include "flutter/cpp/c/type.h" +#ifdef STABLEDIFFUSION_FLAG +#include "StableDiffusionShared/include/QnnApiHelpers.hpp" +#endif + class snpe_handler { public: Snpe_SNPE_Handle_t snpeHandle; @@ -63,10 +67,16 @@ class QTIBackendHelper { const char *name_ = "snpe"; const char *acceleratorName_; std::string snpeOutputLayers_; + std::string snpeOutputTensors_; std::vector inputFormat_; std::vector outputFormat_; std::unique_ptr psnpe_; std::unique_ptr snpe_; +#ifdef STABLEDIFFUSION_FLAG + QnnApiHelpers *sd_pipeline; +#else + void *sd_pipeline; +#endif Snpe_UserBufferList_Handle_t inputMapListHandle_, outputMapListHandle_; Snpe_UserMemoryMap_Handle_t userMemoryMappedBufferMapHandle_; std::vector< @@ -95,6 +105,7 @@ class QTIBackendHelper { bool useIonBuffers_ = true; bool useCpuInt8_ = false; bool isIonRegistered; + bool isStableDiffusion = false; /* exposed functions */ void use_psnpe(const char *model_path); @@ -106,6 +117,18 @@ class QTIBackendHelper { void set_runtime_config(); std::string get_snpe_version(); + void initSd(const char *model_path, const char *native_lib_path); + bool preprocessInputSd(void *data); + bool executeSd(); + void deinitSd(); + bool getOutputSd(void **data); + + int num_steps; + int seed; + float guidance_scale; + std::string native_lib_path; + std::string data_folder_path; + static bool IsRuntimeAvailable(const snpe_runtimes_t delegate); QTIBackendHelper() @@ -117,7 +140,8 @@ class QTIBackendHelper { inputMapListHandle_(Snpe_UserBufferList_Create()), outputMapListHandle_(Snpe_UserBufferList_Create()), snpe_(new snpe_handler()), - psnpe_(new psnpe_handler()) { + psnpe_(new psnpe_handler()), + sd_pipeline(nullptr) { odLayerMap[0] = "detection_boxes:0"; odLayerMap[1] = "Postprocessor/BatchMultiClassNonMaxSuppression_classes"; odLayerMap[2] = "detection_scores:0"; @@ -125,15 +149,23 @@ class QTIBackendHelper { "Postprocessor/BatchMultiClassNonMaxSuppression_num_detections"; userMemoryMappedBufferMapHandle_ = Snpe_UserMemoryMap_Create(); isIonRegistered = false; + + num_steps = 20; + seed = 0; + guidance_scale = 7.5; } ~QTIBackendHelper() { - Snpe_RuntimeList_Delete(inputRuntimeListHandle); - Snpe_RuntimeList_Delete(dummyInputRuntimeListHandle); - Snpe_StringList_Delete(networkInputTensorNamesHandle_); - Snpe_StringList_Delete(networkOutputTensorNamesHandle_); - Snpe_UserBufferList_Delete(inputMapListHandle_); - Snpe_UserBufferList_Delete(outputMapListHandle_); + if (isStableDiffusion) { + deinitSd(); + } else { + Snpe_RuntimeList_Delete(inputRuntimeListHandle); + Snpe_RuntimeList_Delete(dummyInputRuntimeListHandle); + Snpe_StringList_Delete(networkInputTensorNamesHandle_); + Snpe_StringList_Delete(networkOutputTensorNamesHandle_); + Snpe_UserBufferList_Delete(inputMapListHandle_); + Snpe_UserBufferList_Delete(outputMapListHandle_); + } } }; diff --git a/mobile_back_qti/cpp/backend_qti/qti_c.cc b/mobile_back_qti/cpp/backend_qti/qti_c.cc index a6d37acaa..e343eab4d 100644 --- a/mobile_back_qti/cpp/backend_qti/qti_c.cc +++ b/mobile_back_qti/cpp/backend_qti/qti_c.cc @@ -44,18 +44,8 @@ bool useIonBuffer_g; extern "C" { #endif // __cplusplus -// Should return true if current hardware is supported. -bool mlperf_backend_matches_hardware(const char **not_allowed_message, - const char **settings, - const mlperf_device_info_t *device_info) { - if (device_info && device_info->model && device_info->manufacturer) { - LOG(INFO) << "QTI HW supported check: model: " << device_info->model - << ", manufacturer: " << device_info->manufacturer; - } - - std::ifstream in_file; +bool set_system_paths(const char *native_lib_path) { #ifdef __ANDROID__ - const char *native_lib_path = device_info->native_lib_path; std::stringstream adsp_lib_path; adsp_lib_path << native_lib_path << ";"; adsp_lib_path << "/system/lib/rfsa/adsp;/system/vendor/lib/rfsa/adsp;/dsp"; @@ -68,6 +58,21 @@ bool mlperf_backend_matches_hardware(const char **not_allowed_message, setenv("LD_LIBRARY_PATH", ld_lib_path.str().c_str(), 1 /*override*/); #endif + return false; +} + +// Should return true if current hardware is supported. +bool mlperf_backend_matches_hardware(const char **not_allowed_message, + const char **settings, + const mlperf_device_info_t *device_info) { + if (device_info && device_info->model && device_info->manufacturer) { + LOG(INFO) << "QTI HW supported check: model: " << device_info->model + << ", manufacturer: " << device_info->manufacturer; + } + + std::ifstream in_file; + set_system_paths(device_info->native_lib_path); + *not_allowed_message = nullptr; bool isQSoC = Socs::isSnapDragon(device_info->manufacturer); LOG(INFO) << "Is QTI SOC: " << isQSoC; @@ -117,14 +122,8 @@ mlperf_backend_ptr_t mlperf_backend_create( // use lowLatency cores for all snpe models CpuCtrl::lowLatency(); + set_system_paths(native_lib_path); -#ifdef __ANDROID__ - std::stringstream adsp_lib_path; - adsp_lib_path << native_lib_path << ";"; - adsp_lib_path << "/system/lib/rfsa/adsp;/system/vendor/lib/rfsa/adsp;/dsp"; - LOG(INFO) << "lib_path: " << adsp_lib_path.str(); - setenv("ADSP_LIBRARY_PATH", adsp_lib_path.str().c_str(), 1 /*override*/); -#endif std::string snpe_version = xverstr(SNPE_VERSION_STRING); if (snpe_version.compare("default") != 0) { int dotPosition = snpe_version.find_last_of("."); @@ -137,23 +136,31 @@ mlperf_backend_ptr_t mlperf_backend_create( } LOG(INFO) << "snpe_version: " << snpe_version; - // set runtime config - backend_data->set_runtime_config(); - // Use PSNPE or SNPE - if (backend_data->useSnpe_) { - backend_data->use_snpe(model_path); + // Stable Diffusion initialization + if(backend_data->isStableDiffusion) + { + backend_data->initSd(model_path, native_lib_path); + + LOG(INFO) << "StableDiffusion build completed successfully"; } else { - backend_data->use_psnpe(model_path); - } - backend_data->queryCount_ = 0; + // set runtime config + backend_data->set_runtime_config(); + // Use PSNPE or SNPE + if (backend_data->useSnpe_) { + backend_data->use_snpe(model_path); + } else { + backend_data->use_psnpe(model_path); + } - backend_data->get_data_formats(); - backend_data->map_inputs(); - backend_data->map_outputs(); + backend_data->queryCount_ = 0; - LOG(INFO) << "SNPE build completed successfully"; + backend_data->get_data_formats(); + backend_data->map_inputs(); + backend_data->map_outputs(); + LOG(INFO) << "SNPE build completed successfully"; + } return backend_data; } @@ -185,6 +192,9 @@ void mlperf_backend_delete(mlperf_backend_ptr_t backend_ptr) { if (backend_data->isTflite_) { tflite_backend_delete(backend_data->tfliteBackend_); } + if (backend_data->isStableDiffusion) { + backend_data->deinitSd(); + } delete backend_data; backend_data_ = nullptr; } @@ -201,7 +211,15 @@ mlperf_status_t mlperf_backend_issue_query(mlperf_backend_ptr_t backend_ptr) { return tflite_backend_issue_query(backend_data->tfliteBackend_); } - ret = backend_data->execute(); + if (backend_data->isStableDiffusion) { + if (backend_data->executeSd()) { + ret = MLPERF_SUCCESS; + } else { + ret = MLPERF_FAILURE; + } + } else { + ret = backend_data->execute(); + } #ifdef DEBUG_FLAG auto end = high_resolution_clock::now(); @@ -250,6 +268,16 @@ mlperf_status_t mlperf_backend_set_input(mlperf_backend_ptr_t backend_ptr, return tflite_backend_set_input(backend_data->tfliteBackend_, batchIndex, i, data); } + + if(backend_data->isStableDiffusion) + { + if (backend_data->preprocessInputSd(data)) { + return MLPERF_SUCCESS; + } else { + return MLPERF_FAILURE; + } + } + void *batchedDataPtr = ((backend_data->useIonBuffers_ == false) && (backend_data->inputBatch_ <= 1)) ? data @@ -304,13 +332,24 @@ mlperf_status_t mlperf_backend_get_output(mlperf_backend_ptr_t backend_ptr, return tflite_backend_get_output(backend_data->tfliteBackend_, batchIndex, outputIndex, data); } - if (backend_data->snpeOutputLayers_ == - "Postprocessor/BatchMultiClassNonMaxSuppression") { + + if (backend_data->isStableDiffusion) { + if (backend_data->getOutputSd(data)) { + return MLPERF_SUCCESS; + } else { + *data = nullptr; + return MLPERF_FAILURE; + } + } + + if (backend_data->snpeOutputTensors_.find("Postprocessor/BatchMultiClassNonMaxSuppression_classes") != std::string::npos + || backend_data->snpeOutputLayers_ == "Postprocessor/BatchMultiClassNonMaxSuppression") { // Reorder snpeOutputLayers_ for coco process_output const char *outputLayerName = backend_data->odLayerMap[outputIndex].c_str(); *data = backend_data->bufs_[batchIndex].at(outputLayerName).data(); return MLPERF_SUCCESS; - } else if (backend_data->snpeOutputLayers_ == "transpose") { + } else if (backend_data->snpeOutputTensors_.find("transpose:0") != std::string::npos + || backend_data->snpeOutputLayers_ == "transpose") { *data = backend_data->bufs_[int(batchIndex / backend_data->inputBatch_)] .at(Snpe_StringList_At( backend_data->networkOutputTensorNamesHandle_, 0)) diff --git a/mobile_back_qti/cpp/backend_qti/qti_settings.h b/mobile_back_qti/cpp/backend_qti/qti_settings.h index ef88863fd..a052aa15e 100644 --- a/mobile_back_qti/cpp/backend_qti/qti_settings.h +++ b/mobile_back_qti/cpp/backend_qti/qti_settings.h @@ -47,6 +47,7 @@ const std::string empty_settings = ""; #include INCLUDE_SETTINGS(default_dsp) #include INCLUDE_SETTINGS(default_cpu) #include INCLUDE_SETTINGS(default_gpu) +#include INCLUDE_SETTINGS(stablediffusion) STRING_SETTINGS(sd7g1) STRING_SETTINGS(sd7pg2) @@ -65,5 +66,6 @@ STRING_SETTINGS(sm7550) STRING_SETTINGS(default_dsp) STRING_SETTINGS(default_cpu) STRING_SETTINGS(default_gpu) +STRING_SETTINGS(stablediffusion) #endif diff --git a/mobile_back_qti/cpp/backend_qti/rpcmem.cc b/mobile_back_qti/cpp/backend_qti/rpcmem.cc index 3daf45440..dbaf4b22b 100644 --- a/mobile_back_qti/cpp/backend_qti/rpcmem.cc +++ b/mobile_back_qti/cpp/backend_qti/rpcmem.cc @@ -31,7 +31,6 @@ RpcMem::RpcMem() { libHandle_ = dlopen("libcdsprpc.so", RTLD_NOW); #else std::string windowsRpcPath = Socs::getServiceBinaryPath(L"qcnspmcdm"); - ; std::string windowsLibName = "libcdsprpc.dll"; windowsRpcPath = windowsRpcPath + '\\' + windowsLibName; libHandle_ = LoadLibrary(windowsRpcPath.c_str()); @@ -55,6 +54,7 @@ RpcMem::RpcMem() { rpcmemFree_ = reinterpret_cast( (void *)(intptr_t)GetProcAddress((HINSTANCE)libHandle_, "rpcmem_free")); #endif + if (rpcmemAlloc_ && rpcmemFree_) { isSuccess_ = true; } else { diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_cpu.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_cpu.pbtxt index d03328481..ad04f7d1c 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_cpu.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_cpu.pbtxt @@ -19,26 +19,6 @@ common_setting { } } -benchmark_setting { - benchmark_id: "image_classification" - framework: "SNPE" - custom_setting { - id: "use_ion_buffer" - value: "false" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_CPU" - accelerator_name: "snpe_cpu" - accelerator_desc: "CPU" - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp.dlc" - model_checksum: "cdf1fe622b309f692e05781661248a2b" - } - } - delegate_selected: "SNPE_CPU" -} - benchmark_setting { benchmark_id: "image_classification_v2" framework: "SNPE" @@ -56,39 +36,14 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp.dlc" - model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32" + model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" + model_checksum: "" } } single_stream_expected_latency_ns: 250000 delegate_selected: "SNPE_CPU" } -benchmark_setting { - benchmark_id: "image_classification_offline" - framework: "SNPE" - custom_setting { - id: "use_ion_buffer" - value: "false" - } - custom_setting { - id: "scenario" - value: "Offline" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_CPU" - accelerator_name: "psnpe_cpu" - accelerator_desc: "CPU" - batch_size: 128 - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc" - model_checksum: "6523060565b8d3f326f3f323c531fc1c" - } - } - delegate_selected: "SNPE_CPU" -} - benchmark_setting { benchmark_id: "image_classification_offline_v2" framework: "SNPE" @@ -115,8 +70,8 @@ benchmark_setting { accelerator_desc: "CPU" batch_size: 12360 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "0de3b75022ce5c27d5902a080ec1cea0" + model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "" } } delegate_selected: "SNPE_CPU" @@ -126,8 +81,8 @@ benchmark_setting { benchmark_id: "object_detection" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "Postprocessor/BatchMultiClassNonMaxSuppression" + id: "snpe_output_tensors" + value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0" } custom_setting { id: "use_ion_buffer" @@ -139,8 +94,8 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/ssd_mobiledet_qat_htp.dlc" - model_checksum: "c333fc135a8c474679d716fe391a9e2a" + model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_CPU" @@ -150,8 +105,8 @@ benchmark_setting { benchmark_id: "natural_language_processing" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "transpose" + id: "snpe_output_tensors" + value: "transpose:0" } custom_setting { id: "input_buffer_type" @@ -167,8 +122,8 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilebert_quantized_htp.dlc" - model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08" + model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_CPU" @@ -195,10 +150,9 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobile_mosaic_htp.dlc" - model_checksum: "e870526444c1e48df4f0505e530ecfdf" + model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" + model_checksum: "" } - } delegate_selected: "SNPE_CPU" } @@ -223,8 +177,8 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/snusr_htp.dlc" - model_checksum: "84ef0d9c2e7b710381cea962a22a0b41" + model_path: "local:///mlperf_models/snusr_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_CPU" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_dsp.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_dsp.pbtxt index 6a0b9fbb1..113501891 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_dsp.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_dsp.pbtxt @@ -19,30 +19,6 @@ common_setting { } } -benchmark_setting { - benchmark_id: "image_classification" - framework: "SNPE" - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "snpe_dsp" - accelerator_desc: "HTP" - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp.dlc" - model_checksum: "cdf1fe622b309f692e05781661248a2b" - } - } - delegate_selected: "SNPE_DSP" -} - benchmark_setting { benchmark_id: "image_classification_v2" framework: "SNPE" @@ -67,35 +43,6 @@ benchmark_setting { delegate_selected: "SNPE_DSP" } -benchmark_setting { - benchmark_id: "image_classification_offline" - framework: "SNPE" - custom_setting { - id: "scenario" - value: "Offline" - } - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "psnpe_dsp" - accelerator_desc: "HTP" - batch_size: 12288 - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc" - model_checksum: "6523060565b8d3f326f3f323c531fc1c" - } - } - delegate_selected: "SNPE_DSP" -} - benchmark_setting { benchmark_id: "image_classification_offline_v2" framework: "SNPE" @@ -118,8 +65,8 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12288 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "0de3b75022ce5c27d5902a080ec1cea0" + model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -129,8 +76,8 @@ benchmark_setting { benchmark_id: "object_detection" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "Postprocessor/BatchMultiClassNonMaxSuppression" + id: "snpe_output_tensors" + value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0" } custom_setting { id: "bg_load" @@ -146,8 +93,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/ssd_mobiledet_qat_htp.dlc" - model_checksum: "c333fc135a8c474679d716fe391a9e2a" + model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -157,8 +104,8 @@ benchmark_setting { benchmark_id: "natural_language_processing" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "transpose" + id: "snpe_output_tensors" + value: "transpose:0" } custom_setting { id: "bg_load" @@ -182,8 +129,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilebert_quantized_htp.dlc" - model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08" + model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -214,8 +161,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobile_mosaic_htp.dlc" - model_checksum: "e870526444c1e48df4f0505e530ecfdf" + model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -250,8 +197,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/snusr_htp.dlc" - model_checksum: "84ef0d9c2e7b710381cea962a22a0b41" + model_path: "local:///mlperf_models/snusr_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_gpu.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_gpu.pbtxt index 492235273..23350c8cf 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_gpu.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_gpu.pbtxt @@ -19,36 +19,6 @@ common_setting { } } -benchmark_setting { - benchmark_id: "image_classification" - framework: "SNPE" - custom_setting { - id: "use_ion_buffer" - value: "false" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_GPU" - accelerator_name: "snpe_gpu" - accelerator_desc: "GPU" - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp.dlc" - model_checksum: "cdf1fe622b309f692e05781661248a2b" - } - } - delegate_choice: { - priority: 2 - delegate_name: "SNPE_CPU" - accelerator_name: "snpe_cpu" - accelerator_desc: "CPU" - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp.dlc" - model_checksum: "cdf1fe622b309f692e05781661248a2b" - } - } - delegate_selected: "SNPE_GPU" -} - benchmark_setting { benchmark_id: "image_classification_v2" framework: "SNPE" @@ -62,8 +32,8 @@ benchmark_setting { accelerator_name: "snpe_gpu" accelerator_desc: "GPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp.dlc" - model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32" + model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" + model_checksum: "" } } delegate_choice: { @@ -73,44 +43,8 @@ benchmark_setting { accelerator_desc: "CPU" batch_size: 128 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp.dlc" - model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32" - } - } - delegate_selected: "SNPE_GPU" -} - -benchmark_setting { - benchmark_id: "image_classification_offline" - framework: "SNPE" - custom_setting { - id: "use_ion_buffer" - value: "false" - } - custom_setting { - id: "scenario" - value: "Offline" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_GPU" - accelerator_name: "psnpe_gpu" - accelerator_desc: "GPU" - batch_size: 128 - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc" - model_checksum: "6523060565b8d3f326f3f323c531fc1c" - } - } - delegate_choice: { - priority: 2 - delegate_name: "SNPE_CPU" - accelerator_name: "psnpe_cpu" - accelerator_desc: "CPU" - batch_size: 128 - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc" - model_checksum: "6523060565b8d3f326f3f323c531fc1c" + model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_GPU" @@ -134,8 +68,8 @@ benchmark_setting { accelerator_desc: "GPU" batch_size: 12360 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "0de3b75022ce5c27d5902a080ec1cea0" + model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "" } } delegate_choice: { @@ -145,8 +79,8 @@ benchmark_setting { accelerator_desc: "CPU" batch_size: 128 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "0de3b75022ce5c27d5902a080ec1cea0" + model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "" } } delegate_selected: "SNPE_GPU" @@ -156,8 +90,8 @@ benchmark_setting { benchmark_id: "object_detection" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "Postprocessor/BatchMultiClassNonMaxSuppression" + id: "snpe_output_tensors" + value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0" } custom_setting { id: "use_ion_buffer" @@ -169,8 +103,8 @@ benchmark_setting { accelerator_name: "snpe_gpu" accelerator_desc: "GPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/ssd_mobiledet_qat_htp.dlc" - model_checksum: "c333fc135a8c474679d716fe391a9e2a" + model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" + model_checksum: "" } } delegate_choice: { @@ -179,8 +113,8 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/ssd_mobiledet_qat_htp.dlc" - model_checksum: "c333fc135a8c474679d716fe391a9e2a" + model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_GPU" @@ -190,8 +124,8 @@ benchmark_setting { benchmark_id: "natural_language_processing" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "transpose" + id: "snpe_output_tensors" + value: "transpose:0" } custom_setting { id: "input_buffer_type" @@ -207,8 +141,8 @@ benchmark_setting { accelerator_name: "snpe_gpu" accelerator_desc: "GPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilebert_quantized_htp.dlc" - model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08" + model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" + model_checksum: "" } } delegate_choice: { @@ -217,8 +151,8 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilebert_quantized_htp.dlc" - model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08" + model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_GPU" @@ -245,8 +179,8 @@ benchmark_setting { accelerator_name: "snpe_gpu" accelerator_desc: "GPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobile_mosaic_htp.dlc" - model_checksum: "e870526444c1e48df4f0505e530ecfdf" + model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" + model_checksum: "" } } delegate_choice: { @@ -255,8 +189,8 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobile_mosaic_htp.dlc" - model_checksum: "e870526444c1e48df4f0505e530ecfdf" + model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_GPU" @@ -283,8 +217,8 @@ benchmark_setting { accelerator_name: "snpe_gpu" accelerator_desc: "GPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/snusr_htp.dlc" - model_checksum: "84ef0d9c2e7b710381cea962a22a0b41" + model_path: "local:///mlperf_models/snusr_htp.dlc" + model_checksum: "" } } delegate_choice: { @@ -293,9 +227,9 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/snusr_htp.dlc" - model_checksum: "84ef0d9c2e7b710381cea962a22a0b41" + model_path: "local:///mlperf_models/snusr_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_GPU" -} \ No newline at end of file +} diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_gpufp16.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_gpufp16.pbtxt index 687a1dcc5..3b620b299 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_gpufp16.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_gpufp16.pbtxt @@ -19,27 +19,6 @@ common_setting { } } -benchmark_setting { - benchmark_id: "image_classification" - framework: "SNPE" - custom_setting { - id: "bg_load" - value: "false" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_GPU_FP16" - accelerator_name: "snpe_gpu_fp16" - accelerator_desc: "GPU_FP16" - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp.dlc" - model_checksum: "cdf1fe622b309f692e05781661248a2b" - } - } - single_stream_expected_latency_ns: 500000 - delegate_selected: "SNPE_GPU_FP16" -} - benchmark_setting { benchmark_id: "image_classification_v2" framework: "SNPE" @@ -53,37 +32,14 @@ benchmark_setting { accelerator_name: "snpe_gpu_fp16 accelerator_desc: "GPU_FP16" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp.dlc" - model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32" + model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" + model_checksum: "" } } single_stream_expected_latency_ns: 500000 delegate_selected: "SNPE_GPU_FP16" } -benchmark_setting { - benchmark_id: "image_classification_offline" - framework: "SNPE" - custom_setting { - id: "scenario" - value: "Offline" - } - custom_setting { - id: "bg_load" - value: "false" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_GPU_FP16" - accelerator_name: "psnpe_gpu_fp16" - accelerator_desc: "GPU_FP16" - batch_size: 12288 - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc" - model_checksum: "6523060565b8d3f326f3f323c531fc1c" - } - } - delegate_selected: "SNPE_GPU_FP16" -} + benchmark_setting { benchmark_id: "image_classification_offline_v2" framework: "SNPE" @@ -102,18 +58,19 @@ benchmark_setting { accelerator_desc: "GPU_FP16" batch_size: 12360 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "0de3b75022ce5c27d5902a080ec1cea0" + model_path: "local:///mobilenet_v4_htp_batched_4.dlc" + model_checksum: "" } } delegate_selected: "SNPE_GPU_FP16" } + benchmark_setting { benchmark_id: "natural_language_processing" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "transpose" + id: "snpe_output_tensors" + value: "transpose:0" } custom_setting { id: "bg_load" @@ -137,12 +94,13 @@ benchmark_setting { accelerator_name: "snpe_gpu_fp16" accelerator_desc: "GPU_FP16" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilebert_quantized_htp.dlc" - model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08" + model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_GPU_FP16" } + benchmark_setting { benchmark_id: "image_segmentation_v2" framework: "SNPE" @@ -164,8 +122,8 @@ benchmark_setting { accelerator_name: "snpe_gpu_fp16" accelerator_desc: "GPU_FP16" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobile_mosaic_htp.dlc" - model_checksum: "e870526444c1e48df4f0505e530ecfdf" + model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_GPU_FP16" @@ -199,9 +157,9 @@ benchmark_setting { accelerator_name: "snpe_gpu_fp16" accelerator_desc: "GPU_FP16" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/snusr_htp.dlc" - model_checksum: "84ef0d9c2e7b710381cea962a22a0b41" + model_path: "local:///mlperf_models/snusr_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_GPU_FP16" -} \ No newline at end of file +} diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7cxg3.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7cxg3.pbtxt index a5764ab61..59db16e3d 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7cxg3.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7cxg3.pbtxt @@ -19,34 +19,6 @@ common_setting { } } -benchmark_setting { - benchmark_id: "image_classification" - framework: "SNPE" - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "snpe_dsp" - accelerator_desc: "HTP" - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp.dlc" - model_checksum: "cdf1fe622b309f692e05781661248a2b" - } - } - delegate_selected: "SNPE_DSP" -} - benchmark_setting { benchmark_id: "image_classification_v2" framework: "SNPE" @@ -68,47 +40,14 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp.dlc" - model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32" + model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" + model_checksum: "" } } single_stream_expected_latency_ns: 250000 delegate_selected: "SNPE_DSP" } -benchmark_setting { - benchmark_id: "image_classification_offline" - framework: "SNPE" - custom_setting { - id: "scenario" - value: "Offline" - } - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "psnpe_dsp" - accelerator_desc: "HTP" - batch_size: 12288 - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp_batched_8.dlc" - model_checksum: "1e09cab7d0d381ef02cfd5ea5b85da92" - } - } - delegate_selected: "SNPE_DSP" -} - benchmark_setting { benchmark_id: "image_classification_offline_v2" framework: "SNPE" @@ -139,19 +78,20 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12288 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "0de3b75022ce5c27d5902a080ec1cea0" + model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" } + benchmark_setting { benchmark_id: "object_detection" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "Postprocessor/BatchMultiClassNonMaxSuppression" + id: "snpe_output_tensors" + value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0" } custom_setting { id: "bg_load" @@ -167,8 +107,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/ssd_mobiledet_qat_htp.dlc" - model_checksum: "c333fc135a8c474679d716fe391a9e2a" + model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -178,8 +118,8 @@ benchmark_setting { benchmark_id: "natural_language_processing" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "transpose" + id: "snpe_output_tensors" + value: "transpose:0" } custom_setting { id: "bg_load" @@ -199,8 +139,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilebert_quantized_htp.dlc" - model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08" + model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -231,8 +171,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobile_mosaic_htp.dlc" - model_checksum: "e870526444c1e48df4f0505e530ecfdf" + model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -259,9 +199,9 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/snusr_htp.dlc" - model_checksum: "84ef0d9c2e7b710381cea962a22a0b41" + model_path: "local:///mlperf_models//snusr_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" -} \ No newline at end of file +} diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7g1.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7g1.pbtxt index 959b339a4..9c1e8f8d5 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7g1.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7g1.pbtxt @@ -19,30 +19,6 @@ common_setting { } } -benchmark_setting { - benchmark_id: "image_classification" - framework: "SNPE" - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "snpe_dsp" - accelerator_desc: "HTP" - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp.dlc" - model_checksum: "cdf1fe622b309f692e05781661248a2b" - } - } - delegate_selected: "SNPE_DSP" -} - benchmark_setting { benchmark_id: "image_classification_v2" framework: "SNPE" @@ -64,41 +40,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp.dlc" - model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32" - } - } - delegate_selected: "SNPE_DSP" -} - -benchmark_setting { - benchmark_id: "image_classification_offline" - framework: "SNPE" - custom_setting { - id: "scenario" - value: "Offline" - } - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "psnpe_dsp" - accelerator_desc: "HTP" - batch_size: 12288 - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "0de3b75022ce5c27d5902a080ec1cea0" + model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -132,10 +75,10 @@ benchmark_setting { delegate_name: "SNPE_DSP" accelerator_name: "psnpe_dsp" accelerator_desc: "HTP" - batch_size: 12360 + batch_size: 12288 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc" - model_checksum: "6523060565b8d3f326f3f323c531fc1c" + model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -145,8 +88,8 @@ benchmark_setting { benchmark_id: "object_detection" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "Postprocessor/BatchMultiClassNonMaxSuppression" + id: "snpe_output_tensors" + value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0" } custom_setting { id: "bg_load" @@ -162,8 +105,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/ssd_mobiledet_qat_htp.dlc" - model_checksum: "c333fc135a8c474679d716fe391a9e2a" + model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -173,8 +116,8 @@ benchmark_setting { benchmark_id: "natural_language_processing" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "transpose" + id: "snpe_output_tensors" + value: "transpose:0" } custom_setting { id: "bg_load" @@ -198,8 +141,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilebert_quantized_htp.dlc" - model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08" + model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -226,8 +169,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobile_mosaic_htp.dlc" - model_checksum: "e870526444c1e48df4f0505e530ecfdf" + model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -258,9 +201,9 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/snusr_htp.dlc" - model_checksum: "84ef0d9c2e7b710381cea962a22a0b41" + model_path: "local:///mlperf_models//snusr_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" -} \ No newline at end of file +} diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7pg2.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7pg2.pbtxt index c8a7dd85b..cb5cfbb44 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7pg2.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7pg2.pbtxt @@ -19,31 +19,6 @@ common_setting { } } -benchmark_setting { - benchmark_id: "image_classification" - framework: "SNPE" - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "snpe_dsp" - accelerator_desc: "HTP" - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp.dlc" - model_checksum: "cdf1fe622b309f692e05781661248a2b" - } - } - single_stream_expected_latency_ns: 600000 - delegate_selected: "SNPE_DSP" -} - benchmark_setting { benchmark_id: "image_classification_v2" framework: "SNPE" @@ -65,47 +40,14 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp.dlc" - model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32" + model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" + model_checksum: "" } } single_stream_expected_latency_ns: 250000 delegate_selected: "SNPE_DSP" } -benchmark_setting { - benchmark_id: "image_classification_offline" - framework: "SNPE" - custom_setting { - id: "scenario" - value: "Offline" - } - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "psnpe_dsp" - accelerator_desc: "HTP" - batch_size: 12288 - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc" - model_checksum: "6523060565b8d3f326f3f323c531fc1c" - } - } - delegate_selected: "SNPE_DSP" -} - benchmark_setting { benchmark_id: "image_classification_offline_v2" framework: "SNPE" @@ -136,8 +78,8 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12360 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "0de3b75022ce5c27d5902a080ec1cea0" + model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -147,8 +89,8 @@ benchmark_setting { benchmark_id: "object_detection" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "Postprocessor/BatchMultiClassNonMaxSuppression" + id: "snpe_output_tensors" + value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0" } custom_setting { id: "bg_load" @@ -164,8 +106,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/ssd_mobiledet_qat_htp.dlc" - model_checksum: "c333fc135a8c474679d716fe391a9e2a" + model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -175,8 +117,8 @@ benchmark_setting { benchmark_id: "natural_language_processing" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "transpose" + id: "snpe_output_tensors" + value: "transpose:0" } custom_setting { id: "bg_load" @@ -200,8 +142,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilebert_quantized_htp.dlc" - model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08" + model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -228,8 +170,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobile_mosaic_htp.dlc" - model_checksum: "e870526444c1e48df4f0505e530ecfdf" + model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -260,9 +202,9 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/snusr_htp.dlc" - model_checksum: "84ef0d9c2e7b710381cea962a22a0b41" + model_path: "local:///mlperf_models//snusr_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" -} \ No newline at end of file +} diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8cxg3.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8cxg3.pbtxt index 5fdf92bd2..be30c60ab 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8cxg3.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8cxg3.pbtxt @@ -19,35 +19,6 @@ common_setting { } } -benchmark_setting { - benchmark_id: "image_classification" - framework: "SNPE" - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "use_ion_buffer" - value: "true" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "snpe_dsp" - accelerator_desc: "HTP" - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp.dlc" - model_checksum: "cdf1fe622b309f692e05781661248a2b" - } - } - single_stream_expected_latency_ns: 600000 - delegate_selected: "SNPE_DSP" -} - benchmark_setting { benchmark_id: "image_classification_v2" framework: "SNPE" @@ -73,47 +44,14 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp.dlc" - model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32" + model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" + model_checksum: "" } } single_stream_expected_latency_ns: 60000 delegate_selected: "SNPE_DSP" } -benchmark_setting { - benchmark_id: "image_classification_offline" - framework: "SNPE" - custom_setting { - id: "scenario" - value: "Offline" - } - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "psnpe_dsp" - accelerator_desc: "HTP" - batch_size: 12288 - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp_batched_8.dlc" - model_checksum: "1e09cab7d0d381ef02cfd5ea5b85da92" - } - } - delegate_selected: "SNPE_DSP" -} - benchmark_setting { benchmark_id: "image_classification_offline_v2" framework: "SNPE" @@ -144,8 +82,8 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12360 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "0de3b75022ce5c27d5902a080ec1cea0" + model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -155,8 +93,8 @@ benchmark_setting { benchmark_id: "object_detection" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "Postprocessor/BatchMultiClassNonMaxSuppression" + id: "snpe_output_tensors" + value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0" } custom_setting { id: "bg_load" @@ -172,8 +110,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/ssd_mobiledet_qat_htp.dlc" - model_checksum: "c333fc135a8c474679d716fe391a9e2a" + model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -183,8 +121,8 @@ benchmark_setting { benchmark_id: "natural_language_processing" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "transpose" + id: "snpe_output_tensors" + value: "transpose:0" } custom_setting { id: "input_buffer_type" @@ -204,8 +142,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilebert_quantized_htp.dlc" - model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08" + model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -236,8 +174,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobile_mosaic_htp.dlc" - model_checksum: "e870526444c1e48df4f0505e530ecfdf" + model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -264,9 +202,9 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/snusr_htp.dlc" - model_checksum: "84ef0d9c2e7b710381cea962a22a0b41" + model_path: "local:///mlperf_models//snusr_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" -} \ No newline at end of file +} diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g1.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g1.pbtxt index 2aba61269..afdc96265 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g1.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g1.pbtxt @@ -19,35 +19,6 @@ common_setting { } } -benchmark_setting { - benchmark_id: "image_classification" - framework: "SNPE" - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "snpe_dsp" - accelerator_desc: "HTP" - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp.dlc" - model_checksum: "cdf1fe622b309f692e05781661248a2b" - } - } - single_stream_expected_latency_ns: 800000 - delegate_selected: "SNPE_DSP" -} - benchmark_setting { benchmark_id: "image_classification_v2" framework: "SNPE" @@ -69,47 +40,14 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp.dlc" - model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32" + model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" + model_checksum: "" } } single_stream_expected_latency_ns: 80000 delegate_selected: "SNPE_DSP" } -benchmark_setting { - benchmark_id: "image_classification_offline" - framework: "SNPE" - custom_setting { - id: "scenario" - value: "Offline" - } - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "psnpe_dsp" - accelerator_desc: "HTP" - batch_size: 12288 - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp_batched_3.dlc" - model_checksum: "550f807bc7ef40f77018a64a47507d09" - } - } - delegate_selected: "SNPE_DSP" -} - benchmark_setting { benchmark_id: "image_classification_offline_v2" framework: "SNPE" @@ -140,8 +78,8 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12360 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "0de3b75022ce5c27d5902a080ec1cea0" + model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -151,8 +89,8 @@ benchmark_setting { benchmark_id: "object_detection" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "Postprocessor/BatchMultiClassNonMaxSuppression" + id: "snpe_output_tensors" + value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0" } custom_setting { id: "bg_load" @@ -168,8 +106,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/ssd_mobiledet_qat_htp.dlc" - model_checksum: "c333fc135a8c474679d716fe391a9e2a" + model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -179,8 +117,8 @@ benchmark_setting { benchmark_id: "natural_language_processing" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "transpose" + id: "snpe_output_tensors" + value: "transpose:0" } custom_setting { id: "bg_load" @@ -204,8 +142,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilebert_quantized_htp.dlc" - model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08" + model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -232,8 +170,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobile_mosaic_htp.dlc" - model_checksum: "e870526444c1e48df4f0505e530ecfdf" + model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -264,8 +202,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/snusr_htp.dlc" - model_checksum: "84ef0d9c2e7b710381cea962a22a0b41" + model_path: "local:///mlperf_models//snusr_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g2.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g2.pbtxt index f962df30c..2990cc1b3 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g2.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g2.pbtxt @@ -19,31 +19,6 @@ common_setting { } } -benchmark_setting { - benchmark_id: "image_classification" - framework: "SNPE" - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "snpe_dsp" - accelerator_desc: "HTP" - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp_O2.dlc" - model_checksum: "25977982896e607bceb55340c8d76223" - } - } - single_stream_expected_latency_ns: 300000 - delegate_selected: "SNPE_DSP" -} - benchmark_setting { benchmark_id: "image_classification_v2" framework: "SNPE" @@ -65,44 +40,11 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp.dlc" - model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32" - } - } - single_stream_expected_latency_ns: 50000 - delegate_selected: "SNPE_DSP" -} - -benchmark_setting { - benchmark_id: "image_classification_offline" - framework: "SNPE" - custom_setting { - id: "scenario" - value: "Offline" - } - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "psnpe_dsp" - accelerator_desc: "HTP" - batch_size: 12288 - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp_batched_4_O2.dlc" - model_checksum: "b836e404b3aa5ff7914fac8376643fe4" + model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" + model_checksum: "" } } + single_stream_expected_latency_ns: 500000 delegate_selected: "SNPE_DSP" } @@ -134,10 +76,10 @@ benchmark_setting { delegate_name: "SNPE_DSP" accelerator_name: "psnpe_dsp" accelerator_desc: "HTP" - batch_size: 12288 + batch_size: 12360 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "0de3b75022ce5c27d5902a080ec1cea0" + model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -147,8 +89,8 @@ benchmark_setting { benchmark_id: "object_detection" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "Postprocessor/BatchMultiClassNonMaxSuppression" + id: "snpe_output_tensors" + value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0" } custom_setting { id: "bg_load" @@ -164,8 +106,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/ssd_mobiledet_qat_htp_O2.dlc" - model_checksum: "5802abfad10a7fc5c5849b13943d6d44" + model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -175,8 +117,8 @@ benchmark_setting { benchmark_id: "natural_language_processing" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "transpose" + id: "snpe_output_tensors" + value: "transpose:0" } custom_setting { id: "bg_load" @@ -192,7 +134,7 @@ benchmark_setting { } custom_setting { id: "perf_profile" - value: "sustained_high_performance" + value: "high_performance" } delegate_choice: { priority: 1 @@ -200,8 +142,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilebert_quantized_htp_O2.dlc" - model_checksum: "9d0dadbb6014289916a6078c4c991dd5" + model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -226,10 +168,10 @@ benchmark_setting { priority: 1 delegate_name: "SNPE_DSP" accelerator_name: "snpe_dsp" - accelerator_desc: "HTP" + accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobile_mosaic_htp_O2.dlc" - model_checksum: "99b39c2b9ea84ff13e00eaa82f00136b" + model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -260,8 +202,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/snusr_htp_O2.dlc" - model_checksum: "18fa274659e14c57b4f6bedb6871c83f" + model_path: "local:///mlperf_models//snusr_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g3.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g3.pbtxt index 74179f5e9..052813051 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g3.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g3.pbtxt @@ -19,39 +19,6 @@ common_setting { } } -benchmark_setting { - benchmark_id: "image_classification" - framework: "SNPE" - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "perf_profile" - value: "burst" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "snpe_dsp" - accelerator_desc: "HTP" - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp_O2.dlc" - model_checksum: "25977982896e607bceb55340c8d76223" - } - } - single_stream_expected_latency_ns: 250000 - delegate_selected: "SNPE_DSP" -} - benchmark_setting { benchmark_id: "image_classification_v2" framework: "SNPE" @@ -73,48 +40,11 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp.dlc" - model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32" - } - } - single_stream_expected_latency_ns: 250000 - delegate_selected: "SNPE_DSP" -} - -benchmark_setting { - benchmark_id: "image_classification_offline" - framework: "SNPE" - custom_setting { - id: "scenario" - value: "Offline" - } - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "perf_profile" - value: "burst" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "psnpe_dsp" - accelerator_desc: "HTP" - batch_size: 12360 - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp_batched_3_O2.dlc" - model_checksum: "aca3f4430fe98bbfe5c3a358ae9687e1" + model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" + model_checksum: "" } } + single_stream_expected_latency_ns: 500000 delegate_selected: "SNPE_DSP" } @@ -148,8 +78,8 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12360 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "0de3b75022ce5c27d5902a080ec1cea0" + model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -159,16 +89,12 @@ benchmark_setting { benchmark_id: "object_detection" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "Postprocessor/BatchMultiClassNonMaxSuppression" + id: "snpe_output_tensors" + value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0" } custom_setting { id: "bg_load" - value: "true" - } - custom_setting { - id: "perf_profile" - value: "burst" + value: "false" } custom_setting { id: "use_ion_buffer" @@ -180,20 +106,20 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/ssd_mobiledet_qat_htp_O2.dlc" - model_checksum: "5802abfad10a7fc5c5849b13943d6d44" + model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" + model_checksum: "" } } - delegate_selected: "SNPE_DSP" single_stream_expected_latency_ns: 500000 + delegate_selected: "SNPE_DSP" } benchmark_setting { benchmark_id: "natural_language_processing" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "transpose" + id: "snpe_output_tensors" + value: "transpose:0" } custom_setting { id: "bg_load" @@ -209,7 +135,7 @@ benchmark_setting { } custom_setting { id: "perf_profile" - value: "sustained_high_performance" + value: "burst" } delegate_choice: { priority: 1 @@ -217,8 +143,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilebert_quantized_htp_O2.dlc" - model_checksum: "9d0dadbb6014289916a6078c4c991dd5" + model_path: "local:///mlperf_models/mobilebert_quantized_htp_O2.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -247,10 +173,10 @@ benchmark_setting { priority: 1 delegate_name: "SNPE_DSP" accelerator_name: "snpe_dsp" - accelerator_desc: "HTP" + accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobile_mosaic_htp.dlc" - model_checksum: "e870526444c1e48df4f0505e530ecfdf" + model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -277,7 +203,7 @@ benchmark_setting { } custom_setting { id: "use_ion_buffer" - value: "false" + value: "true" } delegate_choice: { priority: 1 @@ -285,8 +211,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/snusr_htp_O2.dlc" - model_checksum: "18fa274659e14c57b4f6bedb6871c83f" + model_path: "local:///mlperf_models/snusr_htp_O2.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8pg1.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8pg1.pbtxt index a46560fd0..5c7814093 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8pg1.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8pg1.pbtxt @@ -19,31 +19,6 @@ common_setting { } } -benchmark_setting { - benchmark_id: "image_classification" - framework: "SNPE" - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "snpe_dsp" - accelerator_desc: "HTP" - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp.dlc" - model_checksum: "cdf1fe622b309f692e05781661248a2b" - } - } - single_stream_expected_latency_ns: 600000 - delegate_selected: "SNPE_DSP" -} - benchmark_setting { benchmark_id: "image_classification_v2" framework: "SNPE" @@ -65,47 +40,14 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp.dlc" - model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32" + model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" + model_checksum: "" } } single_stream_expected_latency_ns: 600000 delegate_selected: "SNPE_DSP" } -benchmark_setting { - benchmark_id: "image_classification_offline" - framework: "SNPE" - custom_setting { - id: "scenario" - value: "Offline" - } - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "psnpe_dsp" - accelerator_desc: "HTP" - batch_size: 12288 - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc" - model_checksum: "6523060565b8d3f326f3f323c531fc1c" - } - } - delegate_selected: "SNPE_DSP" -} - benchmark_setting { benchmark_id: "image_classification_offline_v2" framework: "SNPE" @@ -134,10 +76,10 @@ benchmark_setting { delegate_name: "SNPE_DSP" accelerator_name: "psnpe_dsp" accelerator_desc: "HTP" - batch_size: 12288 + batch_size: 12360 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "0de3b75022ce5c27d5902a080ec1cea0" + model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -147,8 +89,8 @@ benchmark_setting { benchmark_id: "object_detection" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "Postprocessor/BatchMultiClassNonMaxSuppression" + id: "snpe_output_tensors" + value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0" } custom_setting { id: "bg_load" @@ -164,8 +106,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/ssd_mobiledet_qat_htp.dlc" - model_checksum: "c333fc135a8c474679d716fe391a9e2a" + model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -175,8 +117,8 @@ benchmark_setting { benchmark_id: "natural_language_processing" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "transpose" + id: "snpe_output_tensors" + value: "transpose:0" } custom_setting { id: "bg_load" @@ -200,8 +142,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilebert_quantized_htp.dlc" - model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08" + model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -222,14 +164,18 @@ benchmark_setting { id: "output_buffer_type" value: "int_32" } + custom_setting { + id: "perf_profile" + value: "burst" + } delegate_choice: { priority: 1 delegate_name: "SNPE_DSP" accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobile_mosaic_htp.dlc" - model_checksum: "e870526444c1e48df4f0505e530ecfdf" + model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -254,14 +200,18 @@ benchmark_setting { id: "perf_profile" value: "burst" } + custom_setting { + id: "use_ion_buffer" + value: "false" + } delegate_choice: { priority: 1 delegate_name: "SNPE_DSP" accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/snusr_htp.dlc" - model_checksum: "84ef0d9c2e7b710381cea962a22a0b41" + model_path: "local:///mlperf_models//snusr_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm778.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm778.pbtxt index fd9fabe31..df26e4efd 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm778.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm778.pbtxt @@ -19,30 +19,6 @@ common_setting { } } -benchmark_setting { - benchmark_id: "image_classification" - framework: "SNPE" - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "snpe_dsp" - accelerator_desc: "HTP" - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp.dlc" - model_checksum: "cdf1fe622b309f692e05781661248a2b" - } - } - delegate_selected: "SNPE_DSP" -} - benchmark_setting { benchmark_id: "image_classification_v2" framework: "SNPE" @@ -64,43 +40,11 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp.dlc" - model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32" - } - } - delegate_selected: "SNPE_DSP" -} - -benchmark_setting { - benchmark_id: "image_classification_offline" - framework: "SNPE" - custom_setting { - id: "scenario" - value: "Offline" - } - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "psnpe_dsp" - accelerator_desc: "HTP" - batch_size: 12288 - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp_batched_3.dlc" - model_checksum: "550f807bc7ef40f77018a64a47507d09" + model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" + model_checksum: "" } } + single_stream_expected_latency_ns: 500000 delegate_selected: "SNPE_DSP" } @@ -132,10 +76,10 @@ benchmark_setting { delegate_name: "SNPE_DSP" accelerator_name: "psnpe_dsp" accelerator_desc: "HTP" - batch_size: 12288 + batch_size: 12360 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "0de3b75022ce5c27d5902a080ec1cea0" + model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -145,8 +89,8 @@ benchmark_setting { benchmark_id: "object_detection" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "Postprocessor/BatchMultiClassNonMaxSuppression" + id: "snpe_output_tensors" + value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0" } custom_setting { id: "bg_load" @@ -162,8 +106,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/ssd_mobiledet_qat_htp.dlc" - model_checksum: "c333fc135a8c474679d716fe391a9e2a" + model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -173,8 +117,8 @@ benchmark_setting { benchmark_id: "natural_language_processing" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "transpose" + id: "snpe_output_tensors" + value: "transpose:0" } custom_setting { id: "bg_load" @@ -190,7 +134,7 @@ benchmark_setting { } custom_setting { id: "perf_profile" - value: "high_performance" + value: "sustained_high_performance" } delegate_choice: { priority: 1 @@ -198,8 +142,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilebert_quantized_htp.dlc" - model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08" + model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -220,14 +164,18 @@ benchmark_setting { id: "output_buffer_type" value: "int_32" } + custom_setting { + id: "perf_profile" + value: "burst" + } delegate_choice: { priority: 1 delegate_name: "SNPE_DSP" accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobile_mosaic_htp.dlc" - model_checksum: "e870526444c1e48df4f0505e530ecfdf" + model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -252,14 +200,18 @@ benchmark_setting { id: "perf_profile" value: "burst" } + custom_setting { + id: "use_ion_buffer" + value: "false" + } delegate_choice: { priority: 1 delegate_name: "SNPE_DSP" accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/snusr_htp.dlc" - model_checksum: "84ef0d9c2e7b710381cea962a22a0b41" + model_path: "local:///mlperf_models//snusr_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm888.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm888.pbtxt index 69cdcef86..652448ac4 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm888.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm888.pbtxt @@ -19,30 +19,6 @@ common_setting { } } -benchmark_setting { - benchmark_id: "image_classification" - framework: "SNPE" - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "snpe_dsp" - accelerator_desc: "HTP" - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp.dlc" - model_checksum: "cdf1fe622b309f692e05781661248a2b" - } - } - delegate_selected: "SNPE_DSP" -} - benchmark_setting { benchmark_id: "image_classification_v2" framework: "SNPE" @@ -64,39 +40,11 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp.dlc" - model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32" - } - } - delegate_selected: "SNPE_DSP" -} - -benchmark_setting { - benchmark_id: "image_classification_offline" - framework: "SNPE" - custom_setting { - id: "scenario" - value: "Offline" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "psnpe_dsp" - accelerator_desc: "HTP" - batch_size: 12288 - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp_batched_3.dlc" - model_checksum: "550f807bc7ef40f77018a64a47507d09" + model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" + model_checksum: "" } } + single_stream_expected_latency_ns: 500000 delegate_selected: "SNPE_DSP" } @@ -128,10 +76,10 @@ benchmark_setting { delegate_name: "SNPE_DSP" accelerator_name: "psnpe_dsp" accelerator_desc: "HTP" - batch_size: 12288 + batch_size: 12360 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "0de3b75022ce5c27d5902a080ec1cea0" + model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -141,8 +89,8 @@ benchmark_setting { benchmark_id: "object_detection" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "Postprocessor/BatchMultiClassNonMaxSuppression" + id: "snpe_output_tensors" + value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0" } custom_setting { id: "bg_load" @@ -158,8 +106,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/ssd_mobiledet_qat_htp.dlc" - model_checksum: "c333fc135a8c474679d716fe391a9e2a" + model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -169,8 +117,8 @@ benchmark_setting { benchmark_id: "natural_language_processing" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "transpose" + id: "snpe_output_tensors" + value: "transpose:0" } custom_setting { id: "bg_load" @@ -194,8 +142,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilebert_quantized_htp.dlc" - model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08" + model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -216,14 +164,18 @@ benchmark_setting { id: "output_buffer_type" value: "int_32" } + custom_setting { + id: "perf_profile" + value: "burst" + } delegate_choice: { priority: 1 delegate_name: "SNPE_DSP" accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobile_mosaic_htp.dlc" - model_checksum: "e870526444c1e48df4f0505e530ecfdf" + model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -248,14 +200,18 @@ benchmark_setting { id: "perf_profile" value: "burst" } + custom_setting { + id: "use_ion_buffer" + value: "false" + } delegate_choice: { priority: 1 delegate_name: "SNPE_DSP" accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/snusr_htp.dlc" - model_checksum: "84ef0d9c2e7b710381cea962a22a0b41" + model_path: "local:///mlperf_models//snusr_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm4450.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm4450.pbtxt index de8fea559..a81b4d359 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm4450.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm4450.pbtxt @@ -19,34 +19,6 @@ common_setting { } } -benchmark_setting { - benchmark_id: "image_classification" - framework: "SNPE" - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - delegate_choice: { - priority: 1 - custom_setting { - id: "cpu_int8" - value: "true" - } - delegate_name: "SNPE_CPU" - accelerator_name: "snpe_cpu" - accelerator_desc: "CPU" - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp.dlc" - model_checksum: "cdf1fe622b309f692e05781661248a2b" - } - } - delegate_selected: "SNPE_CPU" -} - benchmark_setting { benchmark_id: "image_classification_v2" framework: "SNPE" @@ -66,43 +38,10 @@ benchmark_setting { priority: 1 delegate_name: "SNPE_CPU" accelerator_name: "snpe_cpu" - accelerator_desc: "HTP" - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp.dlc" - model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32" - } - } - delegate_selected: "SNPE_CPU" -} - -benchmark_setting { - benchmark_id: "image_classification_offline" - framework: "SNPE" - custom_setting { - id: "scenario" - value: "Offline" - } - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - delegate_choice: { - priority: 1 - custom_setting { - id: "cpu_int8" - value: "true" - } - delegate_name: "SNPE_CPU" - accelerator_name: "psnpe_cpu" accelerator_desc: "CPU" - batch_size: 12288 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp_batched_3.dlc" - model_checksum: "550f807bc7ef40f77018a64a47507d09" + model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_CPU" @@ -136,10 +75,10 @@ benchmark_setting { delegate_name: "SNPE_CPU" accelerator_name: "psnpe_cpu" accelerator_desc: "CPU" - batch_size: 12288 + batch_size: 12360 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "0de3b75022ce5c27d5902a080ec1cea0" + model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "" } } delegate_selected: "SNPE_CPU" @@ -149,8 +88,8 @@ benchmark_setting { benchmark_id: "object_detection" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "Postprocessor/BatchMultiClassNonMaxSuppression" + id: "snpe_output_tensors" + value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0" } custom_setting { id: "bg_load" @@ -170,8 +109,8 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/ssd_mobiledet_qat_htp.dlc" - model_checksum: "c333fc135a8c474679d716fe391a9e2a" + model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_CPU" @@ -181,8 +120,8 @@ benchmark_setting { benchmark_id: "natural_language_processing" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "transpose" + id: "snpe_output_tensors" + value: "transpose:0" } custom_setting { id: "bg_load" @@ -206,8 +145,8 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilebert_quantized_htp.dlc" - model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08" + model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_CPU" @@ -242,8 +181,8 @@ benchmark_setting { value: "true" } model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobile_mosaic_htp.dlc" - model_checksum: "e870526444c1e48df4f0505e530ecfdf" + model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_CPU" @@ -286,8 +225,8 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/snusr_htp.dlc" - model_checksum: "84ef0d9c2e7b710381cea962a22a0b41" + model_path: "local:///mlperf_models/snusr_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_CPU" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm7550.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm7550.pbtxt index 4bae03981..934cff8a2 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm7550.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm7550.pbtxt @@ -19,35 +19,6 @@ common_setting { } } -benchmark_setting { - benchmark_id: "image_classification" - framework: "SNPE" - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - custom_setting { - id: "perf_profile" - value: "burst" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "snpe_dsp" - accelerator_desc: "HTP" - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp.dlc" - model_checksum: "cdf1fe622b309f692e05781661248a2b" - } - } - single_stream_expected_latency_ns: 500000 - delegate_selected: "SNPE_DSP" -} - benchmark_setting { benchmark_id: "image_classification_v2" framework: "SNPE" @@ -69,51 +40,14 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp.dlc" - model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32" + model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" + model_checksum: "" } } single_stream_expected_latency_ns: 50000 delegate_selected: "SNPE_DSP" } -benchmark_setting { - benchmark_id: "image_classification_offline" - framework: "SNPE" - custom_setting { - id: "scenario" - value: "Offline" - } - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "perf_profile" - value: "burst" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "psnpe_dsp" - accelerator_desc: "HTP" - batch_size: 12288 - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc" - model_checksum: "6523060565b8d3f326f3f323c531fc1c" - } - } - delegate_selected: "SNPE_DSP" -} - benchmark_setting { benchmark_id: "image_classification_offline_v2" framework: "SNPE" @@ -142,10 +76,10 @@ benchmark_setting { delegate_name: "SNPE_DSP" accelerator_name: "psnpe_dsp" accelerator_desc: "HTP" - batch_size: 12288 + batch_size: 12360 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp_batched_4_O2.dlc" - model_checksum: "d349e3fb8a74a5037ecc3b2770dbd188" + model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4_O2.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -155,8 +89,8 @@ benchmark_setting { benchmark_id: "object_detection" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "Postprocessor/BatchMultiClassNonMaxSuppression" + id: "snpe_output_tensors" + value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0" } custom_setting { id: "bg_load" @@ -176,8 +110,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/ssd_mobiledet_qat_htp_O2.dlc" - model_checksum: "5802abfad10a7fc5c5849b13943d6d44" + model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -187,8 +121,8 @@ benchmark_setting { benchmark_id: "natural_language_processing" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "transpose" + id: "snpe_output_tensors" + value: "transpose:0" } custom_setting { id: "bg_load" @@ -212,8 +146,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilebert_quantized_htp.dlc" - model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08" + model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -242,10 +176,10 @@ benchmark_setting { priority: 1 delegate_name: "SNPE_DSP" accelerator_name: "snpe_dsp" - accelerator_desc: "HTP" + accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobile_mosaic_htp.dlc" - model_checksum: "e870526444c1e48df4f0505e530ecfdf" + model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -270,14 +204,18 @@ benchmark_setting { id: "perf_profile" value: "burst" } + custom_setting { + id: "use_ion_buffer" + value: "false" + } delegate_choice: { priority: 1 delegate_name: "SNPE_DSP" accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/snusr_htp_O2.dlc" - model_checksum: "18fa274659e14c57b4f6bedb6871c83f" + model_path: "local:///mlperf_models//snusr_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm8635.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm8635.pbtxt index 3b3b3b5ea..35a5dc4f8 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm8635.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm8635.pbtxt @@ -19,35 +19,6 @@ common_setting { } } -benchmark_setting { - benchmark_id: "image_classification" - framework: "SNPE" - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - custom_setting { - id: "perf_profile" - value: "burst" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "snpe_dsp" - accelerator_desc: "HTP" - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp_O2.dlc" - model_checksum: "25977982896e607bceb55340c8d76223" - } - } - single_stream_expected_latency_ns: 500000 - delegate_selected: "SNPE_DSP" -} - benchmark_setting { benchmark_id: "image_classification_v2" framework: "SNPE" @@ -69,51 +40,14 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp.dlc" - model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32" + model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" + model_checksum: "" } } single_stream_expected_latency_ns: 50000 delegate_selected: "SNPE_DSP" } -benchmark_setting { - benchmark_id: "image_classification_offline" - framework: "SNPE" - custom_setting { - id: "scenario" - value: "Offline" - } - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "perf_profile" - value: "burst" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "psnpe_dsp" - accelerator_desc: "HTP" - batch_size: 12288 - model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc" - model_checksum: "6523060565b8d3f326f3f323c531fc1c" - } - } - delegate_selected: "SNPE_DSP" -} - benchmark_setting { benchmark_id: "image_classification_offline_v2" framework: "SNPE" @@ -142,10 +76,10 @@ benchmark_setting { delegate_name: "SNPE_DSP" accelerator_name: "psnpe_dsp" accelerator_desc: "HTP" - batch_size: 12288 + batch_size: 12360 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "0de3b75022ce5c27d5902a080ec1cea0" + model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -155,8 +89,8 @@ benchmark_setting { benchmark_id: "object_detection" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "Postprocessor/BatchMultiClassNonMaxSuppression" + id: "snpe_output_tensors" + value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0" } custom_setting { id: "bg_load" @@ -176,8 +110,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/ssd_mobiledet_qat_htp.dlc" - model_checksum: "c333fc135a8c474679d716fe391a9e2a" + model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -187,8 +121,8 @@ benchmark_setting { benchmark_id: "natural_language_processing" framework: "SNPE" custom_setting { - id: "snpe_output_layers" - value: "transpose" + id: "snpe_output_tensors" + value: "transpose:0" } custom_setting { id: "bg_load" @@ -212,8 +146,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilebert_quantized_htp.dlc" - model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08" + model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -242,10 +176,10 @@ benchmark_setting { priority: 1 delegate_name: "SNPE_DSP" accelerator_name: "snpe_dsp" - accelerator_desc: "HTP" + accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobile_mosaic_htp.dlc" - model_checksum: "e870526444c1e48df4f0505e530ecfdf" + model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" @@ -276,8 +210,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/snusr_htp.dlc" - model_checksum: "84ef0d9c2e7b710381cea962a22a0b41" + model_path: "local:///mlperf_models/snusr_htp.dlc" + model_checksum: "" } } delegate_selected: "SNPE_DSP" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion.pbtxt new file mode 100644 index 000000000..a936689fb --- /dev/null +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion.pbtxt @@ -0,0 +1,46 @@ +# proto-file: flutter/cpp/proto/backend_setting.proto +# proto-message: BackendSetting + +benchmark_setting { + benchmark_id: "stable_diffusion" + framework: "QNN" + custom_setting { + id: "pipeline" + value: "StableDiffusionPipeline" + } + custom_setting { + id: "bg_load" + value: "false" + } + delegate_choice: { + priority: 1 + delegate_name: "QNN_DSP" + accelerator_name: "snpe_dsp" + accelerator_desc: "DSP" + model_file: { + model_path: "local:///mlperf_models/stable_diffusion/betas.bin" + model_checksum: "" + } + model_file: { + model_path: "local:///mlperf_models/stable_diffusion/lambdas.bin" + model_checksum: "" + } + model_file: { + model_path: "local:///mlperf_models/stable_diffusion/sd_precompute_data.tar" + model_checksum: "" + } + model_file: { + model_path: "local:///mlperf_models/stable_diffusion/text_encoder.serialized.bin" + model_checksum: "" + } + model_file: { + model_path: "local:///mlperf_models/stable_diffusion/unet.serialized.bin" + model_checksum: "" + } + model_file: { + model_path: "local:///mlperf_models/stable_diffusion/vae_decoder.serialized.bin" + model_checksum: "" + } + } + delegate_selected: "QNN_DSP" +} \ No newline at end of file diff --git a/mobile_back_qti/cpp/backend_qti/soc_utility.cc b/mobile_back_qti/cpp/backend_qti/soc_utility.cc index c03e41336..64f6ade8f 100644 --- a/mobile_back_qti/cpp/backend_qti/soc_utility.cc +++ b/mobile_back_qti/cpp/backend_qti/soc_utility.cc @@ -110,7 +110,7 @@ std::map socDetails = std::vector({4, 5, 6, 7}), 8, false)}, {557, SocInfo(2, 0, 0, 0, true, qti_settings_sd8g3, "SD8G3", 1, std::vector({0, 1, 2, 3}), - std::vector({4, 5, 6, 7}), 8, true)}, + std::vector({4, 5, 6, 7}), 8, true, /* stable_diffusion */ true)}, {614, SocInfo(2, 0, 0, 0, true, qti_settings_sm8635, "SM8635", 1, std::vector({0, 1, 2, 3}), std::vector({4, 5, 6, 7}), 8, true)}, @@ -314,29 +314,22 @@ void Socs::soc_info_init() { #endif LOG(INFO) << "Soc ID: " << soc_id; - if (soc_id != UNSUPPORTED_SOC_ID) { - if (socDetails.find(soc_id) == socDetails.end()) { - soc_id = UNSUPPORTED_SOC_ID; - } - m_soc_info = socDetails.find(soc_id)->second; + m_soc_info = socDetails.find(soc_id)->second; - if (external_config) { - LOG(INFO) << "Config settings derived externally from " - "//data/local/tmp/external/qti_settings.pbtxt"; - m_soc_info.m_settings = get_external_config_string(); - } - if (soc_id == UNSUPPORTED_SOC_ID) { - if (QTIBackendHelper::IsRuntimeAvailable(SNPE_DSP)) { - m_soc_info.m_settings = qti_settings_default_dsp; - } else if (QTIBackendHelper::IsRuntimeAvailable(SNPE_GPU)) { - m_soc_info.m_settings = qti_settings_default_gpu; - } else { - m_soc_info.m_settings = qti_settings_default_cpu; - } + if (external_config) { + LOG(INFO) << "Config settings derived externally from " + "//data/local/tmp/external/qti_settings.pbtxt"; + m_soc_info.m_settings = get_external_config_string(); + } + if (soc_id == UNSUPPORTED_SOC_ID) { + if (QTIBackendHelper::IsRuntimeAvailable(SNPE_DSP)) { + m_soc_info.m_settings = qti_settings_default_dsp; + } else if (QTIBackendHelper::IsRuntimeAvailable(SNPE_GPU)) { + m_soc_info.m_settings = qti_settings_default_gpu; + } else { + m_soc_info.m_settings = qti_settings_default_cpu; } - } else { - m_soc_info = unsupportedSoc; } } @@ -429,20 +422,10 @@ bool Socs::soc_settings(const char **settings, const char **not_allowed_message) { soc_info_init(); - if (m_soc_info.m_soc_name == UNSUPPORTED_SOC_STR) { - // it's a QTI SOC, but can't access soc_id - *not_allowed_message = "Unsupported app"; - *settings = empty_settings.c_str(); - return true; - } - // Check if this SoC is supported *not_allowed_message = nullptr; *settings = m_soc_info.m_settings.c_str(); - if (m_soc_info.m_soc_name == DEFAULT_SOC_STR) { - // it's a QTI SOC, but the chipset is not yet supported - *not_allowed_message = "Unsupported QTI SoC"; - } + return true; } diff --git a/mobile_back_qti/cpp/backend_qti/soc_utility.h b/mobile_back_qti/cpp/backend_qti/soc_utility.h index cd6cf3a15..946d2ab6d 100644 --- a/mobile_back_qti/cpp/backend_qti/soc_utility.h +++ b/mobile_back_qti/cpp/backend_qti/soc_utility.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2020-2022 Qualcomm Innovation Center, Inc. All rights reserved. +/* Copyright (c) 2020-2024 Qualcomm Innovation Center, Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at @@ -43,12 +43,13 @@ class SocInfo { m_soc_name(soc_name), m_num_inits(0), m_max_cores(0), - m_needs_rpcmem(false) {} + m_needs_rpcmem(false), + m_needs_stablediffusion(false) {} SocInfo(int num_dsp, int num_gpu, int num_cpu, int num_gpu_fp16, bool useDspFeatures, const std::string settings, std::string soc_name, int num_inits, std::vector hlc, std::vector llc, - int max_cores, bool needs_rpcmem) + int max_cores, bool needs_rpcmem, bool needs_stablediffusion = false) : m_num_dsp(num_dsp), m_num_gpu(num_gpu), m_num_cpu(num_cpu), @@ -60,10 +61,17 @@ class SocInfo { m_high_latency_cores(hlc), m_low_latency_cores(llc), m_max_cores(max_cores), - m_needs_rpcmem(needs_rpcmem) { + m_needs_rpcmem(needs_rpcmem), + m_needs_stablediffusion(needs_stablediffusion) { if (m_useDspFeatures == false) { m_num_inits = 1; } + if (m_needs_stablediffusion) { +#ifdef STABLEDIFFUSION_FLAG + m_settings += qti_settings_stablediffusion; +#endif + } + } int m_num_dsp; @@ -78,6 +86,7 @@ class SocInfo { std::vector m_low_latency_cores; int m_max_cores; bool m_needs_rpcmem; + bool m_needs_stablediffusion; }; class SocProperties { diff --git a/mobile_back_qti/docker/mlperf_dlc_prepare/Dockerfile b/mobile_back_qti/docker/mlperf_dlc_prepare/Dockerfile index 14b82d36c..1a71fff64 100644 --- a/mobile_back_qti/docker/mlperf_dlc_prepare/Dockerfile +++ b/mobile_back_qti/docker/mlperf_dlc_prepare/Dockerfile @@ -14,7 +14,7 @@ ########################################################################## # Docker image name: mlcommons/mlperf_dlc_prepare -FROM ubuntu:focal +FROM ubuntu:jammy RUN groupadd -r mlperfuser \ && useradd -r mlperfuser -g mlperfuser LABEL maintainer="quic_mmundhra@quicinc.com" @@ -26,20 +26,42 @@ RUN apt-get update && apt-get upgrade -y && apt-get autoremove -y && \ RUN apt-get update && apt-get upgrade -y && apt-get autoremove -y && \ apt-get install -y --no-install-recommends \ - python3 python3-pip libpython3.8-dev python3.8-venv libgl1-mesa-glx libglib2.0-0 cython3 gcc make curl unzip libc++1-8 \ + python3 python3-pip libpython3.10-dev python3.10-venv libgl1-mesa-glx libglib2.0-0 cython3 gcc make curl unzip libc++1-14 \ git locales openssh-client ca-certificates tar gzip parallel \ - zip bzip2 gnupg wget python3-six python3-pip libncurses5 openjdk-17-jdk-headless clang-format-10 golang-1.13-go build-essential + zip bzip2 gnupg wget python3-six python3-pip libncurses5 openjdk-11-jdk-headless clang-format golang-1.13-go build-essential -RUN pip3 install pip==21.3.1 setuptools==31.0.1 -RUN pip3 install tensorflow-cpu==2.13.1 -RUN pip3 install protobuf==3.20.3 -RUN pip3 install Pillow opencv-python==4.3.0.38 setuptools matplotlib tensorflow_hub tf-slim \ - absl-py numpy pyyaml decorator scipy attrs pytest tflite psutil +RUN pip3 install pip==24.1.1 setuptools==45.0.0 +RUN pip3 install numpy==1.23.1 opencv-python tensorflow-cpu==2.13.1 +RUN pip3 install protobuf==3.6.0 +RUN pip3 install Pillow tensorflow_hub tf-slim \ + absl-py pyyaml +RUN pip3 install pandas matplotlib +RUN pip3 install onnx==1.12.0 onnxruntime packaging +#RUN pip3 install protobuf==3.19.4 + +RUN curl -SL http://releases.llvm.org/9.0.0/clang+llvm-9.0.0-x86_64-linux-gnu-ubuntu-16.04.tar.xz | tar -xJC /usr/local/. && \ + mv /usr/local/clang+llvm-9.0.0-x86_64-linux-gnu-ubuntu-16.04 /usr/local/clang-9.0.0 + +RUN apt-get update && apt-get install -y bash coreutils +RUN which readlink + +ARG ndk_version=android-ndk-r25c +ARG android_ndk_home=/opt/android/${ndk_version} +# Install the NDK +# Use wget instead of curl to avoid "Error in the HTTP2 framing layer" +RUN cd /tmp && wget -nv https://dl.google.com/android/repository/${ndk_version}-linux.zip && \ + unzip -q /tmp/${ndk_version}-linux.zip -d /opt/android && \ + rm /tmp/${ndk_version}-linux.zip + +ENV ANDROID_NDK_HOME ${android_ndk_home} +ENV ANDROID_NDK_ROOT ${android_ndk_home} + +ENV PATH=${ANDROID_NDK_ROOT}:/usr/local/clang-9.0.0/bin:${PATH} RUN ln -s /usr/bin/python3 /usr/bin/python RUN apt-get clean -RUN mkdir -p /home/mlperf && chmod 754 /home/mlperf +RUN mkdir -p /home/mlperf && chmod 777 /home/mlperf ENV HOME /home/mlperf -USER mlperfuser +USER mlperfuser \ No newline at end of file diff --git a/mobile_back_qti/make/qti_backend.mk b/mobile_back_qti/make/qti_backend.mk index bde9675f4..37732a543 100644 --- a/mobile_back_qti/make/qti_backend.mk +++ b/mobile_back_qti/make/qti_backend.mk @@ -19,10 +19,9 @@ else ifeq ($(WITH_QTI),$(filter $(WITH_QTI),1 2)) backend_qti_flutter_docker_args=-v "${SNPE_SDK}:/mnt/project/mobile_back_qti/$(shell basename ${SNPE_SDK})" endif $(info WITH_QTI=$(WITH_QTI)) - local_snpe_sdk_root=$(shell echo mobile_back_qti/qaisw-* | awk '{print $$NF}') + local_snpe_sdk_root=$(shell echo mobile_back_qti/qairt/* | awk '{print $$NF}') $(info detected SNPE SDK: ${local_snpe_sdk_root}) backend_qti_android_files=${BAZEL_LINKS_PREFIX}bin/mobile_back_qti/cpp/backend_qti/libqtibackend.so \ - ${BAZEL_LINKS_PREFIX}bin/flutter/android/commonlibs/lib_arm64/libc++_shared.so \ ${local_snpe_sdk_root}/lib/aarch64-android/libSNPE.so \ ${local_snpe_sdk_root}/lib/aarch64-android/libSnpeHtpV75Stub.so \ ${local_snpe_sdk_root}/lib/aarch64-android/libSnpeHtpV73Stub.so \ @@ -32,9 +31,14 @@ else ifeq ($(WITH_QTI),$(filter $(WITH_QTI),1 2)) ${local_snpe_sdk_root}/lib/hexagon-v75/unsigned/libSnpeHtpV75Skel.so \ ${local_snpe_sdk_root}/lib/hexagon-v73/unsigned/libSnpeHtpV73Skel.so \ ${local_snpe_sdk_root}/lib/hexagon-v69/unsigned/libSnpeHtpV69Skel.so \ - ${local_snpe_sdk_root}/lib/hexagon-v68/unsigned/libSnpeHtpV68Skel.so + ${local_snpe_sdk_root}/lib/hexagon-v68/unsigned/libSnpeHtpV68Skel.so \ + ${local_snpe_sdk_root}/lib/aarch64-android/libQnnHtp.so \ + ${local_snpe_sdk_root}/lib/aarch64-android/libQnnHtpNetRunExtensions.so \ + ${local_snpe_sdk_root}/lib/aarch64-android/libQnnHtpV75Stub.so \ + ${local_snpe_sdk_root}/lib/hexagon-v75/unsigned/libQnnHtpV75Skel.so \ + ${local_snpe_sdk_root}/lib/aarch64-android/libQnnSystem.so \ + ${BAZEL_LINKS_PREFIX}bin/flutter/android/commonlibs/lib_arm64/libc++_shared.so backend_qti_cmdline_files=${BAZEL_LINKS_PREFIX}bin/mobile_back_qti/cpp/backend_qti/libqtibackend.so \ - ${BAZEL_LINKS_PREFIX}bin/flutter/android/commonlibs/lib_arm64/libc++_shared.so \ ${local_snpe_sdk_root}/lib/aarch64-android/libSNPE.so \ ${local_snpe_sdk_root}/lib/aarch64-android/libSnpeHtpV75Stub.so \ ${local_snpe_sdk_root}/lib/aarch64-android/libSnpeHtpV73Stub.so \ @@ -45,7 +49,15 @@ else ifeq ($(WITH_QTI),$(filter $(WITH_QTI),1 2)) ${local_snpe_sdk_root}/lib/hexagon-v73/unsigned/libSnpeHtpV73Skel.so \ ${local_snpe_sdk_root}/lib/hexagon-v69/unsigned/libSnpeHtpV69Skel.so \ ${local_snpe_sdk_root}/lib/hexagon-v68/unsigned/libSnpeHtpV68Skel.so \ - mobile_back_qti/run_mlperf_tests.sh + mobile_back_qti/run_mlperf_tests.sh \ + ${local_snpe_sdk_root}/lib/aarch64-android/libQnnHtp.so \ + ${local_snpe_sdk_root}/lib/aarch64-android/libQnnHtpNetRunExtensions.so \ + ${local_snpe_sdk_root}/lib/aarch64-android/libQnnHtpV75Stub.so \ + ${local_snpe_sdk_root}/lib/hexagon-v75/unsigned/libQnnHtpV75Skel.so \ + ${local_snpe_sdk_root}/lib/aarch64-android/libQnnSystem.so \ + ${BAZEL_LINKS_PREFIX}bin/flutter/android/commonlibs/lib_arm64/libc++_shared.so + + backend_qti_android_target_sd=//mobile_back_qti/cpp/backend_qti/StableDiffusion:stableDiffusion backend_qti_android_target=//mobile_back_qti/cpp/backend_qti:libqtibackend.so \ //flutter/android/commonlibs:commonlibs @@ -54,6 +66,19 @@ else ifeq ($(WITH_QTI),$(filter $(WITH_QTI),1 2)) backend_qti_android_target+=--//mobile_back_qti/cpp/backend_qti:external_config=${EXTERNAL_CONFIG} endif + ifeq ($(WITH_STABLEDIFFUSION),1) + backend_qti_flutter_docker_args = --env WITH_STABLEDIFFUSION=${WITH_STABLEDIFFUSION} + backend_qti_android_target+=--//mobile_back_qti/cpp/backend_qti:with_stablediffusion=${WITH_STABLEDIFFUSION} + backend_qti_cmdline_files+=mobile_back_qti/cpp/backend_qti/StableDiffusionShared/include/opencv/libs/arm64-v8a/libopencv_core.so \ + mobile_back_qti/cpp/backend_qti/StableDiffusionShared/include/opencv/libs/arm64-v8a/libopencv_imgcodecs.so \ + mobile_back_qti/cpp/backend_qti/StableDiffusionShared/include/opencv/libs/arm64-v8a/libopencv_imgproc.so \ + mobile_back_qti/cpp/backend_qti/StableDiffusionShared/libStableDiffusion.so + backend_qti_android_files+=mobile_back_qti/cpp/backend_qti/StableDiffusionShared/include/opencv/libs/arm64-v8a/libopencv_core.so \ + mobile_back_qti/cpp/backend_qti/StableDiffusionShared/include/opencv/libs/arm64-v8a/libopencv_imgcodecs.so \ + mobile_back_qti/cpp/backend_qti/StableDiffusionShared/include/opencv/libs/arm64-v8a/libopencv_imgproc.so \ + mobile_back_qti/cpp/backend_qti/StableDiffusionShared/libStableDiffusion.so + endif + backend_qti_windows_files=${BAZEL_LINKS_PREFIX}bin/mobile_back_qti/cpp/backend_qti/libqtibackend.dll \ ${BAZEL_LINKS_PREFIX}bin/mobile_back_qti/cpp/backend_qti/libqtibackend.pdb \ ${local_snpe_sdk_root}/lib/aarch64-windows-msvc/SNPE.dll \ diff --git a/mobile_back_qti/run_mlperf_tests.bat b/mobile_back_qti/run_mlperf_tests.bat index 5b7a155cc..95caab306 100644 --- a/mobile_back_qti/run_mlperf_tests.bat +++ b/mobile_back_qti/run_mlperf_tests.bat @@ -34,7 +34,7 @@ rem # use --models argument to pass models path as value rem # use --mode argument to run in performance or accuracy mode. Defaults to performance mode. rem # valid values for --mode argument: performance, accuracy. rem # use --usecase argument to pass name of usecase to run as value (if not mentioned, by default runs all 8 usecases) -rem # valid values for --usecase argument: image_classification_v2, image_classification, object_detection, image_segmentation, language_understanding, super_resolution, image_classification_offline_v2, image_classification_offline +rem # valid values for --usecase argument: image_classification_v2, object_detection, image_segmentation, language_understanding, super_resolution, image_classification_offline_v2 :loop IF NOT "%1"=="" ( @@ -115,14 +115,6 @@ IF "%usecase_name%"=="image_classification_offline_v2" ( call :image_classification_offline_v2_performance goto :eof ) -IF "%usecase_name%"=="image_classification" ( - call :image_classification_performance - goto :eof -) -IF "%usecase_name%"=="image_classification_offline" ( - call :image_classification_offline_performance - goto :eof -) IF %usecase_name%=="" ( call :image_classification_v2_performance echo ## cooldown intitated ## @@ -140,12 +132,6 @@ IF %usecase_name%=="" ( echo ## cooldown intitated ## timeout /t %cooldown_period% /nobreak call :image_classification_offline_v2_performance - echo ## cooldown intitated ## - timeout /t %cooldown_period% /nobreak - call :image_classification_performance - echo ## cooldown intitated ## - timeout /t %cooldown_period% /nobreak - call :image_classification_offline_performance goto :eof ) ) @@ -176,14 +162,6 @@ IF "%usecase_name%"=="image_classification_offline_v2" ( call :image_classification_offline_v2_accuracy goto :eof ) -IF "%usecase_name%"=="image_classification" ( - call :image_classification_accuracy - goto :eof -) -IF "%usecase_name%"=="image_classification_offline" ( - call :image_classification_offline_accuracy - goto :eof -) IF %usecase_name%=="" ( call :image_classification_v2_accuracy echo ## cooldown intitated ## @@ -201,12 +179,6 @@ IF %usecase_name%=="" ( echo ## cooldown intitated ## timeout /t %cooldown_period% /nobreak call :image_classification_offline_v2_accuracy - echo ## cooldown intitated ## - timeout /t %cooldown_period% /nobreak - call :image_classification_accuracy - echo ## cooldown intitated ## - timeout /t %cooldown_period% /nobreak - call :image_classification_offline_accuracy goto :eof ) ) @@ -293,31 +265,6 @@ findstr /C:"Samples per second" %use_case_results_file% >> %results_file% echo ####### Image classification offline V2 is complete ####### EXIT /B 0 -:image_classification_performance -echo ####### Performance:: Image classification in progress ####### -set test_case=image_classification -mkdir %test_case%%test_case_suffix% -set use_case_results_file=%results_prefix%%test_case%%results_suffix% -.\main.exe EXTERNAL %test_case% --mode=PerformanceOnly --images_directory=%dataset_path%\imagenet\img --offset=1 --output_dir=%test_case%%test_case_suffix% --min_query_count=%min_query% --min_duration_ms=%min_duration_ms% --single_stream_expected_latency_ns=600000 --groundtruth_file=%dataset_path% --model_file=%models_path%\mobilenet_edgetpu_224_1.0_htp.dlc --lib_path=libqtibackend.dll --native_lib_path=. > %use_case_results_file% 2>&1 -echo #######%test_case%###### >> %results_file% -findstr /C:"90th percentile latency (ns)" %use_case_results_file% >> %results_file% -findstr /C:"Result is" %use_case_results_file% >> %results_file% -findstr /C:"QPS w/o loadgen overhead" %use_case_results_file% >> %results_file% -echo ####### Image classification is complete ####### -EXIT /B 0 - -:image_classification_offline_performance -echo ####### Performance:: Image classification offline in progress ####### -set test_case=image_classification_offline -mkdir %test_case%%test_case_suffix% -set use_case_results_file=%results_prefix%%test_case%%results_suffix% -.\main.exe EXTERNAL %test_case% --mode=PerformanceOnly --scenario=Offline --batch_size=12288 --images_directory=%dataset_path%\imagenet\img --offset=1 --output_dir=%test_case%%test_case_suffix% --min_query_count=24576 --min_duration_ms=0 --single_stream_expected_latency_ns=1000000 --groundtruth_file=%dataset_path% --model_file=%models_path%\mobilenet_edgetpu_224_1.0_htp_batched_8.dlc --lib_path=libqtibackend.dll --native_lib_path=. > %use_case_results_file% 2>&1 -echo #######%test_case%###### >> %results_file% -findstr /C:"Result is" %use_case_results_file% >> %results_file% -findstr /C:"Samples per second" %use_case_results_file% >> %results_file% -echo ####### Image classification offline is complete ####### -EXIT /B 0 - rem ####### Accuracy usecase functions ####### :image_classification_v2_accuracy @@ -386,28 +333,6 @@ findstr "Accuracy" %use_case_results_file% >> %results_file% echo ####### Image classification offline V2 is complete ####### EXIT /B 0 -:image_classification_accuracy -echo ####### Accuracy:: Image classification in progress ####### -set test_case=image_classification -mkdir %test_case%%test_case_suffix% -set use_case_results_file=%results_prefix%%test_case%%results_suffix% -.\main.exe EXTERNAL %test_case% --mode=AccuracyOnly --images_directory=%dataset_path%\imagenet\img --offset=1 --output_dir=%test_case%%test_case_suffix% --min_query_count=%min_query% --min_duration_ms=%min_duration_ms% --single_stream_expected_latency_ns=600000 --groundtruth_file=%dataset_path%\imagenet\imagenet_val_full.txt --model_file=%models_path%\mobilenet_edgetpu_224_1.0_htp.dlc --lib_path=libqtibackend.dll --native_lib_path=. > %use_case_results_file% 2>&1 -echo #######%test_case%###### >> %results_file% -findstr "Accuracy" %use_case_results_file% >> %results_file% -echo ####### Image classification is complete ####### -EXIT /B 0 - -:image_classification_offline_accuracy -echo ####### Accuracy:: Image classification offline in progress ####### -set test_case=image_classification_offline -mkdir %test_case%%test_case_suffix% -set use_case_results_file=%results_prefix%%test_case%%results_suffix% -.\main.exe EXTERNAL %test_case% --mode=AccuracyOnly --scenario=Offline --batch_size=12288 --images_directory=%dataset_path%\imagenet\img --offset=1 --output_dir=%test_case%%test_case_suffix% --min_query_count=24576 --min_duration_ms=0 --single_stream_expected_latency_ns=1000000 --groundtruth_file=%dataset_path%\imagenet\imagenet_val_full.txt --model_file=%models_path%\mobilenet_edgetpu_224_1.0_htp_batched_8.dlc --lib_path=libqtibackend.dll --native_lib_path=. > %use_case_results_file% 2>&1 -echo #######%test_case%###### >> %results_file% -findstr "Accuracy" %use_case_results_file% >> %results_file% -echo ####### Image classification offline is complete ####### -EXIT /B 0 - :dataset_end echo "set dataset path using --dataset" diff --git a/mobile_back_qti/run_mlperf_tests.sh b/mobile_back_qti/run_mlperf_tests.sh index 354870677..2f21cecd9 100644 --- a/mobile_back_qti/run_mlperf_tests.sh +++ b/mobile_back_qti/run_mlperf_tests.sh @@ -32,7 +32,7 @@ export LD_LIBRARY_PATH=. # use --mode argument to run in performance or accuracy mode. Defaults to performance mode. # valid values for --mode argument: performance, accuracy. # use --usecase argument to pass name of usecase to run as value (if not mentioned, by default runs all 8 usecases) -# valid values for --usecase argument: image_classification_v2, image_classification, object_detection, image_segmentation, language_understanding, super_resolution, image_classification_offline_v2, image_classification_offline +# valid values for --usecase argument: image_classification_v2, object_detection, image_segmentation, language_understanding, super_resolution, image_classification_offline_v2 while [[ $# -gt 0 ]] do @@ -170,32 +170,19 @@ grep "Samples per second" $use_case_results_file >> $results_file echo "####### Image classification offline V2 is complete #######" } -image_classification_performance(){ -echo "####### Performance:: Image classification in progress #######" -export test_case=image_classification +stable_diffusion_performance(){ +echo "####### Performance:: Stable diffusion in progress #######" +export test_case=stable_diffusion mkdir -p $test_case$test_case_suffix export use_case_results_file=$results_prefix$test_case$results_suffix -./main EXTERNAL $test_case --mode=PerformanceOnly --images_directory=$dataset_path/imagenet/img --offset=1 --output_dir=$test_case$test_case_suffix --min_query_count=$min_query --min_duration_ms=$min_duration_ms --single_stream_expected_latency_ns=500000 --groundtruth_file="" --model_file=$models_path/mobilenet_edgetpu_224_1.0_htp.dlc --lib_path=libqtibackend.so --native_lib_path=. > $use_case_results_file 2>&1 +./main EXTERNAL $test_case --mode=PerformanceOnly --input_tfrecord=$dataset_path/stable_diffusion/coco_gen_full.tfrecord --output_dir=$test_case$test_case_suffix --min_query_count=1024 --min_duration_ms=60000 --max_duration_ms=300000 --single_stream_expected_latency_ns=1000000 --model_file=$models_path/stable_diffusion --lib_path=libqtibackend.so --native_lib_path=. > $use_case_results_file 2>&1 echo "#######$test_case######" >> $results_file grep "90th percentile latency (ns)" $use_case_results_file >> $results_file grep "Result is" $use_case_results_file >> $results_file grep "QPS w/o loadgen overhead" $use_case_results_file >> $results_file -echo "####### Image classification is complete #######" +echo "####### Stable Diffusion is complete #######" } -image_classification_offline_performance(){ -echo "####### Performance:: Image classification offline in progress #######" -export test_case=image_classification_offline -mkdir -p $test_case$test_case_suffix -export use_case_results_file=$results_prefix$test_case$results_suffix -./main EXTERNAL $test_case --mode=PerformanceOnly --scenario=Offline --batch_size=12288 --images_directory=$dataset_path/imagenet/img --offset=1 --output_dir=$test_case$test_case_suffix --min_query_count=24576 --min_duration_ms=0 --single_stream_expected_latency_ns=1000000 --groundtruth_file= --model_file=$models_path/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc --lib_path=libqtibackend.so --native_lib_path=. > $use_case_results_file 2>&1 -echo "#######$test_case######" >> $results_file -grep "Result is" $use_case_results_file >> $results_file -grep "Samples per second" $use_case_results_file >> $results_file -echo "####### Image classification offline is complete #######" -} - - ####### Accuracy usecase functions ####### image_classification_v2_accuracy(){ @@ -236,7 +223,7 @@ echo "####### Accuracy:: Natural language processing in progress #######" export test_case=natural_language_processing mkdir -p $test_case$test_case_suffix export use_case_results_file=$results_prefix$test_case$results_suffix -./main EXTERNAL $test_case --mode=AccuracyOnly --input_file=$dataset_path/squad/squad_eval.tfrecord --output_dir=$test_case$test_case_suffix --min_query_count=$min_query --min_duration_ms=$min_duration_ms --single_stream_expected_latency_ns=1000000 --groundtruth_file=$dataset_path/squad/squad_groundtruth.tfrecord --model_file=$models_path/mobilebert_quantized_htp.dlc --lib_path=libqtibackend.so --native_lib_path=. > $use_case_results_file 2>&1 +./main EXTERNAL $test_case --mode=AccuracyOnly --input_file=$dataset_path/squad/squad_eval_mini.tfrecord --output_dir=$test_case$test_case_suffix --min_query_count=$min_query --min_duration_ms=$min_duration_ms --single_stream_expected_latency_ns=1000000 --groundtruth_file=$dataset_path/squad/squad_groundtruth.tfrecord --model_file=$models_path/mobilebert_quantized_htp.dlc --lib_path=libqtibackend.so --native_lib_path=. > $use_case_results_file 2>&1 echo "#######$test_case######" >> $results_file grep "Accuracy" $use_case_results_file >> $results_file echo "####### Natural language processing is complete #######" @@ -264,29 +251,18 @@ grep "Accuracy" $use_case_results_file >> $results_file echo "####### Image classification offline V2 is complete #######" } -image_classification_accuracy(){ -echo "####### Accuracy:: Image classification in progress #######" -export test_case=image_classification +stable_diffusion_accuracy(){ +echo "####### Accuracy:: Stable diffusion in progress #######" +export test_case=stable_diffusion mkdir -p $test_case$test_case_suffix export use_case_results_file=$results_prefix$test_case$results_suffix -./main EXTERNAL $test_case --mode=AccuracyOnly --images_directory=$dataset_path/imagenet/img --offset=1 --output_dir=$test_case$test_case_suffix --min_query_count=$min_query --min_duration_ms=$min_duration_ms --single_stream_expected_latency_ns=1000000 --groundtruth_file=$dataset_path/imagenet/imagenet_val_full.txt --model_file=$models_path/mobilenet_edgetpu_224_1.0_htp.dlc --lib_path=libqtibackend.so --native_lib_path=. > $use_case_results_file 2>&1 +./main EXTERNAL $test_case --mode=AccuracyOnly --input_tfrecord=$dataset_path/stable_diffusion/coco_gen_test.tfrecord --input_clip_model=$models_path/stable_diffusion/clip_model_512x512.tflite --output_dir=$test_case$test_case_suffix --min_query_count=100 --min_duration_ms=0 --single_stream_expected_latency_ns=1000000 --model_file=$models_path/stable_diffusion --lib_path=libqtibackend.so --native_lib_path=. > $use_case_results_file 2>&1 echo "#######$test_case######" >> $results_file grep "Accuracy" $use_case_results_file >> $results_file -echo "####### Image classification is complete #######" +echo "####### Stable Diffusion is complete #######" } -image_classification_offline_accuracy(){ -echo "####### Accuracy:: Image classification offline in progress #######" -export test_case=image_classification_offline -mkdir -p $test_case$test_case_suffix -export use_case_results_file=$results_prefix$test_case$results_suffix -./main EXTERNAL $test_case --mode=AccuracyOnly --scenario=Offline --batch_size=12288 --images_directory=$dataset_path/imagenet/img --offset=1 --output_dir=$test_case$test_case_suffix --min_query_count=24576 --min_duration_ms=0 --single_stream_expected_latency_ns=1000000 --groundtruth_file=$dataset_path/imagenet/imagenet_val_full.txt --model_file=$models_path/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc --lib_path=libqtibackend.so --native_lib_path=. > $use_case_results_file 2>&1 -echo "#######$test_case######" >> $results_file -grep "Accuracy" $use_case_results_file >> $results_file -echo "####### Image classification offline is complete #######" -} - -if [[ "$mode" == "performance" || "$mode" == "" ]] +if [[ "$mode" == "performance" || "$mode" == "" ]] then case $usecase_name in "image_classification_v2") @@ -307,11 +283,8 @@ case $usecase_name in "image_classification_offline_v2") image_classification_offline_v2_performance ;; - "image_classification") - image_classification_performance - ;; - "image_classification_offline") - image_classification_offline_performance + "stable_diffusion") + stable_diffusion_performance ;; *) image_classification_v2_performance @@ -332,10 +305,7 @@ case $usecase_name in image_classification_offline_v2_performance echo "## cooldown intitated ##" sleep $cooldown_period - image_classification_performance - echo "## cooldown intitated ##" - sleep $cooldown_period - image_classification_offline_performance + stable_diffusion_performance ;; esac fi @@ -361,11 +331,8 @@ case $usecase_name in "image_classification_offline_v2") image_classification_offline_v2_accuracy ;; - "image_classification") - image_classification_accuracy - ;; - "image_classification_offline") - image_classification_offline_accuracy + "stable_diffusion") + stable_diffusion_accuracy ;; *) image_classification_v2_accuracy @@ -386,10 +353,7 @@ case $usecase_name in image_classification_offline_v2_accuracy echo "## cooldown intitated ##" sleep $cooldown_period - image_classification_accuracy - echo "## cooldown intitated ##" - sleep $cooldown_period - image_classification_offline_accuracy + stable_diffusion_accuracy ;; esac fi diff --git a/mobile_back_qti/variables.bzl b/mobile_back_qti/variables.bzl index ef0704d14..986ab29c4 100644 --- a/mobile_back_qti/variables.bzl +++ b/mobile_back_qti/variables.bzl @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023 Qualcomm Innovation Center, Inc. All rights reserved. +# Copyright (c) 2020-2024 Qualcomm Innovation Center, Inc. All rights reserved. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,26 +17,26 @@ def _impl(repository_ctx): if "windows" in repository_ctx.os.name: # print(repository_ctx.attr.workspace_dir + "/mobile_back_qti/") - found = repository_ctx.execute(["ls", repository_ctx.attr.workspace_dir + "/mobile_back_qti"]) + found = repository_ctx.execute(["ls", repository_ctx.attr.workspace_dir + "/mobile_back_qti/qairt/"]) if found.return_code != 0 or found.stdout == "" or found.stdout == "\n": - fail("qaisw folder is not found in the repo: " + found.stderr) + fail("qairt folder is not found in the repo: " + found.stderr) filelist = found.stdout.split("\n") filepath = "" for x in filelist: - if x.find("qaisw-") == 0: + if x.startswith("2"): filepath = x break if filepath == "": - fail("qaisw folder is not found in the repo") + fail("qairt folder is not found in the repo") else: - found = repository_ctx.execute(["find", repository_ctx.attr.workspace_dir + "/mobile_back_qti/", "-maxdepth", "1", "-name", "qaisw-*", "-type", "d", "-print", "-quit"]) + found = repository_ctx.execute(["find", repository_ctx.attr.workspace_dir + "/mobile_back_qti/qairt/", "-maxdepth", "1", "-name", "2.*", "-type", "d", "-print", "-quit"]) if found.return_code != 0 or found.stdout == "" or found.stdout == "\n": - fail("qaisw folder is not found in the repo") + fail("qairt folder is not found in the repo") filepath = found.stdout[:-1] - sdk_version = filepath[found.stdout.rfind("/") + 1:] + print("Update SNPE version: " + sdk_version) # buildifier: disable=print - repository_ctx.read(Label("@//:mobile_back_qti/" + sdk_version + "/ReleaseNotes.txt")) + repository_ctx.read(Label("@//:mobile_back_qti/qairt/" + sdk_version + "/ReleaseNotes.txt")) repository_ctx.file("BUILD", "") repository_ctx.file( From bcf4215ee0024f561e8b30026193705354d32b55 Mon Sep 17 00:00:00 2001 From: Mohit Mundhra Date: Fri, 27 Sep 2024 16:25:15 +0530 Subject: [PATCH 04/18] Addressing review comments --- flutter/android/android-docker.mk | 3 -- flutter/android/android.mk | 6 +++- flutter/android/docker/Dockerfile | 45 ++--------------------------- flutter/cpp/binary/cmdline.mk | 6 +++- mobile_back_qti/make/qti_backend.mk | 2 ++ 5 files changed, 14 insertions(+), 48 deletions(-) diff --git a/flutter/android/android-docker.mk b/flutter/android/android-docker.mk index 675ac2475..3e38a8386 100644 --- a/flutter/android/android-docker.mk +++ b/flutter/android/android-docker.mk @@ -19,9 +19,6 @@ user_id=$(shell id -u) .PHONY: flutter/android/docker/image flutter/android/docker/image: output/docker/mlperf_mobile_flutter_android_${user_id}.stamp output/docker/mlperf_mobile_flutter_android_${user_id}.stamp: flutter/android/docker/Dockerfile - ## TODO: change if clause according to make file - rm -f ./mobile_back_qti/cpp/backend_qti/StableDiffusionShared/include/opencv - ln -s /opt/opencv-3.4.7_android/sdk/native ./mobile_back_qti/cpp/backend_qti/StableDiffusionShared/include/opencv docker image build -t ${DOCKER_IMAGE_TAG} flutter/android/docker mkdir -p output/docker touch $@ diff --git a/flutter/android/android.mk b/flutter/android/android.mk index effa10ab0..6a35953a8 100644 --- a/flutter/android/android.mk +++ b/flutter/android/android.mk @@ -20,7 +20,7 @@ ANDROID_NDK_API_LEVEL?=33 flutter/android: flutter/android/libs flutter/android/release: flutter/check-release-env flutter/android flutter/prepare flutter/android/apk flutter/android/appbundle -flutter/android/libs: flutter/android/libs/checksum flutter/android/libs/build flutter/android/libs/copy +flutter/android/libs: flutter/android/libs/deps flutter/android/libs/checksum flutter/android/libs/build flutter/android/libs/copy # run `make flutter/android/apk` before `flutter/android/test-apk` flutter/android/test-apk: flutter/android/test-apk/main flutter/android/test-apk/helper @@ -35,6 +35,10 @@ else @echo "Skip checksum validation" endif +.PHONY: flutter/android/libs/deps +flutter/android/libs/deps: + ${backend_qti_libs_deps} + .PHONY: flutter/android/libs/build flutter/android/libs/build: bazel ${BAZEL_OUTPUT_ROOT_ARG} ${proxy_bazel_args} ${sonar_bazel_startup_options} \ diff --git a/flutter/android/docker/Dockerfile b/flutter/android/docker/Dockerfile index fdad27713..48f19dea3 100644 --- a/flutter/android/docker/Dockerfile +++ b/flutter/android/docker/Dockerfile @@ -30,6 +30,7 @@ RUN curl --proto '=https' -L https://github.com/bazelbuild/bazelisk/releases/dow chmod +x /usr/local/bin/bazel ENV ANDROID_SDK_ROOT=/opt/android +ENV ANDROID_HOME=/opt/android WORKDIR $ANDROID_SDK_ROOT/cmdline-tools # sdkmanager expects to be placed into `$ANDROID_SDK_ROOT/cmdline-tools/tools` RUN curl --proto '=https' -L https://dl.google.com/android/repository/commandlinetools-linux-7583922_latest.zip | jar x && \ @@ -120,48 +121,6 @@ RUN apt-get update && apt-get install --no-install-recommends -y mc \ zlib1g-dev && \ apt-get clean autoclean -# Flatbuffers before installing Python3 -# RUN apt-add-repository ppa:hnakamur/flatbuffers -RUN apt update && apt install -y 'flatbuffers-compiler' && apt install -y 'libflatbuffers-dev' -ENV PATH "/usr/bin/flatc:${PATH}" - -# OpenJDK-11 -# NOTE: JDK is required for Android SDK which is required for openCV 3.4.7 -RUN apt-get update && \ - apt-get install -y openjdk-17-jdk && \ - apt-get install -y ant && \ - apt-get clean -# Fix certificate issues -RUN apt-get update && \ - apt-get install ca-certificates-java && \ - apt-get clean && \ - update-ca-certificates -f -# Setup JAVA_HOME -ENV JAVA_HOME /usr/lib/jvm/java-17-openjdk-amd64 - -# Get and install android command line tool which will be used to install android-sdk -RUN cd /opt && mkdir -p ./Android/cmdline-tools && \ - wget -q https://dl.google.com/android/repository/commandlinetools-linux-8512546_latest.zip && \ - unzip commandlinetools-linux-8512546_latest.zip -d ./Android/cmdline-tools/ && \ - mv ./Android/cmdline-tools/cmdline-tools ./Android/cmdline-tools/tools && \ - rm -rf commandlinetools-linux-8512546_latest.zip - -# Android SDK -RUN cd /opt/Android/cmdline-tools/tools/bin && \ - yes | ./sdkmanager "platforms;android-33" "build-tools;30.0.2" "sources;android-33" \ - "extras;google;m2repository" "extras;android;m2repository" && \ - yes | ./sdkmanager --licenses && \ - yes | ./sdkmanager --update -# Setup ANDROID_HOME -ENV ANDROID_HOME /opt/Android - -# Android NDK -RUN cd /opt/Android/cmdline-tools/tools/bin && \ - yes | ./sdkmanager --install "ndk;25.2.9519653" -# Setup ANDROID_NDK and add into PATH -ENV ANDROID_NDK /opt/Android/ndk/25.2.9519653 -ENV PATH "${ANDROID_NDK}:${PATH}" - # Get cmake-3.19.3 # apt has 3.10.2 as latest version, so remove it RUN apt-get remove --no-install-recommends -y cmake @@ -173,7 +132,7 @@ RUN wget -q https://github.com/Kitware/CMake/releases/download/v3.19.3/cmake-3.1 ENV PATH "/opt/cmake/bin:${PATH}" # OpenCV -ENV CMAKE_TOOLCHAIN_FILE "${ANDROID_NDK}/build/cmake/android.toolchain.cmake" +ENV CMAKE_TOOLCHAIN_FILE "${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake" ENV ANDROID_ABI "arm64-v8a" ENV API_LEVEL "31" ENV ANDROID_TOOLCHAIN_NAME "aarch64-linux-android-4.9" diff --git a/flutter/cpp/binary/cmdline.mk b/flutter/cpp/binary/cmdline.mk index fce713b17..f920044ae 100644 --- a/flutter/cpp/binary/cmdline.mk +++ b/flutter/cpp/binary/cmdline.mk @@ -15,7 +15,11 @@ include flutter/cpp/binary/cmdline-docker.mk -cmdline/android/bins/release: cmdline/android/bins/build cmdline/android/bins/copy +cmdline/android/bins/release: cmdline/android/libs/deps cmdline/android/bins/build cmdline/android/bins/copy + +.PHONY: cmdline/android/libs/deps +cmdline/android/libs/deps: + ${backend_qti_libs_deps} .PHONY: cmdline/android/bins/build cmdline/android/bins/build: diff --git a/mobile_back_qti/make/qti_backend.mk b/mobile_back_qti/make/qti_backend.mk index 37732a543..bb6cbd270 100644 --- a/mobile_back_qti/make/qti_backend.mk +++ b/mobile_back_qti/make/qti_backend.mk @@ -67,6 +67,8 @@ else ifeq ($(WITH_QTI),$(filter $(WITH_QTI),1 2)) endif ifeq ($(WITH_STABLEDIFFUSION),1) + backend_qti_libs_deps = rm -f ./mobile_back_qti/cpp/backend_qti/StableDiffusionShared/include/opencv && \ + ln -s /opt/opencv-3.4.7_android/sdk/native mobile_back_qti/cpp/backend_qti/StableDiffusionShared/include/opencv backend_qti_flutter_docker_args = --env WITH_STABLEDIFFUSION=${WITH_STABLEDIFFUSION} backend_qti_android_target+=--//mobile_back_qti/cpp/backend_qti:with_stablediffusion=${WITH_STABLEDIFFUSION} backend_qti_cmdline_files+=mobile_back_qti/cpp/backend_qti/StableDiffusionShared/include/opencv/libs/arm64-v8a/libopencv_core.so \ From 0ae020ee65a3c7238c2b8c9201a1288faeed2780 Mon Sep 17 00:00:00 2001 From: Anh Date: Thu, 3 Oct 2024 09:27:02 +0900 Subject: [PATCH 05/18] Update seed and num_steps for TFLite SD task (#16) --- .../cpp/backend_tflite/stable_diffusion_pipeline.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.h b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.h index c16a7d5c8..adf460530 100644 --- a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.h +++ b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.h @@ -37,8 +37,8 @@ struct SDBackendData { std::vector input_prompt_tokens; std::vector unconditional_tokens; - int num_steps{10}; - int seed{0}; + int num_steps{20}; + int seed{633994880}; std::vector output; std::unique_ptr executer; From acb17bff137bb14bb53d0914d74d78f51791588c Mon Sep 17 00:00:00 2001 From: Mohit Mundhra Date: Thu, 3 Oct 2024 22:30:04 +0530 Subject: [PATCH 06/18] Ran make format --- WORKSPACE | 2 +- mobile_back_qti/BUILD | 13 +- .../DLC/util/StableDiffusion/README.md | 34 +++-- mobile_back_qti/README.md | 15 +- mobile_back_qti/cpp/backend_qti/BUILD | 11 +- .../backend_qti/StableDiffusionShared/BUILD | 20 ++- .../StableDiffusionShared/variables.bzl | 1 - .../cpp/backend_qti/mlperf_helper.h | 10 +- .../cpp/backend_qti/qti_backend_helper.cc | 135 +++++++++--------- .../cpp/backend_qti/qti_backend_helper.h | 14 +- mobile_back_qti/cpp/backend_qti/qti_c.cc | 45 +++--- .../cpp/backend_qti/soc_utility.cc | 3 +- mobile_back_qti/cpp/backend_qti/soc_utility.h | 1 - mobile_back_qti/variables.bzl | 2 +- 14 files changed, 163 insertions(+), 143 deletions(-) diff --git a/WORKSPACE b/WORKSPACE index 34db3f4b6..49e0b0c63 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -113,4 +113,4 @@ load("//mobile_back_qti/cpp/backend_qti/StableDiffusionShared:variables.bzl", "s stable_diffusion_external_deps_shared( name = "stable_diffusion_external_deps_shared", workspace_dir = __workspace_dir__, -) \ No newline at end of file +) diff --git a/mobile_back_qti/BUILD b/mobile_back_qti/BUILD index b34f03292..661b866b1 100644 --- a/mobile_back_qti/BUILD +++ b/mobile_back_qti/BUILD @@ -49,15 +49,18 @@ cc_library( cc_library( name = "snpe", srcs = [ - "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libSNPE.so" + "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libSNPE.so", ], hdrs = glob([ "qairt/" + SNPE_VERSION + "/include/SNPE/**/*.hpp", "qairt/" + SNPE_VERSION + "/include/SNPE/**/*.h", - "qairt/" + SNPE_VERSION + "/include/QNN/**/*.h", - "qairt/" + SNPE_VERSION + "/include/QNN/*.h" + "qairt/" + SNPE_VERSION + "/include/QNN/**/*.h", + "qairt/" + SNPE_VERSION + "/include/QNN/*.h", ]), - copts = ["-I" + "qairt/" + SNPE_VERSION + "/include/SNPE", "-I" + SNPE_VERSION + "/include/QNN"], + copts = [ + "-I" + "qairt/" + SNPE_VERSION + "/include/SNPE", + "-I" + SNPE_VERSION + "/include/QNN", + ], visibility = ["//visibility:public"], ) @@ -75,7 +78,7 @@ cc_library( "qairt/" + SNPE_VERSION + "/lib/hexagon-v69/unsigned/libSnpeHtpV69Skel.so", "qairt/" + SNPE_VERSION + "/lib/hexagon-v73/unsigned/libSnpeHtpV73Skel.so", "qairt/" + SNPE_VERSION + "/lib/hexagon-v75/unsigned/libSnpeHtpV75Skel.so", - "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libQnnHtpV73Stub.so", + "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libQnnHtpV73Stub.so", "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libQnnHtp.so", "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libQnnHtpPrepare.so", "qairt/" + SNPE_VERSION + "/lib/hexagon-v73/unsigned/libQnnHtpV73Skel.so", diff --git a/mobile_back_qti/DLC/util/StableDiffusion/README.md b/mobile_back_qti/DLC/util/StableDiffusion/README.md index 31c9b7f97..44c1339b4 100644 --- a/mobile_back_qti/DLC/util/StableDiffusion/README.md +++ b/mobile_back_qti/DLC/util/StableDiffusion/README.md @@ -1,25 +1,25 @@ # Stable Diffusion -### This readme contains necessary steps to: +## This readme contains necessary steps to + * Run AIMET quantization * Convert generated onnx files to bin files * To generate all the artifacts needed for stable diffusion inference on Qualcomm Soc - ### Platform requirements * Machine running Ubuntu 20.04 at least -* AIMET PRO version 1.29.0 (make script will automatically be installing it) +* AIMET PRO version 1.29.0 `(make script will automatically be installing it)` * Docker version 20.10.24 * Machine enabled with Nvidia Tesla A100 or Tesla V100 (32GB at least) * NVIDIA driver version equivalent to 525.60.13 - ### Steps to execute `Please follow below steps in the mentioned order and run them as root to avoid permission issues` #### Prerequisites + * Clone the mobile_app_open repository * Install Qualcomm Package manager on the linux machine @@ -38,24 +38,30 @@ cp -rv /opt/qcom/aistack/qairt/2.25.0.240728 mobile_app_open/mobile_back_qti/qai Once done, -* Clone the AIMET SD notebook repository inside - /mobile_back_qti/DLC/util/StableDiffusion/AIMET +* Clone the AIMET SD notebook repository inside + `/mobile_back_qti/DLC/util/StableDiffusion/AIMET` -* Create hugging face access token and paste it on line 2 of aimet.py script, inside /mobile_back_qti/DLC/util/StableDiffusion/AIMET folder. - Place holder provided in aimet.py. +* Create hugging face access token and paste it on `line 2 of aimet.py` script, inside `/mobile_back_qti/DLC/util/StableDiffusion/AIMET` folder. + Place holder provided in `aimet.py`. * Inside AIMET directory run this make command + ```shell sudo make aimet_calibration ``` -* Once, the above make command completes successfully, move to - /mobile_back_qti/DLC or type + +* Once, the above make command completes successfully, move to + `/mobile_back_qti/DLC` or type + ```shell cd ../../../ ``` -* After reaching /mobile_back_qti/DLC run this make command - ```shell + +* After reaching `/mobile_back_qti/DLC` run this make command + + ```shell sudo make stable_diffusion_qnn SNPE_SDK=/mobile_back_qti/qairt/ ``` -* After successful execution, all the artifacts needed to run stable diffusion inference on device will be located in - /output/DLC/mlperf_models/stable_diffusion + +* After successful execution, all the artifacts needed to run stable diffusion inference on device will be located in + `/output/DLC/mlperf_models/stable_diffusion` diff --git a/mobile_back_qti/README.md b/mobile_back_qti/README.md index 8fdf7d5ef..60e6cf88d 100644 --- a/mobile_back_qti/README.md +++ b/mobile_back_qti/README.md @@ -27,11 +27,10 @@ Use your browser to download the SNPE SDK using the links above. Create your Github personal access token. - ### Note for Stable Diffusion -To generate stable diffusion model, please follow the instructions mentioned at -[](DLC/util/StableDiffusion/README.md) +To generate stable diffusion model, please follow the instructions mentioned at +``(DLC/util/StableDiffusion/README.md) ```shell export SNPE_SDK= @@ -76,11 +75,13 @@ Build with the following build command. make OFFICIAL_BUILD=true FLUTTER_BUILD_NUMBER=1 WITH_QTI=1 docker/flutter/android/release ``` -Build with the following build command to include stable_diffusion. -* Download Notebook for stable diffusion from QPM Manager. -* Copy include folder from path_to_notebook>/model/example3/host_linux_target_android_with_MLPerf/include to mobile_back_qti/cpp/backend_qti/StableDiffusionShared/ -* Copy libStableDiffusionShared.so from /model/example3/host_linux_target_android_with_MLPerf/libs/aarch64-android/ to mobile_back_qti/cpp/backend_qti/StableDiffusionShared/ +Build with the following build command to include `stable_diffusion` + +* Download `Tutorial for stable diffusion` from QPM Manager. +* Copy `include folder` from `/model/example3/host_linux_target_android_with_MLPerf/include` to `mobile_back_qti/cpp/backend_qti/StableDiffusionShared/` +* Copy `libStableDiffusionShared.so` from `/model/example3/host_linux_target_android_with_MLPerf/libs/aarch64-android/` to `mobile_back_qti/cpp/backend_qti/StableDiffusionShared/` * Run the command below: + ```shell make OFFICIAL_BUILD=true FLUTTER_BUILD_NUMBER=1 WITH_STABLEDIFFUSION=1 WITH_QTI=1 docker/flutter/android/release ``` diff --git a/mobile_back_qti/cpp/backend_qti/BUILD b/mobile_back_qti/cpp/backend_qti/BUILD index 5320caf10..36f6d6415 100644 --- a/mobile_back_qti/cpp/backend_qti/BUILD +++ b/mobile_back_qti/cpp/backend_qti/BUILD @@ -71,12 +71,18 @@ config_setting( selects.config_setting_group( name = "android_with_stablediffusion", - match_all = ["android_arm64","stablediffusion_option"], + match_all = [ + "android_arm64", + "stablediffusion_option", + ], ) selects.config_setting_group( name = "android_without_stablediffusion", - match_all = ["android_arm64","nostablediffusion_option"], + match_all = [ + "android_arm64", + "nostablediffusion_option", + ], ) config_setting( @@ -106,7 +112,6 @@ snpe_copts = [ "-lc++_shared", ] - pbtxt2header( name = "qti_settings", srcs = [ diff --git a/mobile_back_qti/cpp/backend_qti/StableDiffusionShared/BUILD b/mobile_back_qti/cpp/backend_qti/StableDiffusionShared/BUILD index a72b94dc8..d447ec463 100644 --- a/mobile_back_qti/cpp/backend_qti/StableDiffusionShared/BUILD +++ b/mobile_back_qti/cpp/backend_qti/StableDiffusionShared/BUILD @@ -28,22 +28,28 @@ config_setting( cc_library( name = "opencv_core", - srcs = [OPENCV_ROOT_DIR + "/libs/arm64-v8a/libopencv_core.so", - OPENCV_ROOT_DIR + "/libs/arm64-v8a/libopencv_imgcodecs.so",], - includes = [OPENCV_ROOT_DIR + "/jni/include/"], + srcs = [ + OPENCV_ROOT_DIR + "/libs/arm64-v8a/libopencv_core.so", + OPENCV_ROOT_DIR + "/libs/arm64-v8a/libopencv_imgcodecs.so", + ], hdrs = glob([ - OPENCV_ROOT_DIR + "/jni/include/opencv2/*.hpp", - OPENCV_ROOT_DIR + "/jni/include/opencv2/**/*.hpp", + OPENCV_ROOT_DIR + "/jni/include/opencv2/*.hpp", + OPENCV_ROOT_DIR + "/jni/include/opencv2/**/*.hpp", ]), + includes = [OPENCV_ROOT_DIR + "/jni/include/"], visibility = ["//visibility:public"], ) cc_library( name = "StableDiffusionShared", srcs = glob(["libStableDiffusion.so"]), - hdrs = glob(["include/*.hpp","include/**/*.hpp","include/**/*.h"]), + hdrs = glob([ + "include/*.hpp", + "include/**/*.hpp", + "include/**/*.h", + ]), visibility = ["//visibility:public"], deps = [ - ":opencv_core", + ":opencv_core", ], ) diff --git a/mobile_back_qti/cpp/backend_qti/StableDiffusionShared/variables.bzl b/mobile_back_qti/cpp/backend_qti/StableDiffusionShared/variables.bzl index 67c9a8b9d..d67e29abc 100644 --- a/mobile_back_qti/cpp/backend_qti/StableDiffusionShared/variables.bzl +++ b/mobile_back_qti/cpp/backend_qti/StableDiffusionShared/variables.bzl @@ -28,4 +28,3 @@ stable_diffusion_external_deps_shared = repository_rule( local = True, attrs = {"workspace_dir": attr.string(mandatory = True)}, ) - diff --git a/mobile_back_qti/cpp/backend_qti/mlperf_helper.h b/mobile_back_qti/cpp/backend_qti/mlperf_helper.h index af4c3f244..8e747697f 100644 --- a/mobile_back_qti/cpp/backend_qti/mlperf_helper.h +++ b/mobile_back_qti/cpp/backend_qti/mlperf_helper.h @@ -144,11 +144,11 @@ static void process_config(const mlperf_backend_configuration_t *configs, backend_data->useCpuInt8_ = false; } } else if (strcmp(configs->keys[i], "pipeline") == 0) { - if (std::strcmp(configs->values[i], "StableDiffusionPipeline") == 0) { - backend_data->isStableDiffusion = true; - } else { - backend_data->isStableDiffusion = false; - } + if (std::strcmp(configs->values[i], "StableDiffusionPipeline") == 0) { + backend_data->isStableDiffusion = true; + } else { + backend_data->isStableDiffusion = false; + } } } diff --git a/mobile_back_qti/cpp/backend_qti/qti_backend_helper.cc b/mobile_back_qti/cpp/backend_qti/qti_backend_helper.cc index 10a795cf2..839aac86b 100644 --- a/mobile_back_qti/cpp/backend_qti/qti_backend_helper.cc +++ b/mobile_back_qti/cpp/backend_qti/qti_backend_helper.cc @@ -15,9 +15,9 @@ limitations under the License. #include "qti_backend_helper.h" +#include #include #include -#include #include "DiagLog/IDiagLog.h" #include "DlContainer/DlContainer.h" @@ -117,17 +117,19 @@ static Snpe_Runtime_t Str2Delegate(const snpe_runtimes_t delegate) { break; } - if (Snpe_Util_IsRuntimeAvailableCheckOption(runtime, SNPE_RUNTIME_CHECK_OPTION_UNSIGNEDPD_CHECK)) { + if (Snpe_Util_IsRuntimeAvailableCheckOption( + runtime, SNPE_RUNTIME_CHECK_OPTION_UNSIGNEDPD_CHECK)) { LOG(INFO) << "runtime " << delegate << " is available on this platform"; } else { - LOG(FATAL) << "runtime " << delegate << " is not available on this platform"; + LOG(FATAL) << "runtime " << delegate + << " is not available on this platform"; } return runtime; } bool QTIBackendHelper::IsRuntimeAvailable(const snpe_runtimes_t delegate) { - return (Str2Delegate (delegate) != SNPE_RUNTIME_UNSET); + return (Str2Delegate(delegate) != SNPE_RUNTIME_UNSET); } void QTIBackendHelper::use_psnpe(const char *model_path) { @@ -188,7 +190,7 @@ void QTIBackendHelper::use_psnpe(const char *model_path) { Snpe_BuildConfig_SetOutputBufferNames(buildConfigHandle, outputLayers); std::string platformOptionStr = ""; - if(useCpuInt8_){ + if (useCpuInt8_) { platformOptionStr = "enableCpuFxpMode:ON"; } if (Socs::get_use_dsp_features()) { @@ -233,22 +235,22 @@ void QTIBackendHelper::use_psnpe(const char *model_path) { LOG(FATAL) << "Error in init of snpe_ " << snpe_->snpeHandle; } - if(profilingLevel_ != SNPE_PROFILING_LEVEL_OFF){ + if (profilingLevel_ != SNPE_PROFILING_LEVEL_OFF) { auto diagLogHandle = Snpe_SNPE_GetDiagLogInterface_Ref(snpe_->snpeHandle); - if (!diagLogHandle) - LOG(INFO)<<"Get diagLogHandle failed"; + if (!diagLogHandle) LOG(INFO) << "Get diagLogHandle failed"; auto optionsHandle = Snpe_IDiagLog_GetOptions(diagLogHandle); std::string OutputDir = ".\diaglogs"; #ifdef __ANDROID__ - OutputDir = "/sdcard/Android/data/org.mlcommons.android.mlperfbench/files/diaglogs"; + OutputDir = + "/sdcard/Android/data/org.mlcommons.android.mlperfbench/files/diaglogs"; #endif Snpe_Options_SetLogFileDirectory(optionsHandle, OutputDir.c_str()); - if(Snpe_IDiagLog_SetOptions(diagLogHandle, optionsHandle) != SNPE_SUCCESS) - LOG(INFO)<<"Failed to set DiagLog options"; + if (Snpe_IDiagLog_SetOptions(diagLogHandle, optionsHandle) != SNPE_SUCCESS) + LOG(INFO) << "Failed to set DiagLog options"; if (Snpe_IDiagLog_Start(diagLogHandle) != SNPE_SUCCESS) - LOG(INFO)<<"Failed to start logger "; + LOG(INFO) << "Failed to start logger "; } // Snpe_DlContainer_Delete(containerHandle); } @@ -344,29 +346,30 @@ void QTIBackendHelper::use_snpe(const char *model_path) { LOG(FATAL) << "Error in init of the model " << snpe_; } - if(profilingLevel_ != SNPE_PROFILING_LEVEL_OFF){ + if (profilingLevel_ != SNPE_PROFILING_LEVEL_OFF) { auto diagLogHandle = Snpe_SNPE_GetDiagLogInterface_Ref(snpe_->snpeHandle); - if (!diagLogHandle) - LOG(INFO)<<"Get diagLogHandle failed"; + if (!diagLogHandle) LOG(INFO) << "Get diagLogHandle failed"; auto optionsHandle = Snpe_IDiagLog_GetOptions(diagLogHandle); std::string OutputDir = ".\diaglogs"; - #ifdef __ANDROID__ - OutputDir = "/sdcard/Android/data/org.mlcommons.android.mlperfbench/files/diaglogs"; - #endif +#ifdef __ANDROID__ + OutputDir = + "/sdcard/Android/data/org.mlcommons.android.mlperfbench/files/diaglogs"; +#endif Snpe_Options_SetLogFileDirectory(optionsHandle, OutputDir.c_str()); - if(Snpe_IDiagLog_SetOptions(diagLogHandle, optionsHandle) != SNPE_SUCCESS) - LOG(INFO)<<"Failed to set DiagLog options"; + if (Snpe_IDiagLog_SetOptions(diagLogHandle, optionsHandle) != SNPE_SUCCESS) + LOG(INFO) << "Failed to set DiagLog options"; if (Snpe_IDiagLog_Start(diagLogHandle) != SNPE_SUCCESS) - LOG(INFO)<<"Failed to start logger "; + LOG(INFO) << "Failed to start logger "; } } inline int QTIBackendHelper::get_num_inits() { return Socs::soc_num_inits(); } -void QTIBackendHelper::get_accelerator_instances(int &num_dsp, - int &num_gpu, int &num_cpu, int &num_gpu_fp16) { +void QTIBackendHelper::get_accelerator_instances(int &num_dsp, int &num_gpu, + int &num_cpu, + int &num_gpu_fp16) { std::string &delegate = delegate_; num_dsp = 0; num_gpu = 0; @@ -388,14 +391,13 @@ void QTIBackendHelper::get_accelerator_instances(int &num_dsp, } else if (delegate == "snpe_gpu_fp16" || delegate == "psnpe_gpu_fp16") { num_gpu_fp16 = 1; Socs::set_use_dsp_features(false); + } else { + LOG(FATAL) << "Error: Unsupported delegate " << delegate << " SoC ID " + << Socs::get_soc_name(); } - else { - LOG(FATAL) << "Error: Unsupported delegate " << delegate << " SoC ID " - << Socs::get_soc_name(); - } - } - LOG(INFO) << "Using " << num_dsp << " dsp " << num_gpu - << " gpu" << num_cpu << " cpu" << num_gpu_fp16 << " gpu_fp16"; + } + LOG(INFO) << "Using " << num_dsp << " dsp " << num_gpu << " gpu" << num_cpu + << " cpu" << num_gpu_fp16 << " gpu_fp16"; } void QTIBackendHelper::map_inputs() { @@ -423,11 +425,9 @@ void QTIBackendHelper::map_inputs() { Snpe_IBufferAttributes_GetDims(ubaOptHandle), sizeof(float)); Snpe_UserBufferEncoding_Handle_t ubeFloatHandle = Snpe_UserBufferEncodingFloat_Create(); - ubPtr.push_back(Snpe_Util_CreateUserBufferShared(std::move(inputBuffer.data()), - inputBuffer.size(), - 0, - stridesHandle, - ubeFloatHandle)); + ubPtr.push_back(Snpe_Util_CreateUserBufferShared( + std::move(inputBuffer.data()), inputBuffer.size(), 0, stridesHandle, + ubeFloatHandle)); Snpe_UserBufferMap_Add(inputMapHandle, name, ubPtr.back()); Snpe_TensorShape_Delete(stridesHandle); @@ -448,11 +448,9 @@ void QTIBackendHelper::map_inputs() { if (!ubeTfN) ubeTfN = Snpe_UserBufferEncodingTfN_Create(128.0, 1.0 / 255, 8); - ubPtr.push_back(Snpe_Util_CreateUserBufferShared(std::move(inputBuffer.data()), - inputBuffer.size(), - 0, - stridesHandle, - ubeTfN)); + ubPtr.push_back(Snpe_Util_CreateUserBufferShared( + std::move(inputBuffer.data()), inputBuffer.size(), 0, stridesHandle, + ubeTfN)); Snpe_UserBufferMap_Add(inputMapHandle, name, ubPtr.back()); Snpe_TensorShape_Delete(stridesHandle); @@ -504,7 +502,7 @@ void QTIBackendHelper::map_outputs() { Snpe_UserBufferMap_Add(outputMapHandle, name, x.back()); if (useIonBuffers_) Snpe_UserMemoryMap_Add(userMemoryMappedBufferMapHandle_, name, - bufs_[bi].at(name).data()); + bufs_[bi].at(name).data()); Snpe_UserBufferEncodingTfN_Delete(ubeTfN); Snpe_TensorShape_Delete(stridesHandle); @@ -613,7 +611,8 @@ void QTIBackendHelper::get_data_formats() { long bufSize = calcSizeFromDims(Snpe_TensorShape_Rank(dimsHandle), Snpe_TensorShape_GetDimensions(dimsHandle)); if (outputBufferType_ == FLOAT_32) { - if (snpeOutputLayers_ == "transpose" || snpeOutputTensors_ == "transpose:0") { + if (snpeOutputLayers_ == "transpose" || + snpeOutputTensors_ == "transpose:0") { // For mobileBERT, return output size as half the size of computed // values, // because the DLC returns only single layer as output but the app needs @@ -644,7 +643,7 @@ void QTIBackendHelper::get_data_formats() { } void QTIBackendHelper::set_runtime_config() { - int numDSP = 0, numGPU = 0, numCPU = 0, numGPU_FP16=0; + int numDSP = 0, numGPU = 0, numCPU = 0, numGPU_FP16 = 0; get_accelerator_instances(numDSP, numGPU, numCPU, numGPU_FP16); Snpe_Runtime_t runtime; @@ -722,11 +721,11 @@ std::vector get_normal(unsigned numbers, unsigned seed = 5, void QTIBackendHelper::initSd(const char *model_path, const char *lib_path) { #ifdef STABLEDIFFUSION_FLAG - bool use_mmap = false; // we don't want to use cached + bool use_mmap = false; // we don't want to use cached uint64_t context_bin_mmap_read_budget = 100000; std::string temp(lib_path); - native_lib_path = temp ; - std::string newtemp (model_path); + native_lib_path = temp; + std::string newtemp(model_path); data_folder_path = newtemp; // TODO: Below vars are using in preprocessInputSd @@ -737,30 +736,30 @@ void QTIBackendHelper::initSd(const char *model_path, const char *lib_path) { mlperf_data_t input; input.type = mlperf_data_t::Int32; - input.size = 77*1; // tokenized inputs 77 numbers + input.size = 77 * 1; // tokenized inputs 77 numbers inputFormat_.push_back(input); mlperf_data_t output; output.type = mlperf_data_t::Uint8; - output.size = 512*512*3; + output.size = 512 * 512 * 3; outputFormat_.push_back(output); sd_pipeline = new QnnApiHelpers(); - if (0 != sd_pipeline->Init(data_folder_path, native_lib_path, - 768, 77, 1.0, - 512, 512, 3.0, - use_mmap, context_bin_mmap_read_budget)) { - LOG(FATAL) << "Initialization Failure"; - } + if (0 != sd_pipeline->Init(data_folder_path, native_lib_path, 768, 77, 1.0, + 512, 512, 3.0, use_mmap, + context_bin_mmap_read_budget)) { + LOG(FATAL) << "Initialization Failure"; + } #endif } bool QTIBackendHelper::preprocessInputSd(void *data) { #ifdef STABLEDIFFUSION_FLAG - int32_t *input_prompt_ids = (int32_t *) data; + int32_t *input_prompt_ids = (int32_t *)data; std::vector noise = get_normal(64 * 64 * 4, seed); - return sd_pipeline->PreProcessInput(input_prompt_ids, noise, num_steps, guidance_scale); + return sd_pipeline->PreProcessInput(input_prompt_ids, noise, num_steps, + guidance_scale); #else return false; #endif @@ -790,8 +789,8 @@ bool QTIBackendHelper::getOutputSd(void **data) { } *data = inferenceReturn.m_ImageData; - //delete sd_pipeline; - //sd_pipeline = new QnnApiHelpers(); + // delete sd_pipeline; + // sd_pipeline = new QnnApiHelpers(); return true; #else return false; @@ -800,16 +799,16 @@ bool QTIBackendHelper::getOutputSd(void **data) { void QTIBackendHelper::deinitSd() { #ifdef STABLEDIFFUSION_FLAG - bool use_mmap = false; // we don't want to use cached - uint64_t context_bin_mmap_read_budget = 100000; - /*if (0 != sd_pipeline->Init(data_folder_path, native_lib_path, - 768, 77, 1.0, - 512, 512, 3.0, - use_mmap, context_bin_mmap_read_budget)) { - LOG(FATAL) << "Initialization Failure"; - } + bool use_mmap = false; // we don't want to use cached + uint64_t context_bin_mmap_read_budget = 100000; + /*if (0 != sd_pipeline->Init(data_folder_path, native_lib_path, + 768, 77, 1.0, + 512, 512, 3.0, + use_mmap, context_bin_mmap_read_budget)) { + LOG(FATAL) << "Initialization Failure"; + } */ - delete sd_pipeline; - sd_pipeline = nullptr; + delete sd_pipeline; + sd_pipeline = nullptr; #endif } \ No newline at end of file diff --git a/mobile_back_qti/cpp/backend_qti/qti_backend_helper.h b/mobile_back_qti/cpp/backend_qti/qti_backend_helper.h index 38d7e5fd6..736dc85ab 100644 --- a/mobile_back_qti/cpp/backend_qti/qti_backend_helper.h +++ b/mobile_back_qti/cpp/backend_qti/qti_backend_helper.h @@ -157,14 +157,14 @@ class QTIBackendHelper { ~QTIBackendHelper() { if (isStableDiffusion) { - deinitSd(); + deinitSd(); } else { - Snpe_RuntimeList_Delete(inputRuntimeListHandle); - Snpe_RuntimeList_Delete(dummyInputRuntimeListHandle); - Snpe_StringList_Delete(networkInputTensorNamesHandle_); - Snpe_StringList_Delete(networkOutputTensorNamesHandle_); - Snpe_UserBufferList_Delete(inputMapListHandle_); - Snpe_UserBufferList_Delete(outputMapListHandle_); + Snpe_RuntimeList_Delete(inputRuntimeListHandle); + Snpe_RuntimeList_Delete(dummyInputRuntimeListHandle); + Snpe_StringList_Delete(networkInputTensorNamesHandle_); + Snpe_StringList_Delete(networkOutputTensorNamesHandle_); + Snpe_UserBufferList_Delete(inputMapListHandle_); + Snpe_UserBufferList_Delete(outputMapListHandle_); } } }; diff --git a/mobile_back_qti/cpp/backend_qti/qti_c.cc b/mobile_back_qti/cpp/backend_qti/qti_c.cc index e343eab4d..c9c18906c 100644 --- a/mobile_back_qti/cpp/backend_qti/qti_c.cc +++ b/mobile_back_qti/cpp/backend_qti/qti_c.cc @@ -137,20 +137,18 @@ mlperf_backend_ptr_t mlperf_backend_create( LOG(INFO) << "snpe_version: " << snpe_version; // Stable Diffusion initialization - if(backend_data->isStableDiffusion) - { + if (backend_data->isStableDiffusion) { backend_data->initSd(model_path, native_lib_path); LOG(INFO) << "StableDiffusion build completed successfully"; } else { - // set runtime config backend_data->set_runtime_config(); // Use PSNPE or SNPE if (backend_data->useSnpe_) { - backend_data->use_snpe(model_path); + backend_data->use_snpe(model_path); } else { - backend_data->use_psnpe(model_path); + backend_data->use_psnpe(model_path); } backend_data->queryCount_ = 0; @@ -193,7 +191,7 @@ void mlperf_backend_delete(mlperf_backend_ptr_t backend_ptr) { tflite_backend_delete(backend_data->tfliteBackend_); } if (backend_data->isStableDiffusion) { - backend_data->deinitSd(); + backend_data->deinitSd(); } delete backend_data; backend_data_ = nullptr; @@ -213,9 +211,9 @@ mlperf_status_t mlperf_backend_issue_query(mlperf_backend_ptr_t backend_ptr) { if (backend_data->isStableDiffusion) { if (backend_data->executeSd()) { - ret = MLPERF_SUCCESS; + ret = MLPERF_SUCCESS; } else { - ret = MLPERF_FAILURE; + ret = MLPERF_FAILURE; } } else { ret = backend_data->execute(); @@ -269,13 +267,12 @@ mlperf_status_t mlperf_backend_set_input(mlperf_backend_ptr_t backend_ptr, data); } - if(backend_data->isStableDiffusion) - { - if (backend_data->preprocessInputSd(data)) { - return MLPERF_SUCCESS; - } else { - return MLPERF_FAILURE; - } + if (backend_data->isStableDiffusion) { + if (backend_data->preprocessInputSd(data)) { + return MLPERF_SUCCESS; + } else { + return MLPERF_FAILURE; + } } void *batchedDataPtr = ((backend_data->useIonBuffers_ == false) && @@ -335,21 +332,25 @@ mlperf_status_t mlperf_backend_get_output(mlperf_backend_ptr_t backend_ptr, if (backend_data->isStableDiffusion) { if (backend_data->getOutputSd(data)) { - return MLPERF_SUCCESS; + return MLPERF_SUCCESS; } else { - *data = nullptr; - return MLPERF_FAILURE; + *data = nullptr; + return MLPERF_FAILURE; } } - if (backend_data->snpeOutputTensors_.find("Postprocessor/BatchMultiClassNonMaxSuppression_classes") != std::string::npos - || backend_data->snpeOutputLayers_ == "Postprocessor/BatchMultiClassNonMaxSuppression") { + if (backend_data->snpeOutputTensors_.find( + "Postprocessor/BatchMultiClassNonMaxSuppression_classes") != + std::string::npos || + backend_data->snpeOutputLayers_ == + "Postprocessor/BatchMultiClassNonMaxSuppression") { // Reorder snpeOutputLayers_ for coco process_output const char *outputLayerName = backend_data->odLayerMap[outputIndex].c_str(); *data = backend_data->bufs_[batchIndex].at(outputLayerName).data(); return MLPERF_SUCCESS; - } else if (backend_data->snpeOutputTensors_.find("transpose:0") != std::string::npos - || backend_data->snpeOutputLayers_ == "transpose") { + } else if (backend_data->snpeOutputTensors_.find("transpose:0") != + std::string::npos || + backend_data->snpeOutputLayers_ == "transpose") { *data = backend_data->bufs_[int(batchIndex / backend_data->inputBatch_)] .at(Snpe_StringList_At( backend_data->networkOutputTensorNamesHandle_, 0)) diff --git a/mobile_back_qti/cpp/backend_qti/soc_utility.cc b/mobile_back_qti/cpp/backend_qti/soc_utility.cc index 64f6ade8f..9105622d4 100644 --- a/mobile_back_qti/cpp/backend_qti/soc_utility.cc +++ b/mobile_back_qti/cpp/backend_qti/soc_utility.cc @@ -110,7 +110,8 @@ std::map socDetails = std::vector({4, 5, 6, 7}), 8, false)}, {557, SocInfo(2, 0, 0, 0, true, qti_settings_sd8g3, "SD8G3", 1, std::vector({0, 1, 2, 3}), - std::vector({4, 5, 6, 7}), 8, true, /* stable_diffusion */ true)}, + std::vector({4, 5, 6, 7}), 8, true, + /* stable_diffusion */ true)}, {614, SocInfo(2, 0, 0, 0, true, qti_settings_sm8635, "SM8635", 1, std::vector({0, 1, 2, 3}), std::vector({4, 5, 6, 7}), 8, true)}, diff --git a/mobile_back_qti/cpp/backend_qti/soc_utility.h b/mobile_back_qti/cpp/backend_qti/soc_utility.h index 946d2ab6d..4980ccd89 100644 --- a/mobile_back_qti/cpp/backend_qti/soc_utility.h +++ b/mobile_back_qti/cpp/backend_qti/soc_utility.h @@ -71,7 +71,6 @@ class SocInfo { m_settings += qti_settings_stablediffusion; #endif } - } int m_num_dsp; diff --git a/mobile_back_qti/variables.bzl b/mobile_back_qti/variables.bzl index 986ab29c4..b5919bfc5 100644 --- a/mobile_back_qti/variables.bzl +++ b/mobile_back_qti/variables.bzl @@ -34,7 +34,7 @@ def _impl(repository_ctx): fail("qairt folder is not found in the repo") filepath = found.stdout[:-1] sdk_version = filepath[found.stdout.rfind("/") + 1:] - + print("Update SNPE version: " + sdk_version) # buildifier: disable=print repository_ctx.read(Label("@//:mobile_back_qti/qairt/" + sdk_version + "/ReleaseNotes.txt")) From b6ee8e72e98ea1d9313e067854bc0fe7a71c8f68 Mon Sep 17 00:00:00 2001 From: Mohit Mundhra Date: Thu, 3 Oct 2024 23:02:37 +0530 Subject: [PATCH 07/18] Applying linter changes --- mobile_back_qti/cpp/backend_qti/StableDiffusionShared/BUILD | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mobile_back_qti/cpp/backend_qti/StableDiffusionShared/BUILD b/mobile_back_qti/cpp/backend_qti/StableDiffusionShared/BUILD index d447ec463..43c2176c1 100644 --- a/mobile_back_qti/cpp/backend_qti/StableDiffusionShared/BUILD +++ b/mobile_back_qti/cpp/backend_qti/StableDiffusionShared/BUILD @@ -14,7 +14,6 @@ ########################################################################## load("@stable_diffusion_external_deps_shared//:stable_diffusion_var_def_shared.bzl", "OPENCV_ROOT_DIR") -load("@snpe_version_loader//:snpe_var_def.bzl", "SNPE_VERSION") package( default_visibility = ["//visibility:public"], @@ -42,7 +41,7 @@ cc_library( cc_library( name = "StableDiffusionShared", - srcs = glob(["libStableDiffusion.so"]), + srcs = "libStableDiffusion.so", hdrs = glob([ "include/*.hpp", "include/**/*.hpp", From 786a485055ea1289658047e935313cb1807a4d55 Mon Sep 17 00:00:00 2001 From: Anh Date: Tue, 15 Oct 2024 13:08:35 +0700 Subject: [PATCH 08/18] Update QTI backend for submission v4.1 (#13) * Ignore QTI private SDK and libs * Update path for QTI SDK * Download QTI libs for SD task * Fix QTI BUILD file * Update integration test * Set WITH_STABLEDIFFUSION=1 for CI build * Fix QTI pbtxt files * Disable SD task for Apple devices * Update model path and checksum for QTI * Remove tiny groundtruth_path for SD task * Update path for TFLite models * Update groundtruth_path for SD task --- .github/workflows/android-build-test.yml | 17 +++++-- .gitignore | 2 - flutter/assets/tasks.pbtxt | 8 ++-- .../integration_test/expected_accuracy.dart | 18 +++---- .../integration_test/expected_throughput.dart | 4 +- flutter/integration_test/first_test.dart | 4 +- flutter/integration_test/utils.dart | 15 ++++++ mobile_back_qti/.gitignore | 4 ++ mobile_back_qti/README.md | 4 +- .../backend_qti/StableDiffusionShared/BUILD | 2 +- .../settings/qti_settings_default_cpu.pbtxt | 25 +++++----- .../settings/qti_settings_default_dsp.pbtxt | 24 +++++----- .../settings/qti_settings_default_gpu.pbtxt | 48 +++++++++---------- .../settings/qti_settings_gpufp16.pbtxt | 22 ++++----- .../settings/qti_settings_sd7cxg3.pbtxt | 24 +++++----- .../settings/qti_settings_sd7g1.pbtxt | 24 +++++----- .../settings/qti_settings_sd7pg2.pbtxt | 24 +++++----- .../settings/qti_settings_sd8cxg3.pbtxt | 24 +++++----- .../settings/qti_settings_sd8g1.pbtxt | 24 +++++----- .../settings/qti_settings_sd8g2.pbtxt | 24 +++++----- .../settings/qti_settings_sd8g3.pbtxt | 22 ++++----- .../settings/qti_settings_sd8pg1.pbtxt | 24 +++++----- .../settings/qti_settings_sdm778.pbtxt | 24 +++++----- .../settings/qti_settings_sdm888.pbtxt | 24 +++++----- .../settings/qti_settings_sm4450.pbtxt | 24 +++++----- .../settings/qti_settings_sm7550.pbtxt | 24 +++++----- .../settings/qti_settings_sm8635.pbtxt | 24 +++++----- .../qti_settings_stablediffusion.pbtxt | 24 +++++----- .../tflite_settings_android.pbtxt | 16 +++---- .../tflite_settings_apple_main.pbtxt | 36 +++++++------- 30 files changed, 304 insertions(+), 279 deletions(-) create mode 100644 mobile_back_qti/.gitignore diff --git a/.github/workflows/android-build-test.yml b/.github/workflows/android-build-test.yml index 4d054ace8..e6034b210 100644 --- a/.github/workflows/android-build-test.yml +++ b/.github/workflows/android-build-test.yml @@ -100,15 +100,22 @@ jobs: rm /tmp/${SAMSUNG_LIB}.zip && \ mkdir -p mobile_back_samsung/samsung/lib/internal && \ mv /tmp/${SAMSUNG_LIB}/* mobile_back_samsung/samsung/lib/internal/ + - name: Download QTI SDK + env: + QTI_SDK: qairt-2.25.0.240728-linux + run: | + gsutil cp gs://mobile-app-build-290400_github-actions/lib/v4.1/${QTI_SDK}.zip /tmp/ && \ + unzip /tmp/${QTI_SDK}.zip -d /tmp/${QTI_SDK} && \ + rm /tmp/${QTI_SDK}.zip && \ + mv /tmp/${QTI_SDK}/* mobile_back_qti/ - name: Download QTI libraries env: - QTI_LIB: qaisw-2.20.0.240223_linux + QTI_LIB: StableDiffusionShared run: | - gsutil cp gs://mobile-app-build-290400_github-actions/lib/v4.0/${QTI_LIB}.zip /tmp/ && \ + gsutil cp gs://mobile-app-build-290400_github-actions/lib/v4.1/${QTI_LIB}.zip /tmp/ && \ unzip /tmp/${QTI_LIB}.zip -d /tmp/${QTI_LIB} && \ rm /tmp/${QTI_LIB}.zip && \ - mkdir -p mobile_back_qti/${QTI_LIB} && \ - mv /tmp/${QTI_LIB}/* mobile_back_qti/${QTI_LIB}/ + mv /tmp/${QTI_LIB}/* mobile_back_qti/cpp/backend_qti/StableDiffusionShared/ - name: Cache bazel uses: actions/cache@v4 with: @@ -143,6 +150,7 @@ jobs: PERF_TEST: true WITH_TFLITE: 0 WITH_QTI: 1 + WITH_STABLEDIFFUSION: 1 run: | make flutter/android/release flutter/android/apk flutter/android/test-apk gsutil mv output/android-apks/test-main.apk $GCLOUD_BUCKET_PATH/test-main-qti.apk @@ -177,6 +185,7 @@ jobs: WITH_QTI: 1 WITH_SAMSUNG: 1 WITH_APPLE: 0 + WITH_STABLEDIFFUSION: 1 run: | make flutter/android/release gsutil cp flutter/build/app/outputs/bundle/release/app-release.aab $GCLOUD_BUCKET_PATH/app-release.aab diff --git a/.gitignore b/.gitignore index 1c154391b..8a325c3db 100644 --- a/.gitignore +++ b/.gitignore @@ -9,8 +9,6 @@ /output_logs /datasets/output /datasets/downloads -/mobile_back_qti/snpe-* -/mobile_back_qti/qairt *.so * .apk * .tflite diff --git a/flutter/assets/tasks.pbtxt b/flutter/assets/tasks.pbtxt index db8020d0e..ece13c9d9 100644 --- a/flutter/assets/tasks.pbtxt +++ b/flutter/assets/tasks.pbtxt @@ -229,18 +229,18 @@ task { type: COCOGEN full { name: "COCO validation set for Stable Diffusion" - input_path: "https://github.com/anhappdev/tmp/releases/download/6/coco_gen_test.tfrecord" + input_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-dataset/coco_gen_test.tfrecord" groundtruth_path: "local:///mlperf_models/stable-diffusion/clip_model_512x512.tflite" } lite { name: "COCO validation set for Stable Diffusion" - input_path: "https://github.com/anhappdev/tmp/releases/download/6/coco_gen_full.tfrecord" + input_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-dataset/coco_gen_full.tfrecord" groundtruth_path: "" } tiny { name: "COCO validation set for Stable Diffusion" - input_path: "https://github.com/anhappdev/tmp/releases/download/2/coco_gen.tfrecord" - groundtruth_path: "local:///mlperf_models/stable_diffusion/clip_model_512x512.tflite" + input_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-dataset/coco_gen_test.tfrecord" + groundtruth_path: "" } } model { diff --git a/flutter/integration_test/expected_accuracy.dart b/flutter/integration_test/expected_accuracy.dart index d9c6439a3..7b8df525f 100644 --- a/flutter/integration_test/expected_accuracy.dart +++ b/flutter/integration_test/expected_accuracy.dart @@ -77,15 +77,15 @@ const Map _superResolution = { // TODO (anhappdev): update expected accuracy for stable diffusion const Map _stableDiffusion = { - 'cpu': Interval(min: 0, max: 100), - 'npu': Interval(min: 0, max: 100), - 'tpu': Interval(min: 0, max: 100), - 'ane|TFLite': Interval(min: 0, max: 100), - 'ane|Core ML': Interval(min: 0, max: 100), - 'cpu&gpu&ane': Interval(min: 0, max: 100), - 'snpe_dsp': Interval(min: 0, max: 100), - 'psnpe_dsp': Interval(min: 0, max: 100), - 'neuron': Interval(min: 0, max: 100), + 'cpu': Interval(min: 0, max: 1.0), + 'npu': Interval(min: 0, max: 1.0), + 'tpu': Interval(min: 0, max: 1.0), + 'ane|TFLite': Interval(min: 0, max: 1.0), + 'ane|Core ML': Interval(min: 0, max: 1.0), + 'cpu&gpu&ane': Interval(min: 0, max: 1.0), + 'snpe_dsp': Interval(min: 0, max: 1.0), + 'psnpe_dsp': Interval(min: 0, max: 1.0), + 'neuron': Interval(min: 0, max: 1.0), }; const benchmarkExpectedAccuracy = { diff --git a/flutter/integration_test/expected_throughput.dart b/flutter/integration_test/expected_throughput.dart index 342254e59..40f9e83d5 100644 --- a/flutter/integration_test/expected_throughput.dart +++ b/flutter/integration_test/expected_throughput.dart @@ -49,7 +49,7 @@ const Map> _imageClassificationV2 = { _kS22Ultra: Interval(min: 250, max: 400), }, _kMediatekBackend: { - _kDN2103: Interval(min: 5, max: 90), + _kDN2103: Interval(min: 4.5, max: 50), }, }; @@ -194,7 +194,7 @@ const Map> _imageClassificationOfflineV2 = { _kS22Ultra: Interval(min: 250, max: 450), }, _kMediatekBackend: { - _kDN2103: Interval(min: 5, max: 90), + _kDN2103: Interval(min: 4.5, max: 50), }, }; diff --git a/flutter/integration_test/first_test.dart b/flutter/integration_test/first_test.dart index 23bc1a48d..feaff2c7f 100644 --- a/flutter/integration_test/first_test.dart +++ b/flutter/integration_test/first_test.dart @@ -41,13 +41,15 @@ void main() { testWidgets('run benchmarks', (WidgetTester tester) async { await startApp(tester); await validateSettings(tester); + await setBenchmarks(tester); await runBenchmarks(tester); }); testWidgets('check results', (WidgetTester tester) async { final extendedResult = await obtainResult(); printResults(extendedResult); - checkTaskCount(extendedResult); + // TODO (anhappdev) uncomment when stable_diffusion is implemented for all backends. + // checkTaskCount(extendedResult); checkTasks(extendedResult); }); diff --git a/flutter/integration_test/utils.dart b/flutter/integration_test/utils.dart index f53b9a5e4..f78732a54 100644 --- a/flutter/integration_test/utils.dart +++ b/flutter/integration_test/utils.dart @@ -64,6 +64,21 @@ Future validateSettings(WidgetTester tester) async { } } +Future setBenchmarks(WidgetTester tester) async { + final state = tester.state(find.byType(MaterialApp)); + final benchmarkState = state.context.read(); + for (var benchmark in benchmarkState.benchmarks) { + // Disable test for stable diffusion since it take too long to finish. + if (benchmark.id == BenchmarkId.stableDiffusion) { + benchmark.isActive = false; + print('Benchmark ${benchmark.id} is disabled'); + } else { + benchmark.isActive = true; + print('Benchmark ${benchmark.id} is enabled'); + } + } +} + Future runBenchmarks(WidgetTester tester) async { const downloadTimeout = 20 * 60; // 20 minutes const runBenchmarkTimeout = 30 * 60; // 30 minutes diff --git a/mobile_back_qti/.gitignore b/mobile_back_qti/.gitignore new file mode 100644 index 000000000..96cd036db --- /dev/null +++ b/mobile_back_qti/.gitignore @@ -0,0 +1,4 @@ +snpe-* +qaisw-* +qairt +cpp/backend_qti/StableDiffusionShared/include/opencv diff --git a/mobile_back_qti/README.md b/mobile_back_qti/README.md index 60e6cf88d..b4c2d771a 100644 --- a/mobile_back_qti/README.md +++ b/mobile_back_qti/README.md @@ -78,8 +78,8 @@ make OFFICIAL_BUILD=true FLUTTER_BUILD_NUMBER=1 WITH_QTI=1 docker/flutter/androi Build with the following build command to include `stable_diffusion` * Download `Tutorial for stable diffusion` from QPM Manager. -* Copy `include folder` from `/model/example3/host_linux_target_android_with_MLPerf/include` to `mobile_back_qti/cpp/backend_qti/StableDiffusionShared/` -* Copy `libStableDiffusionShared.so` from `/model/example3/host_linux_target_android_with_MLPerf/libs/aarch64-android/` to `mobile_back_qti/cpp/backend_qti/StableDiffusionShared/` +* Copy `include` folder from `/model/example3/host_linux_target_android_with_MLPerf/include` to `mobile_back_qti/cpp/backend_qti/StableDiffusionShared/` +* Copy `libStableDiffusion.so` from `/model/example3/host_linux_target_android_with_MLPerf/libs/aarch64-android/` to `mobile_back_qti/cpp/backend_qti/StableDiffusionShared/` * Run the command below: ```shell diff --git a/mobile_back_qti/cpp/backend_qti/StableDiffusionShared/BUILD b/mobile_back_qti/cpp/backend_qti/StableDiffusionShared/BUILD index 43c2176c1..a81cff9c3 100644 --- a/mobile_back_qti/cpp/backend_qti/StableDiffusionShared/BUILD +++ b/mobile_back_qti/cpp/backend_qti/StableDiffusionShared/BUILD @@ -41,7 +41,7 @@ cc_library( cc_library( name = "StableDiffusionShared", - srcs = "libStableDiffusion.so", + srcs = ["libStableDiffusion.so"], hdrs = glob([ "include/*.hpp", "include/**/*.hpp", diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_cpu.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_cpu.pbtxt index ad04f7d1c..115183265 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_cpu.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_cpu.pbtxt @@ -36,8 +36,8 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } single_stream_expected_latency_ns: 250000 @@ -70,8 +70,8 @@ benchmark_setting { accelerator_desc: "CPU" batch_size: 12360 model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "7863deea588936fe6e09565ed47dde95" } } delegate_selected: "SNPE_CPU" @@ -94,8 +94,8 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } delegate_selected: "SNPE_CPU" @@ -122,8 +122,8 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_checksum: "96d947175f04950898a372890907dda1" } } delegate_selected: "SNPE_CPU" @@ -150,9 +150,10 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_checksum: "3c0dfbacda053773d6afb34503d9991a" } + } delegate_selected: "SNPE_CPU" } @@ -177,8 +178,8 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "local:///mlperf_models/snusr_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/snusr_htp.dlc" + model_checksum: "668da9816073d67972704e237137a50f" } } delegate_selected: "SNPE_CPU" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_dsp.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_dsp.pbtxt index 113501891..8c5ae233f 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_dsp.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_dsp.pbtxt @@ -36,8 +36,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-snpe/mobilenet_v4_htp.dlc" - model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } delegate_selected: "SNPE_DSP" @@ -65,8 +65,8 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12288 model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "7863deea588936fe6e09565ed47dde95" } } delegate_selected: "SNPE_DSP" @@ -93,8 +93,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } delegate_selected: "SNPE_DSP" @@ -129,8 +129,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_checksum: "96d947175f04950898a372890907dda1" } } delegate_selected: "SNPE_DSP" @@ -161,8 +161,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } delegate_selected: "SNPE_DSP" @@ -197,8 +197,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/snusr_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/snusr_htp.dlc" + model_checksum: "668da9816073d67972704e237137a50f" } } delegate_selected: "SNPE_DSP" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_gpu.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_gpu.pbtxt index 23350c8cf..37e41b7cc 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_gpu.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_gpu.pbtxt @@ -32,8 +32,8 @@ benchmark_setting { accelerator_name: "snpe_gpu" accelerator_desc: "GPU" model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } delegate_choice: { @@ -43,8 +43,8 @@ benchmark_setting { accelerator_desc: "CPU" batch_size: 128 model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } delegate_selected: "SNPE_GPU" @@ -68,8 +68,8 @@ benchmark_setting { accelerator_desc: "GPU" batch_size: 12360 model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "7863deea588936fe6e09565ed47dde95" } } delegate_choice: { @@ -79,8 +79,8 @@ benchmark_setting { accelerator_desc: "CPU" batch_size: 128 model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "7863deea588936fe6e09565ed47dde95" } } delegate_selected: "SNPE_GPU" @@ -103,8 +103,8 @@ benchmark_setting { accelerator_name: "snpe_gpu" accelerator_desc: "GPU" model_file: { - model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } delegate_choice: { @@ -113,8 +113,8 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } delegate_selected: "SNPE_GPU" @@ -141,8 +141,8 @@ benchmark_setting { accelerator_name: "snpe_gpu" accelerator_desc: "GPU" model_file: { - model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_checksum: "96d947175f04950898a372890907dda1" } } delegate_choice: { @@ -151,8 +151,8 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_checksum: "96d947175f04950898a372890907dda1" } } delegate_selected: "SNPE_GPU" @@ -179,8 +179,8 @@ benchmark_setting { accelerator_name: "snpe_gpu" accelerator_desc: "GPU" model_file: { - model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } delegate_choice: { @@ -189,8 +189,8 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } delegate_selected: "SNPE_GPU" @@ -217,8 +217,8 @@ benchmark_setting { accelerator_name: "snpe_gpu" accelerator_desc: "GPU" model_file: { - model_path: "local:///mlperf_models/snusr_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/snusr_htp.dlc" + model_checksum: "668da9816073d67972704e237137a50f" } } delegate_choice: { @@ -227,8 +227,8 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "local:///mlperf_models/snusr_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/snusr_htp.dlc" + model_checksum: "668da9816073d67972704e237137a50f" } } delegate_selected: "SNPE_GPU" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_gpufp16.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_gpufp16.pbtxt index 3b620b299..5f4328a4b 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_gpufp16.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_gpufp16.pbtxt @@ -29,11 +29,11 @@ benchmark_setting { delegate_choice: { priority: 1 delegate_name: "SNPE_GPU_FP16" - accelerator_name: "snpe_gpu_fp16 + accelerator_name: "snpe_gpu_fp16" accelerator_desc: "GPU_FP16" model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } single_stream_expected_latency_ns: 500000 @@ -58,8 +58,8 @@ benchmark_setting { accelerator_desc: "GPU_FP16" batch_size: 12360 model_file: { - model_path: "local:///mobilenet_v4_htp_batched_4.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "7863deea588936fe6e09565ed47dde95" } } delegate_selected: "SNPE_GPU_FP16" @@ -94,8 +94,8 @@ benchmark_setting { accelerator_name: "snpe_gpu_fp16" accelerator_desc: "GPU_FP16" model_file: { - model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_checksum: "96d947175f04950898a372890907dda1" } } delegate_selected: "SNPE_GPU_FP16" @@ -122,8 +122,8 @@ benchmark_setting { accelerator_name: "snpe_gpu_fp16" accelerator_desc: "GPU_FP16" model_file: { - model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } delegate_selected: "SNPE_GPU_FP16" @@ -157,8 +157,8 @@ benchmark_setting { accelerator_name: "snpe_gpu_fp16" accelerator_desc: "GPU_FP16" model_file: { - model_path: "local:///mlperf_models/snusr_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/snusr_htp.dlc" + model_checksum: "668da9816073d67972704e237137a50f" } } delegate_selected: "SNPE_GPU_FP16" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7cxg3.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7cxg3.pbtxt index 59db16e3d..10c926482 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7cxg3.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7cxg3.pbtxt @@ -40,8 +40,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } single_stream_expected_latency_ns: 250000 @@ -78,8 +78,8 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12288 model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "7863deea588936fe6e09565ed47dde95" } } delegate_selected: "SNPE_DSP" @@ -107,8 +107,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } delegate_selected: "SNPE_DSP" @@ -139,8 +139,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_checksum: "96d947175f04950898a372890907dda1" } } delegate_selected: "SNPE_DSP" @@ -171,8 +171,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } delegate_selected: "SNPE_DSP" @@ -199,8 +199,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models//snusr_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm//snusr_htp.dlc" + model_checksum: "668da9816073d67972704e237137a50f" } } delegate_selected: "SNPE_DSP" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7g1.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7g1.pbtxt index 9c1e8f8d5..03169e306 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7g1.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7g1.pbtxt @@ -40,8 +40,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } delegate_selected: "SNPE_DSP" @@ -77,8 +77,8 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12288 model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "7863deea588936fe6e09565ed47dde95" } } delegate_selected: "SNPE_DSP" @@ -105,8 +105,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } delegate_selected: "SNPE_DSP" @@ -141,8 +141,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_checksum: "96d947175f04950898a372890907dda1" } } delegate_selected: "SNPE_DSP" @@ -169,8 +169,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } delegate_selected: "SNPE_DSP" @@ -201,8 +201,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models//snusr_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm//snusr_htp.dlc" + model_checksum: "668da9816073d67972704e237137a50f" } } delegate_selected: "SNPE_DSP" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7pg2.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7pg2.pbtxt index cb5cfbb44..5641d1899 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7pg2.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7pg2.pbtxt @@ -40,8 +40,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } single_stream_expected_latency_ns: 250000 @@ -78,8 +78,8 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12360 model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "7863deea588936fe6e09565ed47dde95" } } delegate_selected: "SNPE_DSP" @@ -106,8 +106,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } delegate_selected: "SNPE_DSP" @@ -142,8 +142,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_checksum: "96d947175f04950898a372890907dda1" } } delegate_selected: "SNPE_DSP" @@ -170,8 +170,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } delegate_selected: "SNPE_DSP" @@ -202,8 +202,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models//snusr_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm//snusr_htp.dlc" + model_checksum: "668da9816073d67972704e237137a50f" } } delegate_selected: "SNPE_DSP" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8cxg3.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8cxg3.pbtxt index be30c60ab..c558d3fe9 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8cxg3.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8cxg3.pbtxt @@ -44,8 +44,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } single_stream_expected_latency_ns: 60000 @@ -82,8 +82,8 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12360 model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "7863deea588936fe6e09565ed47dde95" } } delegate_selected: "SNPE_DSP" @@ -110,8 +110,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } delegate_selected: "SNPE_DSP" @@ -142,8 +142,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_checksum: "96d947175f04950898a372890907dda1" } } delegate_selected: "SNPE_DSP" @@ -174,8 +174,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } delegate_selected: "SNPE_DSP" @@ -202,8 +202,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models//snusr_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm//snusr_htp.dlc" + model_checksum: "668da9816073d67972704e237137a50f" } } delegate_selected: "SNPE_DSP" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g1.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g1.pbtxt index afdc96265..d746a50c8 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g1.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g1.pbtxt @@ -40,8 +40,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } single_stream_expected_latency_ns: 80000 @@ -78,8 +78,8 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12360 model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "7863deea588936fe6e09565ed47dde95" } } delegate_selected: "SNPE_DSP" @@ -106,8 +106,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } delegate_selected: "SNPE_DSP" @@ -142,8 +142,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_checksum: "96d947175f04950898a372890907dda1" } } delegate_selected: "SNPE_DSP" @@ -170,8 +170,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } delegate_selected: "SNPE_DSP" @@ -202,8 +202,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models//snusr_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm//snusr_htp.dlc" + model_checksum: "668da9816073d67972704e237137a50f" } } delegate_selected: "SNPE_DSP" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g2.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g2.pbtxt index 2990cc1b3..3eae2f7da 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g2.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g2.pbtxt @@ -40,8 +40,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } single_stream_expected_latency_ns: 500000 @@ -78,8 +78,8 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12360 model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "7863deea588936fe6e09565ed47dde95" } } delegate_selected: "SNPE_DSP" @@ -106,8 +106,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } delegate_selected: "SNPE_DSP" @@ -142,8 +142,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_checksum: "96d947175f04950898a372890907dda1" } } delegate_selected: "SNPE_DSP" @@ -170,8 +170,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } delegate_selected: "SNPE_DSP" @@ -202,8 +202,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models//snusr_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm//snusr_htp.dlc" + model_checksum: "668da9816073d67972704e237137a50f" } } delegate_selected: "SNPE_DSP" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g3.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g3.pbtxt index 052813051..173631035 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g3.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g3.pbtxt @@ -40,8 +40,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } single_stream_expected_latency_ns: 500000 @@ -78,8 +78,8 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12360 model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "7863deea588936fe6e09565ed47dde95" } } delegate_selected: "SNPE_DSP" @@ -106,7 +106,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" model_checksum: "" } } @@ -143,8 +143,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/mobilebert_quantized_htp_O2.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp_O2.dlc" + model_checksum: "f8631dbd69819438d6b317c204fa80d7" } } delegate_selected: "SNPE_DSP" @@ -175,8 +175,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } delegate_selected: "SNPE_DSP" @@ -211,8 +211,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/snusr_htp_O2.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/snusr_htp_O2.dlc" + model_checksum: "76b33f02ebfa6294a0e973aaf91116fa" } } delegate_selected: "SNPE_DSP" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8pg1.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8pg1.pbtxt index 5c7814093..86775bbfb 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8pg1.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8pg1.pbtxt @@ -40,8 +40,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } single_stream_expected_latency_ns: 600000 @@ -78,8 +78,8 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12360 model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "7863deea588936fe6e09565ed47dde95" } } delegate_selected: "SNPE_DSP" @@ -106,8 +106,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } delegate_selected: "SNPE_DSP" @@ -142,8 +142,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_checksum: "96d947175f04950898a372890907dda1" } } delegate_selected: "SNPE_DSP" @@ -174,8 +174,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } delegate_selected: "SNPE_DSP" @@ -210,8 +210,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models//snusr_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm//snusr_htp.dlc" + model_checksum: "668da9816073d67972704e237137a50f" } } delegate_selected: "SNPE_DSP" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm778.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm778.pbtxt index df26e4efd..2652a864b 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm778.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm778.pbtxt @@ -40,8 +40,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } single_stream_expected_latency_ns: 500000 @@ -78,8 +78,8 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12360 model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "7863deea588936fe6e09565ed47dde95" } } delegate_selected: "SNPE_DSP" @@ -106,8 +106,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } delegate_selected: "SNPE_DSP" @@ -142,8 +142,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_checksum: "96d947175f04950898a372890907dda1" } } delegate_selected: "SNPE_DSP" @@ -174,8 +174,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } delegate_selected: "SNPE_DSP" @@ -210,8 +210,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models//snusr_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm//snusr_htp.dlc" + model_checksum: "668da9816073d67972704e237137a50f" } } delegate_selected: "SNPE_DSP" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm888.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm888.pbtxt index 652448ac4..5fa0435d9 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm888.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm888.pbtxt @@ -40,8 +40,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } single_stream_expected_latency_ns: 500000 @@ -78,8 +78,8 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12360 model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "7863deea588936fe6e09565ed47dde95" } } delegate_selected: "SNPE_DSP" @@ -106,8 +106,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } delegate_selected: "SNPE_DSP" @@ -142,8 +142,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_checksum: "96d947175f04950898a372890907dda1" } } delegate_selected: "SNPE_DSP" @@ -174,8 +174,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } delegate_selected: "SNPE_DSP" @@ -210,8 +210,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models//snusr_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm//snusr_htp.dlc" + model_checksum: "668da9816073d67972704e237137a50f" } } delegate_selected: "SNPE_DSP" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm4450.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm4450.pbtxt index a81b4d359..328f8a429 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm4450.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm4450.pbtxt @@ -40,8 +40,8 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } delegate_selected: "SNPE_CPU" @@ -77,8 +77,8 @@ benchmark_setting { accelerator_desc: "CPU" batch_size: 12360 model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "7863deea588936fe6e09565ed47dde95" } } delegate_selected: "SNPE_CPU" @@ -109,8 +109,8 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } delegate_selected: "SNPE_CPU" @@ -145,8 +145,8 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_checksum: "96d947175f04950898a372890907dda1" } } delegate_selected: "SNPE_CPU" @@ -181,8 +181,8 @@ benchmark_setting { value: "true" } model_file: { - model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } delegate_selected: "SNPE_CPU" @@ -225,8 +225,8 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "local:///mlperf_models/snusr_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/snusr_htp.dlc" + model_checksum: "668da9816073d67972704e237137a50f" } } delegate_selected: "SNPE_CPU" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm7550.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm7550.pbtxt index 934cff8a2..7691804b8 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm7550.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm7550.pbtxt @@ -40,8 +40,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } single_stream_expected_latency_ns: 50000 @@ -78,8 +78,8 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12360 model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4_O2.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4_O2.dlc" + model_checksum: "80ba82f2a628ab712d812d06524d2bd8" } } delegate_selected: "SNPE_DSP" @@ -110,8 +110,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } delegate_selected: "SNPE_DSP" @@ -146,8 +146,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_checksum: "96d947175f04950898a372890907dda1" } } delegate_selected: "SNPE_DSP" @@ -178,8 +178,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } delegate_selected: "SNPE_DSP" @@ -214,8 +214,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models//snusr_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm//snusr_htp.dlc" + model_checksum: "668da9816073d67972704e237137a50f" } } delegate_selected: "SNPE_DSP" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm8635.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm8635.pbtxt index 35a5dc4f8..d0793c4ba 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm8635.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm8635.pbtxt @@ -40,8 +40,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } single_stream_expected_latency_ns: 50000 @@ -78,8 +78,8 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12360 model_file: { - model_path: "local:///mlperf_models/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "7863deea588936fe6e09565ed47dde95" } } delegate_selected: "SNPE_DSP" @@ -110,8 +110,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "local:///mlperf_models/ssd_mobiledet_qat_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } delegate_selected: "SNPE_DSP" @@ -146,8 +146,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/mobilebert_quantized_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_checksum: "96d947175f04950898a372890907dda1" } } delegate_selected: "SNPE_DSP" @@ -178,8 +178,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/mobile_mosaic_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } delegate_selected: "SNPE_DSP" @@ -210,8 +210,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/snusr_htp.dlc" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/snusr_htp.dlc" + model_checksum: "668da9816073d67972704e237137a50f" } } delegate_selected: "SNPE_DSP" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion.pbtxt index a936689fb..7014deb42 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion.pbtxt @@ -18,28 +18,28 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "local:///mlperf_models/stable_diffusion/betas.bin" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/betas.bin" + model_checksum: "09d2e4306d319caf1b34e6afb5c63c22" } model_file: { - model_path: "local:///mlperf_models/stable_diffusion/lambdas.bin" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/lambdas.bin" + model_checksum: "c7179725ec31a6e2c7daf008a5e1ff23" } model_file: { - model_path: "local:///mlperf_models/stable_diffusion/sd_precompute_data.tar" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/sd_precompute_data.tar" + model_checksum: "beb7fe2da40042fb585bb8cb95d86b4d" } model_file: { - model_path: "local:///mlperf_models/stable_diffusion/text_encoder.serialized.bin" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/text_encoder.serialized.bin" + model_checksum: "6da7b95fa467e99af2b9f80c7afe3734" } model_file: { - model_path: "local:///mlperf_models/stable_diffusion/unet.serialized.bin" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/unet.serialized.bin" + model_checksum: "3b504b92cbd788d713ca9cfc5b19d596" } model_file: { - model_path: "local:///mlperf_models/stable_diffusion/vae_decoder.serialized.bin" - model_checksum: "" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/vae_decoder.serialized.bin" + model_checksum: "c7762e64c2596abe7f16614709cc5482" } } delegate_selected: "QNN_DSP" diff --git a/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt b/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt index 0c18e5805..4aab5dbb3 100644 --- a/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt +++ b/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt @@ -215,20 +215,16 @@ benchmark_setting { accelerator_name: "npu" accelerator_desc: "NPU" model_file: { - model_path: "local:///mlperf_models/stable-diffusion/decoder.tflite" - model_checksum: "491385ad873880ba1876e1d097fcc0e3" + model_path: "local:///mlperf_models/sd_decoder_dynamic.tflite" + model_checksum: "68acdb62f99e1dc2c7f5db8cdd0e007c" } model_file: { - model_path: "local:///mlperf_models/stable-diffusion/text_encoder.tflite" - model_checksum: "8985768b09fe31b805e66b6048da9125" + model_path: "local:///mlperf_models/sd_diffusion_model_dynamic.tflite" + model_checksum: "7cbdadf5282b71561ce5eda75e868c19" } model_file: { - model_path: "local:///mlperf_models/stable-diffusion/first_model.tflite" - model_checksum: "f0d6f45a2d702456a234c0a9b192816a" - } - model_file: { - model_path: "local:///mlperf_models/stable-diffusion/second_model.tflite" - model_checksum: "cea07208776347a8a5334106a09444fe" + model_path: "local:///mlperf_models/sd_text_encoder_dynamic.tflite" + model_checksum: "b64effb0360f9ea49a117cdaf8a2fbdc" } } delegate_selected: "NNAPI" diff --git a/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_apple_main.pbtxt b/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_apple_main.pbtxt index ff3a29f07..b424ae4d9 100644 --- a/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_apple_main.pbtxt +++ b/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_apple_main.pbtxt @@ -146,21 +146,21 @@ benchmark_setting { delegate_selected: "Core ML" } -benchmark_setting { - benchmark_id: "stable_diffusion" - framework: "TFLite" - delegate_choice: { - delegate_name: "Metal" - accelerator_name: "gpu" - accelerator_desc: "GPU" - model_file: { - model_path: "local:///path/to/stable_diffusion.tflite" - model_checksum: "" - } - } - delegate_selected: "Metal" - custom_setting { - id: "pipeline" - value: "StableDiffusionPipeline" - } -} +#benchmark_setting { +# benchmark_id: "stable_diffusion" +# framework: "TFLite" +# delegate_choice: { +# delegate_name: "Metal" +# accelerator_name: "gpu" +# accelerator_desc: "GPU" +# model_file: { +# model_path: "local:///path/to/stable_diffusion.tflite" +# model_checksum: "" +# } +# } +# delegate_selected: "Metal" +# custom_setting { +# id: "pipeline" +# value: "StableDiffusionPipeline" +# } +#} From fad67651ced01591d5369d1ba8147f8045ce7577 Mon Sep 17 00:00:00 2001 From: Anh Date: Wed, 16 Oct 2024 16:45:25 +0700 Subject: [PATCH 09/18] Update tflite_settings_mtk_mt6989.pbtxt --- .../tflite_settings_mtk_mt6989.pbtxt | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/mobile_back_tflite/cpp/backend_tflite/neuron/backend_settings/tflite_settings_mtk_mt6989.pbtxt b/mobile_back_tflite/cpp/backend_tflite/neuron/backend_settings/tflite_settings_mtk_mt6989.pbtxt index dc4c7708f..a20fe58d7 100644 --- a/mobile_back_tflite/cpp/backend_tflite/neuron/backend_settings/tflite_settings_mtk_mt6989.pbtxt +++ b/mobile_back_tflite/cpp/backend_tflite/neuron/backend_settings/tflite_settings_mtk_mt6989.pbtxt @@ -63,8 +63,8 @@ benchmark_setting { accelerator_name: "neuron-mdla" accelerator_desc: "MediaTek NN accelerator via the Neuron Delegate" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-mediatek/MobileNetV4-Conv-Large-int8-ptq.dla" - model_checksum: "07055309718ff681d8cd2c00f5e4b5db" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-mediatek/MobileNetV4-Conv-Large-int8-ptq.dla" + model_checksum: "ff9e3705d4a62c4b78e2597156bb7599" } } delegate_choice: { @@ -173,8 +173,8 @@ benchmark_setting { accelerator_name: "neuron" accelerator_desc: "MediaTek NN accelerator + CPU via the Neuron Delegate" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-mediatek/mobiledet_qat.dla" - model_checksum: "14b9572f121caa093cd8bf690fde997c" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-mediatek/mobiledet_qat.dla" + model_checksum: "97cc339e205dfe5503d7dc256b12f472" } } delegate_choice: { @@ -216,8 +216,8 @@ benchmark_setting { accelerator_name: "neuron-no-ahwb" accelerator_desc: "MediaTek NN accelerator + VPU via the Neuron Delegate" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-mediatek/mobilebert_int8_384_nnapi.dla" - model_checksum: "8c6ce45cc49bbf8bb26609bfb219164a" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-mediatek/mobilebert_int8_384_nnapi.dla" + model_checksum: "2c81eebd87e3f620373897cc56dbc3e7" } batch_size: 1 } @@ -260,8 +260,8 @@ benchmark_setting { accelerator_name: "neuron" accelerator_desc: "NPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-mediatek/mobile_segmenter_r4_quant_argmax_uint8.dla" - model_checksum: "fe62a283e6da531647da15b3f26e680a" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-mediatek/mobile_segmenter_r4_quant_argmax_uint8.dla" + model_checksum: "105fa48160803201dedec155445dd4e9" } } delegate_choice: { @@ -302,8 +302,8 @@ benchmark_setting { accelerator_name: "neuron" accelerator_desc: "MediaTek NN accelerator + CPU via the Neuron Delegate" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-mediatek/edsr_f32b5_full_qint8.dla" - model_checksum: "fcd91d276036be666153c663c03fb69e" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-mediatek/edsr_f32b5_full_qint8.dla" + model_checksum: "cc40f7f0e97cf34c8586883fd3357354" } } delegate_choice: { From f74b27c640fc7d1403efaaeaf273c084a5a2bcb3 Mon Sep 17 00:00:00 2001 From: Koan-Sin Tan Date: Tue, 17 Dec 2024 13:59:24 +0800 Subject: [PATCH 10/18] enable stable diffusion in Pixel backend (#936) * hacks to enable stable diffusion on Pixel backend * buildifer and clang-format * remove unnecessary line * adapt to submission-v4.1 --- mobile_back_pixel/cpp/backend_tflite/BUILD | 16 +- .../pixel_single_model_pipeline.cc | 422 ++++++++++++++++++ .../settings/tflite_settings_pixel6.pbtxt | 27 ++ .../cpp/backend_tflite/tflite_pixel.cc | 391 +++------------- 4 files changed, 528 insertions(+), 328 deletions(-) create mode 100644 mobile_back_pixel/cpp/backend_tflite/pixel_single_model_pipeline.cc diff --git a/mobile_back_pixel/cpp/backend_tflite/BUILD b/mobile_back_pixel/cpp/backend_tflite/BUILD index dd416856b..54947f346 100644 --- a/mobile_back_pixel/cpp/backend_tflite/BUILD +++ b/mobile_back_pixel/cpp/backend_tflite/BUILD @@ -51,11 +51,22 @@ cc_library( cc_library( name = "tflite_pixel", srcs = [ + "pixel_single_model_pipeline.cc", "tflite_pixel.cc", + "//mobile_back_tflite/cpp/backend_tflite:sd_utils.cc", + "//mobile_back_tflite/cpp/backend_tflite:stable_diffusion_invoker.cc", + "//mobile_back_tflite/cpp/backend_tflite:stable_diffusion_pipeline.cc", ], hdrs = [ "tflite_settings_pixel.h", "thread_pool.h", + "//mobile_back_tflite/cpp/backend_tflite:pipeline.h", + "//mobile_back_tflite/cpp/backend_tflite:sd_utils.h", + "//mobile_back_tflite/cpp/backend_tflite:single_model_pipeline.h", + "//mobile_back_tflite/cpp/backend_tflite:stable_diffusion_invoker.h", + "//mobile_back_tflite/cpp/backend_tflite:stable_diffusion_pipeline.h", + "//mobile_back_tflite/cpp/backend_tflite:thread_pool.h", + "//mobile_back_tflite/cpp/backend_tflite:utils.h", ], copts = tflite_copts() + select({ "//flutter/android/commonlibs:use_asan": [ @@ -64,11 +75,14 @@ cc_library( "-O1", "-fno-omit-frame-pointer", ], - "//conditions:default": [], + "//conditions:default": [ + "-Imobile_back_tflite/cpp/backend_tflite", + ], }), deps = [ ":pixel_settings", ":resize_bilinear_op", + "//flutter/cpp:utils", "//flutter/cpp/c:headers", "@org_tensorflow//tensorflow/core:tflite_portable_logging", "@org_tensorflow//tensorflow/lite/c:c_api", diff --git a/mobile_back_pixel/cpp/backend_tflite/pixel_single_model_pipeline.cc b/mobile_back_pixel/cpp/backend_tflite/pixel_single_model_pipeline.cc new file mode 100644 index 000000000..1dc201cfb --- /dev/null +++ b/mobile_back_pixel/cpp/backend_tflite/pixel_single_model_pipeline.cc @@ -0,0 +1,422 @@ +/* Copyright 2021 The MLPerf Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include + +#include "flutter/cpp/c/backend_c.h" +#include "flutter/cpp/c/type.h" +#include "tensorflow/lite/c/c_api.h" +#include "tensorflow/lite/c/c_api_experimental.h" +#include "tensorflow/lite/c/common.h" +#if __ANDROID__ +#include + +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/lite/delegates/gpu/delegate.h" +#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h" +#endif +#include "resize_argmax_op.h" +#include "single_model_pipeline.h" +#include "stable_diffusion_pipeline.h" +#include "tflite_settings_pixel.h" +#include "thread_pool.h" + +#define N_OFFLINE_INTERPRETERS 8 + +struct TFLiteBackendData { + const char* name = "TFLite-pixel"; + const char* vendor = "Google"; + const char* accelerator = "CPU"; + TfLiteModel* model{nullptr}; + std::vector options{}; + std::vector interpreter{}; + int32_t shards_num = 1; + uint32_t real_batch_size = 1; + std::unique_ptr executer; + int32_t original_tensor_size = 0; +}; + +static bool backendExists = false; + +static constexpr const char* const kDelegateCpu = "CPU"; + +inline mlperf_data_t::Type TfType2Type(TfLiteType type) { + switch (type) { + case kTfLiteFloat32: + return mlperf_data_t::Float32; + case kTfLiteUInt8: + return mlperf_data_t::Uint8; + case kTfLiteInt8: + return mlperf_data_t::Int8; + case kTfLiteFloat16: + return mlperf_data_t::Float16; + case kTfLiteInt32: + return mlperf_data_t::Int32; + case kTfLiteInt64: + return mlperf_data_t::Int64; + default: + printf("TfLiteType %d not supported\n", type); + return mlperf_data_t::Float32; + } +} + +size_t TFLiteNumElements(const TfLiteTensor* tensor) { + size_t result = 1; + for (int i = 0; i < TfLiteTensorNumDims(tensor); ++i) { + result *= TfLiteTensorDim(tensor, i); + } + return result; +} + +#if __ANDROID__ +bool is_emulator() { + char ro_build_characteristics[PROP_VALUE_MAX + 1]; + if (__system_property_get("ro.build.characteristics", + ro_build_characteristics)) { + char* ptr; + ptr = strstr(ro_build_characteristics, "emulator"); + if (ptr) return true; + } + return false; +} +#endif + +// Create a new backend and return the pointer to it. +mlperf_backend_ptr_t SingleModelPipeline::backend_create( + const char* model_path, mlperf_backend_configuration_t* configs, + const char* native_lib_path) { + // Verify only one instance of the backend exists at any time + if (backendExists) { + printf("Error: Only one backend instance should exist at a time\n"); + return nullptr; + } + + TFLiteBackendData* backend_data = new TFLiteBackendData(); + + backendExists = true; + + // Load the model. + backend_data->model = TfLiteModelCreateFromFile(model_path); + if (!backend_data->model) { + printf("Failed to load model: %s", model_path); + mlperf_backend_delete(backend_data); + return nullptr; + } + + if (configs->batch_size > 1) { + backend_data->shards_num = N_OFFLINE_INTERPRETERS; + + if ((configs->batch_size % backend_data->shards_num) != 0) { + printf("Batch size is not dividable by shards_num: %d %% %d != 0\n", + configs->batch_size, backend_data->shards_num); + mlperf_backend_delete(backend_data); + return nullptr; + } + + backend_data->real_batch_size = + configs->batch_size / backend_data->shards_num; + } + + backend_data->executer = + std::unique_ptr(new Threadpool(backend_data->shards_num)); + + // Create interpreter options function. + auto create_option = [&](TfLiteInterpreterOptions*& option_ptr) -> void { + option_ptr = TfLiteInterpreterOptionsCreate(); + TfLiteInterpreterOptionsAddCustomOp(option_ptr, "ResizeArgmax", + Register_ResizeArgmax(), 1, 999); + TfLiteDelegate* delegate = nullptr; + + for (int i = 0; i < configs->count; ++i) { + if (strcmp(configs->keys[i], "num_threads") == 0) { + TfLiteInterpreterOptionsSetNumThreads(option_ptr, + atoi(configs->values[i])); + } + } + +#if __ANDROID__ + if (strcmp(configs->delegate_selected, kDelegateCpu) == 0) { + backend_data->accelerator = "CPU"; + } else if (!is_emulator() && + ((strcmp(configs->accelerator, "gpu_f16") == 0) || + (strcmp(configs->accelerator, "gpu") == 0))) { + backend_data->accelerator = "GPU"; + auto options = TfLiteGpuDelegateOptionsV2Default(); + if (strcmp(configs->accelerator, "gpu_f16") == 0) + options.inference_priority1 = TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY; + delegate = TfLiteGpuDelegateV2Create(&options); + } else if (strcmp(configs->accelerator, "tpu") == 0) { + backend_data->accelerator = "EdgeTPU"; + auto options = tflite::StatefulNnApiDelegate::Options(); + options.allow_fp16 = true; + options.disallow_nnapi_cpu = true; + options.accelerator_name = "google-edgetpu"; + delegate = new tflite::StatefulNnApiDelegate(options); + } + if (delegate != nullptr) { + TfLiteInterpreterOptionsAddDelegate(option_ptr, delegate); + } +#endif + }; + + backend_data->options.resize(backend_data->shards_num); + backend_data->interpreter.resize(backend_data->shards_num); + + for (int k = 0; k < backend_data->shards_num; k++) { + // Create Backend Option + create_option(backend_data->options[k]); + + // Create the interpreter. + backend_data->interpreter[k] = + TfLiteInterpreterCreate(backend_data->model, backend_data->options[k]); + if (!backend_data->interpreter[k]) { + printf("Fallback to a vanilla interpreter\n"); + backend_data->interpreter[k] = TfLiteInterpreterCreate( + backend_data->model, TfLiteInterpreterOptionsCreate()); + if (!backend_data->interpreter[k]) { + printf("Failed to create the interpreter\n"); + mlperf_backend_delete(backend_data); + return nullptr; + } + } + } + + const int32_t input_tensor_count = + TfLiteInterpreterGetInputTensorCount(backend_data->interpreter[0]); + + for (int shard_index = 0; shard_index < backend_data->shards_num; + shard_index++) { + TfLiteInterpreter*& shard = backend_data->interpreter[shard_index]; + + for (int input_index = 0; input_index < input_tensor_count; input_index++) { + TfLiteTensor* tensor = + TfLiteInterpreterGetInputTensor(shard, input_index); + + backend_data->original_tensor_size = tensor->bytes; + + if (backend_data->real_batch_size != tensor->dims->data[0]) { + std::vector dims; + dims.resize(tensor->dims->size); + dims[0] = backend_data->real_batch_size; + for (int i = 1; i < tensor->dims->size; i++) { + dims[i] = tensor->dims->data[i]; + } + if (TfLiteInterpreterResizeInputTensor(shard, input_index, dims.data(), + tensor->dims->size) != + kTfLiteOk) { + printf("Failed to resize input\n"); + mlperf_backend_delete(backend_data); + return nullptr; + } + } + } + + if (TfLiteInterpreterAllocateTensors(shard) != kTfLiteOk) { + printf("Failed to allocate tensors\n"); + mlperf_backend_delete(backend_data); + return nullptr; + } + } + + return backend_data; +} + +// Vendor name who create this backend. +const char* SingleModelPipeline::backend_vendor_name( + mlperf_backend_ptr_t backend_ptr) { + TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; + return backend_data->vendor; +} + +// TODO: Return the name of the accelerator. +const char* SingleModelPipeline::backend_accelerator_name( + mlperf_backend_ptr_t backend_ptr) { + TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; + return backend_data->accelerator; +} + +// Return the name of this backend. +const char* SingleModelPipeline::backend_name( + mlperf_backend_ptr_t backend_ptr) { + TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; + return backend_data->name; +} + +// Destroy the backend pointer and its data. +void SingleModelPipeline::backend_delete(mlperf_backend_ptr_t backend_ptr) { + TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; + TfLiteModelDelete(backend_data->model); + for (int i = 0; i < backend_data->shards_num; i++) { + TfLiteInterpreterOptionsDelete(backend_data->options[i]); + TfLiteInterpreterDelete(backend_data->interpreter[i]); + } + delete backend_data; + backendExists = false; +} + +// Run the inference for a sample. +mlperf_status_t SingleModelPipeline::backend_issue_query( + mlperf_backend_ptr_t backend_ptr) { + TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; + auto task = [&backend_data](int index) -> TfLiteStatus { + return TfLiteInterpreterInvoke(backend_data->interpreter[index]); + }; + + std::vector> f; + f.resize(backend_data->shards_num); + // dispatch workers for shards + for (int k = 1; k < backend_data->shards_num; k++) { + f[k] = backend_data->executer->submit(task, k); + } + // main thread for the first shard + if (task(0) != kTfLiteOk) { + printf("Failed to run the inference\n"); + return MLPERF_FAILURE; + } + // sync and get result of workers + for (int k = 1; k < backend_data->shards_num; k++) { + if (f[k].get() != kTfLiteOk) { + printf("Failed to run the inference\n"); + return MLPERF_FAILURE; + } + } + return MLPERF_SUCCESS; +} + +// Flush the staged queries immediately. +mlperf_status_t SingleModelPipeline::backend_flush_queries( + mlperf_backend_ptr_t backend_ptr) { + return MLPERF_SUCCESS; +} + +// Return the number of inputs of the model. +int32_t SingleModelPipeline::backend_get_input_count( + mlperf_backend_ptr_t backend_ptr) { + TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; + return TfLiteInterpreterGetInputTensorCount(backend_data->interpreter[0]); +} + +// Return the type of the ith input. +mlperf_data_t SingleModelPipeline::backend_get_input_type( + mlperf_backend_ptr_t backend_ptr, int32_t i) { + TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; + const TfLiteTensor* tensor = + TfLiteInterpreterGetInputTensor(backend_data->interpreter[0], i); + mlperf_data_t type; + type.type = TfType2Type(TfLiteTensorType(tensor)); + type.size = TFLiteNumElements(tensor); + type.size /= backend_data->real_batch_size; + return type; +} + +// Set the data for ith input. +mlperf_status_t SingleModelPipeline::backend_set_input( + mlperf_backend_ptr_t backend_ptr, int32_t batch_index, int32_t i, + void* data) { + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(6, &cpuset); + CPU_SET(7, &cpuset); + sched_setaffinity(0, sizeof(cpu_set_t), &cpuset); + + TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; + + const int shard_index = batch_index / backend_data->real_batch_size; + TfLiteTensor* tensor = TfLiteInterpreterGetInputTensor( + backend_data->interpreter[shard_index], i); + const int data_offset = backend_data->original_tensor_size * + (batch_index % backend_data->real_batch_size); + memcpy(tensor->data.raw + data_offset, data, + backend_data->original_tensor_size); + + return MLPERF_SUCCESS; +} + +// Return the number of outputs for the model. +int32_t SingleModelPipeline::backend_get_output_count( + mlperf_backend_ptr_t backend_ptr) { + TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; + return TfLiteInterpreterGetOutputTensorCount(backend_data->interpreter[0]); +} + +// Return the type of ith output. +mlperf_data_t SingleModelPipeline::backend_get_output_type( + mlperf_backend_ptr_t backend_ptr, int32_t i) { + TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; + const TfLiteTensor* tensor = + TfLiteInterpreterGetOutputTensor(backend_data->interpreter[0], i); + mlperf_data_t type; + type.type = TfType2Type(TfLiteTensorType(tensor)); + type.size = TFLiteNumElements(tensor); + type.size /= backend_data->real_batch_size; + return type; +} + +// Get the data from ith output. +mlperf_status_t SingleModelPipeline::backend_get_output( + mlperf_backend_ptr_t backend_ptr, uint32_t batch_index, int32_t i, + void** data) { + TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; + const int shard_index = batch_index / backend_data->real_batch_size; + + const TfLiteTensor* output_tensor = TfLiteInterpreterGetOutputTensor( + backend_data->interpreter[shard_index], i); + batch_index %= backend_data->real_batch_size; + + int non_batch_size = 1; + for (int i = 1; i < output_tensor->dims->size; i++) { + non_batch_size *= output_tensor->dims->data[i]; + } + + switch (output_tensor->type) { + case kTfLiteFloat32: + *data = (output_tensor->data.f + (batch_index * non_batch_size)); + break; + case kTfLiteUInt8: + *data = (output_tensor->data.uint8 + (batch_index * non_batch_size)); + break; + case kTfLiteInt8: + *data = (output_tensor->data.int8 + (batch_index * non_batch_size)); + break; + case kTfLiteFloat16: + *data = (output_tensor->data.f16 + (batch_index * non_batch_size)); + break; + case kTfLiteInt32: + *data = (output_tensor->data.i32 + (batch_index * non_batch_size)); + break; + case kTfLiteInt64: + *data = (output_tensor->data.i64 + (batch_index * non_batch_size)); + break; + default: + printf("Data type not yet supported\n"); + return MLPERF_FAILURE; + } + return MLPERF_SUCCESS; +} + +void SingleModelPipeline::backend_convert_inputs( + mlperf_backend_ptr_t backend_ptr, int bytes, int width, int height, + uint8_t* data) {} + +void* SingleModelPipeline::backend_get_buffer(size_t n) { + return ::operator new(n); +} + +void SingleModelPipeline::backend_release_buffer(void* p) { + ::operator delete(p); +} diff --git a/mobile_back_pixel/cpp/backend_tflite/settings/tflite_settings_pixel6.pbtxt b/mobile_back_pixel/cpp/backend_tflite/settings/tflite_settings_pixel6.pbtxt index 22db41771..4b80a06b1 100644 --- a/mobile_back_pixel/cpp/backend_tflite/settings/tflite_settings_pixel6.pbtxt +++ b/mobile_back_pixel/cpp/backend_tflite/settings/tflite_settings_pixel6.pbtxt @@ -207,3 +207,30 @@ benchmark_setting { } delegate_selected: "NNAPI" } + +benchmark_setting { + benchmark_id: "stable_diffusion" + framework: "TFLite" + delegate_choice: { + delegate_name: "NNAPI" + accelerator_name: "npu" + accelerator_desc: "NPU" + model_file: { + model_path: "local:///mlperf_models/sd_decoder_dynamic.tflite" + model_checksum: "68acdb62f99e1dc2c7f5db8cdd0e007c" + } + model_file: { + model_path: "local:///mlperf_models/sd_diffusion_model_dynamic.tflite" + model_checksum: "7cbdadf5282b71561ce5eda75e868c19" + } + model_file: { + model_path: "local:///mlperf_models/sd_text_encoder_dynamic.tflite" + model_checksum: "b64effb0360f9ea49a117cdaf8a2fbdc" + } + } + delegate_selected: "NNAPI" + custom_setting { + id: "pipeline" + value: "StableDiffusionPipeline" + } +} diff --git a/mobile_back_pixel/cpp/backend_tflite/tflite_pixel.cc b/mobile_back_pixel/cpp/backend_tflite/tflite_pixel.cc index d8d1cfa1f..476b7ae60 100644 --- a/mobile_back_pixel/cpp/backend_tflite/tflite_pixel.cc +++ b/mobile_back_pixel/cpp/backend_tflite/tflite_pixel.cc @@ -1,90 +1,46 @@ -/* Copyright 2021 The MLPerf Authors. All Rights Reserved. - +/* Copyright 2024 The MLPerf Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include -#include -#include -#include -#include +#include "single_model_pipeline.h" +#include "stable_diffusion_pipeline.h" +#include "tensorflow/core/platform/logging.h" +#include "tflite_settings_pixel.h" -#include "flutter/cpp/c/backend_c.h" -#include "flutter/cpp/c/type.h" -#include "tensorflow/lite/c/c_api.h" -#include "tensorflow/lite/c/c_api_experimental.h" -#include "tensorflow/lite/c/common.h" #if __ANDROID__ #include - -#include "tensorflow/core/platform/logging.h" -#include "tensorflow/lite/delegates/gpu/delegate.h" -#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h" #endif -#include "resize_argmax_op.h" -#include "tflite_settings_pixel.h" -#include "thread_pool.h" - -#define N_OFFLINE_INTERPRETERS 8 - -struct TFLiteBackendData { - const char* name = "TFLite-pixel"; - const char* vendor = "Google"; - const char* accelerator = "CPU"; - TfLiteModel* model{nullptr}; - std::vector options{}; - std::vector interpreter{}; - int32_t shards_num = 1; - uint32_t real_batch_size = 1; - std::unique_ptr executer; - int32_t original_tensor_size = 0; -}; -static bool backendExists = false; +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus -static constexpr const char* const kDelegateCpu = "CPU"; +std::unique_ptr pipeline; -inline mlperf_data_t::Type TfType2Type(TfLiteType type) { - switch (type) { - case kTfLiteFloat32: - return mlperf_data_t::Float32; - case kTfLiteUInt8: - return mlperf_data_t::Uint8; - case kTfLiteInt8: - return mlperf_data_t::Int8; - case kTfLiteFloat16: - return mlperf_data_t::Float16; - case kTfLiteInt32: - return mlperf_data_t::Int32; - case kTfLiteInt64: - return mlperf_data_t::Int64; - default: - printf("TfLiteType %d not supported\n", type); - return mlperf_data_t::Float32; +void init_pipeline(const char *pipeline_type) { + bool sd_pipeline = (strcmp(pipeline_type, "StableDiffusionPipeline") == 0); + if (sd_pipeline) { + LOG(INFO) << "Initializing StableDiffusionPipeline"; + pipeline = std::make_unique(); + } else { + LOG(INFO) << "Initializing SingleModelPipeline"; + pipeline = std::make_unique(); } } -size_t TFLiteNumElements(const TfLiteTensor* tensor) { - size_t result = 1; - for (int i = 0; i < TfLiteTensorNumDims(tensor); ++i) { - result *= TfLiteTensorDim(tensor, i); - } - return result; -} +void reset_pipeline() { pipeline.reset(); } -// TFLite is the standard backend for all hardwares. -bool mlperf_backend_matches_hardware(const char** not_allowed_message, - const char** settings, - const mlperf_device_info_t* device_info) { +// TFLite is the standard backend for all hardware. +bool mlperf_backend_matches_hardware(const char **not_allowed_message, + const char **settings, + const mlperf_device_info_t *device_info) { *not_allowed_message = nullptr; *settings = tflite_settings.c_str(); @@ -102,321 +58,102 @@ bool mlperf_backend_matches_hardware(const char** not_allowed_message, return false; } -#if __ANDROID__ -bool is_emulator() { - char ro_build_characteristics[PROP_VALUE_MAX + 1]; - if (__system_property_get("ro.build.characteristics", - ro_build_characteristics)) { - char* ptr; - ptr = strstr(ro_build_characteristics, "emulator"); - if (ptr) return true; - } - return false; -} -#endif - // Create a new backend and return the pointer to it. mlperf_backend_ptr_t mlperf_backend_create( - const char* model_path, mlperf_backend_configuration_t* configs, - const char* native_lib_path) { - // Verify only one instance of the backend exists at any time - if (backendExists) { - printf("Error: Only one backend instance should exist at a time\n"); - return nullptr; - } - - TFLiteBackendData* backend_data = new TFLiteBackendData(); - - backendExists = true; - - // Load the model. - backend_data->model = TfLiteModelCreateFromFile(model_path); - if (!backend_data->model) { - printf("Failed to load model: %s", model_path); - mlperf_backend_delete(backend_data); - return nullptr; - } - - if (configs->batch_size > 1) { - backend_data->shards_num = N_OFFLINE_INTERPRETERS; - - if ((configs->batch_size % backend_data->shards_num) != 0) { - printf("Batch size is not dividable by shards_num: %d %% %d != 0\n", - configs->batch_size, backend_data->shards_num); - mlperf_backend_delete(backend_data); - return nullptr; - } - - backend_data->real_batch_size = - configs->batch_size / backend_data->shards_num; - } - - backend_data->executer = - std::unique_ptr(new Threadpool(backend_data->shards_num)); - - // Create interpreter options function. - auto create_option = [&](TfLiteInterpreterOptions*& option_ptr) -> void { - option_ptr = TfLiteInterpreterOptionsCreate(); - TfLiteInterpreterOptionsAddCustomOp(option_ptr, "ResizeArgmax", - Register_ResizeArgmax(), 1, 999); - TfLiteDelegate* delegate = nullptr; - - for (int i = 0; i < configs->count; ++i) { - if (strcmp(configs->keys[i], "num_threads") == 0) { - TfLiteInterpreterOptionsSetNumThreads(option_ptr, - atoi(configs->values[i])); - } - } - -#if __ANDROID__ - if (strcmp(configs->delegate_selected, kDelegateCpu) == 0) { - backend_data->accelerator = "CPU"; - } else if (!is_emulator() && - ((strcmp(configs->accelerator, "gpu_f16") == 0) || - (strcmp(configs->accelerator, "gpu") == 0))) { - backend_data->accelerator = "GPU"; - auto options = TfLiteGpuDelegateOptionsV2Default(); - if (strcmp(configs->accelerator, "gpu_f16") == 0) - options.inference_priority1 = TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY; - delegate = TfLiteGpuDelegateV2Create(&options); - } else if (strcmp(configs->accelerator, "tpu") == 0) { - backend_data->accelerator = "EdgeTPU"; - auto options = tflite::StatefulNnApiDelegate::Options(); - options.allow_fp16 = true; - options.disallow_nnapi_cpu = true; - options.accelerator_name = "google-edgetpu"; - delegate = new tflite::StatefulNnApiDelegate(options); - } - if (delegate != nullptr) { - TfLiteInterpreterOptionsAddDelegate(option_ptr, delegate); - } -#endif - }; - - backend_data->options.resize(backend_data->shards_num); - backend_data->interpreter.resize(backend_data->shards_num); - - for (int k = 0; k < backend_data->shards_num; k++) { - // Create Backend Option - create_option(backend_data->options[k]); - - // Create the interpreter. - backend_data->interpreter[k] = - TfLiteInterpreterCreate(backend_data->model, backend_data->options[k]); - if (!backend_data->interpreter[k]) { - printf("Fallback to a vanilla interpreter\n"); - backend_data->interpreter[k] = TfLiteInterpreterCreate( - backend_data->model, TfLiteInterpreterOptionsCreate()); - if (!backend_data->interpreter[k]) { - printf("Failed to create the interpreter\n"); - mlperf_backend_delete(backend_data); - return nullptr; - } - } - } - - const int32_t input_tensor_count = - TfLiteInterpreterGetInputTensorCount(backend_data->interpreter[0]); - - for (int shard_index = 0; shard_index < backend_data->shards_num; - shard_index++) { - TfLiteInterpreter*& shard = backend_data->interpreter[shard_index]; - - for (int input_index = 0; input_index < input_tensor_count; input_index++) { - TfLiteTensor* tensor = - TfLiteInterpreterGetInputTensor(shard, input_index); - - backend_data->original_tensor_size = tensor->bytes; - - if (backend_data->real_batch_size != tensor->dims->data[0]) { - std::vector dims; - dims.resize(tensor->dims->size); - dims[0] = backend_data->real_batch_size; - for (int i = 1; i < tensor->dims->size; i++) { - dims[i] = tensor->dims->data[i]; - } - if (TfLiteInterpreterResizeInputTensor(shard, input_index, dims.data(), - tensor->dims->size) != - kTfLiteOk) { - printf("Failed to resize input\n"); - mlperf_backend_delete(backend_data); - return nullptr; - } - } - } - - if (TfLiteInterpreterAllocateTensors(shard) != kTfLiteOk) { - printf("Failed to allocate tensors\n"); - mlperf_backend_delete(backend_data); - return nullptr; + const char *model_path, mlperf_backend_configuration_t *configs, + const char *native_lib_path) { + const char *pipeline_type = ""; + for (int i = 0; i < configs->count; ++i) { + if (strcmp(configs->keys[i], "pipeline") == 0) { + pipeline_type = configs->values[i]; + break; } } - - return backend_data; + init_pipeline(pipeline_type); + return pipeline->backend_create(model_path, configs, native_lib_path); } // Vendor name who create this backend. -const char* mlperf_backend_vendor_name(mlperf_backend_ptr_t backend_ptr) { - TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; - return backend_data->vendor; +const char *mlperf_backend_vendor_name(mlperf_backend_ptr_t backend_ptr) { + return pipeline->backend_vendor_name(backend_ptr); } // TODO: Return the name of the accelerator. -const char* mlperf_backend_accelerator_name(mlperf_backend_ptr_t backend_ptr) { - TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; - return backend_data->accelerator; +const char *mlperf_backend_accelerator_name(mlperf_backend_ptr_t backend_ptr) { + return pipeline->backend_accelerator_name(backend_ptr); } // Return the name of this backend. -const char* mlperf_backend_name(mlperf_backend_ptr_t backend_ptr) { - TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; - return backend_data->name; +const char *mlperf_backend_name(mlperf_backend_ptr_t backend_ptr) { + return pipeline->backend_name(backend_ptr); } // Destroy the backend pointer and its data. void mlperf_backend_delete(mlperf_backend_ptr_t backend_ptr) { - TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; - TfLiteModelDelete(backend_data->model); - for (int i = 0; i < backend_data->shards_num; i++) { - TfLiteInterpreterOptionsDelete(backend_data->options[i]); - TfLiteInterpreterDelete(backend_data->interpreter[i]); - } - delete backend_data; - backendExists = false; + pipeline->backend_delete(backend_ptr); + reset_pipeline(); } // Run the inference for a sample. mlperf_status_t mlperf_backend_issue_query(mlperf_backend_ptr_t backend_ptr) { - TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; - auto task = [&backend_data](int index) -> TfLiteStatus { - return TfLiteInterpreterInvoke(backend_data->interpreter[index]); - }; - - std::vector> f; - f.resize(backend_data->shards_num); - // dispatch workers for shards - for (int k = 1; k < backend_data->shards_num; k++) { - f[k] = backend_data->executer->submit(task, k); - } - // main thread for the first shard - if (task(0) != kTfLiteOk) { - printf("Failed to run the inference\n"); - return MLPERF_FAILURE; - } - // sync and get result of workers - for (int k = 1; k < backend_data->shards_num; k++) { - if (f[k].get() != kTfLiteOk) { - printf("Failed to run the inference\n"); - return MLPERF_FAILURE; - } - } - return MLPERF_SUCCESS; + return pipeline->backend_issue_query(backend_ptr); } // Flush the staged queries immediately. mlperf_status_t mlperf_backend_flush_queries(mlperf_backend_ptr_t backend_ptr) { - return MLPERF_SUCCESS; + return pipeline->backend_flush_queries(backend_ptr); } // Return the number of inputs of the model. int32_t mlperf_backend_get_input_count(mlperf_backend_ptr_t backend_ptr) { - TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; - return TfLiteInterpreterGetInputTensorCount(backend_data->interpreter[0]); + return pipeline->backend_get_input_count(backend_ptr); } // Return the type of the ith input. mlperf_data_t mlperf_backend_get_input_type(mlperf_backend_ptr_t backend_ptr, int32_t i) { - TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; - const TfLiteTensor* tensor = - TfLiteInterpreterGetInputTensor(backend_data->interpreter[0], i); - mlperf_data_t type; - type.type = TfType2Type(TfLiteTensorType(tensor)); - type.size = TFLiteNumElements(tensor); - type.size /= backend_data->real_batch_size; - return type; + return pipeline->backend_get_input_type(backend_ptr, i); } // Set the data for ith input. mlperf_status_t mlperf_backend_set_input(mlperf_backend_ptr_t backend_ptr, int32_t batch_index, int32_t i, - void* data) { - cpu_set_t cpuset; - CPU_ZERO(&cpuset); - CPU_SET(6, &cpuset); - CPU_SET(7, &cpuset); - sched_setaffinity(0, sizeof(cpu_set_t), &cpuset); - - TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; - - const int shard_index = batch_index / backend_data->real_batch_size; - TfLiteTensor* tensor = TfLiteInterpreterGetInputTensor( - backend_data->interpreter[shard_index], i); - const int data_offset = backend_data->original_tensor_size * - (batch_index % backend_data->real_batch_size); - memcpy(tensor->data.raw + data_offset, data, - backend_data->original_tensor_size); - - return MLPERF_SUCCESS; + void *data) { + return pipeline->backend_set_input(backend_ptr, batch_index, i, data); } // Return the number of outputs for the model. int32_t mlperf_backend_get_output_count(mlperf_backend_ptr_t backend_ptr) { - TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; - return TfLiteInterpreterGetOutputTensorCount(backend_data->interpreter[0]); + return pipeline->backend_get_output_count(backend_ptr); } // Return the type of ith output. mlperf_data_t mlperf_backend_get_output_type(mlperf_backend_ptr_t backend_ptr, int32_t i) { - TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; - const TfLiteTensor* tensor = - TfLiteInterpreterGetOutputTensor(backend_data->interpreter[0], i); - mlperf_data_t type; - type.type = TfType2Type(TfLiteTensorType(tensor)); - type.size = TFLiteNumElements(tensor); - type.size /= backend_data->real_batch_size; - return type; + return pipeline->backend_get_output_type(backend_ptr, i); } // Get the data from ith output. mlperf_status_t mlperf_backend_get_output(mlperf_backend_ptr_t backend_ptr, uint32_t batch_index, int32_t i, - void** data) { - TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr; - const int shard_index = batch_index / backend_data->real_batch_size; + void **data) { + return pipeline->backend_get_output(backend_ptr, batch_index, i, data); +} - const TfLiteTensor* output_tensor = TfLiteInterpreterGetOutputTensor( - backend_data->interpreter[shard_index], i); - batch_index %= backend_data->real_batch_size; +void mlperf_backend_convert_inputs(mlperf_backend_ptr_t backend_ptr, int bytes, + int width, int height, uint8_t *data) { + return pipeline->backend_convert_inputs(backend_ptr, bytes, width, height, + data); +} - int non_batch_size = 1; - for (int i = 1; i < output_tensor->dims->size; i++) { - non_batch_size *= output_tensor->dims->data[i]; - } +void *mlperf_backend_get_buffer(size_t n) { + return pipeline->backend_get_buffer(n); +} - switch (output_tensor->type) { - case kTfLiteFloat32: - *data = (output_tensor->data.f + (batch_index * non_batch_size)); - break; - case kTfLiteUInt8: - *data = (output_tensor->data.uint8 + (batch_index * non_batch_size)); - break; - case kTfLiteInt8: - *data = (output_tensor->data.int8 + (batch_index * non_batch_size)); - break; - case kTfLiteFloat16: - *data = (output_tensor->data.f16 + (batch_index * non_batch_size)); - break; - case kTfLiteInt32: - *data = (output_tensor->data.i32 + (batch_index * non_batch_size)); - break; - case kTfLiteInt64: - *data = (output_tensor->data.i64 + (batch_index * non_batch_size)); - break; - default: - printf("Data type not yet supported\n"); - return MLPERF_FAILURE; - } - return MLPERF_SUCCESS; +void mlperf_backend_release_buffer(void *p) { + return pipeline->backend_release_buffer(p); +} + +#ifdef __cplusplus } +#endif // __cplusplus From aab26977c0ae26d9683f8e3fcb7baf2ed809fa11 Mon Sep 17 00:00:00 2001 From: Anh Date: Tue, 17 Dec 2024 14:00:13 +0700 Subject: [PATCH 11/18] feat: add icon and description for Stable Diffusion benchmark (#917) * Add icon for stable_diffusion task * Add description for stable_diffusion task * Sort the order of task based on BenchmarkId.allIds * Fix ios-build-test.yml * Fix ios-build-test.yml --- .github/workflows/ios-build-test.yml | 2 +- flutter/assets/icons/ic_task_stable_diffusion.svg | 12 ++++++++++++ .../assets/icons/ic_task_stable_diffusion_white.svg | 12 ++++++++++++ flutter/integration_test/expected_throughput.dart | 2 +- flutter/lib/app_constants.dart | 5 +++-- flutter/lib/benchmark/benchmark.dart | 6 +++++- flutter/lib/benchmark/info.dart | 6 ++++++ flutter/lib/l10n/app_en.arb | 3 +++ flutter/lib/ui/icons.dart | 10 ++++------ 9 files changed, 47 insertions(+), 11 deletions(-) create mode 100644 flutter/assets/icons/ic_task_stable_diffusion.svg create mode 100644 flutter/assets/icons/ic_task_stable_diffusion_white.svg diff --git a/.github/workflows/ios-build-test.yml b/.github/workflows/ios-build-test.yml index 2d2b03c6e..4dbc67bef 100644 --- a/.github/workflows/ios-build-test.yml +++ b/.github/workflows/ios-build-test.yml @@ -10,7 +10,7 @@ jobs: build: name: Build and test iOS app # https://github.com/actions/runner-images/blob/main/images/macos/macos-12-Readme.md - runs-on: macos-12 + runs-on: macos-13 timeout-minutes: 120 env: PERF_TEST: true diff --git a/flutter/assets/icons/ic_task_stable_diffusion.svg b/flutter/assets/icons/ic_task_stable_diffusion.svg new file mode 100644 index 000000000..cb8745b58 --- /dev/null +++ b/flutter/assets/icons/ic_task_stable_diffusion.svg @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/flutter/assets/icons/ic_task_stable_diffusion_white.svg b/flutter/assets/icons/ic_task_stable_diffusion_white.svg new file mode 100644 index 000000000..61eb0a374 --- /dev/null +++ b/flutter/assets/icons/ic_task_stable_diffusion_white.svg @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/flutter/integration_test/expected_throughput.dart b/flutter/integration_test/expected_throughput.dart index 40f9e83d5..50cdf97e8 100644 --- a/flutter/integration_test/expected_throughput.dart +++ b/flutter/integration_test/expected_throughput.dart @@ -26,7 +26,7 @@ const _kS22Ultra = 'SM-S908U1'; // Galaxy S22 Ultra const _kDN2103 = 'DN2103'; // OnePlus DN2103 // iOS -const _kIphoneOnGitHubAction = 'iPhone15,3'; +const _kIphoneOnGitHubAction = 'iPhone16,2'; const _kIphoneOnMacbookM1 = 'iPhone14,7'; const Map> _imageClassificationV2 = { diff --git a/flutter/lib/app_constants.dart b/flutter/lib/app_constants.dart index 5e4611750..e83e6c11e 100644 --- a/flutter/lib/app_constants.dart +++ b/flutter/lib/app_constants.dart @@ -24,14 +24,15 @@ class BenchmarkId { static const imageClassificationOfflineV2 = 'image_classification_offline_v2'; static const stableDiffusion = 'stable_diffusion'; + // The sort order of this list will be used in the UI static const allIds = [ + imageClassificationV2, objectDetection, imageSegmentationV2, naturalLanguageProcessing, superResolution, - imageClassificationV2, - imageClassificationOfflineV2, stableDiffusion, + imageClassificationOfflineV2, ]; } diff --git a/flutter/lib/benchmark/benchmark.dart b/flutter/lib/benchmark/benchmark.dart index 1ac2d00bd..41cca8c00 100644 --- a/flutter/lib/benchmark/benchmark.dart +++ b/flutter/lib/benchmark/benchmark.dart @@ -130,7 +130,11 @@ class BenchmarkStore { required List backendConfig, required Map taskSelection, }) { - for (final task in appConfig.task) { + // sort the order of task based on BenchmarkId.allIds + final List sortedTasks = List.from(appConfig.task) + ..sort((a, b) => + BenchmarkId.allIds.indexOf(a.id) - BenchmarkId.allIds.indexOf(b.id)); + for (final task in sortedTasks) { final backendSettings = backendConfig .singleWhereOrNull((setting) => setting.benchmarkId == task.id); if (backendSettings == null) { diff --git a/flutter/lib/benchmark/info.dart b/flutter/lib/benchmark/info.dart index 856555b4e..d2d027cdf 100644 --- a/flutter/lib/benchmark/info.dart +++ b/flutter/lib/benchmark/info.dart @@ -63,6 +63,12 @@ class BenchmarkInfo { detailsTitle: stringResources.benchInfoSuperResolution, detailsContent: stringResources.benchInfoSuperResolutionDesc, ); + case (BenchmarkId.stableDiffusion): + return BenchmarkLocalizationInfo( + name: stringResources.benchNameStableDiffusion, + detailsTitle: stringResources.benchInfoStableDiffusion, + detailsContent: stringResources.benchInfoStableDiffusionDesc, + ); default: throw 'unhandled task id: ${task.id}'; } diff --git a/flutter/lib/l10n/app_en.arb b/flutter/lib/l10n/app_en.arb index 8eb04d97c..0b147d55f 100644 --- a/flutter/lib/l10n/app_en.arb +++ b/flutter/lib/l10n/app_en.arb @@ -102,17 +102,20 @@ "benchNameLanguageProcessing": "Language Processing", "benchNameImageClassificationOffline": "Image Classification (offline)", "benchNameSuperResolution": "Super Resolution", + "benchNameStableDiffusion": "Stable Diffusion", "benchInfoImageClassification": "Image Classification", "benchInfoObjectDetection": "Object detection", "benchInfoImageSegmentation": "Image Segmentation", "benchInfoLanguageProcessing": "Language Processing", "benchInfoSuperResolution": "Super Resolution", + "benchInfoStableDiffusion": "Stable Diffusion", "benchInfoImageClassificationDesc": "Image classification picks the best label to describe an input image and is commonly used for photo search and text extraction. The MobileNetEdgeTPU reference model is evaluated on the ImageNet 2012 validation dataset and requires a minimum accuracy of 74.66% (98% of FP32 accuracy of 76.19%) Top-1 accuracy (For Performance measurements, App uses a different dataset).\n\nThe MobileNetEdgeTPU network is a descendent of the MobileNet-v2 family that is optimized for low-latency and mobile accelerators. The MobileNetEdgeTPU model architecture is based on convolutional layers with inverted residuals and linear bottlenecks, similar to MobileNet v2, but is optimized by introducing fused inverted bottleneck convolutions to improve hardware utilization, and removing hard-swish and squeeze-and-excite blocks.\n\nThe offline variant of image classification has no latency constraints and typically uses batched inference and has higher throughput.", "benchInfoImageClassificationV2Desc": "Image classification picks the best label to describe an input image and is commonly used for photo search and text extraction.\n\nThe MobileNetV4-Conv-L model boasts an impressive 83% accuracy with the ImageNet dataset, versus 76% accuracy for the prior standard, MobileNetEdgeTPU. MobileNetV4-Conv-L is designed to perform well across a range of mobile processor types, from CPUs and GPUs to neural accelerators. The MLPerf Mobile working group worked closely with the MobileNetV4 team in order to ensure optimized performance. This combination of an improved model architecture and collaborative optimization has proven quite potent. Although MobileNetV4-Conv-L executes six times the number of mathematical operations of its predecessor, MobileNetEdgeTPU, benchmark execution times have only increased by a factor of roughly 4.6.\n\nThe offline variant of image classification has no latency constraints and typically uses batched inference and has higher throughput.", "benchInfoObjectDetectionDesc": "Object detection draws bounding boxes around recognized objects in an input image, assigning each one a label. This is a common approach for identifying objects in photos, and automotive safety. Since v1.0, our reference model has been updated to MobileDets (from v0.7 model, Single Shot Detector with a MobileNet-v2 feature extractor operating). MobileDets are trained on the COCO 2017 validation dataset. The MobileDets Object Detection task is evaluated on the COCO 2017 dataset with an input image resolution of 320x320. It requires a minimum mean Average Precision (mAP) of 27.075 (95% of FP32 mAP of 28.5%), which is significantly higher than that of the previous model.\n\nMobileDets are searched for object detection. A key feature of MobileDets is that the search space includes both inverted bottleneck blocks and regular convolution operations to help improve the accuracy-latency trade-off on several hardware accelerators.", "benchInfoImageSegmentationDesc": "Semantic image segmentation partitions an input image into labeled objects at pixel granularity, and is used for complex image manipulation such as red-eye reduction as well as automotive and medical applications. The reference model is the MOSAIC network paired with a tailored feature extraction backbone. It operates on 512x512 resolution input images from the ADE20K validation set and requires a minimum mean Intersection Over Union (mIoU) value of 57.36% (96% of FP32 mIoU of 59.75%), significantly higher than the previous segmentation model (MobileNetv2-Deeplabv3+).\n\nMOSAIC employs a simple asymmetric encoder-decoder structure which consists of an efficient multi-scale context encoder and a light-weight hybrid decoder to recover spatial details from aggregated information with multiple lateral connections between the two. The feature extractor is a variant of MobileNet Multi-Hardware, which is a network built and optimized with neural architecture search. It is further enhanced for image segmentation by reducing the output stride, adding dilated convolutions at the end stage, and halving the feature channels.", "benchInfoLanguageProcessingDesc": "Question Answering finds the best answer to an input question based on a body of text, and is commonly employed in applications such as virtual assistants and chatbots. The reference model, MobileBERT, is evaluated on the Stanford Question Answering Dataset (SQUAD) v1.1 Dev-mini. The task requires a minimum F1-score of 87.4% (93% of FP32 F1-score of 93.08%).\n\nMobileBERT is a streamlined, mobile-optimized version of the larger BERT_LARGE network. It features bottleneck structures and a carefully designed balance between self-attention and feed-forward networks. While BERT is task-agnostic and can be applied to various downstream natural language processing tasks, the MobileBERT variant used in MLPerf is specifically fine-tuned for question answering.", "benchInfoSuperResolutionDesc": "Image Super Resolution (SR) upscales a lower resolution input into a higher resolution output image, enhancing the quality and detail. It is a common task in many mobile applications such as digital zoom. The reference model, EDSR F32B5, is a lightweight member of the Enhanced Deep Super Resolution (EDSR) family that is trained for 2X super resolution on the DIV2K dataset with bicubic downsampling and tested on the OpenSR test-set which comprises 25 selected 1920x1080 HDR images. The benchmark requires a minimum accuracy of 33 dB Peak Signal to Noise Ratio (PSNR) relative to a 33.58 dB accuracy with FP32.\n\nThe EDSR family of models demonstrated excellent performance by winning a super resolution challenge at CVPR 2017. The EDSR F32B5 reference model features five EDSR blocks, each with 32 feature maps. The EDSR block is a simple residual block consisting of a residual connection on one branch and a convolution-ReLU-convolution on the other branch. The final upsampling layer is a depth-to-space operator, which facilitates the x2 super resolution process.", + "benchInfoStableDiffusionDesc": "The Text to Image Gen AI benchmark adopts Stable Diffusion v1.5 for generating images from text prompts. It is a latent diffusion model. The benchmarked Stable Diffusion v1.5 refers to a specific configuration of the model architecture that uses a downsampling-factor 8 autoencoder with an 860M UNet,123M CLIP ViT-L/14 text encoder for the diffusion model, and VAE Decoder of 49.5M parameters. The model was trained on 595k steps at resolution of 512x512, which enables it to generate high quality images. We refer you to https://huggingface.co/benjamin-paine/stable-diffusion-v1-5 for more information. The benchmark runs 20 denoising steps for inference, and uses a precalculated time embedding of size 1x1280. Reference models can be found here https://github.com/mlcommons/mobile_open/releases.\n\nFor latency benchmarking, we benchmark end to end, excluding the time embedding calculation and the tokenizer. For accuracy calculations, the app adopts the CLIP metric for text-to-image consistency, and further evaluation of the generated images using this Image Quality Aesthetic Assessment metric https://github.com/idealo/image-quality-assessment/tree/master?tab=readme-ov-file", "resourceErrorMessage": "Some resources failed to load.\nIf you didn't change config from default you can try clearing the cache.\nIf you use a custom configuration file ensure that it has correct structure or switch back to default config.", "resourceErrorSelectTaskFile": "Update task configuration", diff --git a/flutter/lib/ui/icons.dart b/flutter/lib/ui/icons.dart index cadd279c5..524813430 100644 --- a/flutter/lib/ui/icons.dart +++ b/flutter/lib/ui/icons.dart @@ -26,10 +26,8 @@ class AppIcons { _pSvg('ic_task_image_classification_offline.svg'); static final SvgPicture superResolution = _pSvg('ic_task_super_resolution.svg'); - - // TODO (anhappdev): update icon static final SvgPicture stableDiffusion = - _pSvg('ic_task_super_resolution.svg'); + _pSvg('ic_task_stable_diffusion.svg'); static final SvgPicture imageClassificationWhite = _pSvg('ic_task_image_classification_white.svg'); @@ -43,10 +41,8 @@ class AppIcons { _pSvg('ic_task_image_classification_offline_white.svg'); static final SvgPicture superResolutionWhite = _pSvg('ic_task_super_resolution_white.svg'); - - // TODO (anhappdev): update icon static final SvgPicture stableDiffusionWhite = - _pSvg('ic_task_super_resolution_white.svg'); + _pSvg('ic_task_stable_diffusion_white.svg'); static final SvgPicture arrow = _pSvg('ic_arrow.svg'); @@ -71,6 +67,7 @@ class BenchmarkIcons { BenchmarkId.imageSegmentationV2: AppIcons.imageSegmentation, BenchmarkId.naturalLanguageProcessing: AppIcons.languageProcessing, BenchmarkId.superResolution: AppIcons.superResolution, + BenchmarkId.stableDiffusion: AppIcons.stableDiffusion, BenchmarkId.imageClassificationOfflineV2: AppIcons.imageClassificationOffline, }; @@ -81,6 +78,7 @@ class BenchmarkIcons { BenchmarkId.imageSegmentationV2: AppIcons.imageSegmentationWhite, BenchmarkId.naturalLanguageProcessing: AppIcons.languageProcessingWhite, BenchmarkId.superResolution: AppIcons.superResolutionWhite, + BenchmarkId.stableDiffusion: AppIcons.stableDiffusionWhite, BenchmarkId.imageClassificationOfflineV2: AppIcons.imageClassificationOfflineWhite, }; From 48654bdff2bef8e5061056950bccbe9d04a28398 Mon Sep 17 00:00:00 2001 From: RSMNYS Date: Tue, 14 Jan 2025 09:02:25 +0200 Subject: [PATCH 12/18] use time step embedding from file (#928) * feat: pass task-specific config to backend (#922) * Add TaskConfig.CustomConfig and pass them to backend * Add CustomConfig for main.cc * Use seed and num_steps from CustomConfig for TFLite backend * Replace std::cout with LOG(INFO) * Format files * feat: add ConvertOutputs() API (#927) * Add ConvertOutputs() API * Add ConvertOutputs() for mobile_back_tflite * Set minimum macos version * Set minimum macos version to 13.1 * Update _kIphoneOnGitHubAction * feat: timestamp-embedding-parser (WIP) * disabled bitcode to be able compile with new XCode * chore: formatting * refactor: use custom setting in Core ML backend to detect NCHW input. (#924) * Add GetConfigValue() * Add custom setting data-format for Core ML * Use GetConfigValue() to get stable_diffusion_seed and stable_diffusion_num_steps * fix: resolve crash due to permission denied on Android Play Store version (#930) * Set android:extractNativeLibs="true" * Set android.bundle.enableUncompressedNativeLibs=false * chore: increase Android minSdkVersion from 21 to 30 (#859) Increase minSdkVersion to 30 * feat: finalized SD pipeline to use embedding from the binary file. * refactor: updated embedding_utils to parse pkl file * chore: linting * fix: fixed lint issue in neuron * chore: BUILD cleanup * chore: cleanup * chore: ignore .fvm * chore: updated model paths and checksums for stable diffusion benchmark: tflite_settings_android.pbtxt * chore: reverse timesteps and embeddings to support descending order of the timesteps and embeddings * chore: fixed formatting * chore: added links to the sd models and timestep embeddings file * chore: add the proper name for the embedding_timesteps file * chore: added missed declaration for backend_convert_outputs * chore: clang formatting * chore: added missed files * chore: fixed build file for the pixel backend * chore: bazel formatting * fix: added missed interface implementation for pixel * chore: clang formatting --------- Co-authored-by: Anh --- .bazelrc | 1 - .gitignore | 1 + WORKSPACE | 4 +- flutter/android/app/build.gradle | 2 +- .../android/app/src/main/AndroidManifest.xml | 3 +- flutter/android/gradle.properties | 1 + flutter/assets/tasks.pbtxt | 8 ++ flutter/cpp/backend.h | 4 + flutter/cpp/backends/external.cc | 4 +- flutter/cpp/backends/external.h | 12 +++ flutter/cpp/binary/main.cc | 10 +- flutter/cpp/c/backend_c.h | 2 + flutter/cpp/flutter/BUILD | 1 + flutter/cpp/proto/mlperf_task.proto | 14 ++- flutter/cpp/proto/test.cc | 4 +- flutter/cpp/utils.cc | 91 +++++++++++++++++-- flutter/cpp/utils.h | 9 +- flutter/lib/benchmark/benchmark.dart | 5 + mobile_back_apple/cpp/backend_coreml/BUILD | 1 + .../cpp/backend_coreml/coreml_settings.pbtxt | 34 ++++++- mobile_back_apple/cpp/backend_coreml/main.cc | 5 +- mobile_back_pixel/cpp/backend_tflite/BUILD | 2 + .../pixel_single_model_pipeline.cc | 4 + .../cpp/backend_dummy/ios/BUILD | 2 + mobile_back_tflite/cpp/backend_tflite/BUILD | 14 +++ .../tflite_settings_android.pbtxt | 12 ++- .../cpp/backend_tflite/embedding_utils.cc | 70 ++++++++++++++ .../cpp/backend_tflite/embedding_utils.h | 40 ++++++++ .../cpp/backend_tflite/ios/BUILD | 1 + .../cpp/backend_tflite/neuron/BUILD | 2 + .../cpp/backend_tflite/pipeline.h | 7 +- .../cpp/backend_tflite/sd_utils.cc | 1 - .../backend_tflite/single_model_pipeline.cc | 4 + .../backend_tflite/single_model_pipeline.h | 3 + .../stable_diffusion_invoker.cc | 38 ++++++-- .../backend_tflite/stable_diffusion_invoker.h | 2 +- .../stable_diffusion_pipeline.cc | 30 ++++++ .../stable_diffusion_pipeline.h | 3 + .../cpp/backend_tflite/tflite_c.cc | 6 ++ 39 files changed, 416 insertions(+), 41 deletions(-) create mode 100644 mobile_back_tflite/cpp/backend_tflite/embedding_utils.cc create mode 100644 mobile_back_tflite/cpp/backend_tflite/embedding_utils.h diff --git a/.bazelrc b/.bazelrc index 5788b7d3b..a2ee67b8c 100644 --- a/.bazelrc +++ b/.bazelrc @@ -43,7 +43,6 @@ build:android_x86_64 --fat_apk_cpu=x86_64 # iOS configs build:ios --apple_platform_type=ios -build:ios --apple_bitcode=embedded --copt=-fembed-bitcode build:ios --copt=-Wno-c++11-narrowing build:ios --cxxopt=-fobjc-arc diff --git a/.gitignore b/.gitignore index 8a325c3db..a7be10a01 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ .ijwb .idea .vscode +.fvm /bazel-* /output /output_logs diff --git a/WORKSPACE b/WORKSPACE index 49e0b0c63..dbcbc5c2f 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -1,7 +1,7 @@ workspace(name = "mlperf_app") -load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository") +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") http_archive( name = "bazel_skylib", @@ -49,11 +49,11 @@ http_archive( ], ) -load("@rules_python//python:repositories.bzl", "python_register_toolchains") load( "@org_tensorflow//tensorflow/tools/toolchains/python:python_repo.bzl", "python_repository", ) +load("@rules_python//python:repositories.bzl", "python_register_toolchains") python_repository(name = "python_version_repo") diff --git a/flutter/android/app/build.gradle b/flutter/android/app/build.gradle index a0dd55f62..92d02ddd3 100644 --- a/flutter/android/app/build.gradle +++ b/flutter/android/app/build.gradle @@ -42,7 +42,7 @@ android { defaultConfig { applicationId "org.mlcommons.android.mlperfbench" - minSdkVersion 21 + minSdkVersion 30 targetSdkVersion 34 versionCode flutterVersionCode.toInteger() versionName flutterVersionName diff --git a/flutter/android/app/src/main/AndroidManifest.xml b/flutter/android/app/src/main/AndroidManifest.xml index 79cfd7873..67a178c84 100644 --- a/flutter/android/app/src/main/AndroidManifest.xml +++ b/flutter/android/app/src/main/AndroidManifest.xml @@ -11,7 +11,8 @@ android:maxSdkVersion="29"/> + android:icon="@mipmap/ic_launcher" + android:extractNativeLibs="true"> ( CheckSymbol("mlperf_backend_convert_inputs")); - + // Backends may need to change the format of the outputs (e.g. channel order) + convert_outputs = reinterpret_cast( + CheckSymbol("mlperf_backend_convert_outputs")); // If both functions are defined, then update if (get_buffer && release_buffer) { LOG(INFO) << "Using backend allocator"; diff --git a/flutter/cpp/backends/external.h b/flutter/cpp/backends/external.h index 7a2ef9b8d..12e17357e 100644 --- a/flutter/cpp/backends/external.h +++ b/flutter/cpp/backends/external.h @@ -67,6 +67,8 @@ struct BackendFunctions { mlperf_backend_ptr_t, uint32_t, int32_t, void**)>::type; using ConvertInputsPtr = std::add_pointer::type; + using ConvertOutputsPtr = std::add_pointer::type; // Required functions. BackendMatchesPtr match{nullptr}; @@ -91,6 +93,7 @@ struct BackendFunctions { AllocatorMgr::GetBufferFn get_buffer{nullptr}; AllocatorMgr::ReleaseBufferFn release_buffer{nullptr}; ConvertInputsPtr convert_inputs{nullptr}; + ConvertOutputsPtr convert_outputs{nullptr}; bool isLoaded() { return isloaded; } @@ -210,6 +213,15 @@ class ExternalBackend : public Backend { } } + // Optional function to do output data re-formatting + void ConvertOutputs(int bytes, int width, int height, + uint8_t* data) override { + if (backend_functions_.convert_outputs) { + backend_functions_.convert_outputs(backend_ptr_, bytes, width, height, + data); + } + } + private: std::string backend_name_; std::string vendor_; diff --git a/flutter/cpp/binary/main.cc b/flutter/cpp/binary/main.cc index f2f3e51c9..89f5758ad 100644 --- a/flutter/cpp/binary/main.cc +++ b/flutter/cpp/binary/main.cc @@ -132,7 +132,7 @@ int Main(int argc, char *argv[]) { command_line += " " + backend_name + " " + benchmark_id; // Command Line Flags for mlperf. - std::string mode, scenario = "SingleStream", output_dir; + std::string mode, scenario = "SingleStream", output_dir, custom_config; int min_query_count = 100, min_duration_ms = 100, max_duration_ms = 10 * 60 * 1000, single_stream_expected_latency_ns = 1000000; @@ -157,8 +157,9 @@ int Main(int argc, char *argv[]) { "A hint used by the loadgen to pre-generate " "enough samples to meet the minimum test duration."), Flag::CreateFlag("output_dir", &output_dir, - "The output directory of mlperf.", Flag::kRequired)}); - + "The output directory of mlperf.", Flag::kRequired), + Flag::CreateFlag("custom_config", &custom_config, + "Custom config in form key1:val1,key2:val2.")}); // Command Line Flags for backend. std::unique_ptr backend; std::unique_ptr dataset; @@ -207,9 +208,8 @@ int Main(int argc, char *argv[]) { } } } - SettingList setting_list = - createSettingList(backend_setting, benchmark_id); + CreateSettingList(backend_setting, custom_config, benchmark_id); ExternalBackend *external_backend = new ExternalBackend( model_file_path, lib_path, setting_list, native_lib_path); diff --git a/flutter/cpp/c/backend_c.h b/flutter/cpp/c/backend_c.h index dd863501b..47d3c9bb5 100644 --- a/flutter/cpp/c/backend_c.h +++ b/flutter/cpp/c/backend_c.h @@ -82,6 +82,8 @@ mlperf_status_t mlperf_backend_get_output(mlperf_backend_ptr_t backend_ptr, // Optional functions void mlperf_backend_convert_inputs(mlperf_backend_ptr_t backend_ptr, int bytes, int width, int height, uint8_t* data); +void mlperf_backend_convert_outputs(mlperf_backend_ptr_t backend_ptr, int bytes, + int width, int height, uint8_t* data); #ifdef __cplusplus } diff --git a/flutter/cpp/flutter/BUILD b/flutter/cpp/flutter/BUILD index 0ac2f1b7e..eb5ddb103 100644 --- a/flutter/cpp/flutter/BUILD +++ b/flutter/cpp/flutter/BUILD @@ -55,6 +55,7 @@ apple_xcframework( }, minimum_os_versions = { "ios": "13.1", + "macos": "13.1", }, deps = [ "//flutter/cpp/flutter:bridge", diff --git a/flutter/cpp/proto/mlperf_task.proto b/flutter/cpp/proto/mlperf_task.proto index 53e545e15..5c1012490 100644 --- a/flutter/cpp/proto/mlperf_task.proto +++ b/flutter/cpp/proto/mlperf_task.proto @@ -31,7 +31,7 @@ message MLPerfConfig { // Config of the mlperf tasks. // A task is basically a combination of models and a dataset. // -// Next ID: 11 +// Next ID: 12 message TaskConfig { // Must be unique in one task file. Ex: image_classification // used to match backend settings @@ -52,6 +52,7 @@ message TaskConfig { required string scenario = 7; required DatasetConfig datasets = 8; required ModelConfig model = 9; + repeated CustomConfig custom_config = 11; } // Datasets for a task @@ -107,3 +108,14 @@ message ModelConfig { // Number of detection classes if applicable optional int32 num_classes = 6; } + +// CustomConfig are task specific configuration. +// The TaskConfig.CustomConfig will be converted to +// BenchmarkSetting.CustomSetting and passed to the backend. +// To avoid name collision, the id should be prefixed with TaskConfig.id. +message CustomConfig { + // Id of this config. + required string id = 1; + // Value of this config. + required string value = 2; +} diff --git a/flutter/cpp/proto/test.cc b/flutter/cpp/proto/test.cc index 07d4fd3fd..6a66969eb 100644 --- a/flutter/cpp/proto/test.cc +++ b/flutter/cpp/proto/test.cc @@ -139,9 +139,11 @@ int test_proto() { std::list benchmarks; benchmarks.push_back("image_classification"); benchmarks.push_back("image_classification_offline"); + std::string custom_config = "key1:val1,key2:val2"; for (auto benchmark_id : benchmarks) { // Convert to SettingList - SettingList setting_list = createSettingList(backend_setting, benchmark_id); + SettingList setting_list = + CreateSettingList(backend_setting, custom_config, benchmark_id); std::cout << "SettingList for " << benchmark_id << ":\n"; dumpSettingList(setting_list); diff --git a/flutter/cpp/utils.cc b/flutter/cpp/utils.cc index 965426f4d..126e9edf5 100644 --- a/flutter/cpp/utils.cc +++ b/flutter/cpp/utils.cc @@ -125,27 +125,106 @@ mlperf_backend_configuration_t CppToCSettings(const SettingList &settings) { return c_settings; } -SettingList createSettingList(const BackendSetting &backend_setting, - std::string benchmark_id) { +// Split the string by a given delimiter +std::vector _splitString(const std::string &str, char delimiter) { + std::vector tokens; + std::stringstream ss(str); + std::string token; + while (std::getline(ss, token, delimiter)) { + tokens.push_back(token); + } + return tokens; +} + +// Parse the key:value string list +std::unordered_map _parseKeyValueList( + const std::string &input) { + std::unordered_map keyValueMap; + std::vector pairs = _splitString(input, ','); // Split by comma + + for (const std::string &pair : pairs) { + std::vector keyValue = + _splitString(pair, ':'); // Split by colon + if (keyValue.size() == 2) { + keyValueMap[keyValue[0]] = keyValue[1]; + } else { + LOG(ERROR) << "Invalid key:value pair: " << pair; + } + } + return keyValueMap; +} + +// Create the setting list for backend +SettingList CreateSettingList(const BackendSetting &backend_setting, + const std::string &custom_config, + const std::string &benchmark_id) { SettingList setting_list; int setting_index = 0; - - for (auto setting : backend_setting.common_setting()) { + for (const auto &setting : backend_setting.common_setting()) { setting_list.add_setting(); (*setting_list.mutable_setting(setting_index)) = setting; setting_index++; } // Copy the benchmark specific settings - setting_index = 0; - for (auto bm_setting : backend_setting.benchmark_setting()) { + for (const auto &bm_setting : backend_setting.benchmark_setting()) { if (bm_setting.benchmark_id() == benchmark_id) { setting_list.mutable_benchmark_setting()->CopyFrom(bm_setting); + + auto parsed = _parseKeyValueList(custom_config); + for (const auto &kv : parsed) { + CustomSetting custom_setting = CustomSetting(); + custom_setting.set_id(kv.first); + custom_setting.set_value(kv.second); + setting_list.mutable_benchmark_setting()->mutable_custom_setting()->Add( + std::move(custom_setting)); + } + break; } } LOG(INFO) << "setting_list:" << std::endl << setting_list.DebugString(); return setting_list; } +template +T GetConfigValue(mlperf_backend_configuration_t *configs, const char *key, + T defaultValue); + +template <> +int GetConfigValue(mlperf_backend_configuration_t *configs, + const char *key, int defaultValue) { + for (int i = 0; i < configs->count; ++i) { + if (strcmp(configs->keys[i], key) == 0) { + const char *valueStr = configs->values[i]; + char *endptr = nullptr; + errno = 0; + long value = + strtol(valueStr, &endptr, 10); // Base 10 for decimal conversion + if (errno == ERANGE || value < INT_MIN || value > INT_MAX) { + LOG(ERROR) << "Value out of range for int: " << valueStr; + return defaultValue; + } + if (endptr == valueStr || *endptr != '\0') { + LOG(ERROR) << "Invalid value for int: " << valueStr; + return defaultValue; + } + return static_cast(value); + } + } + return defaultValue; +} + +template <> +std::string GetConfigValue(mlperf_backend_configuration_t *configs, + const char *key, + std::string defaultValue) { + for (int i = 0; i < configs->count; ++i) { + if (strcmp(configs->keys[i], key) == 0) { + return std::string(configs->values[i]); + } + } + return defaultValue; +} + } // namespace mobile } // namespace mlperf diff --git a/flutter/cpp/utils.h b/flutter/cpp/utils.h index 08d7e86c8..c66022401 100644 --- a/flutter/cpp/utils.h +++ b/flutter/cpp/utils.h @@ -65,8 +65,13 @@ void DeleteBackendConfiguration(mlperf_backend_configuration_t *configs); mlperf_backend_configuration_t CppToCSettings(const SettingList &settings); -SettingList createSettingList(const BackendSetting &backend_setting, - std::string benchmark_id); +SettingList CreateSettingList(const BackendSetting &backend_setting, + const std::string &custom_config, + const std::string &benchmark_id); + +template +T GetConfigValue(mlperf_backend_configuration_t *configs, const char *key, + T defaultValue); } // namespace mobile } // namespace mlperf diff --git a/flutter/lib/benchmark/benchmark.dart b/flutter/lib/benchmark/benchmark.dart index 41cca8c00..c64c0bda3 100644 --- a/flutter/lib/benchmark/benchmark.dart +++ b/flutter/lib/benchmark/benchmark.dart @@ -92,6 +92,11 @@ class Benchmark { setting: commonSettings, benchmarkSetting: benchmarkSettings, ); + // Convert TaskConfig.CustomConfig to BenchmarkSetting.CustomSetting + final customConfigs = taskConfig.customConfig + .map((e) => pb.CustomSetting(id: e.id, value: e.value)) + .toList(); + benchmarkSettings.customSetting.addAll(customConfigs); final uris = selectedDelegate.modelFile.map((e) => e.modelPath).toList(); final modelDirName = selectedDelegate.delegateName.replaceAll(' ', '_'); final backendModelPath = diff --git a/mobile_back_apple/cpp/backend_coreml/BUILD b/mobile_back_apple/cpp/backend_coreml/BUILD index c574b8a9c..4e8acf07d 100644 --- a/mobile_back_apple/cpp/backend_coreml/BUILD +++ b/mobile_back_apple/cpp/backend_coreml/BUILD @@ -39,6 +39,7 @@ apple_xcframework( }, minimum_os_versions = { "ios": "13.1", + "macos": "13.1", }, deps = [ "//mobile_back_apple/cpp/backend_coreml:coreml_c", diff --git a/mobile_back_apple/cpp/backend_coreml/coreml_settings.pbtxt b/mobile_back_apple/cpp/backend_coreml/coreml_settings.pbtxt index 496d9c439..a607f61c2 100644 --- a/mobile_back_apple/cpp/backend_coreml/coreml_settings.pbtxt +++ b/mobile_back_apple/cpp/backend_coreml/coreml_settings.pbtxt @@ -22,7 +22,7 @@ benchmark_setting { model_checksum: "39483b20b878d46144ab4cfe9a3e5600" } } - delegate_choice: { + delegate_choice: { delegate_name: "CPU & ANE" accelerator_name: "cpu&ane" accelerator_desc: "CPU and Neural Engine" @@ -57,7 +57,7 @@ benchmark_setting { model_checksum: "39483b20b878d46144ab4cfe9a3e5600" } } - delegate_choice: { + delegate_choice: { delegate_name: "CPU & ANE" accelerator_name: "cpu&ane" accelerator_desc: "CPU and Neural Engine" @@ -81,6 +81,10 @@ benchmark_setting { model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-coreml/mobilenetv4_fp32_NCHW.mlpackage.zip" model_checksum: "164c504eb3e9af6c730c1765b8b81b32" } + custom_setting { + id: "data-format" + value: "NCHW" + } } delegate_choice: { delegate_name: "CPU & GPU" @@ -90,6 +94,10 @@ benchmark_setting { model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-coreml/mobilenetv4_fp32_NCHW.mlpackage.zip" model_checksum: "164c504eb3e9af6c730c1765b8b81b32" } + custom_setting { + id: "data-format" + value: "NCHW" + } } delegate_choice: { delegate_name: "CPU & ANE" @@ -99,6 +107,10 @@ benchmark_setting { model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-coreml/mobilenetv4_fp32_NCHW.mlpackage.zip" model_checksum: "164c504eb3e9af6c730c1765b8b81b32" } + custom_setting { + id: "data-format" + value: "NCHW" + } } delegate_selected: "CPU & GPU & ANE" } @@ -115,6 +127,10 @@ benchmark_setting { model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-coreml/mobilenetv4_fp32_NCHW.mlpackage.zip" model_checksum: "164c504eb3e9af6c730c1765b8b81b32" } + custom_setting { + id: "data-format" + value: "NCHW" + } } delegate_choice: { delegate_name: "CPU & GPU" @@ -125,6 +141,10 @@ benchmark_setting { model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-coreml/mobilenetv4_fp32_NCHW.mlpackage.zip" model_checksum: "164c504eb3e9af6c730c1765b8b81b32" } + custom_setting { + id: "data-format" + value: "NCHW" + } } delegate_choice: { delegate_name: "CPU & ANE" @@ -135,6 +155,10 @@ benchmark_setting { model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-coreml/mobilenetv4_fp32_NCHW.mlpackage.zip" model_checksum: "164c504eb3e9af6c730c1765b8b81b32" } + custom_setting { + id: "data-format" + value: "NCHW" + } } delegate_selected: "CPU & GPU & ANE" } @@ -160,7 +184,7 @@ benchmark_setting { model_checksum: "ef849fbf2132e205158f05ca42db25f4" } } - delegate_choice: { + delegate_choice: { delegate_name: "CPU & ANE" accelerator_name: "cpu&ane" accelerator_desc: "CPU and Neural Engine" @@ -217,7 +241,7 @@ benchmark_setting { model_checksum: "362d6b5bb1b8e10ae5b4e223f60d4d10" } } - delegate_choice: { + delegate_choice: { delegate_name: "CPU & ANE" accelerator_name: "cpu&ane" accelerator_desc: "CPU and Neural Engine" @@ -250,7 +274,7 @@ benchmark_setting { model_checksum: "62489706f20b0c2ae561fb2204eefb61" } } - delegate_choice: { + delegate_choice: { delegate_name: "CPU & ANE" accelerator_name: "cpu&ane" accelerator_desc: "CPU and Neural Engine" diff --git a/mobile_back_apple/cpp/backend_coreml/main.cc b/mobile_back_apple/cpp/backend_coreml/main.cc index a2246345f..af753d566 100644 --- a/mobile_back_apple/cpp/backend_coreml/main.cc +++ b/mobile_back_apple/cpp/backend_coreml/main.cc @@ -101,8 +101,9 @@ mlperf_backend_ptr_t mlperf_backend_create( CoreMLBackendData *backend_data = new CoreMLBackendData(); backendExists = true; - // quick hack for checking if model expects NCHW input. - if (strcasestr(model_path, "NCHW") != nullptr) { + std::string dataFormat = + mlperf::mobile::GetConfigValue(configs, "data-format", std::string("")); + if (dataFormat == "NCHW") { backend_data->expectNCHW = true; LOG(INFO) << "Will convert inputs from NHWC to NCHW!"; } diff --git a/mobile_back_pixel/cpp/backend_tflite/BUILD b/mobile_back_pixel/cpp/backend_tflite/BUILD index 54947f346..8b59fc413 100644 --- a/mobile_back_pixel/cpp/backend_tflite/BUILD +++ b/mobile_back_pixel/cpp/backend_tflite/BUILD @@ -53,6 +53,7 @@ cc_library( srcs = [ "pixel_single_model_pipeline.cc", "tflite_pixel.cc", + "//mobile_back_tflite/cpp/backend_tflite:embedding_utils.cc", "//mobile_back_tflite/cpp/backend_tflite:sd_utils.cc", "//mobile_back_tflite/cpp/backend_tflite:stable_diffusion_invoker.cc", "//mobile_back_tflite/cpp/backend_tflite:stable_diffusion_pipeline.cc", @@ -60,6 +61,7 @@ cc_library( hdrs = [ "tflite_settings_pixel.h", "thread_pool.h", + "//mobile_back_tflite/cpp/backend_tflite:embedding_utils.h", "//mobile_back_tflite/cpp/backend_tflite:pipeline.h", "//mobile_back_tflite/cpp/backend_tflite:sd_utils.h", "//mobile_back_tflite/cpp/backend_tflite:single_model_pipeline.h", diff --git a/mobile_back_pixel/cpp/backend_tflite/pixel_single_model_pipeline.cc b/mobile_back_pixel/cpp/backend_tflite/pixel_single_model_pipeline.cc index 1dc201cfb..1d44b411f 100644 --- a/mobile_back_pixel/cpp/backend_tflite/pixel_single_model_pipeline.cc +++ b/mobile_back_pixel/cpp/backend_tflite/pixel_single_model_pipeline.cc @@ -413,6 +413,10 @@ void SingleModelPipeline::backend_convert_inputs( mlperf_backend_ptr_t backend_ptr, int bytes, int width, int height, uint8_t* data) {} +void SingleModelPipeline::backend_convert_outputs( + mlperf_backend_ptr_t backend_ptr, int bytes, int width, int height, + uint8_t* data) {} + void* SingleModelPipeline::backend_get_buffer(size_t n) { return ::operator new(n); } diff --git a/mobile_back_tflite/cpp/backend_dummy/ios/BUILD b/mobile_back_tflite/cpp/backend_dummy/ios/BUILD index c7adadbca..a3229e059 100644 --- a/mobile_back_tflite/cpp/backend_dummy/ios/BUILD +++ b/mobile_back_tflite/cpp/backend_dummy/ios/BUILD @@ -29,6 +29,7 @@ apple_xcframework( }, minimum_os_versions = { "ios": "13.1", + "macos": "13.1", }, deps = [ "//mobile_back_tflite/cpp/backend_dummy:dummy_backend", @@ -49,6 +50,7 @@ apple_xcframework( }, minimum_os_versions = { "ios": "13.1", + "macos": "13.1", }, deps = [ "//mobile_back_tflite/cpp/backend_dummy:dummy_backend", diff --git a/mobile_back_tflite/cpp/backend_tflite/BUILD b/mobile_back_tflite/cpp/backend_tflite/BUILD index f4d1a69da..651e34eba 100644 --- a/mobile_back_tflite/cpp/backend_tflite/BUILD +++ b/mobile_back_tflite/cpp/backend_tflite/BUILD @@ -36,9 +36,20 @@ pbtxt2header( ], ) +cc_library( + name = "embedding_utils", + srcs = ["embedding_utils.cc"], + hdrs = ["embedding_utils.h"], + visibility = ["//visibility:public"], + deps = [ + "@org_tensorflow//tensorflow/core:tflite_portable_logging", + ], +) + cc_library( name = "tflite_c", srcs = [ + "embedding_utils.cc", "sd_utils.cc", "single_model_pipeline.cc", "stable_diffusion_invoker.cc", @@ -46,6 +57,7 @@ cc_library( "tflite_c.cc", ], hdrs = [ + "embedding_utils.h", "pipeline.h", "sd_utils.h", "single_model_pipeline.h", @@ -67,7 +79,9 @@ cc_library( "//conditions:default": [], }), deps = [ + ":embedding_utils", ":tflite_settings", + "//flutter/cpp:utils", "//flutter/cpp/c:headers", "@org_tensorflow//tensorflow/core:tflite_portable_logging", "@org_tensorflow//tensorflow/lite/c:c_api", diff --git a/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt b/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt index 4aab5dbb3..bfec84e5f 100644 --- a/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt +++ b/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt @@ -215,17 +215,21 @@ benchmark_setting { accelerator_name: "npu" accelerator_desc: "NPU" model_file: { - model_path: "local:///mlperf_models/sd_decoder_dynamic.tflite" + model_path: "https://github.com/RSMNYS/SD/releases/download/SD/sd_decoder_dynamic.tflite" model_checksum: "68acdb62f99e1dc2c7f5db8cdd0e007c" } model_file: { - model_path: "local:///mlperf_models/sd_diffusion_model_dynamic.tflite" - model_checksum: "7cbdadf5282b71561ce5eda75e868c19" + model_path: "https://github.com/RSMNYS/SD/releases/download/SD/sd_diffusion_model_dynamic.tflite" + model_checksum: "309e95f76ac8de01130942037a28aa8f" } model_file: { - model_path: "local:///mlperf_models/sd_text_encoder_dynamic.tflite" + model_path: "https://github.com/RSMNYS/SD/releases/download/SD/sd_text_encoder_dynamic.tflite" model_checksum: "b64effb0360f9ea49a117cdaf8a2fbdc" } + model_file: { + model_path: "https://github.com/RSMNYS/SD/releases/download/SD/timestep_embeddings_data.bin.ts" + model_checksum: "798b772155a69de5df44b304327bb3cc" + } } delegate_selected: "NNAPI" custom_setting { diff --git a/mobile_back_tflite/cpp/backend_tflite/embedding_utils.cc b/mobile_back_tflite/cpp/backend_tflite/embedding_utils.cc new file mode 100644 index 000000000..9f25eb4e3 --- /dev/null +++ b/mobile_back_tflite/cpp/backend_tflite/embedding_utils.cc @@ -0,0 +1,70 @@ +#include "embedding_utils.h" + +#include + +bool TsEmbeddingParser::parse_pickle(const std::string& filename) { + std::ifstream file(filename, std::ios::binary); + if (!file) { + std::cerr << "Failed to open file: " << filename << std::endl; + return false; + } + + // Read timesteps array + std::vector timesteps; + uint32_t num_timesteps; + file.read(reinterpret_cast(&num_timesteps), sizeof(uint32_t)); + timesteps.resize(num_timesteps); + file.read(reinterpret_cast(timesteps.data()), + num_timesteps * sizeof(int32_t)); + + // Read embeddings array + std::vector> embeddings(num_timesteps); + for (auto& emb : embeddings) { + emb.resize(EMBEDDING_DIM); + file.read(reinterpret_cast(emb.data()), + EMBEDDING_DIM * sizeof(float)); + } + + // Reverse both timesteps and embeddings before storing + std::reverse(timesteps.begin(), timesteps.end()); + std::reverse(embeddings.begin(), embeddings.end()); + + // Store in maps + timesteps_[num_timesteps] = std::move(timesteps); + embeddings_[num_timesteps] = std::move(embeddings); + + return true; +} + +std::vector TsEmbeddingParser::get_timestep_embedding( + int32_t steps, int32_t step_index) const { + auto emb_it = embeddings_.find(steps); + if (emb_it == embeddings_.end() || step_index >= emb_it->second.size()) { + return {}; + } + return emb_it->second[step_index]; +} + +std::vector TsEmbeddingParser::get_timesteps(int32_t steps) const { + auto ts_it = timesteps_.find(steps); + if (ts_it == timesteps_.end()) { + return {}; + } + return ts_it->second; +} + +bool EmbeddingManager::load_timestep_embeddings(const std::string& filename) { + ts_parser_ = std::make_unique(); + return ts_parser_->parse_pickle(filename); +} + +std::vector EmbeddingManager::get_timestep_embedding( + int32_t timestep, int num_steps) const { + if (!ts_parser_) return {}; + return ts_parser_->get_timestep_embedding(num_steps, timestep); +} + +std::vector EmbeddingManager::get_timesteps(int num_steps) const { + if (!ts_parser_) return {}; + return ts_parser_->get_timesteps(num_steps); +} \ No newline at end of file diff --git a/mobile_back_tflite/cpp/backend_tflite/embedding_utils.h b/mobile_back_tflite/cpp/backend_tflite/embedding_utils.h new file mode 100644 index 000000000..f543c6332 --- /dev/null +++ b/mobile_back_tflite/cpp/backend_tflite/embedding_utils.h @@ -0,0 +1,40 @@ +#ifndef EMBEDDING_UTILS_H_ +#define EMBEDDING_UTILS_H_ + +#include +#include +#include +#include +#include + +class TsEmbeddingParser { + public: + bool parse_pickle(const std::string& filename); + std::vector get_timestep_embedding(int32_t steps, + int32_t step_index) const; + std::vector get_timesteps(int32_t steps) const; + + private: + static constexpr size_t EMBEDDING_DIM = 1280; + std::map> timesteps_; + std::map>> embeddings_; +}; + +class EmbeddingManager { + public: + static EmbeddingManager& getInstance() { + static EmbeddingManager instance; + return instance; + } + + bool load_timestep_embeddings(const std::string& filename); + std::vector get_timestep_embedding(int32_t timestep, + int num_steps) const; + std::vector get_timesteps(int num_steps) const; + + private: + EmbeddingManager() = default; + std::unique_ptr ts_parser_; +}; + +#endif // EMBEDDING_UTILS_H_ \ No newline at end of file diff --git a/mobile_back_tflite/cpp/backend_tflite/ios/BUILD b/mobile_back_tflite/cpp/backend_tflite/ios/BUILD index f0a764a6d..74fa88aea 100644 --- a/mobile_back_tflite/cpp/backend_tflite/ios/BUILD +++ b/mobile_back_tflite/cpp/backend_tflite/ios/BUILD @@ -15,6 +15,7 @@ apple_xcframework( }, minimum_os_versions = { "ios": "13.1", + "macos": "13.1", }, deps = [ "//mobile_back_tflite/cpp/backend_tflite:tflite_c", diff --git a/mobile_back_tflite/cpp/backend_tflite/neuron/BUILD b/mobile_back_tflite/cpp/backend_tflite/neuron/BUILD index 118076fea..e9e8cf9a7 100644 --- a/mobile_back_tflite/cpp/backend_tflite/neuron/BUILD +++ b/mobile_back_tflite/cpp/backend_tflite/neuron/BUILD @@ -70,7 +70,9 @@ cc_library( local_defines = ["MTK_TFLITE_NEURON_BACKEND"], deps = [ ":tflite_settings", + "//flutter/cpp:utils", "//flutter/cpp/c:headers", + "//mobile_back_tflite/cpp/backend_tflite:embedding_utils", "//mobile_back_tflite/cpp/backend_tflite:tflite_settings", "@org_tensorflow//tensorflow/core:tflite_portable_logging", "@org_tensorflow//tensorflow/lite/c:c_api", diff --git a/mobile_back_tflite/cpp/backend_tflite/pipeline.h b/mobile_back_tflite/cpp/backend_tflite/pipeline.h index 41a9822f2..4ab1b4f1c 100644 --- a/mobile_back_tflite/cpp/backend_tflite/pipeline.h +++ b/mobile_back_tflite/cpp/backend_tflite/pipeline.h @@ -72,11 +72,16 @@ class Pipeline { virtual mlperf_status_t backend_get_output(mlperf_backend_ptr_t backend_ptr, uint32_t batchIndex, int32_t i, void **data) = 0; - + // Optional function to convert the inputs virtual void backend_convert_inputs(mlperf_backend_ptr_t backend_ptr, int bytes, int width, int height, uint8_t *data) = 0; + // Optional function to convert the outputs + virtual void backend_convert_outputs(mlperf_backend_ptr_t backend_ptr, + int bytes, int width, int height, + uint8_t *data) = 0; + virtual void *backend_get_buffer(size_t n) = 0; virtual void backend_release_buffer(void *p) = 0; diff --git a/mobile_back_tflite/cpp/backend_tflite/sd_utils.cc b/mobile_back_tflite/cpp/backend_tflite/sd_utils.cc index 14aa858d9..c5901b66c 100644 --- a/mobile_back_tflite/cpp/backend_tflite/sd_utils.cc +++ b/mobile_back_tflite/cpp/backend_tflite/sd_utils.cc @@ -233,7 +233,6 @@ std::vector get_timestep_embedding(int timestep, int batch_size, int dim, embedding_cos.push_back(cosf(timestep * freq)); embedding_sin.push_back(sinf(timestep * freq)); } - std::vector embedding; for (int i = 0; i < batch_size; i++) { embedding.insert(embedding.end(), diff --git a/mobile_back_tflite/cpp/backend_tflite/single_model_pipeline.cc b/mobile_back_tflite/cpp/backend_tflite/single_model_pipeline.cc index 4dc30507b..ce1eb7a1d 100644 --- a/mobile_back_tflite/cpp/backend_tflite/single_model_pipeline.cc +++ b/mobile_back_tflite/cpp/backend_tflite/single_model_pipeline.cc @@ -635,6 +635,10 @@ void SingleModelPipeline::backend_convert_inputs( #endif } +void SingleModelPipeline::backend_convert_outputs( + mlperf_backend_ptr_t backend_ptr, int bytes, int width, int height, + uint8_t *data) {} + void *SingleModelPipeline::backend_get_buffer(size_t n) { #ifdef MTK_TFLITE_NEURON_BACKEND if (neuron_backend != nullptr) { diff --git a/mobile_back_tflite/cpp/backend_tflite/single_model_pipeline.h b/mobile_back_tflite/cpp/backend_tflite/single_model_pipeline.h index 30c639596..70d447588 100644 --- a/mobile_back_tflite/cpp/backend_tflite/single_model_pipeline.h +++ b/mobile_back_tflite/cpp/backend_tflite/single_model_pipeline.h @@ -64,6 +64,9 @@ class SingleModelPipeline : public Pipeline { void backend_convert_inputs(mlperf_backend_ptr_t backend_ptr, int bytes, int width, int height, uint8_t *data) override; + void backend_convert_outputs(mlperf_backend_ptr_t backend_ptr, int bytes, + int width, int height, uint8_t *data) override; + void *backend_get_buffer(size_t n) override; void backend_release_buffer(void *p) override; diff --git a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.cc b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.cc index 55ea8be07..8c3405739 100644 --- a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.cc +++ b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.cc @@ -4,6 +4,7 @@ #include #include +#include "embedding_utils.h" #include "sd_utils.h" #include "stable_diffusion_pipeline.h" #include "tensorflow/lite/c/c_api.h" @@ -24,15 +25,15 @@ StableDiffusionInvoker::StableDiffusionInvoker(SDBackendData* backend_data) : backend_data_(backend_data) {} std::vector StableDiffusionInvoker::invoke() { - std::cout << "Prompt encoding started" << std::endl; + LOG(INFO) << "Prompt encoding started"; auto encoded_text = encode_prompt(backend_data_->input_prompt_tokens); auto unconditional_encoded_text = encode_prompt(backend_data_->unconditional_tokens); - std::cout << "Diffusion process started" << std::endl; + LOG(INFO) << "Diffusion process started"; auto latent = diffusion_process(encoded_text, unconditional_encoded_text, backend_data_->num_steps, backend_data_->seed); - std::cout << "Image decoding started" << std::endl; + LOG(INFO) << "Image decoding started"; return decode_image(latent); } @@ -99,19 +100,43 @@ std::vector StableDiffusionInvoker::diffusion_process( const std::vector& unconditional_encoded_text, int num_steps, int seed) { float unconditional_guidance_scale = 7.5f; + auto noise = get_normal(64 * 64 * 4, seed); auto latent = noise; - auto timesteps = get_timesteps(1, 1000, 1000 / num_steps); + // Get pre-calculated timesteps and embeddings + auto& embedding_manager = EmbeddingManager::getInstance(); + auto timesteps = embedding_manager.get_timesteps(num_steps); + + if (timesteps.empty()) { + LOG(ERROR) << "Failed to get timesteps for " << num_steps << " steps"; + return std::vector(); + } + auto alphas_tuple = get_initial_alphas(timesteps); + auto alphas = std::get<0>(alphas_tuple); auto alphas_prev = std::get<1>(alphas_tuple); for (int i = timesteps.size() - 1; i >= 0; --i) { - std::cout << "Step " << timesteps.size() - 1 - i << "\n"; + LOG(INFO) << "Step " << timesteps.size() - 1 - i; + + std::cout << "\n=== Processing Step " << timesteps.size() - 1 - i + << " (timestamp: " << timesteps[i] << ") ===" << std::endl; auto latent_prev = latent; - auto t_emb = get_timestep_embedding(timesteps[i]); + + auto t_emb = embedding_manager.get_timestep_embedding(i, num_steps); + + if (t_emb.empty()) { + LOG(ERROR) << "Failed to get timestamp embedding for step " << i; + return std::vector(); + } + + if (t_emb.empty()) { + LOG(ERROR) << "Failed to get timestamp embedding for step " << i; + return std::vector(); + } auto unconditional_latent = diffusion_step(latent, t_emb, unconditional_encoded_text); @@ -132,6 +157,7 @@ std::vector StableDiffusionInvoker::diffusion_process( latent.assign(std::begin(l), std::end(l)); } + std::cout << "\nDiffusion process completed" << std::endl; return latent; } diff --git a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.h b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.h index ccbef1f9e..706589835 100644 --- a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.h +++ b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.h @@ -5,7 +5,7 @@ #include #include -#include "stable_diffusion_pipeline.h" // Include the backend data structure +#include "stable_diffusion_pipeline.h" #include "tensorflow/lite/interpreter.h" #include "tensorflow/lite/model_builder.h" diff --git a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.cc b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.cc index 52d20b570..de7ddba57 100644 --- a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.cc +++ b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.cc @@ -6,7 +6,9 @@ #include #include +#include "embedding_utils.h" #include "flutter/cpp/c/backend_c.h" +#include "flutter/cpp/utils.h" #include "stable_diffusion_invoker.h" #include "tensorflow/lite/c/c_api.h" #include "tensorflow/lite/c/common.h" @@ -58,12 +60,26 @@ mlperf_backend_ptr_t StableDiffusionPipeline::backend_create( // Verify only one instance of the backend exists at any time if (backendExists) { + LOG(ERROR) << "Backend already exists"; return nullptr; } SDBackendData* backend_data = new SDBackendData(); backendExists = true; + // Read seed and num_steps value from SD task settings + backend_data->seed = + mlperf::mobile::GetConfigValue(configs, "stable_diffusion_seed", 0); + if (backend_data->seed == 0) { + LOG(ERROR) << "Cannot get stable_diffusion_seed"; + return nullptr; + } + backend_data->num_steps = + mlperf::mobile::GetConfigValue(configs, "stable_diffusion_num_steps", 0); + if (backend_data->num_steps == 0) { + LOG(ERROR) << "Cannot get stable_diffusion_num_steps"; + return nullptr; + } // Load models from the provided directory path std::string text_encoder_path = std::string(model_path) + "/sd_text_encoder_dynamic.tflite"; @@ -95,6 +111,16 @@ mlperf_backend_ptr_t StableDiffusionPipeline::backend_create( return nullptr; } + std::string ts_embedding_path = + std::string(model_path) + "/timestep_embeddings_data.bin.ts"; + if (!EmbeddingManager::getInstance().load_timestep_embeddings( + ts_embedding_path)) { + LOG(ERROR) << "Failed to load timestep embeddings from " + << ts_embedding_path; + backend_delete(backend_data); + return nullptr; + } + return backend_data; } @@ -268,6 +294,10 @@ void StableDiffusionPipeline::backend_convert_inputs( mlperf_backend_ptr_t backend_ptr, int bytes, int width, int height, uint8_t* data) {} +void StableDiffusionPipeline::backend_convert_outputs( + mlperf_backend_ptr_t backend_ptr, int bytes, int width, int height, + uint8_t* data) {} + void* StableDiffusionPipeline::backend_get_buffer(size_t n) { return ::operator new(n); } diff --git a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.h b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.h index adf460530..17070a286 100644 --- a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.h +++ b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.h @@ -91,6 +91,9 @@ class StableDiffusionPipeline : public Pipeline { void backend_convert_inputs(mlperf_backend_ptr_t backend_ptr, int bytes, int width, int height, uint8_t *data) override; + void backend_convert_outputs(mlperf_backend_ptr_t backend_ptr, int bytes, + int width, int height, uint8_t *data) override; + void *backend_get_buffer(size_t n) override; void backend_release_buffer(void *p) override; diff --git a/mobile_back_tflite/cpp/backend_tflite/tflite_c.cc b/mobile_back_tflite/cpp/backend_tflite/tflite_c.cc index dced8bf1d..62a6a18bc 100644 --- a/mobile_back_tflite/cpp/backend_tflite/tflite_c.cc +++ b/mobile_back_tflite/cpp/backend_tflite/tflite_c.cc @@ -229,6 +229,12 @@ void mlperf_backend_convert_inputs(mlperf_backend_ptr_t backend_ptr, int bytes, data); } +void mlperf_backend_convert_outputs(mlperf_backend_ptr_t backend_ptr, int bytes, + int width, int height, uint8_t *data) { + return pipeline->backend_convert_outputs(backend_ptr, bytes, width, height, + data); +} + void *mlperf_backend_get_buffer(size_t n) { return pipeline->backend_get_buffer(n); } From f7fc2e81c62a2c89675d90b26f92150f6ee21942 Mon Sep 17 00:00:00 2001 From: Anh Date: Wed, 15 Jan 2025 15:01:58 +0700 Subject: [PATCH 13/18] Update model_path for stable_diffusion (#946) --- .../settings/tflite_settings_pixel6.pbtxt | 12 ++++++++---- .../backend_settings/tflite_settings_android.pbtxt | 8 ++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/mobile_back_pixel/cpp/backend_tflite/settings/tflite_settings_pixel6.pbtxt b/mobile_back_pixel/cpp/backend_tflite/settings/tflite_settings_pixel6.pbtxt index 4b80a06b1..6037eaa76 100644 --- a/mobile_back_pixel/cpp/backend_tflite/settings/tflite_settings_pixel6.pbtxt +++ b/mobile_back_pixel/cpp/backend_tflite/settings/tflite_settings_pixel6.pbtxt @@ -216,17 +216,21 @@ benchmark_setting { accelerator_name: "npu" accelerator_desc: "NPU" model_file: { - model_path: "local:///mlperf_models/sd_decoder_dynamic.tflite" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-tflite/sd_decoder_dynamic.tflite" model_checksum: "68acdb62f99e1dc2c7f5db8cdd0e007c" } model_file: { - model_path: "local:///mlperf_models/sd_diffusion_model_dynamic.tflite" - model_checksum: "7cbdadf5282b71561ce5eda75e868c19" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-tflite/sd_diffusion_model_dynamic.tflite" + model_checksum: "309e95f76ac8de01130942037a28aa8f" } model_file: { - model_path: "local:///mlperf_models/sd_text_encoder_dynamic.tflite" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-tflite/sd_text_encoder_dynamic.tflite" model_checksum: "b64effb0360f9ea49a117cdaf8a2fbdc" } + model_file: { + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-tflite/timestep_embeddings_data.bin.ts" + model_checksum: "798b772155a69de5df44b304327bb3cc" + } } delegate_selected: "NNAPI" custom_setting { diff --git a/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt b/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt index bfec84e5f..fe8ebf64d 100644 --- a/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt +++ b/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt @@ -215,19 +215,19 @@ benchmark_setting { accelerator_name: "npu" accelerator_desc: "NPU" model_file: { - model_path: "https://github.com/RSMNYS/SD/releases/download/SD/sd_decoder_dynamic.tflite" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-tflite/sd_decoder_dynamic.tflite" model_checksum: "68acdb62f99e1dc2c7f5db8cdd0e007c" } model_file: { - model_path: "https://github.com/RSMNYS/SD/releases/download/SD/sd_diffusion_model_dynamic.tflite" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-tflite/sd_diffusion_model_dynamic.tflite" model_checksum: "309e95f76ac8de01130942037a28aa8f" } model_file: { - model_path: "https://github.com/RSMNYS/SD/releases/download/SD/sd_text_encoder_dynamic.tflite" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-tflite/sd_text_encoder_dynamic.tflite" model_checksum: "b64effb0360f9ea49a117cdaf8a2fbdc" } model_file: { - model_path: "https://github.com/RSMNYS/SD/releases/download/SD/timestep_embeddings_data.bin.ts" + model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-tflite/timestep_embeddings_data.bin.ts" model_checksum: "798b772155a69de5df44b304327bb3cc" } } From 46c839a114fc8a4e8c749b48ca7ba7c4ff05bb30 Mon Sep 17 00:00:00 2001 From: Anh Date: Wed, 15 Jan 2025 17:10:05 +0700 Subject: [PATCH 14/18] Use CloudFare link for qti model_path --- .../settings/qti_settings_default_gpu.pbtxt | 24 +++++++++---------- .../settings/qti_settings_gpufp16.pbtxt | 10 ++++---- .../settings/qti_settings_sd7cxg3.pbtxt | 12 +++++----- .../settings/qti_settings_sd7g1.pbtxt | 12 +++++----- .../settings/qti_settings_sd7pg2.pbtxt | 12 +++++----- .../settings/qti_settings_sd8cxg3.pbtxt | 12 +++++----- .../settings/qti_settings_sd8g1.pbtxt | 12 +++++----- .../settings/qti_settings_sd8g2.pbtxt | 12 +++++----- .../settings/qti_settings_sd8g3.pbtxt | 12 +++++----- .../settings/qti_settings_sd8pg1.pbtxt | 12 +++++----- .../settings/qti_settings_sdm778.pbtxt | 12 +++++----- .../settings/qti_settings_sdm888.pbtxt | 12 +++++----- .../settings/qti_settings_sm4450.pbtxt | 12 +++++----- .../settings/qti_settings_sm7550.pbtxt | 12 +++++----- .../settings/qti_settings_sm8635.pbtxt | 12 +++++----- .../qti_settings_stablediffusion.pbtxt | 12 +++++----- 16 files changed, 101 insertions(+), 101 deletions(-) diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_gpu.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_gpu.pbtxt index 37e41b7cc..7ad7a249a 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_gpu.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_gpu.pbtxt @@ -32,7 +32,7 @@ benchmark_setting { accelerator_name: "snpe_gpu" accelerator_desc: "GPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc" model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } @@ -43,7 +43,7 @@ benchmark_setting { accelerator_desc: "CPU" batch_size: 128 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc" model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } @@ -68,7 +68,7 @@ benchmark_setting { accelerator_desc: "GPU" batch_size: 12360 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc" model_checksum: "7863deea588936fe6e09565ed47dde95" } } @@ -79,7 +79,7 @@ benchmark_setting { accelerator_desc: "CPU" batch_size: 128 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc" model_checksum: "7863deea588936fe6e09565ed47dde95" } } @@ -103,7 +103,7 @@ benchmark_setting { accelerator_name: "snpe_gpu" accelerator_desc: "GPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc" model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } @@ -113,7 +113,7 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc" model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } @@ -141,7 +141,7 @@ benchmark_setting { accelerator_name: "snpe_gpu" accelerator_desc: "GPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc" model_checksum: "96d947175f04950898a372890907dda1" } } @@ -151,7 +151,7 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc" model_checksum: "96d947175f04950898a372890907dda1" } } @@ -179,7 +179,7 @@ benchmark_setting { accelerator_name: "snpe_gpu" accelerator_desc: "GPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc" model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } @@ -189,7 +189,7 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc" model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } @@ -217,7 +217,7 @@ benchmark_setting { accelerator_name: "snpe_gpu" accelerator_desc: "GPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/snusr_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/snusr_htp.dlc" model_checksum: "668da9816073d67972704e237137a50f" } } @@ -227,7 +227,7 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/snusr_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/snusr_htp.dlc" model_checksum: "668da9816073d67972704e237137a50f" } } diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_gpufp16.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_gpufp16.pbtxt index 5f4328a4b..da75dd996 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_gpufp16.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_gpufp16.pbtxt @@ -32,7 +32,7 @@ benchmark_setting { accelerator_name: "snpe_gpu_fp16" accelerator_desc: "GPU_FP16" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc" model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } @@ -58,7 +58,7 @@ benchmark_setting { accelerator_desc: "GPU_FP16" batch_size: 12360 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc" model_checksum: "7863deea588936fe6e09565ed47dde95" } } @@ -94,7 +94,7 @@ benchmark_setting { accelerator_name: "snpe_gpu_fp16" accelerator_desc: "GPU_FP16" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc" model_checksum: "96d947175f04950898a372890907dda1" } } @@ -122,7 +122,7 @@ benchmark_setting { accelerator_name: "snpe_gpu_fp16" accelerator_desc: "GPU_FP16" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc" model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } @@ -157,7 +157,7 @@ benchmark_setting { accelerator_name: "snpe_gpu_fp16" accelerator_desc: "GPU_FP16" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/snusr_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/snusr_htp.dlc" model_checksum: "668da9816073d67972704e237137a50f" } } diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7cxg3.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7cxg3.pbtxt index 10c926482..e217f39a9 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7cxg3.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7cxg3.pbtxt @@ -40,7 +40,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc" model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } @@ -78,7 +78,7 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12288 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc" model_checksum: "7863deea588936fe6e09565ed47dde95" } } @@ -107,7 +107,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc" model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } @@ -139,7 +139,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc" model_checksum: "96d947175f04950898a372890907dda1" } } @@ -171,7 +171,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc" model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } @@ -199,7 +199,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm//snusr_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm//snusr_htp.dlc" model_checksum: "668da9816073d67972704e237137a50f" } } diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7g1.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7g1.pbtxt index 03169e306..928b6eea0 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7g1.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7g1.pbtxt @@ -40,7 +40,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc" model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } @@ -77,7 +77,7 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12288 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc" model_checksum: "7863deea588936fe6e09565ed47dde95" } } @@ -105,7 +105,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc" model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } @@ -141,7 +141,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc" model_checksum: "96d947175f04950898a372890907dda1" } } @@ -169,7 +169,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc" model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } @@ -201,7 +201,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm//snusr_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm//snusr_htp.dlc" model_checksum: "668da9816073d67972704e237137a50f" } } diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7pg2.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7pg2.pbtxt index 5641d1899..71b9e6d29 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7pg2.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7pg2.pbtxt @@ -40,7 +40,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc" model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } @@ -78,7 +78,7 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12360 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc" model_checksum: "7863deea588936fe6e09565ed47dde95" } } @@ -106,7 +106,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc" model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } @@ -142,7 +142,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc" model_checksum: "96d947175f04950898a372890907dda1" } } @@ -170,7 +170,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc" model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } @@ -202,7 +202,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm//snusr_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm//snusr_htp.dlc" model_checksum: "668da9816073d67972704e237137a50f" } } diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8cxg3.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8cxg3.pbtxt index c558d3fe9..3514f8ec1 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8cxg3.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8cxg3.pbtxt @@ -44,7 +44,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc" model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } @@ -82,7 +82,7 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12360 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc" model_checksum: "7863deea588936fe6e09565ed47dde95" } } @@ -110,7 +110,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc" model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } @@ -142,7 +142,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc" model_checksum: "96d947175f04950898a372890907dda1" } } @@ -174,7 +174,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc" model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } @@ -202,7 +202,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm//snusr_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm//snusr_htp.dlc" model_checksum: "668da9816073d67972704e237137a50f" } } diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g1.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g1.pbtxt index d746a50c8..824bf9dbf 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g1.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g1.pbtxt @@ -40,7 +40,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc" model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } @@ -78,7 +78,7 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12360 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc" model_checksum: "7863deea588936fe6e09565ed47dde95" } } @@ -106,7 +106,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc" model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } @@ -142,7 +142,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc" model_checksum: "96d947175f04950898a372890907dda1" } } @@ -170,7 +170,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc" model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } @@ -202,7 +202,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm//snusr_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm//snusr_htp.dlc" model_checksum: "668da9816073d67972704e237137a50f" } } diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g2.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g2.pbtxt index 3eae2f7da..5c19b71e5 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g2.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g2.pbtxt @@ -40,7 +40,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc" model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } @@ -78,7 +78,7 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12360 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc" model_checksum: "7863deea588936fe6e09565ed47dde95" } } @@ -106,7 +106,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc" model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } @@ -142,7 +142,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc" model_checksum: "96d947175f04950898a372890907dda1" } } @@ -170,7 +170,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc" model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } @@ -202,7 +202,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm//snusr_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm//snusr_htp.dlc" model_checksum: "668da9816073d67972704e237137a50f" } } diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g3.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g3.pbtxt index 173631035..ec0db6041 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g3.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g3.pbtxt @@ -40,7 +40,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc" model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } @@ -78,7 +78,7 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12360 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc" model_checksum: "7863deea588936fe6e09565ed47dde95" } } @@ -106,7 +106,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc" model_checksum: "" } } @@ -143,7 +143,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp_O2.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp_O2.dlc" model_checksum: "f8631dbd69819438d6b317c204fa80d7" } } @@ -175,7 +175,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc" model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } @@ -211,7 +211,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/snusr_htp_O2.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/snusr_htp_O2.dlc" model_checksum: "76b33f02ebfa6294a0e973aaf91116fa" } } diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8pg1.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8pg1.pbtxt index 86775bbfb..58609c39d 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8pg1.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8pg1.pbtxt @@ -40,7 +40,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc" model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } @@ -78,7 +78,7 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12360 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc" model_checksum: "7863deea588936fe6e09565ed47dde95" } } @@ -106,7 +106,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc" model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } @@ -142,7 +142,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc" model_checksum: "96d947175f04950898a372890907dda1" } } @@ -174,7 +174,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc" model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } @@ -210,7 +210,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm//snusr_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm//snusr_htp.dlc" model_checksum: "668da9816073d67972704e237137a50f" } } diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm778.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm778.pbtxt index 2652a864b..e280e0158 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm778.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm778.pbtxt @@ -40,7 +40,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc" model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } @@ -78,7 +78,7 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12360 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc" model_checksum: "7863deea588936fe6e09565ed47dde95" } } @@ -106,7 +106,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc" model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } @@ -142,7 +142,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc" model_checksum: "96d947175f04950898a372890907dda1" } } @@ -174,7 +174,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc" model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } @@ -210,7 +210,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm//snusr_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm//snusr_htp.dlc" model_checksum: "668da9816073d67972704e237137a50f" } } diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm888.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm888.pbtxt index 5fa0435d9..5aa7db3cf 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm888.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm888.pbtxt @@ -40,7 +40,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc" model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } @@ -78,7 +78,7 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12360 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc" model_checksum: "7863deea588936fe6e09565ed47dde95" } } @@ -106,7 +106,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc" model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } @@ -142,7 +142,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc" model_checksum: "96d947175f04950898a372890907dda1" } } @@ -174,7 +174,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc" model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } @@ -210,7 +210,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm//snusr_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm//snusr_htp.dlc" model_checksum: "668da9816073d67972704e237137a50f" } } diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm4450.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm4450.pbtxt index 328f8a429..61dbf92cd 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm4450.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm4450.pbtxt @@ -40,7 +40,7 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc" model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } @@ -77,7 +77,7 @@ benchmark_setting { accelerator_desc: "CPU" batch_size: 12360 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc" model_checksum: "7863deea588936fe6e09565ed47dde95" } } @@ -109,7 +109,7 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc" model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } @@ -145,7 +145,7 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc" model_checksum: "96d947175f04950898a372890907dda1" } } @@ -181,7 +181,7 @@ benchmark_setting { value: "true" } model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc" model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } @@ -225,7 +225,7 @@ benchmark_setting { accelerator_name: "snpe_cpu" accelerator_desc: "CPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/snusr_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/snusr_htp.dlc" model_checksum: "668da9816073d67972704e237137a50f" } } diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm7550.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm7550.pbtxt index 7691804b8..57e24295f 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm7550.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm7550.pbtxt @@ -40,7 +40,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc" model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } @@ -78,7 +78,7 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12360 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4_O2.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4_O2.dlc" model_checksum: "80ba82f2a628ab712d812d06524d2bd8" } } @@ -110,7 +110,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc" model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } @@ -146,7 +146,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc" model_checksum: "96d947175f04950898a372890907dda1" } } @@ -178,7 +178,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc" model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } @@ -214,7 +214,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm//snusr_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm//snusr_htp.dlc" model_checksum: "668da9816073d67972704e237137a50f" } } diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm8635.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm8635.pbtxt index d0793c4ba..7571b8942 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm8635.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm8635.pbtxt @@ -40,7 +40,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc" model_checksum: "56e5039260e20e5c2a0b54cc0fac8098" } } @@ -78,7 +78,7 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12360 model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc" model_checksum: "7863deea588936fe6e09565ed47dde95" } } @@ -110,7 +110,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/ssd_mobiledet_qat_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc" model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } @@ -146,7 +146,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobilebert_quantized_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc" model_checksum: "96d947175f04950898a372890907dda1" } } @@ -178,7 +178,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/mobile_mosaic_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc" model_checksum: "3c0dfbacda053773d6afb34503d9991a" } } @@ -210,7 +210,7 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/snusr_htp.dlc" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/snusr_htp.dlc" model_checksum: "668da9816073d67972704e237137a50f" } } diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion.pbtxt index 7014deb42..9430e5cd1 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion.pbtxt @@ -18,27 +18,27 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/betas.bin" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/betas.bin" model_checksum: "09d2e4306d319caf1b34e6afb5c63c22" } model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/lambdas.bin" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/lambdas.bin" model_checksum: "c7179725ec31a6e2c7daf008a5e1ff23" } model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/sd_precompute_data.tar" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/sd_precompute_data.tar" model_checksum: "beb7fe2da40042fb585bb8cb95d86b4d" } model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/text_encoder.serialized.bin" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/text_encoder.serialized.bin" model_checksum: "6da7b95fa467e99af2b9f80c7afe3734" } model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/unet.serialized.bin" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/unet.serialized.bin" model_checksum: "3b504b92cbd788d713ca9cfc5b19d596" } model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-qualcomm/vae_decoder.serialized.bin" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/vae_decoder.serialized.bin" model_checksum: "c7762e64c2596abe7f16614709cc5482" } } From 6648c56ca84ff778b24bd2c9214e10472930a800 Mon Sep 17 00:00:00 2001 From: Anh Date: Wed, 15 Jan 2025 17:14:03 +0700 Subject: [PATCH 15/18] Use CloudFare link for tflite model_path --- .../backend_tflite/settings/tflite_settings_pixel6.pbtxt | 8 ++++---- .../backend_settings/tflite_settings_android.pbtxt | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/mobile_back_pixel/cpp/backend_tflite/settings/tflite_settings_pixel6.pbtxt b/mobile_back_pixel/cpp/backend_tflite/settings/tflite_settings_pixel6.pbtxt index b2e3f4ae7..23cc5eff7 100644 --- a/mobile_back_pixel/cpp/backend_tflite/settings/tflite_settings_pixel6.pbtxt +++ b/mobile_back_pixel/cpp/backend_tflite/settings/tflite_settings_pixel6.pbtxt @@ -216,19 +216,19 @@ benchmark_setting { accelerator_name: "npu" accelerator_desc: "NPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-tflite/sd_decoder_dynamic.tflite" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/tflite/sd_decoder_dynamic.tflite" model_checksum: "68acdb62f99e1dc2c7f5db8cdd0e007c" } model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-tflite/sd_diffusion_model_dynamic.tflite" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/tflite/sd_diffusion_model_dynamic.tflite" model_checksum: "309e95f76ac8de01130942037a28aa8f" } model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-tflite/sd_text_encoder_dynamic.tflite" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/tflite/sd_text_encoder_dynamic.tflite" model_checksum: "b64effb0360f9ea49a117cdaf8a2fbdc" } model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-tflite/timestep_embeddings_data.bin.ts" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/tflite/timestep_embeddings_data.bin.ts" model_checksum: "798b772155a69de5df44b304327bb3cc" } } diff --git a/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt b/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt index c6b27399e..a94aa8749 100644 --- a/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt +++ b/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt @@ -215,19 +215,19 @@ benchmark_setting { accelerator_name: "npu" accelerator_desc: "NPU" model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-tflite/sd_decoder_dynamic.tflite" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/tflite/sd_decoder_dynamic.tflite" model_checksum: "68acdb62f99e1dc2c7f5db8cdd0e007c" } model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-tflite/sd_diffusion_model_dynamic.tflite" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/tflite/sd_diffusion_model_dynamic.tflite" model_checksum: "309e95f76ac8de01130942037a28aa8f" } model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-tflite/sd_text_encoder_dynamic.tflite" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/tflite/sd_text_encoder_dynamic.tflite" model_checksum: "b64effb0360f9ea49a117cdaf8a2fbdc" } model_file: { - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-tflite/timestep_embeddings_data.bin.ts" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/tflite/timestep_embeddings_data.bin.ts" model_checksum: "798b772155a69de5df44b304327bb3cc" } } From dcd283f7f1ad3044483d70ee61048952bde16e65 Mon Sep 17 00:00:00 2001 From: Anh Date: Wed, 15 Jan 2025 17:29:58 +0700 Subject: [PATCH 16/18] Fix build error --- flutter/lib/benchmark/benchmark.dart | 1 + .../cpp/backend_tflite/stable_diffusion_invoker.cc | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/flutter/lib/benchmark/benchmark.dart b/flutter/lib/benchmark/benchmark.dart index 7e0b98a1f..38b1867f3 100644 --- a/flutter/lib/benchmark/benchmark.dart +++ b/flutter/lib/benchmark/benchmark.dart @@ -1,5 +1,6 @@ import 'package:collection/collection.dart'; +import 'package:mlperfbench/app_constants.dart'; import 'package:mlperfbench/backend/bridge/run_settings.dart'; import 'package:mlperfbench/backend/loadgen_info.dart'; import 'package:mlperfbench/benchmark/info.dart'; diff --git a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.cc b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.cc index 9d76f7b5d..fd944d7f1 100644 --- a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.cc +++ b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.cc @@ -154,7 +154,7 @@ std::vector StableDiffusionInvoker::diffusion_process( latent.assign(std::begin(l), std::end(l)); } - std::cout << "\nDiffusion process completed" << std::endl; + LOG(INFO) << "Diffusion process completed!"; return latent; } From 67f24a442e659ff54ad9fd59b1889c19e3cf799c Mon Sep 17 00:00:00 2001 From: Anh Date: Wed, 15 Jan 2025 20:09:48 +0700 Subject: [PATCH 17/18] Add missing model_checksum --- .../cpp/backend_qti/settings/qti_settings_sd8g3.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g3.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g3.pbtxt index ec0db6041..1794863d9 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g3.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g3.pbtxt @@ -107,7 +107,7 @@ benchmark_setting { accelerator_desc: "HTP" model_file: { model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc" - model_checksum: "" + model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85" } } single_stream_expected_latency_ns: 500000 From ef6e4ebe27b799b0ecefbcd1f440f182af656d33 Mon Sep 17 00:00:00 2001 From: Anh Date: Thu, 16 Jan 2025 07:21:47 +0700 Subject: [PATCH 18/18] Update expected_throughput --- flutter/integration_test/expected_throughput.dart | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/flutter/integration_test/expected_throughput.dart b/flutter/integration_test/expected_throughput.dart index f2e7e40d8..f3925faa7 100644 --- a/flutter/integration_test/expected_throughput.dart +++ b/flutter/integration_test/expected_throughput.dart @@ -88,11 +88,11 @@ const Map> _objectDetection = { }, _kQtiBackend: { _kS22Ultra: Interval(min: 700, max: 1400), - _kS24Ultra: Interval(min: 1800, max: 2500), + _kS24Ultra: Interval(min: 1800, max: 2700), }, _kMediatekBackend: { _kDN2103: Interval(min: 120, max: 210), - _kS10Plus: Interval(min: 1200, max: 1800) + _kS10Plus: Interval(min: 1200, max: 2000) }, _kSamsungBackend: { _kS24: Interval(min: 1400, max: 2400), @@ -123,7 +123,7 @@ const Map> _imageSegmentationV2 = { }, _kMediatekBackend: { _kDN2103: Interval(min: 45, max: 70), - _kS10Plus: Interval(min: 800, max: 1400) + _kS10Plus: Interval(min: 800, max: 1500) }, _kSamsungBackend: { _kS24: Interval(min: 800, max: 1500), @@ -150,7 +150,7 @@ const Map> _naturalLanguageProcessing = { }, _kQtiBackend: { _kS22Ultra: Interval(min: 100, max: 200), - _kS24Ultra: Interval(min: 250, max: 450), + _kS24Ultra: Interval(min: 250, max: 460), }, _kMediatekBackend: { _kDN2103: Interval(min: 1, max: 6), @@ -185,7 +185,7 @@ const Map> _superResolution = { }, _kMediatekBackend: { _kDN2103: Interval(min: 5, max: 15), - _kS10Plus: Interval(min: 150, max: 280) + _kS10Plus: Interval(min: 150, max: 300) }, _kSamsungBackend: { _kS24: Interval(min: 90, max: 180), @@ -244,7 +244,7 @@ const Map> _imageClassificationOfflineV2 = { }, _kQtiBackend: { _kS22Ultra: Interval(min: 250, max: 450), - _kS24Ultra: Interval(min: 900, max: 1600), + _kS24Ultra: Interval(min: 900, max: 1700), }, _kMediatekBackend: { _kDN2103: Interval(min: 4.5, max: 90),