From 48654bdff2bef8e5061056950bccbe9d04a28398 Mon Sep 17 00:00:00 2001 From: RSMNYS Date: Tue, 14 Jan 2025 09:02:25 +0200 Subject: [PATCH] use time step embedding from file (#928) * feat: pass task-specific config to backend (#922) * Add TaskConfig.CustomConfig and pass them to backend * Add CustomConfig for main.cc * Use seed and num_steps from CustomConfig for TFLite backend * Replace std::cout with LOG(INFO) * Format files * feat: add ConvertOutputs() API (#927) * Add ConvertOutputs() API * Add ConvertOutputs() for mobile_back_tflite * Set minimum macos version * Set minimum macos version to 13.1 * Update _kIphoneOnGitHubAction * feat: timestamp-embedding-parser (WIP) * disabled bitcode to be able compile with new XCode * chore: formatting * refactor: use custom setting in Core ML backend to detect NCHW input. (#924) * Add GetConfigValue() * Add custom setting data-format for Core ML * Use GetConfigValue() to get stable_diffusion_seed and stable_diffusion_num_steps * fix: resolve crash due to permission denied on Android Play Store version (#930) * Set android:extractNativeLibs="true" * Set android.bundle.enableUncompressedNativeLibs=false * chore: increase Android minSdkVersion from 21 to 30 (#859) Increase minSdkVersion to 30 * feat: finalized SD pipeline to use embedding from the binary file. * refactor: updated embedding_utils to parse pkl file * chore: linting * fix: fixed lint issue in neuron * chore: BUILD cleanup * chore: cleanup * chore: ignore .fvm * chore: updated model paths and checksums for stable diffusion benchmark: tflite_settings_android.pbtxt * chore: reverse timesteps and embeddings to support descending order of the timesteps and embeddings * chore: fixed formatting * chore: added links to the sd models and timestep embeddings file * chore: add the proper name for the embedding_timesteps file * chore: added missed declaration for backend_convert_outputs * chore: clang formatting * chore: added missed files * chore: fixed build file for the pixel backend * chore: bazel formatting * fix: added missed interface implementation for pixel * chore: clang formatting --------- Co-authored-by: Anh --- .bazelrc | 1 - .gitignore | 1 + WORKSPACE | 4 +- flutter/android/app/build.gradle | 2 +- .../android/app/src/main/AndroidManifest.xml | 3 +- flutter/android/gradle.properties | 1 + flutter/assets/tasks.pbtxt | 8 ++ flutter/cpp/backend.h | 4 + flutter/cpp/backends/external.cc | 4 +- flutter/cpp/backends/external.h | 12 +++ flutter/cpp/binary/main.cc | 10 +- flutter/cpp/c/backend_c.h | 2 + flutter/cpp/flutter/BUILD | 1 + flutter/cpp/proto/mlperf_task.proto | 14 ++- flutter/cpp/proto/test.cc | 4 +- flutter/cpp/utils.cc | 91 +++++++++++++++++-- flutter/cpp/utils.h | 9 +- flutter/lib/benchmark/benchmark.dart | 5 + mobile_back_apple/cpp/backend_coreml/BUILD | 1 + .../cpp/backend_coreml/coreml_settings.pbtxt | 34 ++++++- mobile_back_apple/cpp/backend_coreml/main.cc | 5 +- mobile_back_pixel/cpp/backend_tflite/BUILD | 2 + .../pixel_single_model_pipeline.cc | 4 + .../cpp/backend_dummy/ios/BUILD | 2 + mobile_back_tflite/cpp/backend_tflite/BUILD | 14 +++ .../tflite_settings_android.pbtxt | 12 ++- .../cpp/backend_tflite/embedding_utils.cc | 70 ++++++++++++++ .../cpp/backend_tflite/embedding_utils.h | 40 ++++++++ .../cpp/backend_tflite/ios/BUILD | 1 + .../cpp/backend_tflite/neuron/BUILD | 2 + .../cpp/backend_tflite/pipeline.h | 7 +- .../cpp/backend_tflite/sd_utils.cc | 1 - .../backend_tflite/single_model_pipeline.cc | 4 + .../backend_tflite/single_model_pipeline.h | 3 + .../stable_diffusion_invoker.cc | 38 ++++++-- .../backend_tflite/stable_diffusion_invoker.h | 2 +- .../stable_diffusion_pipeline.cc | 30 ++++++ .../stable_diffusion_pipeline.h | 3 + .../cpp/backend_tflite/tflite_c.cc | 6 ++ 39 files changed, 416 insertions(+), 41 deletions(-) create mode 100644 mobile_back_tflite/cpp/backend_tflite/embedding_utils.cc create mode 100644 mobile_back_tflite/cpp/backend_tflite/embedding_utils.h diff --git a/.bazelrc b/.bazelrc index 5788b7d3b..a2ee67b8c 100644 --- a/.bazelrc +++ b/.bazelrc @@ -43,7 +43,6 @@ build:android_x86_64 --fat_apk_cpu=x86_64 # iOS configs build:ios --apple_platform_type=ios -build:ios --apple_bitcode=embedded --copt=-fembed-bitcode build:ios --copt=-Wno-c++11-narrowing build:ios --cxxopt=-fobjc-arc diff --git a/.gitignore b/.gitignore index 8a325c3db..a7be10a01 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ .ijwb .idea .vscode +.fvm /bazel-* /output /output_logs diff --git a/WORKSPACE b/WORKSPACE index 49e0b0c63..dbcbc5c2f 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -1,7 +1,7 @@ workspace(name = "mlperf_app") -load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository") +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") http_archive( name = "bazel_skylib", @@ -49,11 +49,11 @@ http_archive( ], ) -load("@rules_python//python:repositories.bzl", "python_register_toolchains") load( "@org_tensorflow//tensorflow/tools/toolchains/python:python_repo.bzl", "python_repository", ) +load("@rules_python//python:repositories.bzl", "python_register_toolchains") python_repository(name = "python_version_repo") diff --git a/flutter/android/app/build.gradle b/flutter/android/app/build.gradle index a0dd55f62..92d02ddd3 100644 --- a/flutter/android/app/build.gradle +++ b/flutter/android/app/build.gradle @@ -42,7 +42,7 @@ android { defaultConfig { applicationId "org.mlcommons.android.mlperfbench" - minSdkVersion 21 + minSdkVersion 30 targetSdkVersion 34 versionCode flutterVersionCode.toInteger() versionName flutterVersionName diff --git a/flutter/android/app/src/main/AndroidManifest.xml b/flutter/android/app/src/main/AndroidManifest.xml index 79cfd7873..67a178c84 100644 --- a/flutter/android/app/src/main/AndroidManifest.xml +++ b/flutter/android/app/src/main/AndroidManifest.xml @@ -11,7 +11,8 @@ android:maxSdkVersion="29"/> + android:icon="@mipmap/ic_launcher" + android:extractNativeLibs="true"> ( CheckSymbol("mlperf_backend_convert_inputs")); - + // Backends may need to change the format of the outputs (e.g. channel order) + convert_outputs = reinterpret_cast( + CheckSymbol("mlperf_backend_convert_outputs")); // If both functions are defined, then update if (get_buffer && release_buffer) { LOG(INFO) << "Using backend allocator"; diff --git a/flutter/cpp/backends/external.h b/flutter/cpp/backends/external.h index 7a2ef9b8d..12e17357e 100644 --- a/flutter/cpp/backends/external.h +++ b/flutter/cpp/backends/external.h @@ -67,6 +67,8 @@ struct BackendFunctions { mlperf_backend_ptr_t, uint32_t, int32_t, void**)>::type; using ConvertInputsPtr = std::add_pointer::type; + using ConvertOutputsPtr = std::add_pointer::type; // Required functions. BackendMatchesPtr match{nullptr}; @@ -91,6 +93,7 @@ struct BackendFunctions { AllocatorMgr::GetBufferFn get_buffer{nullptr}; AllocatorMgr::ReleaseBufferFn release_buffer{nullptr}; ConvertInputsPtr convert_inputs{nullptr}; + ConvertOutputsPtr convert_outputs{nullptr}; bool isLoaded() { return isloaded; } @@ -210,6 +213,15 @@ class ExternalBackend : public Backend { } } + // Optional function to do output data re-formatting + void ConvertOutputs(int bytes, int width, int height, + uint8_t* data) override { + if (backend_functions_.convert_outputs) { + backend_functions_.convert_outputs(backend_ptr_, bytes, width, height, + data); + } + } + private: std::string backend_name_; std::string vendor_; diff --git a/flutter/cpp/binary/main.cc b/flutter/cpp/binary/main.cc index f2f3e51c9..89f5758ad 100644 --- a/flutter/cpp/binary/main.cc +++ b/flutter/cpp/binary/main.cc @@ -132,7 +132,7 @@ int Main(int argc, char *argv[]) { command_line += " " + backend_name + " " + benchmark_id; // Command Line Flags for mlperf. - std::string mode, scenario = "SingleStream", output_dir; + std::string mode, scenario = "SingleStream", output_dir, custom_config; int min_query_count = 100, min_duration_ms = 100, max_duration_ms = 10 * 60 * 1000, single_stream_expected_latency_ns = 1000000; @@ -157,8 +157,9 @@ int Main(int argc, char *argv[]) { "A hint used by the loadgen to pre-generate " "enough samples to meet the minimum test duration."), Flag::CreateFlag("output_dir", &output_dir, - "The output directory of mlperf.", Flag::kRequired)}); - + "The output directory of mlperf.", Flag::kRequired), + Flag::CreateFlag("custom_config", &custom_config, + "Custom config in form key1:val1,key2:val2.")}); // Command Line Flags for backend. std::unique_ptr backend; std::unique_ptr dataset; @@ -207,9 +208,8 @@ int Main(int argc, char *argv[]) { } } } - SettingList setting_list = - createSettingList(backend_setting, benchmark_id); + CreateSettingList(backend_setting, custom_config, benchmark_id); ExternalBackend *external_backend = new ExternalBackend( model_file_path, lib_path, setting_list, native_lib_path); diff --git a/flutter/cpp/c/backend_c.h b/flutter/cpp/c/backend_c.h index dd863501b..47d3c9bb5 100644 --- a/flutter/cpp/c/backend_c.h +++ b/flutter/cpp/c/backend_c.h @@ -82,6 +82,8 @@ mlperf_status_t mlperf_backend_get_output(mlperf_backend_ptr_t backend_ptr, // Optional functions void mlperf_backend_convert_inputs(mlperf_backend_ptr_t backend_ptr, int bytes, int width, int height, uint8_t* data); +void mlperf_backend_convert_outputs(mlperf_backend_ptr_t backend_ptr, int bytes, + int width, int height, uint8_t* data); #ifdef __cplusplus } diff --git a/flutter/cpp/flutter/BUILD b/flutter/cpp/flutter/BUILD index 0ac2f1b7e..eb5ddb103 100644 --- a/flutter/cpp/flutter/BUILD +++ b/flutter/cpp/flutter/BUILD @@ -55,6 +55,7 @@ apple_xcframework( }, minimum_os_versions = { "ios": "13.1", + "macos": "13.1", }, deps = [ "//flutter/cpp/flutter:bridge", diff --git a/flutter/cpp/proto/mlperf_task.proto b/flutter/cpp/proto/mlperf_task.proto index 53e545e15..5c1012490 100644 --- a/flutter/cpp/proto/mlperf_task.proto +++ b/flutter/cpp/proto/mlperf_task.proto @@ -31,7 +31,7 @@ message MLPerfConfig { // Config of the mlperf tasks. // A task is basically a combination of models and a dataset. // -// Next ID: 11 +// Next ID: 12 message TaskConfig { // Must be unique in one task file. Ex: image_classification // used to match backend settings @@ -52,6 +52,7 @@ message TaskConfig { required string scenario = 7; required DatasetConfig datasets = 8; required ModelConfig model = 9; + repeated CustomConfig custom_config = 11; } // Datasets for a task @@ -107,3 +108,14 @@ message ModelConfig { // Number of detection classes if applicable optional int32 num_classes = 6; } + +// CustomConfig are task specific configuration. +// The TaskConfig.CustomConfig will be converted to +// BenchmarkSetting.CustomSetting and passed to the backend. +// To avoid name collision, the id should be prefixed with TaskConfig.id. +message CustomConfig { + // Id of this config. + required string id = 1; + // Value of this config. + required string value = 2; +} diff --git a/flutter/cpp/proto/test.cc b/flutter/cpp/proto/test.cc index 07d4fd3fd..6a66969eb 100644 --- a/flutter/cpp/proto/test.cc +++ b/flutter/cpp/proto/test.cc @@ -139,9 +139,11 @@ int test_proto() { std::list benchmarks; benchmarks.push_back("image_classification"); benchmarks.push_back("image_classification_offline"); + std::string custom_config = "key1:val1,key2:val2"; for (auto benchmark_id : benchmarks) { // Convert to SettingList - SettingList setting_list = createSettingList(backend_setting, benchmark_id); + SettingList setting_list = + CreateSettingList(backend_setting, custom_config, benchmark_id); std::cout << "SettingList for " << benchmark_id << ":\n"; dumpSettingList(setting_list); diff --git a/flutter/cpp/utils.cc b/flutter/cpp/utils.cc index 965426f4d..126e9edf5 100644 --- a/flutter/cpp/utils.cc +++ b/flutter/cpp/utils.cc @@ -125,27 +125,106 @@ mlperf_backend_configuration_t CppToCSettings(const SettingList &settings) { return c_settings; } -SettingList createSettingList(const BackendSetting &backend_setting, - std::string benchmark_id) { +// Split the string by a given delimiter +std::vector _splitString(const std::string &str, char delimiter) { + std::vector tokens; + std::stringstream ss(str); + std::string token; + while (std::getline(ss, token, delimiter)) { + tokens.push_back(token); + } + return tokens; +} + +// Parse the key:value string list +std::unordered_map _parseKeyValueList( + const std::string &input) { + std::unordered_map keyValueMap; + std::vector pairs = _splitString(input, ','); // Split by comma + + for (const std::string &pair : pairs) { + std::vector keyValue = + _splitString(pair, ':'); // Split by colon + if (keyValue.size() == 2) { + keyValueMap[keyValue[0]] = keyValue[1]; + } else { + LOG(ERROR) << "Invalid key:value pair: " << pair; + } + } + return keyValueMap; +} + +// Create the setting list for backend +SettingList CreateSettingList(const BackendSetting &backend_setting, + const std::string &custom_config, + const std::string &benchmark_id) { SettingList setting_list; int setting_index = 0; - - for (auto setting : backend_setting.common_setting()) { + for (const auto &setting : backend_setting.common_setting()) { setting_list.add_setting(); (*setting_list.mutable_setting(setting_index)) = setting; setting_index++; } // Copy the benchmark specific settings - setting_index = 0; - for (auto bm_setting : backend_setting.benchmark_setting()) { + for (const auto &bm_setting : backend_setting.benchmark_setting()) { if (bm_setting.benchmark_id() == benchmark_id) { setting_list.mutable_benchmark_setting()->CopyFrom(bm_setting); + + auto parsed = _parseKeyValueList(custom_config); + for (const auto &kv : parsed) { + CustomSetting custom_setting = CustomSetting(); + custom_setting.set_id(kv.first); + custom_setting.set_value(kv.second); + setting_list.mutable_benchmark_setting()->mutable_custom_setting()->Add( + std::move(custom_setting)); + } + break; } } LOG(INFO) << "setting_list:" << std::endl << setting_list.DebugString(); return setting_list; } +template +T GetConfigValue(mlperf_backend_configuration_t *configs, const char *key, + T defaultValue); + +template <> +int GetConfigValue(mlperf_backend_configuration_t *configs, + const char *key, int defaultValue) { + for (int i = 0; i < configs->count; ++i) { + if (strcmp(configs->keys[i], key) == 0) { + const char *valueStr = configs->values[i]; + char *endptr = nullptr; + errno = 0; + long value = + strtol(valueStr, &endptr, 10); // Base 10 for decimal conversion + if (errno == ERANGE || value < INT_MIN || value > INT_MAX) { + LOG(ERROR) << "Value out of range for int: " << valueStr; + return defaultValue; + } + if (endptr == valueStr || *endptr != '\0') { + LOG(ERROR) << "Invalid value for int: " << valueStr; + return defaultValue; + } + return static_cast(value); + } + } + return defaultValue; +} + +template <> +std::string GetConfigValue(mlperf_backend_configuration_t *configs, + const char *key, + std::string defaultValue) { + for (int i = 0; i < configs->count; ++i) { + if (strcmp(configs->keys[i], key) == 0) { + return std::string(configs->values[i]); + } + } + return defaultValue; +} + } // namespace mobile } // namespace mlperf diff --git a/flutter/cpp/utils.h b/flutter/cpp/utils.h index 08d7e86c8..c66022401 100644 --- a/flutter/cpp/utils.h +++ b/flutter/cpp/utils.h @@ -65,8 +65,13 @@ void DeleteBackendConfiguration(mlperf_backend_configuration_t *configs); mlperf_backend_configuration_t CppToCSettings(const SettingList &settings); -SettingList createSettingList(const BackendSetting &backend_setting, - std::string benchmark_id); +SettingList CreateSettingList(const BackendSetting &backend_setting, + const std::string &custom_config, + const std::string &benchmark_id); + +template +T GetConfigValue(mlperf_backend_configuration_t *configs, const char *key, + T defaultValue); } // namespace mobile } // namespace mlperf diff --git a/flutter/lib/benchmark/benchmark.dart b/flutter/lib/benchmark/benchmark.dart index 41cca8c00..c64c0bda3 100644 --- a/flutter/lib/benchmark/benchmark.dart +++ b/flutter/lib/benchmark/benchmark.dart @@ -92,6 +92,11 @@ class Benchmark { setting: commonSettings, benchmarkSetting: benchmarkSettings, ); + // Convert TaskConfig.CustomConfig to BenchmarkSetting.CustomSetting + final customConfigs = taskConfig.customConfig + .map((e) => pb.CustomSetting(id: e.id, value: e.value)) + .toList(); + benchmarkSettings.customSetting.addAll(customConfigs); final uris = selectedDelegate.modelFile.map((e) => e.modelPath).toList(); final modelDirName = selectedDelegate.delegateName.replaceAll(' ', '_'); final backendModelPath = diff --git a/mobile_back_apple/cpp/backend_coreml/BUILD b/mobile_back_apple/cpp/backend_coreml/BUILD index c574b8a9c..4e8acf07d 100644 --- a/mobile_back_apple/cpp/backend_coreml/BUILD +++ b/mobile_back_apple/cpp/backend_coreml/BUILD @@ -39,6 +39,7 @@ apple_xcframework( }, minimum_os_versions = { "ios": "13.1", + "macos": "13.1", }, deps = [ "//mobile_back_apple/cpp/backend_coreml:coreml_c", diff --git a/mobile_back_apple/cpp/backend_coreml/coreml_settings.pbtxt b/mobile_back_apple/cpp/backend_coreml/coreml_settings.pbtxt index 496d9c439..a607f61c2 100644 --- a/mobile_back_apple/cpp/backend_coreml/coreml_settings.pbtxt +++ b/mobile_back_apple/cpp/backend_coreml/coreml_settings.pbtxt @@ -22,7 +22,7 @@ benchmark_setting { model_checksum: "39483b20b878d46144ab4cfe9a3e5600" } } - delegate_choice: { + delegate_choice: { delegate_name: "CPU & ANE" accelerator_name: "cpu&ane" accelerator_desc: "CPU and Neural Engine" @@ -57,7 +57,7 @@ benchmark_setting { model_checksum: "39483b20b878d46144ab4cfe9a3e5600" } } - delegate_choice: { + delegate_choice: { delegate_name: "CPU & ANE" accelerator_name: "cpu&ane" accelerator_desc: "CPU and Neural Engine" @@ -81,6 +81,10 @@ benchmark_setting { model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-coreml/mobilenetv4_fp32_NCHW.mlpackage.zip" model_checksum: "164c504eb3e9af6c730c1765b8b81b32" } + custom_setting { + id: "data-format" + value: "NCHW" + } } delegate_choice: { delegate_name: "CPU & GPU" @@ -90,6 +94,10 @@ benchmark_setting { model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-coreml/mobilenetv4_fp32_NCHW.mlpackage.zip" model_checksum: "164c504eb3e9af6c730c1765b8b81b32" } + custom_setting { + id: "data-format" + value: "NCHW" + } } delegate_choice: { delegate_name: "CPU & ANE" @@ -99,6 +107,10 @@ benchmark_setting { model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-coreml/mobilenetv4_fp32_NCHW.mlpackage.zip" model_checksum: "164c504eb3e9af6c730c1765b8b81b32" } + custom_setting { + id: "data-format" + value: "NCHW" + } } delegate_selected: "CPU & GPU & ANE" } @@ -115,6 +127,10 @@ benchmark_setting { model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-coreml/mobilenetv4_fp32_NCHW.mlpackage.zip" model_checksum: "164c504eb3e9af6c730c1765b8b81b32" } + custom_setting { + id: "data-format" + value: "NCHW" + } } delegate_choice: { delegate_name: "CPU & GPU" @@ -125,6 +141,10 @@ benchmark_setting { model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-coreml/mobilenetv4_fp32_NCHW.mlpackage.zip" model_checksum: "164c504eb3e9af6c730c1765b8b81b32" } + custom_setting { + id: "data-format" + value: "NCHW" + } } delegate_choice: { delegate_name: "CPU & ANE" @@ -135,6 +155,10 @@ benchmark_setting { model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-coreml/mobilenetv4_fp32_NCHW.mlpackage.zip" model_checksum: "164c504eb3e9af6c730c1765b8b81b32" } + custom_setting { + id: "data-format" + value: "NCHW" + } } delegate_selected: "CPU & GPU & ANE" } @@ -160,7 +184,7 @@ benchmark_setting { model_checksum: "ef849fbf2132e205158f05ca42db25f4" } } - delegate_choice: { + delegate_choice: { delegate_name: "CPU & ANE" accelerator_name: "cpu&ane" accelerator_desc: "CPU and Neural Engine" @@ -217,7 +241,7 @@ benchmark_setting { model_checksum: "362d6b5bb1b8e10ae5b4e223f60d4d10" } } - delegate_choice: { + delegate_choice: { delegate_name: "CPU & ANE" accelerator_name: "cpu&ane" accelerator_desc: "CPU and Neural Engine" @@ -250,7 +274,7 @@ benchmark_setting { model_checksum: "62489706f20b0c2ae561fb2204eefb61" } } - delegate_choice: { + delegate_choice: { delegate_name: "CPU & ANE" accelerator_name: "cpu&ane" accelerator_desc: "CPU and Neural Engine" diff --git a/mobile_back_apple/cpp/backend_coreml/main.cc b/mobile_back_apple/cpp/backend_coreml/main.cc index a2246345f..af753d566 100644 --- a/mobile_back_apple/cpp/backend_coreml/main.cc +++ b/mobile_back_apple/cpp/backend_coreml/main.cc @@ -101,8 +101,9 @@ mlperf_backend_ptr_t mlperf_backend_create( CoreMLBackendData *backend_data = new CoreMLBackendData(); backendExists = true; - // quick hack for checking if model expects NCHW input. - if (strcasestr(model_path, "NCHW") != nullptr) { + std::string dataFormat = + mlperf::mobile::GetConfigValue(configs, "data-format", std::string("")); + if (dataFormat == "NCHW") { backend_data->expectNCHW = true; LOG(INFO) << "Will convert inputs from NHWC to NCHW!"; } diff --git a/mobile_back_pixel/cpp/backend_tflite/BUILD b/mobile_back_pixel/cpp/backend_tflite/BUILD index 54947f346..8b59fc413 100644 --- a/mobile_back_pixel/cpp/backend_tflite/BUILD +++ b/mobile_back_pixel/cpp/backend_tflite/BUILD @@ -53,6 +53,7 @@ cc_library( srcs = [ "pixel_single_model_pipeline.cc", "tflite_pixel.cc", + "//mobile_back_tflite/cpp/backend_tflite:embedding_utils.cc", "//mobile_back_tflite/cpp/backend_tflite:sd_utils.cc", "//mobile_back_tflite/cpp/backend_tflite:stable_diffusion_invoker.cc", "//mobile_back_tflite/cpp/backend_tflite:stable_diffusion_pipeline.cc", @@ -60,6 +61,7 @@ cc_library( hdrs = [ "tflite_settings_pixel.h", "thread_pool.h", + "//mobile_back_tflite/cpp/backend_tflite:embedding_utils.h", "//mobile_back_tflite/cpp/backend_tflite:pipeline.h", "//mobile_back_tflite/cpp/backend_tflite:sd_utils.h", "//mobile_back_tflite/cpp/backend_tflite:single_model_pipeline.h", diff --git a/mobile_back_pixel/cpp/backend_tflite/pixel_single_model_pipeline.cc b/mobile_back_pixel/cpp/backend_tflite/pixel_single_model_pipeline.cc index 1dc201cfb..1d44b411f 100644 --- a/mobile_back_pixel/cpp/backend_tflite/pixel_single_model_pipeline.cc +++ b/mobile_back_pixel/cpp/backend_tflite/pixel_single_model_pipeline.cc @@ -413,6 +413,10 @@ void SingleModelPipeline::backend_convert_inputs( mlperf_backend_ptr_t backend_ptr, int bytes, int width, int height, uint8_t* data) {} +void SingleModelPipeline::backend_convert_outputs( + mlperf_backend_ptr_t backend_ptr, int bytes, int width, int height, + uint8_t* data) {} + void* SingleModelPipeline::backend_get_buffer(size_t n) { return ::operator new(n); } diff --git a/mobile_back_tflite/cpp/backend_dummy/ios/BUILD b/mobile_back_tflite/cpp/backend_dummy/ios/BUILD index c7adadbca..a3229e059 100644 --- a/mobile_back_tflite/cpp/backend_dummy/ios/BUILD +++ b/mobile_back_tflite/cpp/backend_dummy/ios/BUILD @@ -29,6 +29,7 @@ apple_xcframework( }, minimum_os_versions = { "ios": "13.1", + "macos": "13.1", }, deps = [ "//mobile_back_tflite/cpp/backend_dummy:dummy_backend", @@ -49,6 +50,7 @@ apple_xcframework( }, minimum_os_versions = { "ios": "13.1", + "macos": "13.1", }, deps = [ "//mobile_back_tflite/cpp/backend_dummy:dummy_backend", diff --git a/mobile_back_tflite/cpp/backend_tflite/BUILD b/mobile_back_tflite/cpp/backend_tflite/BUILD index f4d1a69da..651e34eba 100644 --- a/mobile_back_tflite/cpp/backend_tflite/BUILD +++ b/mobile_back_tflite/cpp/backend_tflite/BUILD @@ -36,9 +36,20 @@ pbtxt2header( ], ) +cc_library( + name = "embedding_utils", + srcs = ["embedding_utils.cc"], + hdrs = ["embedding_utils.h"], + visibility = ["//visibility:public"], + deps = [ + "@org_tensorflow//tensorflow/core:tflite_portable_logging", + ], +) + cc_library( name = "tflite_c", srcs = [ + "embedding_utils.cc", "sd_utils.cc", "single_model_pipeline.cc", "stable_diffusion_invoker.cc", @@ -46,6 +57,7 @@ cc_library( "tflite_c.cc", ], hdrs = [ + "embedding_utils.h", "pipeline.h", "sd_utils.h", "single_model_pipeline.h", @@ -67,7 +79,9 @@ cc_library( "//conditions:default": [], }), deps = [ + ":embedding_utils", ":tflite_settings", + "//flutter/cpp:utils", "//flutter/cpp/c:headers", "@org_tensorflow//tensorflow/core:tflite_portable_logging", "@org_tensorflow//tensorflow/lite/c:c_api", diff --git a/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt b/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt index 4aab5dbb3..bfec84e5f 100644 --- a/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt +++ b/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt @@ -215,17 +215,21 @@ benchmark_setting { accelerator_name: "npu" accelerator_desc: "NPU" model_file: { - model_path: "local:///mlperf_models/sd_decoder_dynamic.tflite" + model_path: "https://github.com/RSMNYS/SD/releases/download/SD/sd_decoder_dynamic.tflite" model_checksum: "68acdb62f99e1dc2c7f5db8cdd0e007c" } model_file: { - model_path: "local:///mlperf_models/sd_diffusion_model_dynamic.tflite" - model_checksum: "7cbdadf5282b71561ce5eda75e868c19" + model_path: "https://github.com/RSMNYS/SD/releases/download/SD/sd_diffusion_model_dynamic.tflite" + model_checksum: "309e95f76ac8de01130942037a28aa8f" } model_file: { - model_path: "local:///mlperf_models/sd_text_encoder_dynamic.tflite" + model_path: "https://github.com/RSMNYS/SD/releases/download/SD/sd_text_encoder_dynamic.tflite" model_checksum: "b64effb0360f9ea49a117cdaf8a2fbdc" } + model_file: { + model_path: "https://github.com/RSMNYS/SD/releases/download/SD/timestep_embeddings_data.bin.ts" + model_checksum: "798b772155a69de5df44b304327bb3cc" + } } delegate_selected: "NNAPI" custom_setting { diff --git a/mobile_back_tflite/cpp/backend_tflite/embedding_utils.cc b/mobile_back_tflite/cpp/backend_tflite/embedding_utils.cc new file mode 100644 index 000000000..9f25eb4e3 --- /dev/null +++ b/mobile_back_tflite/cpp/backend_tflite/embedding_utils.cc @@ -0,0 +1,70 @@ +#include "embedding_utils.h" + +#include + +bool TsEmbeddingParser::parse_pickle(const std::string& filename) { + std::ifstream file(filename, std::ios::binary); + if (!file) { + std::cerr << "Failed to open file: " << filename << std::endl; + return false; + } + + // Read timesteps array + std::vector timesteps; + uint32_t num_timesteps; + file.read(reinterpret_cast(&num_timesteps), sizeof(uint32_t)); + timesteps.resize(num_timesteps); + file.read(reinterpret_cast(timesteps.data()), + num_timesteps * sizeof(int32_t)); + + // Read embeddings array + std::vector> embeddings(num_timesteps); + for (auto& emb : embeddings) { + emb.resize(EMBEDDING_DIM); + file.read(reinterpret_cast(emb.data()), + EMBEDDING_DIM * sizeof(float)); + } + + // Reverse both timesteps and embeddings before storing + std::reverse(timesteps.begin(), timesteps.end()); + std::reverse(embeddings.begin(), embeddings.end()); + + // Store in maps + timesteps_[num_timesteps] = std::move(timesteps); + embeddings_[num_timesteps] = std::move(embeddings); + + return true; +} + +std::vector TsEmbeddingParser::get_timestep_embedding( + int32_t steps, int32_t step_index) const { + auto emb_it = embeddings_.find(steps); + if (emb_it == embeddings_.end() || step_index >= emb_it->second.size()) { + return {}; + } + return emb_it->second[step_index]; +} + +std::vector TsEmbeddingParser::get_timesteps(int32_t steps) const { + auto ts_it = timesteps_.find(steps); + if (ts_it == timesteps_.end()) { + return {}; + } + return ts_it->second; +} + +bool EmbeddingManager::load_timestep_embeddings(const std::string& filename) { + ts_parser_ = std::make_unique(); + return ts_parser_->parse_pickle(filename); +} + +std::vector EmbeddingManager::get_timestep_embedding( + int32_t timestep, int num_steps) const { + if (!ts_parser_) return {}; + return ts_parser_->get_timestep_embedding(num_steps, timestep); +} + +std::vector EmbeddingManager::get_timesteps(int num_steps) const { + if (!ts_parser_) return {}; + return ts_parser_->get_timesteps(num_steps); +} \ No newline at end of file diff --git a/mobile_back_tflite/cpp/backend_tflite/embedding_utils.h b/mobile_back_tflite/cpp/backend_tflite/embedding_utils.h new file mode 100644 index 000000000..f543c6332 --- /dev/null +++ b/mobile_back_tflite/cpp/backend_tflite/embedding_utils.h @@ -0,0 +1,40 @@ +#ifndef EMBEDDING_UTILS_H_ +#define EMBEDDING_UTILS_H_ + +#include +#include +#include +#include +#include + +class TsEmbeddingParser { + public: + bool parse_pickle(const std::string& filename); + std::vector get_timestep_embedding(int32_t steps, + int32_t step_index) const; + std::vector get_timesteps(int32_t steps) const; + + private: + static constexpr size_t EMBEDDING_DIM = 1280; + std::map> timesteps_; + std::map>> embeddings_; +}; + +class EmbeddingManager { + public: + static EmbeddingManager& getInstance() { + static EmbeddingManager instance; + return instance; + } + + bool load_timestep_embeddings(const std::string& filename); + std::vector get_timestep_embedding(int32_t timestep, + int num_steps) const; + std::vector get_timesteps(int num_steps) const; + + private: + EmbeddingManager() = default; + std::unique_ptr ts_parser_; +}; + +#endif // EMBEDDING_UTILS_H_ \ No newline at end of file diff --git a/mobile_back_tflite/cpp/backend_tflite/ios/BUILD b/mobile_back_tflite/cpp/backend_tflite/ios/BUILD index f0a764a6d..74fa88aea 100644 --- a/mobile_back_tflite/cpp/backend_tflite/ios/BUILD +++ b/mobile_back_tflite/cpp/backend_tflite/ios/BUILD @@ -15,6 +15,7 @@ apple_xcframework( }, minimum_os_versions = { "ios": "13.1", + "macos": "13.1", }, deps = [ "//mobile_back_tflite/cpp/backend_tflite:tflite_c", diff --git a/mobile_back_tflite/cpp/backend_tflite/neuron/BUILD b/mobile_back_tflite/cpp/backend_tflite/neuron/BUILD index 118076fea..e9e8cf9a7 100644 --- a/mobile_back_tflite/cpp/backend_tflite/neuron/BUILD +++ b/mobile_back_tflite/cpp/backend_tflite/neuron/BUILD @@ -70,7 +70,9 @@ cc_library( local_defines = ["MTK_TFLITE_NEURON_BACKEND"], deps = [ ":tflite_settings", + "//flutter/cpp:utils", "//flutter/cpp/c:headers", + "//mobile_back_tflite/cpp/backend_tflite:embedding_utils", "//mobile_back_tflite/cpp/backend_tflite:tflite_settings", "@org_tensorflow//tensorflow/core:tflite_portable_logging", "@org_tensorflow//tensorflow/lite/c:c_api", diff --git a/mobile_back_tflite/cpp/backend_tflite/pipeline.h b/mobile_back_tflite/cpp/backend_tflite/pipeline.h index 41a9822f2..4ab1b4f1c 100644 --- a/mobile_back_tflite/cpp/backend_tflite/pipeline.h +++ b/mobile_back_tflite/cpp/backend_tflite/pipeline.h @@ -72,11 +72,16 @@ class Pipeline { virtual mlperf_status_t backend_get_output(mlperf_backend_ptr_t backend_ptr, uint32_t batchIndex, int32_t i, void **data) = 0; - + // Optional function to convert the inputs virtual void backend_convert_inputs(mlperf_backend_ptr_t backend_ptr, int bytes, int width, int height, uint8_t *data) = 0; + // Optional function to convert the outputs + virtual void backend_convert_outputs(mlperf_backend_ptr_t backend_ptr, + int bytes, int width, int height, + uint8_t *data) = 0; + virtual void *backend_get_buffer(size_t n) = 0; virtual void backend_release_buffer(void *p) = 0; diff --git a/mobile_back_tflite/cpp/backend_tflite/sd_utils.cc b/mobile_back_tflite/cpp/backend_tflite/sd_utils.cc index 14aa858d9..c5901b66c 100644 --- a/mobile_back_tflite/cpp/backend_tflite/sd_utils.cc +++ b/mobile_back_tflite/cpp/backend_tflite/sd_utils.cc @@ -233,7 +233,6 @@ std::vector get_timestep_embedding(int timestep, int batch_size, int dim, embedding_cos.push_back(cosf(timestep * freq)); embedding_sin.push_back(sinf(timestep * freq)); } - std::vector embedding; for (int i = 0; i < batch_size; i++) { embedding.insert(embedding.end(), diff --git a/mobile_back_tflite/cpp/backend_tflite/single_model_pipeline.cc b/mobile_back_tflite/cpp/backend_tflite/single_model_pipeline.cc index 4dc30507b..ce1eb7a1d 100644 --- a/mobile_back_tflite/cpp/backend_tflite/single_model_pipeline.cc +++ b/mobile_back_tflite/cpp/backend_tflite/single_model_pipeline.cc @@ -635,6 +635,10 @@ void SingleModelPipeline::backend_convert_inputs( #endif } +void SingleModelPipeline::backend_convert_outputs( + mlperf_backend_ptr_t backend_ptr, int bytes, int width, int height, + uint8_t *data) {} + void *SingleModelPipeline::backend_get_buffer(size_t n) { #ifdef MTK_TFLITE_NEURON_BACKEND if (neuron_backend != nullptr) { diff --git a/mobile_back_tflite/cpp/backend_tflite/single_model_pipeline.h b/mobile_back_tflite/cpp/backend_tflite/single_model_pipeline.h index 30c639596..70d447588 100644 --- a/mobile_back_tflite/cpp/backend_tflite/single_model_pipeline.h +++ b/mobile_back_tflite/cpp/backend_tflite/single_model_pipeline.h @@ -64,6 +64,9 @@ class SingleModelPipeline : public Pipeline { void backend_convert_inputs(mlperf_backend_ptr_t backend_ptr, int bytes, int width, int height, uint8_t *data) override; + void backend_convert_outputs(mlperf_backend_ptr_t backend_ptr, int bytes, + int width, int height, uint8_t *data) override; + void *backend_get_buffer(size_t n) override; void backend_release_buffer(void *p) override; diff --git a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.cc b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.cc index 55ea8be07..8c3405739 100644 --- a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.cc +++ b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.cc @@ -4,6 +4,7 @@ #include #include +#include "embedding_utils.h" #include "sd_utils.h" #include "stable_diffusion_pipeline.h" #include "tensorflow/lite/c/c_api.h" @@ -24,15 +25,15 @@ StableDiffusionInvoker::StableDiffusionInvoker(SDBackendData* backend_data) : backend_data_(backend_data) {} std::vector StableDiffusionInvoker::invoke() { - std::cout << "Prompt encoding started" << std::endl; + LOG(INFO) << "Prompt encoding started"; auto encoded_text = encode_prompt(backend_data_->input_prompt_tokens); auto unconditional_encoded_text = encode_prompt(backend_data_->unconditional_tokens); - std::cout << "Diffusion process started" << std::endl; + LOG(INFO) << "Diffusion process started"; auto latent = diffusion_process(encoded_text, unconditional_encoded_text, backend_data_->num_steps, backend_data_->seed); - std::cout << "Image decoding started" << std::endl; + LOG(INFO) << "Image decoding started"; return decode_image(latent); } @@ -99,19 +100,43 @@ std::vector StableDiffusionInvoker::diffusion_process( const std::vector& unconditional_encoded_text, int num_steps, int seed) { float unconditional_guidance_scale = 7.5f; + auto noise = get_normal(64 * 64 * 4, seed); auto latent = noise; - auto timesteps = get_timesteps(1, 1000, 1000 / num_steps); + // Get pre-calculated timesteps and embeddings + auto& embedding_manager = EmbeddingManager::getInstance(); + auto timesteps = embedding_manager.get_timesteps(num_steps); + + if (timesteps.empty()) { + LOG(ERROR) << "Failed to get timesteps for " << num_steps << " steps"; + return std::vector(); + } + auto alphas_tuple = get_initial_alphas(timesteps); + auto alphas = std::get<0>(alphas_tuple); auto alphas_prev = std::get<1>(alphas_tuple); for (int i = timesteps.size() - 1; i >= 0; --i) { - std::cout << "Step " << timesteps.size() - 1 - i << "\n"; + LOG(INFO) << "Step " << timesteps.size() - 1 - i; + + std::cout << "\n=== Processing Step " << timesteps.size() - 1 - i + << " (timestamp: " << timesteps[i] << ") ===" << std::endl; auto latent_prev = latent; - auto t_emb = get_timestep_embedding(timesteps[i]); + + auto t_emb = embedding_manager.get_timestep_embedding(i, num_steps); + + if (t_emb.empty()) { + LOG(ERROR) << "Failed to get timestamp embedding for step " << i; + return std::vector(); + } + + if (t_emb.empty()) { + LOG(ERROR) << "Failed to get timestamp embedding for step " << i; + return std::vector(); + } auto unconditional_latent = diffusion_step(latent, t_emb, unconditional_encoded_text); @@ -132,6 +157,7 @@ std::vector StableDiffusionInvoker::diffusion_process( latent.assign(std::begin(l), std::end(l)); } + std::cout << "\nDiffusion process completed" << std::endl; return latent; } diff --git a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.h b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.h index ccbef1f9e..706589835 100644 --- a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.h +++ b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.h @@ -5,7 +5,7 @@ #include #include -#include "stable_diffusion_pipeline.h" // Include the backend data structure +#include "stable_diffusion_pipeline.h" #include "tensorflow/lite/interpreter.h" #include "tensorflow/lite/model_builder.h" diff --git a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.cc b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.cc index 52d20b570..de7ddba57 100644 --- a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.cc +++ b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.cc @@ -6,7 +6,9 @@ #include #include +#include "embedding_utils.h" #include "flutter/cpp/c/backend_c.h" +#include "flutter/cpp/utils.h" #include "stable_diffusion_invoker.h" #include "tensorflow/lite/c/c_api.h" #include "tensorflow/lite/c/common.h" @@ -58,12 +60,26 @@ mlperf_backend_ptr_t StableDiffusionPipeline::backend_create( // Verify only one instance of the backend exists at any time if (backendExists) { + LOG(ERROR) << "Backend already exists"; return nullptr; } SDBackendData* backend_data = new SDBackendData(); backendExists = true; + // Read seed and num_steps value from SD task settings + backend_data->seed = + mlperf::mobile::GetConfigValue(configs, "stable_diffusion_seed", 0); + if (backend_data->seed == 0) { + LOG(ERROR) << "Cannot get stable_diffusion_seed"; + return nullptr; + } + backend_data->num_steps = + mlperf::mobile::GetConfigValue(configs, "stable_diffusion_num_steps", 0); + if (backend_data->num_steps == 0) { + LOG(ERROR) << "Cannot get stable_diffusion_num_steps"; + return nullptr; + } // Load models from the provided directory path std::string text_encoder_path = std::string(model_path) + "/sd_text_encoder_dynamic.tflite"; @@ -95,6 +111,16 @@ mlperf_backend_ptr_t StableDiffusionPipeline::backend_create( return nullptr; } + std::string ts_embedding_path = + std::string(model_path) + "/timestep_embeddings_data.bin.ts"; + if (!EmbeddingManager::getInstance().load_timestep_embeddings( + ts_embedding_path)) { + LOG(ERROR) << "Failed to load timestep embeddings from " + << ts_embedding_path; + backend_delete(backend_data); + return nullptr; + } + return backend_data; } @@ -268,6 +294,10 @@ void StableDiffusionPipeline::backend_convert_inputs( mlperf_backend_ptr_t backend_ptr, int bytes, int width, int height, uint8_t* data) {} +void StableDiffusionPipeline::backend_convert_outputs( + mlperf_backend_ptr_t backend_ptr, int bytes, int width, int height, + uint8_t* data) {} + void* StableDiffusionPipeline::backend_get_buffer(size_t n) { return ::operator new(n); } diff --git a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.h b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.h index adf460530..17070a286 100644 --- a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.h +++ b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.h @@ -91,6 +91,9 @@ class StableDiffusionPipeline : public Pipeline { void backend_convert_inputs(mlperf_backend_ptr_t backend_ptr, int bytes, int width, int height, uint8_t *data) override; + void backend_convert_outputs(mlperf_backend_ptr_t backend_ptr, int bytes, + int width, int height, uint8_t *data) override; + void *backend_get_buffer(size_t n) override; void backend_release_buffer(void *p) override; diff --git a/mobile_back_tflite/cpp/backend_tflite/tflite_c.cc b/mobile_back_tflite/cpp/backend_tflite/tflite_c.cc index dced8bf1d..62a6a18bc 100644 --- a/mobile_back_tflite/cpp/backend_tflite/tflite_c.cc +++ b/mobile_back_tflite/cpp/backend_tflite/tflite_c.cc @@ -229,6 +229,12 @@ void mlperf_backend_convert_inputs(mlperf_backend_ptr_t backend_ptr, int bytes, data); } +void mlperf_backend_convert_outputs(mlperf_backend_ptr_t backend_ptr, int bytes, + int width, int height, uint8_t *data) { + return pipeline->backend_convert_outputs(backend_ptr, bytes, width, height, + data); +} + void *mlperf_backend_get_buffer(size_t n) { return pipeline->backend_get_buffer(n); }