From 48654bdff2bef8e5061056950bccbe9d04a28398 Mon Sep 17 00:00:00 2001
From: RSMNYS <sergij.r@scopicsoftware.com>
Date: Tue, 14 Jan 2025 09:02:25 +0200
Subject: [PATCH] use time step embedding from file (#928)

* feat: pass task-specific config to backend (#922)

* Add TaskConfig.CustomConfig and pass them to backend

* Add CustomConfig for main.cc

* Use seed and num_steps from CustomConfig for TFLite backend

* Replace std::cout with LOG(INFO)

* Format files

* feat: add ConvertOutputs() API (#927)

* Add ConvertOutputs() API

* Add ConvertOutputs() for mobile_back_tflite

* Set minimum macos version

* Set minimum macos version to 13.1

* Update _kIphoneOnGitHubAction

* feat: timestamp-embedding-parser (WIP)

* disabled bitcode to be able compile with new XCode

* chore: formatting

* refactor: use custom setting in Core ML backend to detect NCHW input. (#924)

* Add GetConfigValue()

* Add custom setting data-format for Core ML

* Use GetConfigValue() to get stable_diffusion_seed and stable_diffusion_num_steps

* fix: resolve crash due to permission denied on Android Play Store version (#930)

* Set android:extractNativeLibs="true"

* Set android.bundle.enableUncompressedNativeLibs=false

* chore: increase Android minSdkVersion from 21 to 30 (#859)

Increase minSdkVersion to 30

* feat: finalized SD pipeline to use embedding from the binary file.

* refactor: updated embedding_utils to parse pkl file

* chore: linting

* fix: fixed lint issue in neuron

* chore: BUILD cleanup

* chore: cleanup

* chore: ignore .fvm

* chore: updated model paths and checksums for stable diffusion benchmark: tflite_settings_android.pbtxt

* chore: reverse timesteps and embeddings to support descending order of the timesteps and embeddings

* chore: fixed formatting

* chore: added links to the sd models and timestep embeddings file

* chore: add the proper name for the embedding_timesteps file

* chore: added missed declaration for backend_convert_outputs

* chore: clang formatting

* chore: added missed files

* chore: fixed build file for the pixel backend

* chore: bazel formatting

* fix: added missed interface implementation for pixel

* chore: clang formatting

---------

Co-authored-by: Anh <anh.app.dev@gmail.com>
---
 .bazelrc                                      |  1 -
 .gitignore                                    |  1 +
 WORKSPACE                                     |  4 +-
 flutter/android/app/build.gradle              |  2 +-
 .../android/app/src/main/AndroidManifest.xml  |  3 +-
 flutter/android/gradle.properties             |  1 +
 flutter/assets/tasks.pbtxt                    |  8 ++
 flutter/cpp/backend.h                         |  4 +
 flutter/cpp/backends/external.cc              |  4 +-
 flutter/cpp/backends/external.h               | 12 +++
 flutter/cpp/binary/main.cc                    | 10 +-
 flutter/cpp/c/backend_c.h                     |  2 +
 flutter/cpp/flutter/BUILD                     |  1 +
 flutter/cpp/proto/mlperf_task.proto           | 14 ++-
 flutter/cpp/proto/test.cc                     |  4 +-
 flutter/cpp/utils.cc                          | 91 +++++++++++++++++--
 flutter/cpp/utils.h                           |  9 +-
 flutter/lib/benchmark/benchmark.dart          |  5 +
 mobile_back_apple/cpp/backend_coreml/BUILD    |  1 +
 .../cpp/backend_coreml/coreml_settings.pbtxt  | 34 ++++++-
 mobile_back_apple/cpp/backend_coreml/main.cc  |  5 +-
 mobile_back_pixel/cpp/backend_tflite/BUILD    |  2 +
 .../pixel_single_model_pipeline.cc            |  4 +
 .../cpp/backend_dummy/ios/BUILD               |  2 +
 mobile_back_tflite/cpp/backend_tflite/BUILD   | 14 +++
 .../tflite_settings_android.pbtxt             | 12 ++-
 .../cpp/backend_tflite/embedding_utils.cc     | 70 ++++++++++++++
 .../cpp/backend_tflite/embedding_utils.h      | 40 ++++++++
 .../cpp/backend_tflite/ios/BUILD              |  1 +
 .../cpp/backend_tflite/neuron/BUILD           |  2 +
 .../cpp/backend_tflite/pipeline.h             |  7 +-
 .../cpp/backend_tflite/sd_utils.cc            |  1 -
 .../backend_tflite/single_model_pipeline.cc   |  4 +
 .../backend_tflite/single_model_pipeline.h    |  3 +
 .../stable_diffusion_invoker.cc               | 38 ++++++--
 .../backend_tflite/stable_diffusion_invoker.h |  2 +-
 .../stable_diffusion_pipeline.cc              | 30 ++++++
 .../stable_diffusion_pipeline.h               |  3 +
 .../cpp/backend_tflite/tflite_c.cc            |  6 ++
 39 files changed, 416 insertions(+), 41 deletions(-)
 create mode 100644 mobile_back_tflite/cpp/backend_tflite/embedding_utils.cc
 create mode 100644 mobile_back_tflite/cpp/backend_tflite/embedding_utils.h

diff --git a/.bazelrc b/.bazelrc
index 5788b7d3b..a2ee67b8c 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -43,7 +43,6 @@ build:android_x86_64 --fat_apk_cpu=x86_64
 
 # iOS configs
 build:ios --apple_platform_type=ios
-build:ios --apple_bitcode=embedded --copt=-fembed-bitcode
 build:ios --copt=-Wno-c++11-narrowing
 build:ios --cxxopt=-fobjc-arc
 
diff --git a/.gitignore b/.gitignore
index 8a325c3db..a7be10a01 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@
 .ijwb
 .idea
 .vscode
+.fvm
 /bazel-*
 /output
 /output_logs
diff --git a/WORKSPACE b/WORKSPACE
index 49e0b0c63..dbcbc5c2f 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -1,7 +1,7 @@
 workspace(name = "mlperf_app")
 
-load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
 load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository")
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
 
 http_archive(
     name = "bazel_skylib",
@@ -49,11 +49,11 @@ http_archive(
     ],
 )
 
-load("@rules_python//python:repositories.bzl", "python_register_toolchains")
 load(
     "@org_tensorflow//tensorflow/tools/toolchains/python:python_repo.bzl",
     "python_repository",
 )
+load("@rules_python//python:repositories.bzl", "python_register_toolchains")
 
 python_repository(name = "python_version_repo")
 
diff --git a/flutter/android/app/build.gradle b/flutter/android/app/build.gradle
index a0dd55f62..92d02ddd3 100644
--- a/flutter/android/app/build.gradle
+++ b/flutter/android/app/build.gradle
@@ -42,7 +42,7 @@ android {
 
     defaultConfig {
         applicationId "org.mlcommons.android.mlperfbench"
-        minSdkVersion 21
+        minSdkVersion 30
         targetSdkVersion 34
         versionCode flutterVersionCode.toInteger()
         versionName flutterVersionName
diff --git a/flutter/android/app/src/main/AndroidManifest.xml b/flutter/android/app/src/main/AndroidManifest.xml
index 79cfd7873..67a178c84 100644
--- a/flutter/android/app/src/main/AndroidManifest.xml
+++ b/flutter/android/app/src/main/AndroidManifest.xml
@@ -11,7 +11,8 @@
                    android:maxSdkVersion="29"/>
 
   <application android:label="MLPerf Mobile"
-               android:icon="@mipmap/ic_launcher">
+               android:icon="@mipmap/ic_launcher"
+               android:extractNativeLibs="true">
 
     <!-- TFLite -->
     <uses-library android:name="libOpenCL.so"
diff --git a/flutter/android/gradle.properties b/flutter/android/gradle.properties
index 94adc3a3f..4efbcd2b0 100644
--- a/flutter/android/gradle.properties
+++ b/flutter/android/gradle.properties
@@ -1,3 +1,4 @@
 org.gradle.jvmargs=-Xmx1536M
 android.useAndroidX=true
 android.enableJetifier=true
+android.bundle.enableUncompressedNativeLibs=false
\ No newline at end of file
diff --git a/flutter/assets/tasks.pbtxt b/flutter/assets/tasks.pbtxt
index ece13c9d9..9cc3996a6 100644
--- a/flutter/assets/tasks.pbtxt
+++ b/flutter/assets/tasks.pbtxt
@@ -247,4 +247,12 @@ task {
     id: "stable_diffusion"
     name: "StableDiffusion"
   }
+  custom_config {
+    id: "stable_diffusion_seed"
+    value: "633994880"
+  }
+  custom_config {
+    id: "stable_diffusion_num_steps"
+    value: "20"
+  }
 }
diff --git a/flutter/cpp/backend.h b/flutter/cpp/backend.h
index 5aa20dded..deb8fdc89 100644
--- a/flutter/cpp/backend.h
+++ b/flutter/cpp/backend.h
@@ -74,6 +74,10 @@ class Backend {
   virtual void ConvertInputs(int bytes, int image_width, int image_height,
                              uint8_t* data) = 0;
 
+  // Allow backend to do output layout change
+  virtual void ConvertOutputs(int bytes, int image_width, int image_height,
+                              uint8_t* data) = 0;
+
  private:
   BackendSetting settings_;
 };
diff --git a/flutter/cpp/backends/external.cc b/flutter/cpp/backends/external.cc
index e6e7aed28..3675e4943 100644
--- a/flutter/cpp/backends/external.cc
+++ b/flutter/cpp/backends/external.cc
@@ -187,7 +187,9 @@ BackendFunctions::BackendFunctions(const std::string& lib_path) {
   // Backends may need to change the format of the inputs (e.g. channel order)
   convert_inputs = reinterpret_cast<decltype(convert_inputs)>(
       CheckSymbol("mlperf_backend_convert_inputs"));
-
+  // Backends may need to change the format of the outputs (e.g. channel order)
+  convert_outputs = reinterpret_cast<decltype(convert_outputs)>(
+      CheckSymbol("mlperf_backend_convert_outputs"));
   // If both functions are defined, then update
   if (get_buffer && release_buffer) {
     LOG(INFO) << "Using backend allocator";
diff --git a/flutter/cpp/backends/external.h b/flutter/cpp/backends/external.h
index 7a2ef9b8d..12e17357e 100644
--- a/flutter/cpp/backends/external.h
+++ b/flutter/cpp/backends/external.h
@@ -67,6 +67,8 @@ struct BackendFunctions {
       mlperf_backend_ptr_t, uint32_t, int32_t, void**)>::type;
   using ConvertInputsPtr = std::add_pointer<void(mlperf_backend_ptr_t, int, int,
                                                  int, uint8_t*)>::type;
+  using ConvertOutputsPtr = std::add_pointer<void(mlperf_backend_ptr_t, int,
+                                                  int, int, uint8_t*)>::type;
 
   // Required functions.
   BackendMatchesPtr match{nullptr};
@@ -91,6 +93,7 @@ struct BackendFunctions {
   AllocatorMgr::GetBufferFn get_buffer{nullptr};
   AllocatorMgr::ReleaseBufferFn release_buffer{nullptr};
   ConvertInputsPtr convert_inputs{nullptr};
+  ConvertOutputsPtr convert_outputs{nullptr};
 
   bool isLoaded() { return isloaded; }
 
@@ -210,6 +213,15 @@ class ExternalBackend : public Backend {
     }
   }
 
+  // Optional function to do output data re-formatting
+  void ConvertOutputs(int bytes, int width, int height,
+                      uint8_t* data) override {
+    if (backend_functions_.convert_outputs) {
+      backend_functions_.convert_outputs(backend_ptr_, bytes, width, height,
+                                         data);
+    }
+  }
+
  private:
   std::string backend_name_;
   std::string vendor_;
diff --git a/flutter/cpp/binary/main.cc b/flutter/cpp/binary/main.cc
index f2f3e51c9..89f5758ad 100644
--- a/flutter/cpp/binary/main.cc
+++ b/flutter/cpp/binary/main.cc
@@ -132,7 +132,7 @@ int Main(int argc, char *argv[]) {
   command_line += " " + backend_name + " " + benchmark_id;
 
   // Command Line Flags for mlperf.
-  std::string mode, scenario = "SingleStream", output_dir;
+  std::string mode, scenario = "SingleStream", output_dir, custom_config;
   int min_query_count = 100, min_duration_ms = 100,
       max_duration_ms = 10 * 60 * 1000,
       single_stream_expected_latency_ns = 1000000;
@@ -157,8 +157,9 @@ int Main(int argc, char *argv[]) {
                         "A hint used by the loadgen to pre-generate "
                         "enough samples to meet the minimum test duration."),
        Flag::CreateFlag("output_dir", &output_dir,
-                        "The output directory of mlperf.", Flag::kRequired)});
-
+                        "The output directory of mlperf.", Flag::kRequired),
+       Flag::CreateFlag("custom_config", &custom_config,
+                        "Custom config in form key1:val1,key2:val2.")});
   // Command Line Flags for backend.
   std::unique_ptr<Backend> backend;
   std::unique_ptr<Dataset> dataset;
@@ -207,9 +208,8 @@ int Main(int argc, char *argv[]) {
             }
           }
         }
-
         SettingList setting_list =
-            createSettingList(backend_setting, benchmark_id);
+            CreateSettingList(backend_setting, custom_config, benchmark_id);
 
         ExternalBackend *external_backend = new ExternalBackend(
             model_file_path, lib_path, setting_list, native_lib_path);
diff --git a/flutter/cpp/c/backend_c.h b/flutter/cpp/c/backend_c.h
index dd863501b..47d3c9bb5 100644
--- a/flutter/cpp/c/backend_c.h
+++ b/flutter/cpp/c/backend_c.h
@@ -82,6 +82,8 @@ mlperf_status_t mlperf_backend_get_output(mlperf_backend_ptr_t backend_ptr,
 // Optional functions
 void mlperf_backend_convert_inputs(mlperf_backend_ptr_t backend_ptr, int bytes,
                                    int width, int height, uint8_t* data);
+void mlperf_backend_convert_outputs(mlperf_backend_ptr_t backend_ptr, int bytes,
+                                    int width, int height, uint8_t* data);
 
 #ifdef __cplusplus
 }
diff --git a/flutter/cpp/flutter/BUILD b/flutter/cpp/flutter/BUILD
index 0ac2f1b7e..eb5ddb103 100644
--- a/flutter/cpp/flutter/BUILD
+++ b/flutter/cpp/flutter/BUILD
@@ -55,6 +55,7 @@ apple_xcframework(
     },
     minimum_os_versions = {
         "ios": "13.1",
+        "macos": "13.1",
     },
     deps = [
         "//flutter/cpp/flutter:bridge",
diff --git a/flutter/cpp/proto/mlperf_task.proto b/flutter/cpp/proto/mlperf_task.proto
index 53e545e15..5c1012490 100644
--- a/flutter/cpp/proto/mlperf_task.proto
+++ b/flutter/cpp/proto/mlperf_task.proto
@@ -31,7 +31,7 @@ message MLPerfConfig {
 // Config of the mlperf tasks.
 // A task is basically a combination of models and a dataset.
 //
-// Next ID: 11
+// Next ID: 12
 message TaskConfig {
   // Must be unique in one task file. Ex: image_classification
   // used to match backend settings
@@ -52,6 +52,7 @@ message TaskConfig {
   required string scenario = 7;
   required DatasetConfig datasets = 8;
   required ModelConfig model = 9;
+  repeated CustomConfig custom_config = 11;
 }
 
 // Datasets for a task
@@ -107,3 +108,14 @@ message ModelConfig {
   // Number of detection classes if applicable
   optional int32 num_classes = 6;
 }
+
+// CustomConfig are task specific configuration.
+// The TaskConfig.CustomConfig will be converted to
+// BenchmarkSetting.CustomSetting and passed to the backend.
+// To avoid name collision, the id should be prefixed with TaskConfig.id.
+message CustomConfig {
+  // Id of this config.
+  required string id = 1;
+  // Value of this config.
+  required string value = 2;
+}
diff --git a/flutter/cpp/proto/test.cc b/flutter/cpp/proto/test.cc
index 07d4fd3fd..6a66969eb 100644
--- a/flutter/cpp/proto/test.cc
+++ b/flutter/cpp/proto/test.cc
@@ -139,9 +139,11 @@ int test_proto() {
   std::list<std::string> benchmarks;
   benchmarks.push_back("image_classification");
   benchmarks.push_back("image_classification_offline");
+  std::string custom_config = "key1:val1,key2:val2";
   for (auto benchmark_id : benchmarks) {
     // Convert to SettingList
-    SettingList setting_list = createSettingList(backend_setting, benchmark_id);
+    SettingList setting_list =
+        CreateSettingList(backend_setting, custom_config, benchmark_id);
 
     std::cout << "SettingList for " << benchmark_id << ":\n";
     dumpSettingList(setting_list);
diff --git a/flutter/cpp/utils.cc b/flutter/cpp/utils.cc
index 965426f4d..126e9edf5 100644
--- a/flutter/cpp/utils.cc
+++ b/flutter/cpp/utils.cc
@@ -125,27 +125,106 @@ mlperf_backend_configuration_t CppToCSettings(const SettingList &settings) {
   return c_settings;
 }
 
-SettingList createSettingList(const BackendSetting &backend_setting,
-                              std::string benchmark_id) {
+// Split the string by a given delimiter
+std::vector<std::string> _splitString(const std::string &str, char delimiter) {
+  std::vector<std::string> tokens;
+  std::stringstream ss(str);
+  std::string token;
+  while (std::getline(ss, token, delimiter)) {
+    tokens.push_back(token);
+  }
+  return tokens;
+}
+
+// Parse the key:value string list
+std::unordered_map<std::string, std::string> _parseKeyValueList(
+    const std::string &input) {
+  std::unordered_map<std::string, std::string> keyValueMap;
+  std::vector<std::string> pairs = _splitString(input, ',');  // Split by comma
+
+  for (const std::string &pair : pairs) {
+    std::vector<std::string> keyValue =
+        _splitString(pair, ':');  // Split by colon
+    if (keyValue.size() == 2) {
+      keyValueMap[keyValue[0]] = keyValue[1];
+    } else {
+      LOG(ERROR) << "Invalid key:value pair: " << pair;
+    }
+  }
+  return keyValueMap;
+}
+
+// Create the setting list for backend
+SettingList CreateSettingList(const BackendSetting &backend_setting,
+                              const std::string &custom_config,
+                              const std::string &benchmark_id) {
   SettingList setting_list;
   int setting_index = 0;
-
-  for (auto setting : backend_setting.common_setting()) {
+  for (const auto &setting : backend_setting.common_setting()) {
     setting_list.add_setting();
     (*setting_list.mutable_setting(setting_index)) = setting;
     setting_index++;
   }
 
   // Copy the benchmark specific settings
-  setting_index = 0;
-  for (auto bm_setting : backend_setting.benchmark_setting()) {
+  for (const auto &bm_setting : backend_setting.benchmark_setting()) {
     if (bm_setting.benchmark_id() == benchmark_id) {
       setting_list.mutable_benchmark_setting()->CopyFrom(bm_setting);
+
+      auto parsed = _parseKeyValueList(custom_config);
+      for (const auto &kv : parsed) {
+        CustomSetting custom_setting = CustomSetting();
+        custom_setting.set_id(kv.first);
+        custom_setting.set_value(kv.second);
+        setting_list.mutable_benchmark_setting()->mutable_custom_setting()->Add(
+            std::move(custom_setting));
+      }
+      break;
     }
   }
   LOG(INFO) << "setting_list:" << std::endl << setting_list.DebugString();
   return setting_list;
 }
 
+template <typename T>
+T GetConfigValue(mlperf_backend_configuration_t *configs, const char *key,
+                 T defaultValue);
+
+template <>
+int GetConfigValue<int>(mlperf_backend_configuration_t *configs,
+                        const char *key, int defaultValue) {
+  for (int i = 0; i < configs->count; ++i) {
+    if (strcmp(configs->keys[i], key) == 0) {
+      const char *valueStr = configs->values[i];
+      char *endptr = nullptr;
+      errno = 0;
+      long value =
+          strtol(valueStr, &endptr, 10);  // Base 10 for decimal conversion
+      if (errno == ERANGE || value < INT_MIN || value > INT_MAX) {
+        LOG(ERROR) << "Value out of range for int: " << valueStr;
+        return defaultValue;
+      }
+      if (endptr == valueStr || *endptr != '\0') {
+        LOG(ERROR) << "Invalid value for int: " << valueStr;
+        return defaultValue;
+      }
+      return static_cast<int>(value);
+    }
+  }
+  return defaultValue;
+}
+
+template <>
+std::string GetConfigValue<std::string>(mlperf_backend_configuration_t *configs,
+                                        const char *key,
+                                        std::string defaultValue) {
+  for (int i = 0; i < configs->count; ++i) {
+    if (strcmp(configs->keys[i], key) == 0) {
+      return std::string(configs->values[i]);
+    }
+  }
+  return defaultValue;
+}
+
 }  // namespace mobile
 }  // namespace mlperf
diff --git a/flutter/cpp/utils.h b/flutter/cpp/utils.h
index 08d7e86c8..c66022401 100644
--- a/flutter/cpp/utils.h
+++ b/flutter/cpp/utils.h
@@ -65,8 +65,13 @@ void DeleteBackendConfiguration(mlperf_backend_configuration_t *configs);
 
 mlperf_backend_configuration_t CppToCSettings(const SettingList &settings);
 
-SettingList createSettingList(const BackendSetting &backend_setting,
-                              std::string benchmark_id);
+SettingList CreateSettingList(const BackendSetting &backend_setting,
+                              const std::string &custom_config,
+                              const std::string &benchmark_id);
+
+template <typename T>
+T GetConfigValue(mlperf_backend_configuration_t *configs, const char *key,
+                 T defaultValue);
 
 }  // namespace mobile
 }  // namespace mlperf
diff --git a/flutter/lib/benchmark/benchmark.dart b/flutter/lib/benchmark/benchmark.dart
index 41cca8c00..c64c0bda3 100644
--- a/flutter/lib/benchmark/benchmark.dart
+++ b/flutter/lib/benchmark/benchmark.dart
@@ -92,6 +92,11 @@ class Benchmark {
       setting: commonSettings,
       benchmarkSetting: benchmarkSettings,
     );
+    // Convert TaskConfig.CustomConfig to BenchmarkSetting.CustomSetting
+    final customConfigs = taskConfig.customConfig
+        .map((e) => pb.CustomSetting(id: e.id, value: e.value))
+        .toList();
+    benchmarkSettings.customSetting.addAll(customConfigs);
     final uris = selectedDelegate.modelFile.map((e) => e.modelPath).toList();
     final modelDirName = selectedDelegate.delegateName.replaceAll(' ', '_');
     final backendModelPath =
diff --git a/mobile_back_apple/cpp/backend_coreml/BUILD b/mobile_back_apple/cpp/backend_coreml/BUILD
index c574b8a9c..4e8acf07d 100644
--- a/mobile_back_apple/cpp/backend_coreml/BUILD
+++ b/mobile_back_apple/cpp/backend_coreml/BUILD
@@ -39,6 +39,7 @@ apple_xcframework(
     },
     minimum_os_versions = {
         "ios": "13.1",
+        "macos": "13.1",
     },
     deps = [
         "//mobile_back_apple/cpp/backend_coreml:coreml_c",
diff --git a/mobile_back_apple/cpp/backend_coreml/coreml_settings.pbtxt b/mobile_back_apple/cpp/backend_coreml/coreml_settings.pbtxt
index 496d9c439..a607f61c2 100644
--- a/mobile_back_apple/cpp/backend_coreml/coreml_settings.pbtxt
+++ b/mobile_back_apple/cpp/backend_coreml/coreml_settings.pbtxt
@@ -22,7 +22,7 @@ benchmark_setting {
       model_checksum: "39483b20b878d46144ab4cfe9a3e5600"
     }
   }
-   delegate_choice: {
+  delegate_choice: {
     delegate_name: "CPU & ANE"
     accelerator_name: "cpu&ane"
     accelerator_desc: "CPU and Neural Engine"
@@ -57,7 +57,7 @@ benchmark_setting {
       model_checksum: "39483b20b878d46144ab4cfe9a3e5600"
     }
   }
-   delegate_choice: {
+  delegate_choice: {
     delegate_name: "CPU & ANE"
     accelerator_name: "cpu&ane"
     accelerator_desc: "CPU and Neural Engine"
@@ -81,6 +81,10 @@ benchmark_setting {
       model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-coreml/mobilenetv4_fp32_NCHW.mlpackage.zip"
       model_checksum: "164c504eb3e9af6c730c1765b8b81b32"
     }
+    custom_setting {
+      id: "data-format"
+      value: "NCHW"
+    }
   }
   delegate_choice: {
     delegate_name: "CPU & GPU"
@@ -90,6 +94,10 @@ benchmark_setting {
       model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-coreml/mobilenetv4_fp32_NCHW.mlpackage.zip"
       model_checksum: "164c504eb3e9af6c730c1765b8b81b32"
     }
+    custom_setting {
+      id: "data-format"
+      value: "NCHW"
+    }
   }
   delegate_choice: {
     delegate_name: "CPU & ANE"
@@ -99,6 +107,10 @@ benchmark_setting {
       model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-coreml/mobilenetv4_fp32_NCHW.mlpackage.zip"
       model_checksum: "164c504eb3e9af6c730c1765b8b81b32"
     }
+    custom_setting {
+      id: "data-format"
+      value: "NCHW"
+    }
   }
   delegate_selected: "CPU & GPU & ANE"
 }
@@ -115,6 +127,10 @@ benchmark_setting {
       model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-coreml/mobilenetv4_fp32_NCHW.mlpackage.zip"
       model_checksum: "164c504eb3e9af6c730c1765b8b81b32"
     }
+    custom_setting {
+      id: "data-format"
+      value: "NCHW"
+    }
   }
   delegate_choice: {
     delegate_name: "CPU & GPU"
@@ -125,6 +141,10 @@ benchmark_setting {
       model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-coreml/mobilenetv4_fp32_NCHW.mlpackage.zip"
       model_checksum: "164c504eb3e9af6c730c1765b8b81b32"
     }
+    custom_setting {
+      id: "data-format"
+      value: "NCHW"
+    }
   }
   delegate_choice: {
     delegate_name: "CPU & ANE"
@@ -135,6 +155,10 @@ benchmark_setting {
       model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.1-coreml/mobilenetv4_fp32_NCHW.mlpackage.zip"
       model_checksum: "164c504eb3e9af6c730c1765b8b81b32"
     }
+    custom_setting {
+      id: "data-format"
+      value: "NCHW"
+    }
   }
   delegate_selected: "CPU & GPU & ANE"
 }
@@ -160,7 +184,7 @@ benchmark_setting {
       model_checksum: "ef849fbf2132e205158f05ca42db25f4"
     }
   }
-   delegate_choice: {
+  delegate_choice: {
     delegate_name: "CPU & ANE"
     accelerator_name: "cpu&ane"
     accelerator_desc: "CPU and Neural Engine"
@@ -217,7 +241,7 @@ benchmark_setting {
       model_checksum: "362d6b5bb1b8e10ae5b4e223f60d4d10"
     }
   }
-   delegate_choice: {
+  delegate_choice: {
     delegate_name: "CPU & ANE"
     accelerator_name: "cpu&ane"
     accelerator_desc: "CPU and Neural Engine"
@@ -250,7 +274,7 @@ benchmark_setting {
       model_checksum: "62489706f20b0c2ae561fb2204eefb61"
     }
   }
-   delegate_choice: {
+  delegate_choice: {
     delegate_name: "CPU & ANE"
     accelerator_name: "cpu&ane"
     accelerator_desc: "CPU and Neural Engine"
diff --git a/mobile_back_apple/cpp/backend_coreml/main.cc b/mobile_back_apple/cpp/backend_coreml/main.cc
index a2246345f..af753d566 100644
--- a/mobile_back_apple/cpp/backend_coreml/main.cc
+++ b/mobile_back_apple/cpp/backend_coreml/main.cc
@@ -101,8 +101,9 @@ mlperf_backend_ptr_t mlperf_backend_create(
 
   CoreMLBackendData *backend_data = new CoreMLBackendData();
   backendExists = true;
-  // quick hack for checking if model expects NCHW input.
-  if (strcasestr(model_path, "NCHW") != nullptr) {
+  std::string dataFormat =
+      mlperf::mobile::GetConfigValue(configs, "data-format", std::string(""));
+  if (dataFormat == "NCHW") {
     backend_data->expectNCHW = true;
     LOG(INFO) << "Will convert inputs from NHWC to NCHW!";
   }
diff --git a/mobile_back_pixel/cpp/backend_tflite/BUILD b/mobile_back_pixel/cpp/backend_tflite/BUILD
index 54947f346..8b59fc413 100644
--- a/mobile_back_pixel/cpp/backend_tflite/BUILD
+++ b/mobile_back_pixel/cpp/backend_tflite/BUILD
@@ -53,6 +53,7 @@ cc_library(
     srcs = [
         "pixel_single_model_pipeline.cc",
         "tflite_pixel.cc",
+        "//mobile_back_tflite/cpp/backend_tflite:embedding_utils.cc",
         "//mobile_back_tflite/cpp/backend_tflite:sd_utils.cc",
         "//mobile_back_tflite/cpp/backend_tflite:stable_diffusion_invoker.cc",
         "//mobile_back_tflite/cpp/backend_tflite:stable_diffusion_pipeline.cc",
@@ -60,6 +61,7 @@ cc_library(
     hdrs = [
         "tflite_settings_pixel.h",
         "thread_pool.h",
+        "//mobile_back_tflite/cpp/backend_tflite:embedding_utils.h",
         "//mobile_back_tflite/cpp/backend_tflite:pipeline.h",
         "//mobile_back_tflite/cpp/backend_tflite:sd_utils.h",
         "//mobile_back_tflite/cpp/backend_tflite:single_model_pipeline.h",
diff --git a/mobile_back_pixel/cpp/backend_tflite/pixel_single_model_pipeline.cc b/mobile_back_pixel/cpp/backend_tflite/pixel_single_model_pipeline.cc
index 1dc201cfb..1d44b411f 100644
--- a/mobile_back_pixel/cpp/backend_tflite/pixel_single_model_pipeline.cc
+++ b/mobile_back_pixel/cpp/backend_tflite/pixel_single_model_pipeline.cc
@@ -413,6 +413,10 @@ void SingleModelPipeline::backend_convert_inputs(
     mlperf_backend_ptr_t backend_ptr, int bytes, int width, int height,
     uint8_t* data) {}
 
+void SingleModelPipeline::backend_convert_outputs(
+    mlperf_backend_ptr_t backend_ptr, int bytes, int width, int height,
+    uint8_t* data) {}
+
 void* SingleModelPipeline::backend_get_buffer(size_t n) {
   return ::operator new(n);
 }
diff --git a/mobile_back_tflite/cpp/backend_dummy/ios/BUILD b/mobile_back_tflite/cpp/backend_dummy/ios/BUILD
index c7adadbca..a3229e059 100644
--- a/mobile_back_tflite/cpp/backend_dummy/ios/BUILD
+++ b/mobile_back_tflite/cpp/backend_dummy/ios/BUILD
@@ -29,6 +29,7 @@ apple_xcframework(
     },
     minimum_os_versions = {
         "ios": "13.1",
+        "macos": "13.1",
     },
     deps = [
         "//mobile_back_tflite/cpp/backend_dummy:dummy_backend",
@@ -49,6 +50,7 @@ apple_xcframework(
     },
     minimum_os_versions = {
         "ios": "13.1",
+        "macos": "13.1",
     },
     deps = [
         "//mobile_back_tflite/cpp/backend_dummy:dummy_backend",
diff --git a/mobile_back_tflite/cpp/backend_tflite/BUILD b/mobile_back_tflite/cpp/backend_tflite/BUILD
index f4d1a69da..651e34eba 100644
--- a/mobile_back_tflite/cpp/backend_tflite/BUILD
+++ b/mobile_back_tflite/cpp/backend_tflite/BUILD
@@ -36,9 +36,20 @@ pbtxt2header(
     ],
 )
 
+cc_library(
+    name = "embedding_utils",
+    srcs = ["embedding_utils.cc"],
+    hdrs = ["embedding_utils.h"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "@org_tensorflow//tensorflow/core:tflite_portable_logging",
+    ],
+)
+
 cc_library(
     name = "tflite_c",
     srcs = [
+        "embedding_utils.cc",
         "sd_utils.cc",
         "single_model_pipeline.cc",
         "stable_diffusion_invoker.cc",
@@ -46,6 +57,7 @@ cc_library(
         "tflite_c.cc",
     ],
     hdrs = [
+        "embedding_utils.h",
         "pipeline.h",
         "sd_utils.h",
         "single_model_pipeline.h",
@@ -67,7 +79,9 @@ cc_library(
         "//conditions:default": [],
     }),
     deps = [
+        ":embedding_utils",
         ":tflite_settings",
+        "//flutter/cpp:utils",
         "//flutter/cpp/c:headers",
         "@org_tensorflow//tensorflow/core:tflite_portable_logging",
         "@org_tensorflow//tensorflow/lite/c:c_api",
diff --git a/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt b/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt
index 4aab5dbb3..bfec84e5f 100644
--- a/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt
+++ b/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt
@@ -215,17 +215,21 @@ benchmark_setting {
     accelerator_name: "npu"
     accelerator_desc: "NPU"
     model_file: {
-      model_path: "local:///mlperf_models/sd_decoder_dynamic.tflite"
+      model_path: "https://github.com/RSMNYS/SD/releases/download/SD/sd_decoder_dynamic.tflite"
       model_checksum: "68acdb62f99e1dc2c7f5db8cdd0e007c"
     }
     model_file: {
-      model_path: "local:///mlperf_models/sd_diffusion_model_dynamic.tflite"
-      model_checksum: "7cbdadf5282b71561ce5eda75e868c19"
+      model_path: "https://github.com/RSMNYS/SD/releases/download/SD/sd_diffusion_model_dynamic.tflite"
+      model_checksum: "309e95f76ac8de01130942037a28aa8f"
     }
     model_file: {
-      model_path: "local:///mlperf_models/sd_text_encoder_dynamic.tflite"
+      model_path: "https://github.com/RSMNYS/SD/releases/download/SD/sd_text_encoder_dynamic.tflite"
       model_checksum: "b64effb0360f9ea49a117cdaf8a2fbdc"
     }
+    model_file: {
+      model_path: "https://github.com/RSMNYS/SD/releases/download/SD/timestep_embeddings_data.bin.ts"
+      model_checksum: "798b772155a69de5df44b304327bb3cc"
+    }
   }
   delegate_selected: "NNAPI"
   custom_setting {
diff --git a/mobile_back_tflite/cpp/backend_tflite/embedding_utils.cc b/mobile_back_tflite/cpp/backend_tflite/embedding_utils.cc
new file mode 100644
index 000000000..9f25eb4e3
--- /dev/null
+++ b/mobile_back_tflite/cpp/backend_tflite/embedding_utils.cc
@@ -0,0 +1,70 @@
+#include "embedding_utils.h"
+
+#include <iostream>
+
+bool TsEmbeddingParser::parse_pickle(const std::string& filename) {
+  std::ifstream file(filename, std::ios::binary);
+  if (!file) {
+    std::cerr << "Failed to open file: " << filename << std::endl;
+    return false;
+  }
+
+  // Read timesteps array
+  std::vector<int32_t> timesteps;
+  uint32_t num_timesteps;
+  file.read(reinterpret_cast<char*>(&num_timesteps), sizeof(uint32_t));
+  timesteps.resize(num_timesteps);
+  file.read(reinterpret_cast<char*>(timesteps.data()),
+            num_timesteps * sizeof(int32_t));
+
+  // Read embeddings array
+  std::vector<std::vector<float>> embeddings(num_timesteps);
+  for (auto& emb : embeddings) {
+    emb.resize(EMBEDDING_DIM);
+    file.read(reinterpret_cast<char*>(emb.data()),
+              EMBEDDING_DIM * sizeof(float));
+  }
+
+  // Reverse both timesteps and embeddings before storing
+  std::reverse(timesteps.begin(), timesteps.end());
+  std::reverse(embeddings.begin(), embeddings.end());
+
+  // Store in maps
+  timesteps_[num_timesteps] = std::move(timesteps);
+  embeddings_[num_timesteps] = std::move(embeddings);
+
+  return true;
+}
+
+std::vector<float> TsEmbeddingParser::get_timestep_embedding(
+    int32_t steps, int32_t step_index) const {
+  auto emb_it = embeddings_.find(steps);
+  if (emb_it == embeddings_.end() || step_index >= emb_it->second.size()) {
+    return {};
+  }
+  return emb_it->second[step_index];
+}
+
+std::vector<int32_t> TsEmbeddingParser::get_timesteps(int32_t steps) const {
+  auto ts_it = timesteps_.find(steps);
+  if (ts_it == timesteps_.end()) {
+    return {};
+  }
+  return ts_it->second;
+}
+
+bool EmbeddingManager::load_timestep_embeddings(const std::string& filename) {
+  ts_parser_ = std::make_unique<TsEmbeddingParser>();
+  return ts_parser_->parse_pickle(filename);
+}
+
+std::vector<float> EmbeddingManager::get_timestep_embedding(
+    int32_t timestep, int num_steps) const {
+  if (!ts_parser_) return {};
+  return ts_parser_->get_timestep_embedding(num_steps, timestep);
+}
+
+std::vector<int32_t> EmbeddingManager::get_timesteps(int num_steps) const {
+  if (!ts_parser_) return {};
+  return ts_parser_->get_timesteps(num_steps);
+}
\ No newline at end of file
diff --git a/mobile_back_tflite/cpp/backend_tflite/embedding_utils.h b/mobile_back_tflite/cpp/backend_tflite/embedding_utils.h
new file mode 100644
index 000000000..f543c6332
--- /dev/null
+++ b/mobile_back_tflite/cpp/backend_tflite/embedding_utils.h
@@ -0,0 +1,40 @@
+#ifndef EMBEDDING_UTILS_H_
+#define EMBEDDING_UTILS_H_
+
+#include <filesystem>
+#include <fstream>
+#include <map>
+#include <memory>
+#include <vector>
+
+class TsEmbeddingParser {
+ public:
+  bool parse_pickle(const std::string& filename);
+  std::vector<float> get_timestep_embedding(int32_t steps,
+                                            int32_t step_index) const;
+  std::vector<int32_t> get_timesteps(int32_t steps) const;
+
+ private:
+  static constexpr size_t EMBEDDING_DIM = 1280;
+  std::map<int32_t, std::vector<int32_t>> timesteps_;
+  std::map<int32_t, std::vector<std::vector<float>>> embeddings_;
+};
+
+class EmbeddingManager {
+ public:
+  static EmbeddingManager& getInstance() {
+    static EmbeddingManager instance;
+    return instance;
+  }
+
+  bool load_timestep_embeddings(const std::string& filename);
+  std::vector<float> get_timestep_embedding(int32_t timestep,
+                                            int num_steps) const;
+  std::vector<int32_t> get_timesteps(int num_steps) const;
+
+ private:
+  EmbeddingManager() = default;
+  std::unique_ptr<TsEmbeddingParser> ts_parser_;
+};
+
+#endif  // EMBEDDING_UTILS_H_
\ No newline at end of file
diff --git a/mobile_back_tflite/cpp/backend_tflite/ios/BUILD b/mobile_back_tflite/cpp/backend_tflite/ios/BUILD
index f0a764a6d..74fa88aea 100644
--- a/mobile_back_tflite/cpp/backend_tflite/ios/BUILD
+++ b/mobile_back_tflite/cpp/backend_tflite/ios/BUILD
@@ -15,6 +15,7 @@ apple_xcframework(
     },
     minimum_os_versions = {
         "ios": "13.1",
+        "macos": "13.1",
     },
     deps = [
         "//mobile_back_tflite/cpp/backend_tflite:tflite_c",
diff --git a/mobile_back_tflite/cpp/backend_tflite/neuron/BUILD b/mobile_back_tflite/cpp/backend_tflite/neuron/BUILD
index 118076fea..e9e8cf9a7 100644
--- a/mobile_back_tflite/cpp/backend_tflite/neuron/BUILD
+++ b/mobile_back_tflite/cpp/backend_tflite/neuron/BUILD
@@ -70,7 +70,9 @@ cc_library(
     local_defines = ["MTK_TFLITE_NEURON_BACKEND"],
     deps = [
         ":tflite_settings",
+        "//flutter/cpp:utils",
         "//flutter/cpp/c:headers",
+        "//mobile_back_tflite/cpp/backend_tflite:embedding_utils",
         "//mobile_back_tflite/cpp/backend_tflite:tflite_settings",
         "@org_tensorflow//tensorflow/core:tflite_portable_logging",
         "@org_tensorflow//tensorflow/lite/c:c_api",
diff --git a/mobile_back_tflite/cpp/backend_tflite/pipeline.h b/mobile_back_tflite/cpp/backend_tflite/pipeline.h
index 41a9822f2..4ab1b4f1c 100644
--- a/mobile_back_tflite/cpp/backend_tflite/pipeline.h
+++ b/mobile_back_tflite/cpp/backend_tflite/pipeline.h
@@ -72,11 +72,16 @@ class Pipeline {
   virtual mlperf_status_t backend_get_output(mlperf_backend_ptr_t backend_ptr,
                                              uint32_t batchIndex, int32_t i,
                                              void **data) = 0;
-
+  // Optional function to convert the inputs
   virtual void backend_convert_inputs(mlperf_backend_ptr_t backend_ptr,
                                       int bytes, int width, int height,
                                       uint8_t *data) = 0;
 
+  // Optional function to convert the outputs
+  virtual void backend_convert_outputs(mlperf_backend_ptr_t backend_ptr,
+                                       int bytes, int width, int height,
+                                       uint8_t *data) = 0;
+
   virtual void *backend_get_buffer(size_t n) = 0;
 
   virtual void backend_release_buffer(void *p) = 0;
diff --git a/mobile_back_tflite/cpp/backend_tflite/sd_utils.cc b/mobile_back_tflite/cpp/backend_tflite/sd_utils.cc
index 14aa858d9..c5901b66c 100644
--- a/mobile_back_tflite/cpp/backend_tflite/sd_utils.cc
+++ b/mobile_back_tflite/cpp/backend_tflite/sd_utils.cc
@@ -233,7 +233,6 @@ std::vector<float> get_timestep_embedding(int timestep, int batch_size, int dim,
     embedding_cos.push_back(cosf(timestep * freq));
     embedding_sin.push_back(sinf(timestep * freq));
   }
-
   std::vector<float> embedding;
   for (int i = 0; i < batch_size; i++) {
     embedding.insert(embedding.end(),
diff --git a/mobile_back_tflite/cpp/backend_tflite/single_model_pipeline.cc b/mobile_back_tflite/cpp/backend_tflite/single_model_pipeline.cc
index 4dc30507b..ce1eb7a1d 100644
--- a/mobile_back_tflite/cpp/backend_tflite/single_model_pipeline.cc
+++ b/mobile_back_tflite/cpp/backend_tflite/single_model_pipeline.cc
@@ -635,6 +635,10 @@ void SingleModelPipeline::backend_convert_inputs(
 #endif
 }
 
+void SingleModelPipeline::backend_convert_outputs(
+    mlperf_backend_ptr_t backend_ptr, int bytes, int width, int height,
+    uint8_t *data) {}
+
 void *SingleModelPipeline::backend_get_buffer(size_t n) {
 #ifdef MTK_TFLITE_NEURON_BACKEND
   if (neuron_backend != nullptr) {
diff --git a/mobile_back_tflite/cpp/backend_tflite/single_model_pipeline.h b/mobile_back_tflite/cpp/backend_tflite/single_model_pipeline.h
index 30c639596..70d447588 100644
--- a/mobile_back_tflite/cpp/backend_tflite/single_model_pipeline.h
+++ b/mobile_back_tflite/cpp/backend_tflite/single_model_pipeline.h
@@ -64,6 +64,9 @@ class SingleModelPipeline : public Pipeline {
   void backend_convert_inputs(mlperf_backend_ptr_t backend_ptr, int bytes,
                               int width, int height, uint8_t *data) override;
 
+  void backend_convert_outputs(mlperf_backend_ptr_t backend_ptr, int bytes,
+                               int width, int height, uint8_t *data) override;
+
   void *backend_get_buffer(size_t n) override;
 
   void backend_release_buffer(void *p) override;
diff --git a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.cc b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.cc
index 55ea8be07..8c3405739 100644
--- a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.cc
+++ b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.cc
@@ -4,6 +4,7 @@
 #include <random>
 #include <valarray>
 
+#include "embedding_utils.h"
 #include "sd_utils.h"
 #include "stable_diffusion_pipeline.h"
 #include "tensorflow/lite/c/c_api.h"
@@ -24,15 +25,15 @@ StableDiffusionInvoker::StableDiffusionInvoker(SDBackendData* backend_data)
     : backend_data_(backend_data) {}
 
 std::vector<float> StableDiffusionInvoker::invoke() {
-  std::cout << "Prompt encoding started" << std::endl;
+  LOG(INFO) << "Prompt encoding started";
   auto encoded_text = encode_prompt(backend_data_->input_prompt_tokens);
   auto unconditional_encoded_text =
       encode_prompt(backend_data_->unconditional_tokens);
-  std::cout << "Diffusion process started" << std::endl;
+  LOG(INFO) << "Diffusion process started";
   auto latent =
       diffusion_process(encoded_text, unconditional_encoded_text,
                         backend_data_->num_steps, backend_data_->seed);
-  std::cout << "Image decoding started" << std::endl;
+  LOG(INFO) << "Image decoding started";
   return decode_image(latent);
 }
 
@@ -99,19 +100,43 @@ std::vector<float> StableDiffusionInvoker::diffusion_process(
     const std::vector<float>& unconditional_encoded_text, int num_steps,
     int seed) {
   float unconditional_guidance_scale = 7.5f;
+
   auto noise = get_normal(64 * 64 * 4, seed);
   auto latent = noise;
 
-  auto timesteps = get_timesteps(1, 1000, 1000 / num_steps);
+  // Get pre-calculated timesteps and embeddings
+  auto& embedding_manager = EmbeddingManager::getInstance();
+  auto timesteps = embedding_manager.get_timesteps(num_steps);
+
+  if (timesteps.empty()) {
+    LOG(ERROR) << "Failed to get timesteps for " << num_steps << " steps";
+    return std::vector<float>();
+  }
+
   auto alphas_tuple = get_initial_alphas(timesteps);
+
   auto alphas = std::get<0>(alphas_tuple);
   auto alphas_prev = std::get<1>(alphas_tuple);
 
   for (int i = timesteps.size() - 1; i >= 0; --i) {
-    std::cout << "Step " << timesteps.size() - 1 - i << "\n";
+    LOG(INFO) << "Step " << timesteps.size() - 1 - i;
+
+    std::cout << "\n=== Processing Step " << timesteps.size() - 1 - i
+              << " (timestamp: " << timesteps[i] << ") ===" << std::endl;
 
     auto latent_prev = latent;
-    auto t_emb = get_timestep_embedding(timesteps[i]);
+
+    auto t_emb = embedding_manager.get_timestep_embedding(i, num_steps);
+
+    if (t_emb.empty()) {
+      LOG(ERROR) << "Failed to get timestamp embedding for step " << i;
+      return std::vector<float>();
+    }
+
+    if (t_emb.empty()) {
+      LOG(ERROR) << "Failed to get timestamp embedding for step " << i;
+      return std::vector<float>();
+    }
 
     auto unconditional_latent =
         diffusion_step(latent, t_emb, unconditional_encoded_text);
@@ -132,6 +157,7 @@ std::vector<float> StableDiffusionInvoker::diffusion_process(
     latent.assign(std::begin(l), std::end(l));
   }
 
+  std::cout << "\nDiffusion process completed" << std::endl;
   return latent;
 }
 
diff --git a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.h b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.h
index ccbef1f9e..706589835 100644
--- a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.h
+++ b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.h
@@ -5,7 +5,7 @@
 #include <string>
 #include <vector>
 
-#include "stable_diffusion_pipeline.h"  // Include the backend data structure
+#include "stable_diffusion_pipeline.h"
 #include "tensorflow/lite/interpreter.h"
 #include "tensorflow/lite/model_builder.h"
 
diff --git a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.cc b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.cc
index 52d20b570..de7ddba57 100644
--- a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.cc
+++ b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.cc
@@ -6,7 +6,9 @@
 #include <random>
 #include <valarray>
 
+#include "embedding_utils.h"
 #include "flutter/cpp/c/backend_c.h"
+#include "flutter/cpp/utils.h"
 #include "stable_diffusion_invoker.h"
 #include "tensorflow/lite/c/c_api.h"
 #include "tensorflow/lite/c/common.h"
@@ -58,12 +60,26 @@ mlperf_backend_ptr_t StableDiffusionPipeline::backend_create(
 
   // Verify only one instance of the backend exists at any time
   if (backendExists) {
+    LOG(ERROR) << "Backend already exists";
     return nullptr;
   }
 
   SDBackendData* backend_data = new SDBackendData();
   backendExists = true;
 
+  // Read seed and num_steps value from SD task settings
+  backend_data->seed =
+      mlperf::mobile::GetConfigValue(configs, "stable_diffusion_seed", 0);
+  if (backend_data->seed == 0) {
+    LOG(ERROR) << "Cannot get stable_diffusion_seed";
+    return nullptr;
+  }
+  backend_data->num_steps =
+      mlperf::mobile::GetConfigValue(configs, "stable_diffusion_num_steps", 0);
+  if (backend_data->num_steps == 0) {
+    LOG(ERROR) << "Cannot get stable_diffusion_num_steps";
+    return nullptr;
+  }
   // Load models from the provided directory path
   std::string text_encoder_path =
       std::string(model_path) + "/sd_text_encoder_dynamic.tflite";
@@ -95,6 +111,16 @@ mlperf_backend_ptr_t StableDiffusionPipeline::backend_create(
     return nullptr;
   }
 
+  std::string ts_embedding_path =
+      std::string(model_path) + "/timestep_embeddings_data.bin.ts";
+  if (!EmbeddingManager::getInstance().load_timestep_embeddings(
+          ts_embedding_path)) {
+    LOG(ERROR) << "Failed to load timestep embeddings from "
+               << ts_embedding_path;
+    backend_delete(backend_data);
+    return nullptr;
+  }
+
   return backend_data;
 }
 
@@ -268,6 +294,10 @@ void StableDiffusionPipeline::backend_convert_inputs(
     mlperf_backend_ptr_t backend_ptr, int bytes, int width, int height,
     uint8_t* data) {}
 
+void StableDiffusionPipeline::backend_convert_outputs(
+    mlperf_backend_ptr_t backend_ptr, int bytes, int width, int height,
+    uint8_t* data) {}
+
 void* StableDiffusionPipeline::backend_get_buffer(size_t n) {
   return ::operator new(n);
 }
diff --git a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.h b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.h
index adf460530..17070a286 100644
--- a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.h
+++ b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.h
@@ -91,6 +91,9 @@ class StableDiffusionPipeline : public Pipeline {
   void backend_convert_inputs(mlperf_backend_ptr_t backend_ptr, int bytes,
                               int width, int height, uint8_t *data) override;
 
+  void backend_convert_outputs(mlperf_backend_ptr_t backend_ptr, int bytes,
+                               int width, int height, uint8_t *data) override;
+
   void *backend_get_buffer(size_t n) override;
 
   void backend_release_buffer(void *p) override;
diff --git a/mobile_back_tflite/cpp/backend_tflite/tflite_c.cc b/mobile_back_tflite/cpp/backend_tflite/tflite_c.cc
index dced8bf1d..62a6a18bc 100644
--- a/mobile_back_tflite/cpp/backend_tflite/tflite_c.cc
+++ b/mobile_back_tflite/cpp/backend_tflite/tflite_c.cc
@@ -229,6 +229,12 @@ void mlperf_backend_convert_inputs(mlperf_backend_ptr_t backend_ptr, int bytes,
                                           data);
 }
 
+void mlperf_backend_convert_outputs(mlperf_backend_ptr_t backend_ptr, int bytes,
+                                    int width, int height, uint8_t *data) {
+  return pipeline->backend_convert_outputs(backend_ptr, bytes, width, height,
+                                           data);
+}
+
 void *mlperf_backend_get_buffer(size_t n) {
   return pipeline->backend_get_buffer(n);
 }