From 05d0d93078548bf32ed7564e0b6152ffcd6d06c3 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Fri, 20 Dec 2024 14:47:20 +0100 Subject: [PATCH 1/5] build: add a Kconfig option for building cold module manifests To execute a non-performance critical part of SOF code base in DRAM we create an additional "cold" module, that will stay in DRAM next to the boot module. This patch only adds a Kconfig option and module manifests in TOML. Signed-off-by: Guennadi Liakhovetski --- Kconfig.sof | 8 ++++++++ src/ipc/ipc4/helper.c | 4 ++-- tools/rimage/config/lnl.toml.h | 14 ++++++++++++++ tools/rimage/config/mtl.toml.h | 14 ++++++++++++++ tools/rimage/config/ptl.toml.h | 14 ++++++++++++++ 5 files changed, 52 insertions(+), 2 deletions(-) diff --git a/Kconfig.sof b/Kconfig.sof index 0833201e817d..77dbe3de34fe 100644 --- a/Kconfig.sof +++ b/Kconfig.sof @@ -122,6 +122,14 @@ config COMPILER_INLINE_FUNCTION_OPTION help When enabled, -fno-inline-function option is not passed to compiler +config COLD_STORE_EXECUTE_DRAM + bool "Execute and use cold data and code in DRAM" + help + Non-performance critical data and code can be kept in DRAM to be + accessed and executed there without copying to SRAM. Select this + option to enable this feature to save SRAM and to speed up SRAM + copying of performance-critical data and code. + rsource "src/Kconfig" # See zephyr/modules/Kconfig diff --git a/src/ipc/ipc4/helper.c b/src/ipc/ipc4/helper.c index eb08414b323f..b4a27cc0382c 100644 --- a/src/ipc/ipc4/helper.c +++ b/src/ipc/ipc4/helper.c @@ -966,9 +966,9 @@ const struct comp_driver *ipc4_get_comp_drv(uint32_t module_id) uint32_t lib_idx = LIB_MANAGER_GET_LIB_ID(module_id); if (lib_idx == 0) { - /* module_id 0 is used for base fw which is in entry 1 */ + /* module_id 0 is used for base fw which is in entry 1 or 2 */ if (!module_id) - entry_index = 1; + entry_index = 1 + IS_ENABLED(CONFIG_COLD_STORE_EXECUTE_DRAM); else entry_index = module_id; diff --git a/tools/rimage/config/lnl.toml.h b/tools/rimage/config/lnl.toml.h index 247a4975f9c8..6ff62c64e9db 100644 --- a/tools/rimage/config/lnl.toml.h +++ b/tools/rimage/config/lnl.toml.h @@ -12,6 +12,20 @@ index = __COUNTER__ +#if CONFIG_COLD_STORE_EXECUTE_DRAM + [[module.entry]] + name = "COLD" + uuid = "D406D134-C3C1-402C-8AEC-6821C0C2B0E6" + affinity_mask = "3" + instance_count = "1" + domain_types = "0" + load_type = "0" + module_type = "0" + auto_start = "0" + + index = __COUNTER__ +#endif + [[module.entry]] name = "BASEFW" uuid = "0E398C32-5ADE-BA4B-93B1-C50432280EE4" diff --git a/tools/rimage/config/mtl.toml.h b/tools/rimage/config/mtl.toml.h index b549c76c3658..bfdfb41fdaef 100644 --- a/tools/rimage/config/mtl.toml.h +++ b/tools/rimage/config/mtl.toml.h @@ -12,6 +12,20 @@ index = __COUNTER__ +#if CONFIG_COLD_STORE_EXECUTE_DRAM + [[module.entry]] + name = "COLD" + uuid = "D406D134-C3C1-402C-8AEC-6821C0C2B0E6" + affinity_mask = "3" + instance_count = "1" + domain_types = "0" + load_type = "0" + module_type = "0" + auto_start = "0" + + index = __COUNTER__ +#endif + [[module.entry]] name = "BASEFW" uuid = "0E398C32-5ADE-BA4B-93B1-C50432280EE4" diff --git a/tools/rimage/config/ptl.toml.h b/tools/rimage/config/ptl.toml.h index e83045b4ecee..c5643de0ab30 100644 --- a/tools/rimage/config/ptl.toml.h +++ b/tools/rimage/config/ptl.toml.h @@ -12,6 +12,20 @@ auto_start = "0" index = __COUNTER__ +#if CONFIG_COLD_STORE_EXECUTE_DRAM +[[module.entry]] +name = "COLD" +uuid = "D406D134-C3C1-402C-8AEC-6821C0C2B0E6" +affinity_mask = "3" +instance_count = "1" +domain_types = "0" +load_type = "0" +module_type = "0" +auto_start = "0" + +index = __COUNTER__ +#endif + [[module.entry]] name = "BASEFW" uuid = "0E398C32-5ADE-BA4B-93B1-C50432280EE4" From dfca68e6c35149ec1c900a4ad990490e43980b1e Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Fri, 20 Dec 2024 14:55:31 +0100 Subject: [PATCH 2/5] build: make cold data and code support universal This extends LLEXT cold data and code support to the entire SOF code base. Using the same __cold and __cold_rodata qualifies any code or data can be marked to be used directly in DRAM without being copied to SRAM. Signed-off-by: Guennadi Liakhovetski --- posix/include/sof/lib/memory.h | 8 ++++++++ scripts/llext_link_helper.py | 8 ++++---- src/include/module/module/llext.h | 6 ------ src/platform/ace30/include/platform/lib/memory.h | 6 ++++++ src/platform/lunarlake/include/platform/lib/memory.h | 6 ++++++ src/platform/meteorlake/include/platform/lib/memory.h | 6 ++++++ xtos/include/sof/lib/memory.h | 8 ++++++++ zephyr/include/sof/lib/memory.h | 8 ++++++++ 8 files changed, 46 insertions(+), 10 deletions(-) diff --git a/posix/include/sof/lib/memory.h b/posix/include/sof/lib/memory.h index 8fcbc1f9adbe..9a784ec7f19d 100644 --- a/posix/include/sof/lib/memory.h +++ b/posix/include/sof/lib/memory.h @@ -10,4 +10,12 @@ #include +#ifndef __cold +#define __cold +#endif + +#ifndef __cold_rodata +#define __cold_rodata +#endif + #endif /* __SOF_LIB_MEMORY_H__ */ diff --git a/scripts/llext_link_helper.py b/scripts/llext_link_helper.py index 325d65a4e3de..c745b388cd03 100755 --- a/scripts/llext_link_helper.py +++ b/scripts/llext_link_helper.py @@ -136,10 +136,10 @@ def main(): # run at arbitrary memory locations. One of the use-cases is running # parts of the module directly in DRAM - sacrificing performance but # saving scarce SRAM. We achieve this by placing non-performance - # critical functions in a .text.dram ELF section. When compiling and - # linking such functions, an additional .literal.dram section is - # automatically created. Note, that for some reason the compiler also - # marks that section as executable. + # critical functions in a .cold ELF section. When compiling and linking + # such functions, an additional .cold.literal section is automatically + # created. Note, that for some reason the compiler also marks that + # section as executable. # This script links those sections at address 0. We could hard-code # section names, but so far we choose to only link .text the "original" # way and all other executable sections we link at 0. diff --git a/src/include/module/module/llext.h b/src/include/module/module/llext.h index f8477fe7d9b3..05b8f0ffb5f5 100644 --- a/src/include/module/module/llext.h +++ b/src/include/module/module/llext.h @@ -34,10 +34,4 @@ static const struct sof_module_api_build_info buildinfo __section(".mod_buildinf .api_version_number.full = SOF_MODULE_API_CURRENT_VERSION, \ } -#if CONFIG_LLEXT_TYPE_ELF_RELOCATABLE && defined(LL_EXTENSION_BUILD) -#define __cold __section(".text.dram") -#else -#define __cold -#endif - #endif diff --git a/src/platform/ace30/include/platform/lib/memory.h b/src/platform/ace30/include/platform/lib/memory.h index 6232b77af907..dab9d94c062a 100644 --- a/src/platform/ace30/include/platform/lib/memory.h +++ b/src/platform/ace30/include/platform/lib/memory.h @@ -58,6 +58,12 @@ */ #define HEAPMEM_SIZE 0xD0000 +#if CONFIG_COLD_STORE_EXECUTE_DRAM && \ + (CONFIG_LLEXT_TYPE_ELF_RELOCATABLE || !defined(LL_EXTENSION_BUILD)) +#define __cold __section(".cold") +#define __cold_rodata __section(".coldrodata") +#endif + #endif /* __PLATFORM_LIB_MEMORY_H__ */ #else diff --git a/src/platform/lunarlake/include/platform/lib/memory.h b/src/platform/lunarlake/include/platform/lib/memory.h index e23fd7556482..b5bda0ede464 100644 --- a/src/platform/lunarlake/include/platform/lib/memory.h +++ b/src/platform/lunarlake/include/platform/lib/memory.h @@ -58,6 +58,12 @@ */ #define HEAPMEM_SIZE 0xF0000 +#if CONFIG_COLD_STORE_EXECUTE_DRAM && \ + (CONFIG_LLEXT_TYPE_ELF_RELOCATABLE || !defined(LL_EXTENSION_BUILD)) +#define __cold __section(".cold") +#define __cold_rodata __section(".coldrodata") +#endif + #endif /* __PLATFORM_LIB_MEMORY_H__ */ #else diff --git a/src/platform/meteorlake/include/platform/lib/memory.h b/src/platform/meteorlake/include/platform/lib/memory.h index 77e8b7c1b25b..76b60f98fabb 100644 --- a/src/platform/meteorlake/include/platform/lib/memory.h +++ b/src/platform/meteorlake/include/platform/lib/memory.h @@ -58,6 +58,12 @@ */ #define HEAPMEM_SIZE 0xF0000 +#if CONFIG_COLD_STORE_EXECUTE_DRAM && \ + (CONFIG_LLEXT_TYPE_ELF_RELOCATABLE || !defined(LL_EXTENSION_BUILD)) +#define __cold __section(".cold") +#define __cold_rodata __section(".coldrodata") +#endif + #endif /* __PLATFORM_LIB_MEMORY_H__ */ #else diff --git a/xtos/include/sof/lib/memory.h b/xtos/include/sof/lib/memory.h index 8fcbc1f9adbe..9a784ec7f19d 100644 --- a/xtos/include/sof/lib/memory.h +++ b/xtos/include/sof/lib/memory.h @@ -10,4 +10,12 @@ #include +#ifndef __cold +#define __cold +#endif + +#ifndef __cold_rodata +#define __cold_rodata +#endif + #endif /* __SOF_LIB_MEMORY_H__ */ diff --git a/zephyr/include/sof/lib/memory.h b/zephyr/include/sof/lib/memory.h index 8cde3c0f589d..f58686a92981 100644 --- a/zephyr/include/sof/lib/memory.h +++ b/zephyr/include/sof/lib/memory.h @@ -8,4 +8,12 @@ #include +#ifndef __cold +#define __cold +#endif + +#ifndef __cold_rodata +#define __cold_rodata +#endif + #endif /* __SOF_LIB_MEMORY_H__ */ From 65f87f91d40ce00aa8db6e0966e8b16afa6e5398 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Mon, 6 Jan 2025 15:16:56 +0100 Subject: [PATCH 3/5] memory: move some code and data to DRAM A lot of code isn't performance-critical and can be used directly in DRAM, without being copied into scarce SRAM. This commit selects two functions as first candidates for that. Moving data to DRAM is more difficult. The largest data blobs are audio processing coefficients and they're usually used during audio processing, i.e. when performance is critical. A good candidate for such data relocation is the src component, which has many coefficient sets, of which only some are used at run-time. Follow up work will switch to keeping all src coefficients in DRAM and only copying used ones into dynamically allocated SRAM buffers. This commit only moves several conversion function selection arrays into DRAM. Those arrays are small so this won't free a lot of SRAM, but at least this will serve as the first test. Signed-off-by: Guennadi Liakhovetski --- src/audio/mfcc/mfcc.c | 2 +- src/audio/mixin_mixout/mixin_mixout_generic.c | 2 +- src/audio/mixin_mixout/mixin_mixout_hifi3.c | 2 +- src/audio/mixin_mixout/mixin_mixout_hifi5.c | 2 +- src/audio/volume/volume.c | 8 ++++++-- src/ipc/ipc4/handler.c | 2 +- src/ipc/ipc4/helper.c | 2 +- 7 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/audio/mfcc/mfcc.c b/src/audio/mfcc/mfcc.c index fd0e0223b533..964b4bae7d38 100644 --- a/src/audio/mfcc/mfcc.c +++ b/src/audio/mfcc/mfcc.c @@ -38,7 +38,7 @@ SOF_DEFINE_REG_UUID(mfcc); DECLARE_TR_CTX(mfcc_tr, SOF_UUID(mfcc_uuid), LOG_LEVEL_INFO); -const struct mfcc_func_map mfcc_fm[] = { +__cold_rodata const struct mfcc_func_map mfcc_fm[] = { #if CONFIG_FORMAT_S16LE {SOF_IPC_FRAME_S16_LE, mfcc_s16_default}, #endif /* CONFIG_FORMAT_S16LE */ diff --git a/src/audio/mixin_mixout/mixin_mixout_generic.c b/src/audio/mixin_mixout/mixin_mixout_generic.c index 69ba5f02a4bd..5443ee5a5f49 100644 --- a/src/audio/mixin_mixout/mixin_mixout_generic.c +++ b/src/audio/mixin_mixout/mixin_mixout_generic.c @@ -283,7 +283,7 @@ static void mix_s32_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t } #endif /* CONFIG_FORMAT_S32LE */ -const struct mix_func_map mix_func_map[] = { +__cold_rodata const struct mix_func_map mix_func_map[] = { #if CONFIG_FORMAT_S16LE { SOF_IPC_FRAME_S16_LE, mix_s16, mix_s16_gain }, #endif diff --git a/src/audio/mixin_mixout/mixin_mixout_hifi3.c b/src/audio/mixin_mixout/mixin_mixout_hifi3.c index 5f0e786fba57..da22be3634c5 100644 --- a/src/audio/mixin_mixout/mixin_mixout_hifi3.c +++ b/src/audio/mixin_mixout/mixin_mixout_hifi3.c @@ -607,7 +607,7 @@ static void mix_s32_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t #endif /* CONFIG_FORMAT_S32LE */ -const struct mix_func_map mix_func_map[] = { +__cold_rodata const struct mix_func_map mix_func_map[] = { #if CONFIG_FORMAT_S16LE { SOF_IPC_FRAME_S16_LE, mix_s16, mix_s16_gain }, #endif diff --git a/src/audio/mixin_mixout/mixin_mixout_hifi5.c b/src/audio/mixin_mixout/mixin_mixout_hifi5.c index 7c2942e76bd2..14bea3d98773 100644 --- a/src/audio/mixin_mixout/mixin_mixout_hifi5.c +++ b/src/audio/mixin_mixout/mixin_mixout_hifi5.c @@ -269,7 +269,7 @@ static void mix_s32(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe #endif /* CONFIG_FORMAT_S32LE */ /* TODO: implement mixing functions with gain support!*/ -const struct mix_func_map mix_func_map[] = { +__cold_rodata const struct mix_func_map mix_func_map[] = { #if CONFIG_FORMAT_S16LE { SOF_IPC_FRAME_S16_LE, mix_s16, mix_s16 }, #endif diff --git a/src/audio/volume/volume.c b/src/audio/volume/volume.c index 05ea4274987e..b3cf42cf116a 100644 --- a/src/audio/volume/volume.c +++ b/src/audio/volume/volume.c @@ -193,8 +193,12 @@ static uint32_t vol_zc_get_s32(const struct audio_stream *source, #endif /* CONFIG_FORMAT_S32LE */ -/** \brief Map of formats with dedicated zc functions. */ -static const struct comp_zc_func_map zc_func_map[] = { +/** + * \brief Map of formats with dedicated zc functions. + * + * This is only used during @c .prepare() so it isn't performance-critical. + */ +__cold_rodata static const struct comp_zc_func_map zc_func_map[] = { #if CONFIG_FORMAT_S16LE { SOF_IPC_FRAME_S16_LE, vol_zc_get_s16 }, #endif /* CONFIG_FORMAT_S16LE */ diff --git a/src/ipc/ipc4/handler.c b/src/ipc/ipc4/handler.c index 6645d6fd08e5..2098475d4b49 100644 --- a/src/ipc/ipc4/handler.c +++ b/src/ipc/ipc4/handler.c @@ -891,7 +891,7 @@ static int ipc4_process_glb_message(struct ipc4_message_request *ipc4) * delete module <-------> free component */ -static int ipc4_init_module_instance(struct ipc4_message_request *ipc4) +__cold static int ipc4_init_module_instance(struct ipc4_message_request *ipc4) { struct ipc4_module_init_instance module_init; struct comp_dev *dev; diff --git a/src/ipc/ipc4/helper.c b/src/ipc/ipc4/helper.c index b4a27cc0382c..0dfb4bc73fc2 100644 --- a/src/ipc/ipc4/helper.c +++ b/src/ipc/ipc4/helper.c @@ -102,7 +102,7 @@ static inline char *ipc4_get_comp_new_data(void) } #endif -struct comp_dev *comp_new_ipc4(struct ipc4_module_init_instance *module_init) +__cold struct comp_dev *comp_new_ipc4(struct ipc4_module_init_instance *module_init) { struct comp_ipc_config ipc_config; const struct comp_driver *drv; From 5bb2a669da1dc2e856e5d516ee404f663a15e5e2 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Fri, 20 Dec 2024 16:25:23 +0100 Subject: [PATCH 4/5] mtl: enable cold code and data Leave non-performance critical code and data in DRAM, saving SRAM and sacrificing some performance. Signed-off-by: Guennadi Liakhovetski --- app/boards/intel_adsp_ace15_mtpm.conf | 1 + 1 file changed, 1 insertion(+) diff --git a/app/boards/intel_adsp_ace15_mtpm.conf b/app/boards/intel_adsp_ace15_mtpm.conf index 5341d14e4ddd..52bdee4c0b6e 100644 --- a/app/boards/intel_adsp_ace15_mtpm.conf +++ b/app/boards/intel_adsp_ace15_mtpm.conf @@ -46,6 +46,7 @@ CONFIG_SOF_TELEMETRY_IO_PERFORMANCE_MEASUREMENTS=y CONFIG_SOF_TELEMETRY_PERFORMANCE_MEASUREMENTS=y CONFIG_ZEPHYR_NATIVE_DRIVERS=y CONFIG_ZEPHYR_DP_SCHEDULER=y +CONFIG_COLD_STORE_EXECUTE_DRAM=y # SOF / loadable modules CONFIG_INTEL_MODULES=y From ffcb3e4e9c7f5b770bb6ad98c77fa55d5748d9b7 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Thu, 9 Jan 2025 08:08:27 +0100 Subject: [PATCH 5/5] github: free disc space for LP64-WIP test The LP64-WIP test is also failing with insufficient disc space errors. Apply the same work-around as for other tests. Signed-off-by: Guennadi Liakhovetski --- .github/workflows/zephyr.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/zephyr.yml b/.github/workflows/zephyr.yml index b53fc367f5bd..3fe3e8982d58 100644 --- a/.github/workflows/zephyr.yml +++ b/.github/workflows/zephyr.yml @@ -73,6 +73,11 @@ jobs: path: ./workspace/sof filter: 'tree:0' + - name: free space + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + - name: west clones run: pip3 install west && cd workspace/sof/ && west init -l && west update --narrow --fetch-opt=--filter=tree:0