From 13e35eaface7fe0aef2c5970fefafb59bc0abdad Mon Sep 17 00:00:00 2001 From: Niklas Hauser Date: Tue, 21 Jan 2025 16:25:18 +0100 Subject: [PATCH 1/3] [cortex-m7] Enable I/D-Cache optionally --- src/modm/platform/core/cortex/module.lb | 26 ++++++++++++++++----- src/modm/platform/core/cortex/startup.c.in | 27 ++++++++++++---------- 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/src/modm/platform/core/cortex/module.lb b/src/modm/platform/core/cortex/module.lb index 577c2fe045..5e9a84d691 100644 --- a/src/modm/platform/core/cortex/module.lb +++ b/src/modm/platform/core/cortex/module.lb @@ -219,7 +219,20 @@ def prepare(module, options): maximum="64Ki", default="3Ki")) - if "f" in options[":target"].get_driver("core")["type"]: + core = options[":target"].get_driver("core")["type"] + if "m7" in core: + module.add_option( + BooleanOption( + name="enable_icache", + description="Enable Instruction-Cache", + default=True)) + module.add_option( + BooleanOption( + name="enable_dcache", + description="Enable Data-Cache", + default=True)) + + if "f" in core: module.add_option( EnumerationOption( name="float-abi", @@ -331,8 +344,7 @@ def validate(env): def build(env): env.substitutions = env.query("vector_table") core = env.substitutions["core"] - with_icache = "m7" in core - with_dcache = with_icache and not (env.has_module(":platform:dma") or env.has_module(":platform:bdma")) + enable_dcache = env.get("enable_dcache", False) and not (env.has_module(":platform:dma") or env.has_module(":platform:bdma")) env.substitutions.update({ "target": env[":target"].identifier, "with_fault_storage": env.has_module(":platform:fault"), @@ -341,12 +353,14 @@ def build(env): "with_fpu": env.get("float-abi", "soft") != "soft", "with_multicore": env.has_module(":platform:multicore"), "with_msplim": sum(c.isnumeric() for c in core) == 2, - "with_icache": with_icache, - "with_dcache": with_dcache, + "enable_icache": env.get("enable_icache", False), + "enable_dcache": enable_dcache, + "has_icache": env.has_option("enable_icache"), + "has_dcache": env.has_option("enable_dcache"), }) env.outbasepath = "modm/src/modm/platform/core" - if env.substitutions["with_icache"] and not env.substitutions["with_dcache"]: + if env.get("enable_dcache", False) and not enable_dcache: env.log.warning("Cortex-M7 D-Cache is disabled due to using DMA!") # startup script diff --git a/src/modm/platform/core/cortex/startup.c.in b/src/modm/platform/core/cortex/startup.c.in index 20c2346746..c9e638eed4 100644 --- a/src/modm/platform/core/cortex/startup.c.in +++ b/src/modm/platform/core/cortex/startup.c.in @@ -95,20 +95,18 @@ table_zero(const uint32_t *const start, const uint32_t *const end) // Called by Reset_Handler in reset_handler.s void __modm_startup(void) { - // Copy and zero all internal memory - table_copy(__table_copy_intern_start, __table_copy_intern_end); - table_zero(__table_zero_intern_start, __table_zero_intern_end); -%# -%% if with_icache - // Enable instruction cache +%% if enable_icache SCB_EnableICache(); - SCB_InvalidateICache(); +%% elif has_icache + SCB_DisableICache(); %% endif -%% if with_dcache - // Enable data cache with default WBWA policy - SCB_EnableDCache(); - SCB_CleanInvalidateDCache(); +%% if has_dcache + SCB_DisableDCache(); %% endif +%# + // Copy and zero all internal memory + table_copy(__table_copy_intern_start, __table_copy_intern_end); + table_zero(__table_zero_intern_start, __table_zero_intern_end); %# %% if core != "cortex-m0" // Set the vector table location @@ -129,7 +127,12 @@ void __modm_startup(void) // Initialize heap as implemented by the heap option __modm_initialize_memory(); - +%# +%% if enable_dcache + // Enable D-Cache with default WBWA policy *after* all memory operations + SCB_EnableDCache(); +%# +%% endif // Call all constructors of static objects table_call(__init_array_start, __init_array_end); From 5b4125f09a3802fa91b90339e64dbd0f1acc102e Mon Sep 17 00:00:00 2001 From: Niklas Hauser Date: Fri, 24 Jan 2025 16:42:18 +0100 Subject: [PATCH 2/3] [cortex-m] Fix too far branch in reset handler --- src/modm/platform/core/cortex/reset_handler.sx.in | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/modm/platform/core/cortex/reset_handler.sx.in b/src/modm/platform/core/cortex/reset_handler.sx.in index 4970285a1c..cacf68c848 100644 --- a/src/modm/platform/core/cortex/reset_handler.sx.in +++ b/src/modm/platform/core/cortex/reset_handler.sx.in @@ -14,7 +14,7 @@ .syntax unified .align 4 .type Reset_Handler, %function - .func Reset_Handler + .func Reset_Handler Reset_Handler: // Some bootloaders do not reset the stack pointer back to the VTOR entry ldr r0,=__main_stack_top @@ -32,5 +32,6 @@ Reset_Handler: %% endif bl __modm_initialize_platform bl modm_initialize_platform - b __modm_startup + ldr r0,=__modm_startup + bx r0 .endfunc From 6c4e0f5d84cff9456aea34114ec56ca052b2a4b4 Mon Sep 17 00:00:00 2001 From: Niklas Hauser Date: Fri, 24 Jan 2025 17:08:55 +0100 Subject: [PATCH 3/3] [cortex-m] Use fast 64-bit copy and zero tables --- src/modm/platform/core/cortex/linker.macros | 15 +++++---- src/modm/platform/core/cortex/module.lb | 7 +++-- src/modm/platform/core/cortex/startup.c.in | 34 ++++++++++----------- 3 files changed, 31 insertions(+), 25 deletions(-) diff --git a/src/modm/platform/core/cortex/linker.macros b/src/modm/platform/core/cortex/linker.macros index 35953f4bc7..2ccc3d93db 100644 --- a/src/modm/platform/core/cortex/linker.macros +++ b/src/modm/platform/core/cortex/linker.macros @@ -53,6 +53,7 @@ EXCEPTION_FRAME_SIZE = {{ exception_frame_size }}; __vector_table_rom_start = .; __vector_table_ram_load = .; KEEP(*(.vector_rom)) + . = ALIGN(8); __vector_table_rom_end = .; } >{{memory}} %% endmacro @@ -65,7 +66,7 @@ EXCEPTION_FRAME_SIZE = {{ exception_frame_size }}; { __vector_table_ram_start = .; KEEP(*(.vector_ram)) - . = ALIGN(4); + . = ALIGN(8); __vector_table_ram_end = .; } >{{memory}} %% endmacro @@ -144,10 +145,11 @@ EXCEPTION_FRAME_SIZE = {{ exception_frame_size }}; %# .{{section}} : { + . = ALIGN(8); __{{section}}_load = LOADADDR(.{{section}}); __{{section}}_start = .; *(.{{section}} .{{section}}.*) - . = ALIGN(4); + . = ALIGN(8); __{{section}}_end = .; } >{{memory}} %% endfor @@ -284,11 +286,11 @@ EXCEPTION_FRAME_SIZE = {{ exception_frame_size }}; %% do table_copy.append("data") .data : { - . = ALIGN(4); + . = ALIGN(8); __data_load = LOADADDR(.data); __data_start = .; *(.data .data.* .gnu.linkonce.d.*) - . = ALIGN(4); + . = ALIGN(8); __data_end = .; } >{{memory}} AT >{{rom}} %% do table_copy.extend(sections_data) @@ -299,7 +301,7 @@ EXCEPTION_FRAME_SIZE = {{ exception_frame_size }}; __{{section}}_load = LOADADDR(.{{section}}); __{{section}}_start = .; *(.{{section}} .{{section}}.*) - . = ALIGN(4); + . = ALIGN(8); __{{section}}_end = .; } >{{memory}} AT >{{rom}} %% endfor @@ -320,6 +322,7 @@ EXCEPTION_FRAME_SIZE = {{ exception_frame_size }}; . = ALIGN(4); __{{section}}_end = .; %% endfor + . = ALIGN(8); __bss_end = .; } >{{memory}} %# @@ -330,7 +333,7 @@ EXCEPTION_FRAME_SIZE = {{ exception_frame_size }}; { __{{section}}_start = . ; *(.{{section}} .{{section}}.*) - . = ALIGN(4); + . = ALIGN(8); __{{section}}_end = .; } >{{memory}} %% endfor diff --git a/src/modm/platform/core/cortex/module.lb b/src/modm/platform/core/cortex/module.lb index 5e9a84d691..e54730ec9c 100644 --- a/src/modm/platform/core/cortex/module.lb +++ b/src/modm/platform/core/cortex/module.lb @@ -365,10 +365,13 @@ def build(env): # startup script env.template("reset_handler.sx.in") - env.template("startup.c.in") + ops = env.template("startup.c.in") + # Prevent use of slow bytewise memcpy and memset in startup code + env.collect(":build:cflags", "-fno-builtin", operations=ops) + env.collect(":build:linkflags", "-nostartfiles") + env.template("vectors.c.in") env.template("vectors.hpp.in") - env.collect(":build:linkflags", "-nostartfiles") # dealing with runtime assertions if env.has_module(":architecture:assert"): diff --git a/src/modm/platform/core/cortex/startup.c.in b/src/modm/platform/core/cortex/startup.c.in index c9e638eed4..9c56a1f951 100644 --- a/src/modm/platform/core/cortex/startup.c.in +++ b/src/modm/platform/core/cortex/startup.c.in @@ -29,17 +29,17 @@ extern int main(void); // ---------------------------------------------------------------------------- // Linker section start and end pointers -extern const uint32_t __table_copy_intern_start[]; -extern const uint32_t __table_copy_intern_end[]; +extern const uint64_t __table_copy_intern_start[]; +extern const uint64_t __table_copy_intern_end[]; -extern const uint32_t __table_zero_intern_start[]; -extern const uint32_t __table_zero_intern_end[]; +extern const uint64_t __table_zero_intern_start[]; +extern const uint64_t __table_zero_intern_end[]; -extern const uint32_t __table_copy_extern_start[]; -extern const uint32_t __table_copy_extern_end[]; +extern const uint64_t __table_copy_extern_start[]; +extern const uint64_t __table_copy_extern_end[]; -extern const uint32_t __table_zero_extern_start[]; -extern const uint32_t __table_zero_extern_end[]; +extern const uint64_t __table_zero_extern_start[]; +extern const uint64_t __table_zero_extern_end[]; extern const uint32_t __vector_table_{{ vector_table_location }}_start[]; @@ -64,13 +64,13 @@ table_call(const FunctionPointer *const start, const FunctionPointer *const end) // Copies the section defined by a table of {loadaddr, dest start, dest end} static inline void -table_copy(const uint32_t *const start, const uint32_t *const end) +table_copy(const uint64_t *const start, const uint64_t *const end) { - uint32_t **table = (uint32_t **)start; - while(table < (uint32_t **)end) + uint64_t **table = (uint64_t **)start; + while(table < (uint64_t **)end) { - const uint32_t *src = table[0]; // load address - uint32_t *dest = table[1]; // destination start + const uint64_t *src = table[0]; // load address + uint64_t *dest = table[1]; // destination start while (dest < table[2]) // destination end *(dest++) = *(src++); table += 3; @@ -79,12 +79,12 @@ table_copy(const uint32_t *const start, const uint32_t *const end) // Zeros the section defined by a table of {start, end} static inline void -table_zero(const uint32_t *const start, const uint32_t *const end) +table_zero(const uint64_t *const start, const uint64_t *const end) { - uint32_t **table = (uint32_t **)start; - while(table < (uint32_t **)end) + uint64_t **table = (uint64_t **)start; + while(table < (uint64_t **)end) { - uint32_t *dest = table[0]; // destination start + uint64_t *dest = table[0]; // destination start while (dest < table[1]) // destination end *(dest++) = 0; table += 2;