Skip to content

Commit

Permalink
Switch from tonccpy to ASM memcpy
Browse files Browse the repository at this point in the history
  • Loading branch information
RocketRobz committed Mar 16, 2024
1 parent 2046633 commit f283773
Show file tree
Hide file tree
Showing 83 changed files with 2,612 additions and 1,847 deletions.
5 changes: 3 additions & 2 deletions hb/dldi/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,12 @@ INCLUDES := include ../common/include
#---------------------------------------------------------------------------------
# options for code generation
#---------------------------------------------------------------------------------
ARCH := -mthumb -mthumb-interwork -march=armv4t
ARCH := -mthumb -march=armv4t

COMMON := -g -Wall -O2\
-mcpu=arm7tdmi -mtune=arm7tdmi -fomit-frame-pointer\
-ffast-math \
-flto \
$(ARCH)

# ADD -DDEBUG here for logging/debug
Expand All @@ -39,7 +40,7 @@ COMMON += $(INCLUDE) -DARM9 -fPIC
CFLAGS := $(COMMON) -std=gnu99
CXXFLAGS := $(CFLAGS) -fno-rtti -fno-exceptions

ASFLAGS := -g $(ARCH) $(INCLUDE)
ASFLAGS := -g $(ARCH) -flto $(INCLUDE)
LDFLAGS = -nostartfiles -nostdlib -T ../dldi.ld -g $(ARCH) -Wl,-Map,$(TARGET).map

LIBS := -lnds9
Expand Down
116 changes: 116 additions & 0 deletions hb/dldi/include/aeabi.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
// SPDX-License-Identifier: Zlib
// SPDX-FileNotice: Modified from the original version by the BlocksDS project.
//
// Copyright (C) 2021-2023 agbabi contributors

#ifndef AEABI_H__
#define AEABI_H__

#ifdef __cplusplus
extern "C" {
#endif

#include <stddef.h>

/**
* Alias of __aeabi_memcpy4
* @param dest Destination address
* @param src Source address
* @param n Number of bytes to copy
*/
void __aeabi_memcpy8(void* __restrict__ dest, const void* __restrict__ src, size_t n) __attribute__((nonnull(1, 2)));

/**
* Copies n bytes from src to dest (forward)
* Assumes dest and src are 4-byte aligned
* @param dest Destination address
* @param src Source address
* @param n Number of bytes to copy
*/
void __aeabi_memcpy4(void* __restrict__ dest, const void* __restrict__ src, size_t n) __attribute__((nonnull(1, 2)));

/**
* Copies n bytes from src to dest (forward)
* @param dest Destination address
* @param src Source address
* @param n Number of bytes to copy
*/
void __aeabi_memcpy(void* __restrict__ dest, const void* __restrict__ src, size_t n) __attribute__((nonnull(1, 2)));

/**
* Alias of __aeabi_memmove4
* @param dest Destination address
* @param src Source address
* @param n Number of bytes to copy
*/
void __aeabi_memmove8(void* dest, const void* src, size_t n) __attribute__((nonnull(1, 2)));

/**
* Safely copies n bytes of src to dest
* Assumes dest and src are 4-byte aligned
* @param dest Destination address
* @param src Source address
* @param n Number of bytes to copy
*/
void __aeabi_memmove4(void* dest, const void* src, size_t n) __attribute__((nonnull(1, 2)));

/**
* Safely copies n bytes of src to dest
* @param dest Destination address
* @param src Source address
* @param n Number of bytes to copy
*/
void __aeabi_memmove(void* dest, const void* src, size_t n) __attribute__((nonnull(1, 2)));

/**
* Alias of __aeabi_memset4
* @param dest Destination address
* @param n Number of bytes to set
* @param c Value to set
*/
void __aeabi_memset8(void* dest, size_t n, int c) __attribute__((nonnull(1)));

/**
* Set n bytes of dest to (c & 0xff)
* Assumes dest is 4-byte aligned
* @param dest Destination address
* @param n Number of bytes to set
* @param c Value to set
*/
void __aeabi_memset4(void* dest, size_t n, int c) __attribute__((nonnull(1)));

/**
* Set n bytes of dest to (c & 0xff)
* @param dest Destination address
* @param n Number of bytes to set
* @param c Value to set
*/
void __aeabi_memset(void* dest, size_t n, int c) __attribute__((nonnull(1)));

/**
* Alias of __aeabi_memclr4
* @param dest Destination address
* @param n Number of bytes to clear
*/
void __aeabi_memclr8(void* dest, size_t n) __attribute__((nonnull(1)));

/**
* Clears n bytes of dest to 0
* Assumes dest is 4-byte aligned
* @param dest Destination address
* @param n Number of bytes to clear
*/
void __aeabi_memclr4(void* dest, size_t n) __attribute__((nonnull(1)));

/**
* Clears n bytes of dest to 0
* @param dest Destination address
* @param n Number of bytes to clear
*/
void __aeabi_memclr(void* dest, size_t n) __attribute__((nonnull(1)));

#ifdef __cplusplus
}
#endif

#endif // AEABI_H__
11 changes: 9 additions & 2 deletions retail/bootloader/include/asminc.h → hb/dldi/include/asminc.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
#define _ASMINC_H_

#if !__ASSEMBLER__
#error This header file is only for use in assembly files!
#endif // !__ASSEMBLER__
# error "This header file is only for use in assembly files!"
#endif


.macro BEGIN_ASM_FUNC name section=text
Expand All @@ -14,6 +14,13 @@
\name:
.endm

.macro BEGIN_ASM_FUNC_NO_SECTION name
.global \name
.type \name, %function
.align 2
\name:
.endm

#define ICACHE_SIZE 0x2000
#define DCACHE_SIZE 0x1000
#define CACHE_LINE_SIZE 32
Expand Down
39 changes: 39 additions & 0 deletions hb/dldi/include/macros.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// SPDX-License-Identifier: Zlib
// SPDX-FileNotice: Modified from the original version by the BlocksDS project.
//
// Copyright (C) 2021-2023 agbabi contributors
//
// ARM assembly support macros

@ Shift and test upper two bits, clobbering \reg
@ Use mi for first bit, cs for second bit
.macro joaobapt_test_lsl reg shift = #0
movs \reg, \reg, lsl \shift
.endm

@ Test lowest two bits, clobbering \reg
@ Use mi for low bit, cs for high bit
.macro joaobapt_test reg
joaobapt_test_lsl \reg, #31
.endm

@ Test lowest two bits of \src, result stored in \dst
@ Use mi for low bit, cs for high bit
.macro joaobapt_test_into dst, src
movs \dst, \src, lsl #31
.endm

@ Branches depending on lowest two bits, clobbering \reg
@ b_mi = low bit case, b_cs = high bit case
.macro joaobapt_switch reg, b_mi, b_cs
joaobapt_test \reg
bmi \b_mi
bcs \b_cs
.endm

@ Branches depending on alignment of \a and \b, clobbering \scratch
@ b_byte = off-by-byte case, b_half = off-by-half case
.macro align_switch a, b, scratch, b_byte, b_half
eor \scratch, \a, \b
joaobapt_switch \scratch, \b_byte, \b_half
.endm
126 changes: 126 additions & 0 deletions hb/dldi/source/abi_memcpy.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
// SPDX-License-Identifier: Zlib
// SPDX-FileNotice: Modified from the original version by the BlocksDS project.
//
// Copyright (C) 2021-2023 agbabi contributors
//
// ABI:
// __aeabi_memcpy, __aeabi_memcpy4, __aeabi_memcpy8
// Standard:
// memcpy
// Support:
// __ndsabi_memcpy2, __ndsabi_memcpy1

#include "asminc.h"

#include "macros.inc"

.syntax unified

.arm



BEGIN_ASM_FUNC __aeabi_memcpy

@ >6-bytes is roughly the threshold when byte-by-byte copy is slower
cmp r2, #6
ble __ndsabi_memcpy1

align_switch r0, r1, r3, __ndsabi_memcpy1, .Lcopy_halves

@ Check if r0 (or r1) needs word aligning
rsbs r3, r0, #4
joaobapt_test r3

@ Copy byte head to align
ldrbmi r3, [r1], #1
strbmi r3, [r0], #1
submi r2, r2, #1
@ r0, r1 are now half aligned

@ Copy half head to align
ldrhcs r3, [r1], #2
strhcs r3, [r0], #2
subcs r2, r2, #2
@ r0, r1 are now word aligned


BEGIN_ASM_FUNC_NO_SECTION __aeabi_memcpy8
BEGIN_ASM_FUNC_NO_SECTION __aeabi_memcpy4

cmp r2, #32
blt .Lcopy_words

@ Word aligned, 32-byte copy
push {r4-r10}
.Lloop_32:
subs r2, r2, #32
ldmiage r1!, {r3-r10}
stmiage r0!, {r3-r10}
bgt .Lloop_32
pop {r4-r10}
bxeq lr

@ < 32 bytes remaining to be copied
add r2, r2, #32

.Lcopy_words:
cmp r2, #4
blt .Lcopy_halves
.Lloop_4:
subs r2, r2, #4
ldrge r3, [r1], #4
strge r3, [r0], #4
bgt .Lloop_4
bxeq lr

@ Copy byte & half tail
@ This test still works when r2 is negative
joaobapt_test r2
@ Copy half
ldrhcs r3, [r1], #2
strhcs r3, [r0], #2
@ Copy byte
ldrbmi r3, [r1]
strbmi r3, [r0]
bx lr

.Lcopy_halves:
@ Copy byte head to align
tst r0, #1
ldrbne r3, [r1], #1
strbne r3, [r0], #1
subne r2, r2, #1
@ r0, r1 are now half aligned


BEGIN_ASM_FUNC_NO_SECTION __ndsabi_memcpy2

subs r2, r2, #2
ldrhge r3, [r1], #2
strhge r3, [r0], #2
bgt __ndsabi_memcpy2
bxeq lr

@ Copy byte tail
adds r2, r2, #2
ldrbne r3, [r1]
strbne r3, [r0]
bx lr


BEGIN_ASM_FUNC_NO_SECTION __ndsabi_memcpy1

subs r2, r2, #1
ldrbge r3, [r1], #1
strbge r3, [r0], #1
bgt __ndsabi_memcpy1
bx lr


BEGIN_ASM_FUNC memcpy

push {r0, lr}
bl __aeabi_memcpy
pop {r0, lr}
bx lr
Loading

0 comments on commit f283773

Please sign in to comment.