-
Notifications
You must be signed in to change notification settings - Fork 145
/
Copy pathbranch_table.S
57 lines (47 loc) · 1.7 KB
/
branch_table.S
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#include "apple-linux-convergence.S"
.p2align 2
.text
GLABEL MyMemSet
/* MyMemSet(unsigned char * b, unsigned char v, long l)
x0 w1 x2
The length is first checked against less than or equal to 0. If
so, the body of the function is skipped.
The loop will be unrolled 8x. The length (x2) modulo 8 gets turned
into the number of instructions to jump to or beyond the initial
str. A modulo of 0 is handled separately - it causes a branch to the
initial str.
This code can be dramatically improved by copying more than one byte
at a time. You will have to figure out how to do this optimally in
P6 - MemCpy
*/
#if defined(__APPLE__)
_MyMemSet:
#else
MyMemSet:
#endif
START_PROC
PUSH_P x29, x30
mov x29, sp
cmp x2, xzr // Test for bad length.
ble 99f // Take branch of 0 or less.
add x3, x2, x0 // x3 gets address of one beyond buffer
mov x6, 8
MOD x2, x6, x4, x5 // x4 gets l % 8
cbz x4, 10f // Handle evenly divisible case.
sub x4, x6, x4 // Invert sense of x4 e.g. 3 becomes 5
LLD_ADDR x5, 10f
add x5, x5, x4, lsl 2
br x5
10: str w1, [x0], 1
str w1, [x0], 1
str w1, [x0], 1
str w1, [x0], 1
str w1, [x0], 1
str w1, [x0], 1
str w1, [x0], 1
str w1, [x0], 1
cmp x3, x0
bgt 10b
99: POP_P x29, x30
ret
END_PROC