std: move memcmp, memcpy, memmove, memset to fxlibc

This commit is contained in:
Lephe 2021-05-23 16:50:57 +02:00
parent 7e0ccc3f69
commit 6c12217777
Signed by: Lephenixnoir
GPG Key ID: 1BBA026E13FC0495
5 changed files with 0 additions and 373 deletions

View File

@ -67,10 +67,6 @@ set(SOURCES_COMMON
src/spu/spu.c
src/std/aprint.c
src/std/malloc.c
src/std/memcmp.s
src/std/memcpy.s
src/std/memmove.s
src/std/memset.s
src/std/print.c
src/std/string.c
src/std/string-ext.c

View File

@ -1,114 +0,0 @@
.global _memcmp
.text
_memcmp:
tst r6, r6
bt .zero
/* When comparing less than 64 bytes, use the naive method */
mov #64, r0
cmp/ge r6, r0
bt _naive_memcmp
mov #4, r2
mov #3, r3
_memcmp_align_rhs:
/* 4-align the right-hand side */
mov.b @r4+, r0
mov.b @r5+, r1
cmp/eq r0, r1
bf/s .end
dt r6
tst r3, r5
bf _memcmp_align_rhs
/* If left-hand side is 4-aligned, use mov.l */
tst r3, r4
bt .aligned4
/* If unaligned but SH4, use movua.l */
mov.l .gint, r0
mov.l @r0, r0
tst #1, r0
bt .unaligned4
/* If left-hand side is 2-aligned, use mov.w and mov.l */
mov r4, r0
tst #1, r0
bt .aligned2
/* Otherwise use a naive comparison */
bra _naive_memcmp
nop
.aligned4:
/* Compare 4 bytes at a time until at most 4 bytes are left */
mov.l @r4+, r0
mov.l @r5+, r1
cmp/eq r0, r1
bf/s _fail
add #-4, r6
cmp/ge r6, r2
bf .aligned4
bra _naive_memcmp
nop
.unaligned4:
/* Compare 4 bytes at a time until at most 4 bytes are left. Since
left-hand side is aligned, use movua.l */
movua.l @r4+, r0
mov.l @r5+, r1
cmp/eq r0, r1
bf/s _fail
add #-4, r6
cmp/ge r6, r2
bf .unaligned4
bra _naive_memcmp
nop
.aligned2:
/* Read 4 bytes from r4 in two steps */
mov.w @r4+, r0
mov.l @r5+, r1
mov.w @r4+, r2
shll16 r0
or r2, r0
cmp/eq r0, r1
bf/s _fail
add #-4, r6
cmp/ge r6, r2
bf .aligned2
bra _naive_memcmp
nop
_fail:
/* Rewind 4 bytes to compare manually */
add #-4, r4
add #-4, r5
add #4, r6
_naive_memcmp:
mov.b @r4+, r0
mov.b @r5+, r1
cmp/eq r0, r1
bf/s .end
dt r6
bf _naive_memcmp
.end:
extu.b r0, r0
extu.b r1, r1
rts
sub r1, r0
.zero:
rts
mov #0, r0
.align 4
.gint:
.long _gint

View File

@ -1,125 +0,0 @@
.global _memcpy
.text
_memcpy:
tst r6, r6
bt .zero
mov r4, r3
mov #3, r2
/* When copying less than 64 bytes, use the naive method */
mov #64, r0
cmp/ge r6, r0
bt _naive_memcpy
_memcpy_align_dst:
/* 4-align the destination */
mov.b @r5+, r0
mov.b r0, @r4
add #1, r4
tst r2, r4
bf/s _memcpy_align_dst
dt r6
/* If source is 4-aligned, use mov.l */
tst r2, r5
bt/s .aligned4_32
mov #4, r2
/* If unaligned but SH4, use movua.l */
mov.l .gint, r0
mov.l @r0, r0
tst #1, r0
bt .unaligned4
/* If source is 2-aligned, use mov.w */
mov r5, r0
tst #1, r0
bt .aligned2
/* Otherwise use a naive copy */
bra _naive_memcpy
nop
.aligned4_32:
mov #36, r2
/* Copy 32 bytes at a time until at most 32 bytes are left */
mov.l @r5+, r0
mov.l @r5+, r1
mov.l @r5+, r7
mov.l r0, @r4
mov.l r1, @(4,r4)
mov.l r7, @(8,r4)
mov.l @r5+, r0
mov.l @r5+, r1
mov.l @r5+, r7
mov.l r0, @(12,r4)
mov.l r1, @(16,r4)
mov.l r7, @(20,r4)
mov.l @r5+, r0
mov.l @r5+, r1
add #-32, r6
mov.l r0, @(24,r4)
mov.l r1, @(28,r4)
cmp/ge r6, r2
bf/s .aligned4_32
add #32, r4
.aligned4_4:
mov #4, r2
/* Copy 4 bytes at a time until at most 4 bytes are left */
mov.l @r5+, r0
mov.l r0, @r4
add #-4, r6
cmp/ge r6, r2
bf/s .aligned4_4
add #4, r4
bra _naive_memcpy
nop
.unaligned4:
/* Copy 4 bytes but read with movua.l since source is unaligned */
movua.l @r5+, r0
mov.l r0, @r4
add #-4, r6
cmp/ge r6, r2
bf/s .unaligned4
add #4, r4
bra _naive_memcpy
nop
.aligned2:
mov.w @r5+, r0
mov.w r0, @r4
mov.w @r5+, r0
mov.w r0, @(2,r4)
add #-4, r6
cmp/ge r6, r2
bf/s .aligned2
add #4, r4
bra _naive_memcpy
nop
_naive_memcpy:
mov.b @r5+, r0
dt r6
mov.b r0, @r4
bf/s _naive_memcpy
add #1, r4
rts
mov r3, r0
.zero:
rts
mov r4, r0
.align 4
.gint:
.long _gint

View File

@ -1,60 +0,0 @@
.global _memmove
.text
_memmove:
tst r6, r6
bt .zero
/* Simple optimization: if regions do not overlap, use memcpy() */
mov r4, r0
add r6, r0
cmp/ge r0, r5
bt _memmove_memcpy
mov r5, r0
add r6, r0
cmp/ge r0, r4
bt _memmove_memcpy
mov r4, r3
cmp/ge r4, r5
bf .backwards
.forwards:
/* If the destination starts before the source, copy forwards */
mov.b @r5+, r0
mov.b r0, @r4
dt r6
bf/s .forwards
add #1, r4
rts
mov r3, r0
.backwards:
/* Otherwise, copy backwards */
add r6, r4
add r6, r5
.backwards_loop:
add #-1, r5
mov.b @r5, r0
dt r6
bf/s .backwards_loop
mov.b r0, @-r4
rts
mov r3, r0
_memmove_memcpy:
mov.l .memcpy, r1
jmp @r1
nop
.zero:
rts
mov r4, r0
.align 4
.memcpy:
.long _memcpy

View File

@ -1,70 +0,0 @@
.global _memset
.text
_memset:
tst r6, r6
bt .zero
/* We'll fill from the end */
mov r4, r3
add r6, r4
/* When setting less than 64 bytes, use the naive method */
mov #64, r0
cmp/ge r6, r0
bt _naive_memset
mov #3, r2
/* Make a 4-byte filler */
mov r5, r0
shll8 r5
or r5, r0
mov r0, r5
shll16 r5
or r5, r0
_memset_align:
/* 4-align the destination */
mov.b r0, @-r4
tst r2, r4
bf/s _memset_align
dt r6
mov #40, r2
.aligned4_32:
add #-32, r4
add #-32, r6
mov.l r0, @(28,r4)
mov.l r0, @(24,r4)
mov.l r0, @(20,r4)
mov.l r0, @(16,r4)
mov.l r0, @(12,r4)
mov.l r0, @(8,r4)
mov.l r0, @(4,r4)
cmp/ge r6, r2
bf/s .aligned4_32
mov.l r0, @r4
mov #8, r2
.aligned4_4:
mov.l r0, @-r4
cmp/ge r6, r2
bf/s .aligned4_4
add #-4, r6
_naive_memset:
/* Tight loop copy one byte */
dt r6
bf/s _naive_memset
mov.b r5, @-r4
.end:
rts
mov r3, r0
.zero:
rts
mov r4, r0