string: use gint's optimized memcmp (DONE)

This commit is contained in:
Lephenixnoir 2021-05-23 16:40:21 +02:00
parent a354e38ccf
commit 5cfd2a7d85
Signed by: Lephenixnoir
GPG Key ID: 1BBA026E13FC0495
2 changed files with 118 additions and 0 deletions

View File

@ -172,6 +172,7 @@ if(sh-generic IN_LIST TARGET_FOLDERS)
src/libc/setjmp/target/sh-generic/setjmp.S
src/libc/setjmp/target/sh-generic/longjmp.S
src/libc/string/target/sh-generic/memchr.S
src/libc/string/target/sh-generic/memcmp.S
src/libc/string/target/sh-generic/memcpy.S
src/libc/string/target/sh-generic/memset.S
src/libc/string/target/sh-generic/strlen.S

View File

@ -0,0 +1,117 @@
#include <bits/asm/cpucap.h>
.global _memcmp
.text
_memcmp:
tst r6, r6
bt .zero
/* When comparing less than 64 bytes, use the naive method */
mov #64, r0
cmp/ge r6, r0
bt _naive_memcmp
mov #4, r2
mov #3, r3
_memcmp_align_rhs:
/* 4-align the right-hand side */
mov.b @r4+, r0
mov.b @r5+, r1
cmp/eq r0, r1
bf/s .end
dt r6
tst r3, r5
bf _memcmp_align_rhs
/* If left-hand side is 4-aligned, use mov.l */
tst r3, r4
bt .aligned4
/* If unaligned but SH4, use movua.l */
mov.l .___cpucap, r0
mov.l @r0, r0
tst #__CPUCAP_SH4ALDSP, r0
bf .unaligned4
/* If left-hand side is 2-aligned, use mov.w and mov.l */
mov r4, r0
tst #1, r0
bt .aligned2
/* Otherwise use a naive comparison */
bra _naive_memcmp
nop
.aligned4:
/* Compare 4 bytes at a time until at most 4 bytes are left */
mov.l @r4+, r0
mov.l @r5+, r1
cmp/eq r0, r1
bf/s _fail
add #-4, r6
cmp/ge r6, r2
bf .aligned4
bra _naive_memcmp
nop
.unaligned4:
/* Compare 4 bytes at a time until at most 4 bytes are left. Since
left-hand side is aligned, use movua.l */
movua.l @r4+, r0
mov.l @r5+, r1
cmp/eq r0, r1
bf/s _fail
add #-4, r6
cmp/ge r6, r2
bf .unaligned4
bra _naive_memcmp
nop
.aligned2:
/* Read 4 bytes from r4 in two steps */
mov.w @r4+, r0
mov.l @r5+, r1
mov.w @r4+, r2
shll16 r0
or r2, r0
cmp/eq r0, r1
bf/s _fail
add #-4, r6
cmp/ge r6, r2
bf .aligned2
bra _naive_memcmp
nop
_fail:
/* Rewind 4 bytes to compare manually */
add #-4, r4
add #-4, r5
add #4, r6
_naive_memcmp:
mov.b @r4+, r0
mov.b @r5+, r1
cmp/eq r0, r1
bf/s .end
dt r6
bf _naive_memcmp
.end:
extu.b r0, r0
extu.b r1, r1
rts
sub r1, r0
.zero:
rts
mov #0, r0
.align 4
.___cpucap:
.long ___cpucap