118 lines
1.7 KiB
ArmAsm
118 lines
1.7 KiB
ArmAsm
#include <bits/asm/cpucap.h>
|
|
|
|
.global _memcmp
|
|
.text
|
|
|
|
_memcmp:
|
|
tst r6, r6
|
|
bt .zero
|
|
|
|
/* When comparing less than 64 bytes, use the naive method */
|
|
mov #64, r0
|
|
cmp/ge r6, r0
|
|
bt _naive_memcmp
|
|
|
|
mov #4, r2
|
|
mov #3, r3
|
|
|
|
_memcmp_align_rhs:
|
|
/* 4-align the right-hand side */
|
|
mov.b @r4+, r0
|
|
mov.b @r5+, r1
|
|
cmp/eq r0, r1
|
|
bf/s .end
|
|
dt r6
|
|
tst r3, r5
|
|
bf _memcmp_align_rhs
|
|
|
|
/* If left-hand side is 4-aligned, use mov.l */
|
|
tst r3, r4
|
|
bt .aligned4
|
|
|
|
/* If unaligned but SH4, use movua.l */
|
|
mov.l .___cpucap, r0
|
|
mov.l @r0, r0
|
|
tst #__CPUCAP_SH4ALDSP, r0
|
|
bf .unaligned4
|
|
|
|
/* If left-hand side is 2-aligned, use mov.w and mov.l */
|
|
mov r4, r0
|
|
tst #1, r0
|
|
bt .aligned2
|
|
|
|
/* Otherwise use a naive comparison */
|
|
bra _naive_memcmp
|
|
nop
|
|
|
|
.aligned4:
|
|
/* Compare 4 bytes at a time until at most 4 bytes are left */
|
|
mov.l @r4+, r0
|
|
mov.l @r5+, r1
|
|
cmp/eq r0, r1
|
|
bf/s _fail
|
|
add #-4, r6
|
|
cmp/ge r6, r2
|
|
bf .aligned4
|
|
|
|
bra _naive_memcmp
|
|
nop
|
|
|
|
.unaligned4:
|
|
/* Compare 4 bytes at a time until at most 4 bytes are left. Since
|
|
left-hand side is aligned, use movua.l */
|
|
movua.l @r4+, r0
|
|
mov.l @r5+, r1
|
|
cmp/eq r0, r1
|
|
bf/s _fail
|
|
add #-4, r6
|
|
cmp/ge r6, r2
|
|
bf .unaligned4
|
|
|
|
bra _naive_memcmp
|
|
nop
|
|
|
|
.aligned2:
|
|
/* Read 4 bytes from r4 in two steps */
|
|
mov.w @r4+, r0
|
|
mov.l @r5+, r1
|
|
mov.w @r4+, r2
|
|
shll16 r0
|
|
or r2, r0
|
|
cmp/eq r0, r1
|
|
bf/s _fail
|
|
add #-4, r6
|
|
cmp/ge r6, r2
|
|
bf .aligned2
|
|
|
|
bra _naive_memcmp
|
|
nop
|
|
|
|
_fail:
|
|
/* Rewind 4 bytes to compare manually */
|
|
add #-4, r4
|
|
add #-4, r5
|
|
add #4, r6
|
|
|
|
_naive_memcmp:
|
|
mov.b @r4+, r0
|
|
mov.b @r5+, r1
|
|
cmp/eq r0, r1
|
|
bf/s .end
|
|
dt r6
|
|
bf _naive_memcmp
|
|
|
|
.end:
|
|
extu.b r0, r0
|
|
extu.b r1, r1
|
|
rts
|
|
sub r1, r0
|
|
|
|
.zero:
|
|
rts
|
|
mov #0, r0
|
|
|
|
.align 4
|
|
|
|
.___cpucap:
|
|
.long ___cpucap
|