string: add and test an optimized memchr (DONE)

This version works on both SH3 and SH4.
This commit is contained in:
Lephenixnoir 2021-05-23 14:30:35 +02:00
parent b96970e26d
commit d6f606fa5c
Signed by: Lephenixnoir
GPG Key ID: 1BBA026E13FC0495
3 changed files with 87 additions and 3 deletions

View File

@ -167,7 +167,8 @@ endif()
if(sh-generic IN_LIST TARGET_FOLDERS)
list(APPEND SOURCES
src/libc/setjmp/target/sh-generic/setjmp.S
src/libc/setjmp/target/sh-generic/longjmp.S)
src/libc/setjmp/target/sh-generic/longjmp.S
src/libc/string/target/sh-generic/memchr.S)
endif()
if(casiowin-fx IN_LIST TARGET_FOLDERS)

19
STATUS
View File

@ -14,6 +14,14 @@ taken from the C99 standard (ISO/IEC 9899:1999), section 7 ("Library").
address of the function can be taken; don't rely on the macro being defined,
as the user can remove it except in some special cases
String functions (mainly in <string.h>) can use 4-byte accesses, and in doing
so read up to 3 bytes after the end of the string if it is not padded (which
malloc'd strings and literal strings both are, leaving only stack-allocated and
statically-allocated ones). This allows important speed optimizations. The
extra access cannot trigger memory protection because there is no valid memory
less than 4 bytes before the end of any protection region. The extra access
might trigger the UBC in very specific scenarios, but we don't really care.
# Status
In this file, every definition is classified in one of several implementation
@ -102,7 +110,7 @@ DONE: Function/symbol/macro is defined, builds, links, and is tested
7.21 <string.h>
7.21.2.1 memcpy: DONE
7.21.2.2 memmove: DONE
7.21.2.2 memmove: DONE (Unoptimized: byte-by-byte)
! 7.21.2.3 strcpy: TODO
! 7.21.2.4 strncpy: TODO
! 7.21.3.1 strcat: TODO
@ -112,7 +120,7 @@ DONE: Function/symbol/macro is defined, builds, links, and is tested
! 7.21.4.3 strcoll: TODO
! 7.21.4.4 strncmp: TODO
! 7.21.4.5 strxfrm: TODO
! 7.21.5.1 memchr: TODO
7.21.5.1 memchr: DONE
! 7.21.5.2 strchr: TODO
! 7.21.5.3 strcspn: TODO
! 7.21.5.4 strpbrk: TODO
@ -123,6 +131,13 @@ DONE: Function/symbol/macro is defined, builds, links, and is tested
7.21.6.1 memset: DONE
! 7.21.6.2 strerror: TODO
! 7.21.6.3 strlen: TODO
Extensions:
- strnlen: TODO
- strchrnul: TODO
- strcasecmp: TODO
- strncasecmp: TODO
- strdup: TODO
- strndup: TODO
7.22 <tgmath.h> => GCC

View File

@ -0,0 +1,68 @@
.global _memchr
.type _memchr, @function
_memchr:
mov r4, r0
exts.b r5, r5
/* For small inputs, simply check bytes individually */
mov #64, r2
cmp/hi r6, r2
bt .last
.large: /* Make a 4-byte version of r5 for cmp/str */
extu.b r5, r3
swap.b r3, r2
or r3, r2
swap.w r2, r3
or r3, r2
/* First check 3 bytes to ensure we don't skip bytes when aligning */
mov.b @r0+, r1
cmp/eq r1, r5
bt .end
mov.b @r0+, r1
cmp/eq r1, r5
bt .end
mov.b @r0+, r1
cmp/eq r1, r5
bt .end
/* Align to a 4-byte boundary */
shlr2 r0
shll2 r0
add r4, r6
sub r0, r6
mov r6, r7
shlr2 r7
mov #3, r3
and r3, r6
/* Read longwords */
1: mov.l @r0+, r1
cmp/str r1, r2
bt .found
dt r7
bf 1b
.last: /* Don't read if there are no bytes left */
tst r6, r6
bt .none
2: mov.b @r0+, r1
cmp/eq r1, r5
bt .end
dt r6
bf 2b
.none: rts
mov #0, r0
.found: /* Go back to find out which of the last 4 bytes is r5 */
add #-4, r0
bra 2b
mov #4, r6
.end: rts
add #-1, r0