From b69e0fd299f3d734cf5687e864f1b72b7284fca8 Mon Sep 17 00:00:00 2001 From: Lephenixnoir Date: Sun, 23 May 2021 16:16:45 +0200 Subject: [PATCH] string: use gint's optimized memset (DONE) --- CMakeLists.txt | 1 + src/libc/string/memset.c | 11 ++-- src/libc/string/target/sh-generic/memset.S | 70 ++++++++++++++++++++++ 3 files changed, 75 insertions(+), 7 deletions(-) create mode 100644 src/libc/string/target/sh-generic/memset.S diff --git a/CMakeLists.txt b/CMakeLists.txt index 59be0bf..a24ad0a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -172,6 +172,7 @@ if(sh-generic IN_LIST TARGET_FOLDERS) src/libc/setjmp/target/sh-generic/setjmp.S src/libc/setjmp/target/sh-generic/longjmp.S src/libc/string/target/sh-generic/memchr.S + src/libc/string/target/sh-generic/memset.S src/libc/string/target/sh-generic/strlen.S src/target/sh-generic/cpucap.c) endif() diff --git a/src/libc/string/memset.c b/src/libc/string/memset.c index 58cc851..c815a5a 100644 --- a/src/libc/string/memset.c +++ b/src/libc/string/memset.c @@ -1,16 +1,13 @@ #include #include -/* -** The memset() function fills the first n bytes of the memory area pointed to -** by s with the constant byte c. -** -** TODO: use DMA ! -** TODO: use DSP ? -*/ +#ifndef __SUPPORT_ARCH_SH + void *memset(void *s, int c, size_t n) { while ((int)--n >= 0) ((uint8_t *) s)[n] = c; return (s); } + +#endif /*__SUPPORT_ARCH_SH*/ diff --git a/src/libc/string/target/sh-generic/memset.S b/src/libc/string/target/sh-generic/memset.S new file mode 100644 index 0000000..4aa6a8a --- /dev/null +++ b/src/libc/string/target/sh-generic/memset.S @@ -0,0 +1,70 @@ +.global _memset +.text + +_memset: + tst r6, r6 + bt .zero + + /* We'll fill from the end */ + mov r4, r3 + add r6, r4 + + /* When setting less than 64 bytes, use the naive method */ + mov #64, r0 + cmp/ge r6, r0 + bt _naive_memset + + mov #3, r2 + + /* Make a 4-byte filler */ + mov r5, r0 + shll8 r5 + or r5, r0 + mov r0, r5 + shll16 r5 + or r5, r0 + +_memset_align: + /* 4-align the destination */ + mov.b r0, @-r4 + tst r2, r4 + bf/s _memset_align + dt r6 + + mov #40, r2 + +.aligned4_32: + add #-32, r4 + add #-32, r6 + mov.l r0, @(28,r4) + mov.l r0, @(24,r4) + mov.l r0, @(20,r4) + mov.l r0, @(16,r4) + mov.l r0, @(12,r4) + mov.l r0, @(8,r4) + mov.l r0, @(4,r4) + cmp/ge r6, r2 + bf/s .aligned4_32 + mov.l r0, @r4 + + mov #8, r2 + +.aligned4_4: + mov.l r0, @-r4 + cmp/ge r6, r2 + bf/s .aligned4_4 + add #-4, r6 + +_naive_memset: + /* Tight loop copy one byte */ + dt r6 + bf/s _naive_memset + mov.b r5, @-r4 + +.end: + rts + mov r3, r0 + +.zero: + rts + mov r4, r0