From df9073e6ff09a865025e6f511618e90d8d50357e Mon Sep 17 00:00:00 2001 From: Lephenixnoir Date: Mon, 24 May 2021 10:04:54 +0200 Subject: [PATCH] string: add and test str[n]casecmp and KMP str[case]str (DONE) --- CMakeLists.txt | 5 ++ STATUS | 13 ++--- include/string.h | 3 ++ src/libc/string/strcasecmp.c | 11 ++++ src/libc/string/strcasestr.c | 7 +++ src/libc/string/string_p.h | 17 +++++++ src/libc/string/strncasecmp.c | 12 +++++ src/libc/string/strstr.c | 7 +++ src/libc/string/strstr_base.c | 95 +++++++++++++++++++++++++++++++++++ 9 files changed, 164 insertions(+), 6 deletions(-) create mode 100644 src/libc/string/strcasecmp.c create mode 100644 src/libc/string/strcasestr.c create mode 100644 src/libc/string/string_p.h create mode 100644 src/libc/string/strncasecmp.c create mode 100644 src/libc/string/strstr.c create mode 100644 src/libc/string/strstr_base.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 6b792ac..0a63ea9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -135,6 +135,8 @@ set(SOURCES src/libc/string/memcpy.c src/libc/string/memmove.c src/libc/string/memset.c + src/libc/string/strcasecmp.c + src/libc/string/strcasestr.c src/libc/string/strcat.c src/libc/string/strchr.c src/libc/string/strchrnul.c @@ -145,6 +147,7 @@ set(SOURCES src/libc/string/strdup.c src/libc/string/strerror.c src/libc/string/strlen.c + src/libc/string/strncasecmp.c src/libc/string/strncat.c src/libc/string/strncmp.c src/libc/string/strncpy.c @@ -153,6 +156,8 @@ set(SOURCES src/libc/string/strpbrk.c src/libc/string/strrchr.c src/libc/string/strspn.c + src/libc/string/strstr.c + src/libc/string/strstr_base.c src/libc/string/strxfrm.c) if(vhex-generic IN_LIST TARGET_FOLDERS) diff --git a/STATUS b/STATUS index 6375e71..3f427b4 100644 --- a/STATUS +++ b/STATUS @@ -117,25 +117,26 @@ DONE: Function/symbol/macro is defined, builds, links, and is tested 7.21.3.2 strncat: DONE 7.21.4.1 memcmp: DONE 7.21.4.2 strcmp: DONE -! 7.21.4.3 strcoll: TEST + 7.21.4.3 strcoll: DONE 7.21.4.4 strncmp: DONE -! 7.21.4.5 strxfrm: TEST + 7.21.4.5 strxfrm: DONE 7.21.5.1 memchr: DONE 7.21.5.2 strchr: DONE 7.21.5.3 strcspn: DONE 7.21.5.4 strpbrk: DONE 7.21.5.5 strrchr: DONE 7.21.5.6 strspn: DONE -! 7.21.5.7 strstr: TODO + 7.21.5.7 strstr: DONE ! 7.21.5.8 strtok: TODO 7.21.6.1 memset: DONE 7.21.6.2 strerror: DONE 7.21.6.3 strlen: DONE Extensions: -! - strnlen: TODO + - strnlen: DONE - strchrnul: DONE -! - strcasecmp: TODO -! - strncasecmp: TODO + - strcasestr: DONE + - strcasecmp: DONE + - strncasecmp: DONE - strdup: DONE - strndup: DONE diff --git a/include/string.h b/include/string.h index 3b3eab0..c721fb7 100644 --- a/include/string.h +++ b/include/string.h @@ -100,6 +100,9 @@ extern int strcasecmp(char const *__s1, char const *__s2); /* Compare at most __n characters of __s1 and __s2 case-insensitively. */ extern int strncasecmp(char const *__s1, char const *__s2, size_t __n); +/* Find __needle in __haystack, ignoring case. */ +extern char *strcasestr(char const *__haystack, char const *__needle); + /* Duplicate __s with malloc. */ extern char *strdup(char const *__s); diff --git a/src/libc/string/strcasecmp.c b/src/libc/string/strcasecmp.c new file mode 100644 index 0000000..6e827fd --- /dev/null +++ b/src/libc/string/strcasecmp.c @@ -0,0 +1,11 @@ +#include +#include + +int strcasecmp(const char *s1, const char *s2) +{ + while (*s1 != '\0' && *s2 != '\0' && tolower(*s1) == tolower(*s2)) { + s1 += 1; + s2 += 1; + } + return (*s1 - *s2); +} diff --git a/src/libc/string/strcasestr.c b/src/libc/string/strcasestr.c new file mode 100644 index 0000000..29986f4 --- /dev/null +++ b/src/libc/string/strcasestr.c @@ -0,0 +1,7 @@ +#include +#include "string_p.h" + +char *strcasestr(char const *haystack, char const *needle) +{ + return __strstr_base(haystack, needle, true); +} diff --git a/src/libc/string/string_p.h b/src/libc/string/string_p.h new file mode 100644 index 0000000..f757c0f --- /dev/null +++ b/src/libc/string/string_p.h @@ -0,0 +1,17 @@ +#ifndef __STRING_P_H__ +# define __STRING_P_H__ + +#include +#include + +/* +** Search for __needle within __haystack using either brute-force or the Knuth- +** Morris-Pratt algorithm. Ignores case if __ignore_case == true. This is the +** base function for strstr and strcasestr. +*/ +char *__strstr_base( + char const *__haystack, + char const *__needle, + bool __ignore_case); + +#endif /*__STRING_P_H__*/ diff --git a/src/libc/string/strncasecmp.c b/src/libc/string/strncasecmp.c new file mode 100644 index 0000000..42a93c9 --- /dev/null +++ b/src/libc/string/strncasecmp.c @@ -0,0 +1,12 @@ +#include +#include + +int strncasecmp(const char *s1, const char *s2, size_t n) +{ + if (n == 0) + return (0); + size_t i = -1; + while (++i < n - 1 && s1[i] != '\0' && s2[i] != '\0' + && tolower(s1[i]) == tolower(s2[i])) ; + return (tolower(s1[i]) - tolower(s2[i])); +} diff --git a/src/libc/string/strstr.c b/src/libc/string/strstr.c new file mode 100644 index 0000000..094c9f8 --- /dev/null +++ b/src/libc/string/strstr.c @@ -0,0 +1,7 @@ +#include +#include "string_p.h" + +char *strstr(char const *haystack, char const *needle) +{ + return __strstr_base(haystack, needle, false); +} diff --git a/src/libc/string/strstr_base.c b/src/libc/string/strstr_base.c new file mode 100644 index 0000000..2711115 --- /dev/null +++ b/src/libc/string/strstr_base.c @@ -0,0 +1,95 @@ +#include "string_p.h" +#include +#include +#include + +/* Quadratic-time brute-force algorithm. */ +static char *strstr_bruteforce(char const *haystack, char const *needle, + bool ignore_case) +{ + size_t len_needle = strlen(needle); + + while(*haystack) { + int diff = ignore_case + ? strncasecmp(haystack, needle, len_needle) + : strncmp(haystack, needle, len_needle); + + if(!diff) return (char *)haystack; + haystack++; + } + + return NULL; +} + +/* Linear-time Knuth-Morris-Pratt algorithm. */ +static char *strstr_kmp(char const *haystack, char const *needle, int16_t *T, + bool ignore_case) +{ + /* Be careful, this macro has double evaluation */ + #define equal(c1, c2) \ + (ignore_case ? tolower(c1) == tolower(c2) : (c1) == (c2)) + + /* Build the failure function */ + + T[0] = -1; + int cnd = 0; + + for(int pos = 1; needle[pos]; pos++, cnd++) { + if(equal(needle[pos], needle[cnd])) { + T[pos] = T[cnd]; + } + else { + T[pos] = cnd; + while(cnd >= 0 && !equal(needle[pos], needle[cnd])) + cnd = T[cnd]; + } + } + + /* Perform linear-time matching */ + + int i=0, pos=0; + while(haystack[i]) { + if(equal(needle[pos], haystack[i])) { + pos++; + i++; + + if(!needle[pos]) return (char *)haystack + i - pos; + } + else { + pos = T[pos]; + if(pos < 0) { + pos++; + i++; + } + } + } + + return NULL; +} + +char *__strstr_base(char const *haystack, char const *needle, + bool ignore_case) +{ + size_t len_needle = strlen(needle); + int16_t *T = NULL; + + /* + ** Don't use the KMP algorithm for huge needles (> 64 kiB) as indexes + ** might not fit in an int16_t. (In any case, if you need to search for + ** such a huge pattern, don't use strstr.) + */ + if(len_needle < INT16_MAX) { + T = malloc(len_needle * sizeof *T); + } + + /* Fall back to bruteforce if allocation fails */ + if(T) { + char *match = strstr_kmp(haystack, needle, T, ignore_case); + free(T); + return match; + } + else { + return strstr_bruteforce(haystack, needle, ignore_case); + } +} +