Browse Source

string: add and test str[n]casecmp and KMP str[case]str (DONE)

master
Lephenixnoir 8 months ago
parent
commit
df9073e6ff
Signed by: Lephenixnoir GPG Key ID: 1BBA026E13FC0495
  1. 5
      CMakeLists.txt
  2. 13
      STATUS
  3. 3
      include/string.h
  4. 11
      src/libc/string/strcasecmp.c
  5. 7
      src/libc/string/strcasestr.c
  6. 17
      src/libc/string/string_p.h
  7. 12
      src/libc/string/strncasecmp.c
  8. 7
      src/libc/string/strstr.c
  9. 95
      src/libc/string/strstr_base.c

5
CMakeLists.txt

@ -135,6 +135,8 @@ set(SOURCES
src/libc/string/memcpy.c
src/libc/string/memmove.c
src/libc/string/memset.c
src/libc/string/strcasecmp.c
src/libc/string/strcasestr.c
src/libc/string/strcat.c
src/libc/string/strchr.c
src/libc/string/strchrnul.c
@ -145,6 +147,7 @@ set(SOURCES
src/libc/string/strdup.c
src/libc/string/strerror.c
src/libc/string/strlen.c
src/libc/string/strncasecmp.c
src/libc/string/strncat.c
src/libc/string/strncmp.c
src/libc/string/strncpy.c
@ -153,6 +156,8 @@ set(SOURCES
src/libc/string/strpbrk.c
src/libc/string/strrchr.c
src/libc/string/strspn.c
src/libc/string/strstr.c
src/libc/string/strstr_base.c
src/libc/string/strxfrm.c)
if(vhex-generic IN_LIST TARGET_FOLDERS)

13
STATUS

@ -117,25 +117,26 @@ DONE: Function/symbol/macro is defined, builds, links, and is tested
7.21.3.2 strncat: DONE
7.21.4.1 memcmp: DONE
7.21.4.2 strcmp: DONE
! 7.21.4.3 strcoll: TEST
7.21.4.3 strcoll: DONE
7.21.4.4 strncmp: DONE
! 7.21.4.5 strxfrm: TEST
7.21.4.5 strxfrm: DONE
7.21.5.1 memchr: DONE
7.21.5.2 strchr: DONE
7.21.5.3 strcspn: DONE
7.21.5.4 strpbrk: DONE
7.21.5.5 strrchr: DONE
7.21.5.6 strspn: DONE
! 7.21.5.7 strstr: TODO
7.21.5.7 strstr: DONE
! 7.21.5.8 strtok: TODO
7.21.6.1 memset: DONE
7.21.6.2 strerror: DONE
7.21.6.3 strlen: DONE
Extensions:
! - strnlen: TODO
- strnlen: DONE
- strchrnul: DONE
! - strcasecmp: TODO
! - strncasecmp: TODO
- strcasestr: DONE
- strcasecmp: DONE
- strncasecmp: DONE
- strdup: DONE
- strndup: DONE

3
include/string.h

@ -100,6 +100,9 @@ extern int strcasecmp(char const *__s1, char const *__s2);
/* Compare at most __n characters of __s1 and __s2 case-insensitively. */
extern int strncasecmp(char const *__s1, char const *__s2, size_t __n);
/* Find __needle in __haystack, ignoring case. */
extern char *strcasestr(char const *__haystack, char const *__needle);
/* Duplicate __s with malloc. */
extern char *strdup(char const *__s);

11
src/libc/string/strcasecmp.c

@ -0,0 +1,11 @@
#include <string.h>
#include <ctype.h>
int strcasecmp(const char *s1, const char *s2)
{
while (*s1 != '\0' && *s2 != '\0' && tolower(*s1) == tolower(*s2)) {
s1 += 1;
s2 += 1;
}
return (*s1 - *s2);
}

7
src/libc/string/strcasestr.c

@ -0,0 +1,7 @@
#include <string.h>
#include "string_p.h"
char *strcasestr(char const *haystack, char const *needle)
{
return __strstr_base(haystack, needle, true);
}

17
src/libc/string/string_p.h

@ -0,0 +1,17 @@
#ifndef __STRING_P_H__
# define __STRING_P_H__
#include <string.h>
#include <stdbool.h>
/*
** Search for __needle within __haystack using either brute-force or the Knuth-
** Morris-Pratt algorithm. Ignores case if __ignore_case == true. This is the
** base function for strstr and strcasestr.
*/
char *__strstr_base(
char const *__haystack,
char const *__needle,
bool __ignore_case);
#endif /*__STRING_P_H__*/

12
src/libc/string/strncasecmp.c

@ -0,0 +1,12 @@
#include <string.h>
#include <ctype.h>
int strncasecmp(const char *s1, const char *s2, size_t n)
{
if (n == 0)
return (0);
size_t i = -1;
while (++i < n - 1 && s1[i] != '\0' && s2[i] != '\0'
&& tolower(s1[i]) == tolower(s2[i])) ;
return (tolower(s1[i]) - tolower(s2[i]));
}

7
src/libc/string/strstr.c

@ -0,0 +1,7 @@
#include <string.h>
#include "string_p.h"
char *strstr(char const *haystack, char const *needle)
{
return __strstr_base(haystack, needle, false);
}

95
src/libc/string/strstr_base.c

@ -0,0 +1,95 @@
#include "string_p.h"
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
/* Quadratic-time brute-force algorithm. */
static char *strstr_bruteforce(char const *haystack, char const *needle,
bool ignore_case)
{
size_t len_needle = strlen(needle);
while(*haystack) {
int diff = ignore_case
? strncasecmp(haystack, needle, len_needle)
: strncmp(haystack, needle, len_needle);
if(!diff) return (char *)haystack;
haystack++;
}
return NULL;
}
/* Linear-time Knuth-Morris-Pratt algorithm. */
static char *strstr_kmp(char const *haystack, char const *needle, int16_t *T,
bool ignore_case)
{
/* Be careful, this macro has double evaluation */
#define equal(c1, c2) \
(ignore_case ? tolower(c1) == tolower(c2) : (c1) == (c2))
/* Build the failure function */
T[0] = -1;
int cnd = 0;
for(int pos = 1; needle[pos]; pos++, cnd++) {
if(equal(needle[pos], needle[cnd])) {
T[pos] = T[cnd];
}
else {
T[pos] = cnd;
while(cnd >= 0 && !equal(needle[pos], needle[cnd]))
cnd = T[cnd];
}
}
/* Perform linear-time matching */
int i=0, pos=0;
while(haystack[i]) {
if(equal(needle[pos], haystack[i])) {
pos++;
i++;
if(!needle[pos]) return (char *)haystack + i - pos;
}
else {
pos = T[pos];
if(pos < 0) {
pos++;
i++;
}
}
}
return NULL;
}
char *__strstr_base(char const *haystack, char const *needle,
bool ignore_case)
{
size_t len_needle = strlen(needle);
int16_t *T = NULL;
/*
** Don't use the KMP algorithm for huge needles (> 64 kiB) as indexes
** might not fit in an int16_t. (In any case, if you need to search for
** such a huge pattern, don't use strstr.)
*/
if(len_needle < INT16_MAX) {
T = malloc(len_needle * sizeof *T);
}
/* Fall back to bruteforce if allocation fails */
if(T) {
char *match = strstr_kmp(haystack, needle, T, ignore_case);
free(T);
return match;
}
else {
return strstr_bruteforce(haystack, needle, ignore_case);
}
}
Loading…
Cancel
Save