From 5042236392c8c122b85cd973f881a624445182f1 Mon Sep 17 00:00:00 2001 From: Lephenixnoir Date: Thu, 20 May 2021 09:23:06 +0200 Subject: [PATCH] stdlib: reword strtoull into a generic strto_int This new function will support strtol, strtoul, and strtoll as well. --- CMakeLists.txt | 1 + src/libc/stdlib/stdlib_p.h | 31 ++++++++++++ src/libc/stdlib/strto_int.c | 96 +++++++++++++++++++++++++++++++++++++ src/libc/stdlib/strtoull.c | 61 ++--------------------- 4 files changed, 133 insertions(+), 56 deletions(-) create mode 100644 src/libc/stdlib/stdlib_p.h create mode 100644 src/libc/stdlib/strto_int.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 41cf356..4895a50 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -106,6 +106,7 @@ set(SOURCES src/libc/stdlib/llabs.c src/libc/stdlib/lldiv.c src/libc/stdlib/reallocarray.c + src/libc/stdlib/strto_int.c src/libc/stdlib/strtoull.c # string src/libc/string/strchr.c diff --git a/src/libc/stdlib/stdlib_p.h b/src/libc/stdlib/stdlib_p.h new file mode 100644 index 0000000..ab3caba --- /dev/null +++ b/src/libc/stdlib/stdlib_p.h @@ -0,0 +1,31 @@ +#ifndef __STDLIB_P_H__ +# define __STDLIB_P_H__ + +#include +#include + +/* Parse an integer from a string. This is the base function for strtol, + strtoul, strtoll, and strtoull. + + This function does not set errno, and instead returns the error code + according to conversion rules. Setting errno is troublesome because it's a + global state that cannot be reverted and thus cannot be tested. + + If outl is non-NULL, strto_int produces a long or an unsigned long result + (depending on use_unsigned). Signedness only affects the range of values + that are considered to be ERANGE, and both results are stored in *outl. + Similarly, if outll is non-NULL, strto_int produces a long long or unsigned + long long result. + + On platforms where long is 32-bit, 64-bit operations are performed only if + outll is non-NULL. This is because multiplications with overflow can be + expensive. */ +int strto_int( + char const * restrict __ptr, + char ** restrict __endptr, + int __base, + long *__outl, + long long *__outll, + bool __use_unsigned); + +#endif /*__STDLIB_P_H__*/ diff --git a/src/libc/stdlib/strto_int.c b/src/libc/stdlib/strto_int.c new file mode 100644 index 0000000..4b43124 --- /dev/null +++ b/src/libc/stdlib/strto_int.c @@ -0,0 +1,96 @@ +#include "stdlib_p.h" +#include +#include +#include + +/* Parse an integer from a string. Base function for strtol, strtoul, strtoll, + and strtoull. This function: + -> Does not set errno, and instead return the potential error code. Setting + errno would prevent these functions from calling each other as they all + have different ranges, resulting in undue ERANGE. + -> Can parse into both long and long long, depending on what pointer is + non-NULL. */ +int strto_int(char const * restrict ptr, char ** restrict endptr, int base, + long *outl, long long *outll, bool use_unsigned) +{ + /* Save the value of ptr in endptr now in case the format is invalid */ + if(endptr) *endptr = (char *)ptr; + + /* Skip initial whitespace */ + while(isspace(*ptr)) ptr++; + + /* Accept a sign character */ + bool negative = false; + if(*ptr == '-') negative = true; + if(*ptr == '-' || *ptr == '+') ptr++; + + /* Use unsigned variables as only these have defined overflow */ + unsigned long xl = 0; + unsigned long long xll = 0; + + int errno_value = 0; + bool valid = false; + + /* Read prefixes and determine base */ + if((base == 0 || base == 16) && ptr[0]=='0' && tolower(ptr[1])=='x') { + ptr += 2; + base = 16; + } + else if(base == 0 && ptr[0] == '0') { + ptr++; + base = 8; + } + else if(base == 0) { + base = 10; + } + + /* Read digits */ + while(1) { + int v = -1; + if(isdigit(*ptr)) v = *ptr - '0'; + if(islower(*ptr)) v = *ptr - 'a' + 10; + if(v == -1 || v >= base) break; + + /* The value is valid as long as there is at least one digit */ + valid = true; + + /* (x = base*x + v) but with overflow checks */ + + /* + ** TODO FIXME: There is a bug with overflows if the unsigned + ** value cannot be represented but the signed value can (which + ** is the case only for LONG_MIN and LLONG_MIN) + */ + if(outl) { + if(__builtin_umull_overflow(xl, base, &xl)) + errno_value = ERANGE; + if(__builtin_uaddl_overflow(xl, v, &xl)) + errno_value = ERANGE; + } + if(outll) { + if(__builtin_umulll_overflow(xll, base, &xll)) + errno_value = ERANGE; + if(__builtin_uaddll_overflow(xll, v, &xll)) + errno_value = ERANGE; + } + + ptr++; + } + + /* Handle the sign */ + if(negative) { + /* Only -0 can be represented as unsigned */ + if(outl && use_unsigned && xl != 0) + errno_value = ERANGE; + if(outll && use_unsigned && xll != 0) + errno_value = ERANGE; + + xl = -xl; + xll = -xll; + } + + if(outl) *outl = xl; + if(outll) *outll = xll; + if(endptr && valid) *endptr = (char *)ptr; + return errno_value; +} diff --git a/src/libc/stdlib/strtoull.c b/src/libc/stdlib/strtoull.c index dcf4ee5..791c73d 100644 --- a/src/libc/stdlib/strtoull.c +++ b/src/libc/stdlib/strtoull.c @@ -1,62 +1,11 @@ -#include -#include -#include +#include "stdlib_p.h" #include unsigned long long int strtoull(char const * restrict ptr, char ** restrict endptr, int base) { - /* Save the value of ptr in endptr now in case the format is invalid */ - if(endptr) *endptr = (char *)ptr; - - /* Skip initial whitespace */ - while(isspace(*ptr)) ptr++; - - /* Accept a sign character */ - bool negative = false; - if(*ptr == '-') negative = true; - if(*ptr == '-' || *ptr == '+') ptr++; - - unsigned long long x = 0; - bool valid = false; - - /* Read prefixes and determine base */ - if((base == 0 || base == 16) && ptr[0]=='0' && tolower(ptr[1])=='x') { - ptr += 2; - base = 16; - } - else if(base == 0 && ptr[0] == '0') { - ptr++; - base = 8; - } - else if(base == 0) { - base = 10; - } - - /* Read digits */ - while(1) { - int v = -1; - if(isdigit(*ptr)) v = *ptr - '0'; - if(islower(*ptr)) v = *ptr - 'a' + 10; - if(v == -1 || v >= base) break; - - /* The value is valid as long as there is at least one digit */ - valid = true; - - /* (x = base*x + v) but with overflow checks */ - if(__builtin_umulll_overflow(x, base, &x)) - errno = ERANGE; - if(__builtin_uaddll_overflow(x, v, &x)) - errno = ERANGE; - - ptr++; - } - - if(negative) { - if(x != 0) errno = ERANGE; - x = -x; - } - - if(endptr && valid) *endptr = (char *)ptr; - return x; + unsigned long long n = 0; + int err = strto_int(ptr, endptr, base, NULL, (long long *)&n, true); + if(err != 0) errno = err; + return n; }