From efb37cf7833f1a26a46fbbda398c9af541041665 Mon Sep 17 00:00:00 2001 From: Lephenixnoir Date: Fri, 21 May 2021 22:49:55 +0200 Subject: [PATCH] stdlib: add and test strtold (DONE) The method is rather naive - digits read as an integer, then multipled by a power of 10 or 2. This does not always give exact results, but it's close enough for now. A stub support for long double larger than 64 bits is provided. --- CMakeLists.txt | 1 + STATUS | 3 +- src/libc/stdlib/strtold.c | 193 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 196 insertions(+), 1 deletion(-) create mode 100644 src/libc/stdlib/strtold.c diff --git a/CMakeLists.txt b/CMakeLists.txt index e049d27..fd97016 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -121,6 +121,7 @@ set(SOURCES src/libc/stdlib/reallocarray.c src/libc/stdlib/strto_int.c src/libc/stdlib/strtol.c + src/libc/stdlib/strtold.c src/libc/stdlib/strtoll.c src/libc/stdlib/strtoul.c src/libc/stdlib/strtoull.c diff --git a/STATUS b/STATUS index 7e6ef2a..a59c700 100644 --- a/STATUS +++ b/STATUS @@ -89,7 +89,8 @@ DONE: Function/symbol/macro is defined, builds, links, and is tested 7.20 ! 7.20.1.1 atof: TODO 7.20.1.2 atoi, atol, atoll: DONE -! 7.20.1.3 strtod, strtof, strtold: TODO +! 7.20.1.3 strtod, strtof: TODO + strtold: DONE 7.20.1.4 strtol, strtoul, strtoll, strtoull: DONE ! 7.20.2 Pseudo-random sequence generation functions: TODO ! 7.20.3 Memory management functions: TODO (check existing code first) diff --git a/src/libc/stdlib/strtold.c b/src/libc/stdlib/strtold.c new file mode 100644 index 0000000..e2a4aa5 --- /dev/null +++ b/src/libc/stdlib/strtold.c @@ -0,0 +1,193 @@ +#include +#include + +#include +#include +#include + +#include +#include +#include + +/* +** In the following conversions, the significant digits are represented in an +** integer and multiplied at the last moment by a suitable power of 10 (decimal +** representation) or 2 (hexadecimal representation). An integer of a suitable +** size needs to be used; that size is the size of the long double type. +** +** TODO: vhex-x86: Using 128-bit long double is untested! +*/ +#if __SIZEOF_LONG_DOUBLE__ == 8 +# define SIGNIFICAND_TYPE uint64_t +# define SIGNIFICAND_DIGITS 17 +#elif __SIZEOF_LONG_DOUBLE__ <= 16 +# define SIGNIFICAND_TYPE unsigned __int128 +# define SIGNIFICAND_DIGITS 38 +#else +# error long double larger than 128 bits is not supported +#endif + +/* Basically strncasecmp. */ +static int ncasecmp(char const *left, char const *right, size_t n) +{ + for(size_t i = 0; i < n; i++) { + int diff = tolower(left[i]) - tolower(right[i]); + if(diff) return diff; + } + return 0; +} + +/* +** Parse digits and exponent into integers, in decimal or hexadecimal notation. +** +** -> In decimal notation; we read up to 19 (64-bit) or 38 (128-bit) digits, +* which is enough to fill the mantissa of a long double, and later multiply +** the digits by a power of 10. The main approximation is the power of 10. +** +** -> In hexadecimal notation, we read as many bits as the mantissa of a long +** double, then later multiply by a power of 2. There are no approximations. +*/ +static void parse_digits(char const * restrict *ptr0, bool *valid, + SIGNIFICAND_TYPE *digits, long *exponent, bool hexadecimal) +{ + char const *ptr = *ptr0; + bool dot_found = false; + int digits_found = 0; + + *digits = 0; + *exponent = 0; + + int max_digits = hexadecimal ? LDBL_MANT_DIG / 4 : SIGNIFICAND_DIGITS; + + /* TODO: locale: use a locale-aware decimal separator */ + int dot_character = '.'; + int exp_character = (hexadecimal ? 'p' : 'e'); + + for(int i = 0; isdigit(*ptr) || (hexadecimal && isxdigit(*ptr)) + || *ptr == dot_character; i++, ptr++) { + + /* Allow only one dot in the string, stop at the second one */ + if(*ptr == dot_character && dot_found) break; + + if(*ptr == dot_character) { + dot_found = true; + continue; + } + + /* Count digits only until SIGNIFICAND_DIGITS */ + if(digits_found < max_digits) { + if(hexadecimal) { + int v = *ptr - '0'; + if(!isdigit(*ptr)) v = tolower(*ptr)-'a'+10; + *digits = (*digits << 4) + v; + } + else { + *digits = (*digits * 10) + (*ptr - '0'); + } + } + else (*exponent)++; + + if(dot_found) (*exponent)--; + + /* But also round at the first discarded one */ + if(digits_found == max_digits && *ptr >= '5') + (*digits)++; + + digits_found++; + } + + /* Require at least one digit to be present; if not, the whole string + is considered invalid */ + if(!digits_found) { + *valid = false; + return; + } + + /* In hexadecimal, each character is worth 4 bits of exponent */ + if(hexadecimal) (*exponent) *= 4; + + /* Parse exponent */ + if(tolower(*ptr) == exp_character) { + char *end; + long e = strtol(ptr + 1, &end, 10); + + /* If an integer cannot be parsed, ignore the 'e...' part */ + if(end != ptr + 1) { + ptr = end; + *exponent += e; + } + } + + *ptr0 = ptr; + *valid = true; +} + +long double strtold(char const * restrict ptr, char ** restrict endptr) +{ + /* Save the value of ptr in endptr, in case format is invalid */ + if(endptr) *endptr = (char *)ptr; + + /* Skip initial whitespace */ + while(isspace(*ptr)) ptr++; + + /* Read optional sign */ + bool negative = false; + if(*ptr == '-') negative = true; + if(*ptr == '-' || *ptr == '+') ptr++; + + /* Result variable */ + bool valid = true; + long double x = 0.0; + + /* NaN possibly with an argument */ + if(!ncasecmp(ptr, "nan", 3)) { + if(ptr[3] == '(') { + x = __builtin_nanl(ptr+4); + while(*ptr != ')') ptr++; + } + else { + x = __builtin_nanl(""); + ptr += 3; + } + } + /* Infinity */ + else if(!ncasecmp(ptr, "infinity", 8)) { + x = __builtin_infl(); + ptr += 8; + } + else if(!ncasecmp(ptr, "inf", 3)) { + x = __builtin_infl(); + ptr += 3; + } + else { + SIGNIFICAND_TYPE digits = 0; + long exponent = 0; + + if(ptr[0] == '0' && tolower(ptr[1]) == 'x') { + ptr += 2; + parse_digits(&ptr, &valid, &digits, &exponent, true); + x = (long double)digits * exp2(exponent); + } + else { + parse_digits(&ptr, &valid, &digits, &exponent, false); + x = (long double)digits * powl(10, exponent); + } + + /* + ** Detect overflow, somewhat. Implementation is not required to + ** set errno on underflow, which makes things much easier for + ** us as underflow gives 0 (7.20.1.3§10). + */ + if(x == HUGE_VALL) { + errno = ERANGE; + } + } + + /* Apply sign; this method is allowed by 7.20.1.3§4.249 */ + if(negative) x = -x; + + /* Save the result pointer */ + if(endptr && valid) *endptr = (char *)ptr; + + return x; +}