From f5cca84ae8b504e943b3b877e9f3811488166379 Mon Sep 17 00:00:00 2001 From: Lephenixnoir Date: Fri, 21 May 2021 23:52:54 +0200 Subject: [PATCH] stdlib: add and test strtod, strtof and atof (DONE) This uses a generic function strto_fp similar to strto_int that is used for strtol and its derivatives. --- CMakeLists.txt | 4 + STATUS | 5 +- include/stdlib.h | 3 + src/libc/stdlib/atof.c | 6 + src/libc/stdlib/stdlib_p.h | 13 +++ src/libc/stdlib/strto_fp.c | 217 +++++++++++++++++++++++++++++++++++++ src/libc/stdlib/strtod.c | 10 ++ src/libc/stdlib/strtof.c | 10 ++ src/libc/stdlib/strtold.c | 193 +-------------------------------- 9 files changed, 270 insertions(+), 191 deletions(-) create mode 100644 src/libc/stdlib/atof.c create mode 100644 src/libc/stdlib/strto_fp.c create mode 100644 src/libc/stdlib/strtod.c create mode 100644 src/libc/stdlib/strtof.c diff --git a/CMakeLists.txt b/CMakeLists.txt index fd97016..b7ad657 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -109,6 +109,7 @@ set(SOURCES src/libc/stdio/printf.c # stdlib src/libc/stdlib/abs.c + src/libc/stdlib/atof.c src/libc/stdlib/atoi.c src/libc/stdlib/atol.c src/libc/stdlib/atoll.c @@ -119,7 +120,10 @@ set(SOURCES src/libc/stdlib/llabs.c src/libc/stdlib/lldiv.c src/libc/stdlib/reallocarray.c + src/libc/stdlib/strto_fp.c src/libc/stdlib/strto_int.c + src/libc/stdlib/strtod.c + src/libc/stdlib/strtof.c src/libc/stdlib/strtol.c src/libc/stdlib/strtold.c src/libc/stdlib/strtoll.c diff --git a/STATUS b/STATUS index a59c700..30945cc 100644 --- a/STATUS +++ b/STATUS @@ -87,10 +87,9 @@ DONE: Function/symbol/macro is defined, builds, links, and is tested ! 7.19.10 Error-handling functions: TODO 7.20 -! 7.20.1.1 atof: TODO + 7.20.1.1 atof: DONE 7.20.1.2 atoi, atol, atoll: DONE -! 7.20.1.3 strtod, strtof: TODO - strtold: DONE + 7.20.1.3 strtod, strtof, strtold: DONE 7.20.1.4 strtol, strtoul, strtoll, strtoull: DONE ! 7.20.2 Pseudo-random sequence generation functions: TODO ! 7.20.3 Memory management functions: TODO (check existing code first) diff --git a/include/stdlib.h b/include/stdlib.h index 63e8d91..37ad1c8 100644 --- a/include/stdlib.h +++ b/include/stdlib.h @@ -76,6 +76,9 @@ extern long long int atoll(char const *__ptr); /* Numeric conversion functions. */ +/* ASCII to floating-point. */ +extern double atof(char const *__ptr); + /* Parse a long int from a string. */ extern long int strtol( char const * restrict __ptr, diff --git a/src/libc/stdlib/atof.c b/src/libc/stdlib/atof.c new file mode 100644 index 0000000..23917fc --- /dev/null +++ b/src/libc/stdlib/atof.c @@ -0,0 +1,6 @@ +#include + +double atof(char const *ptr) +{ + return (double)strtod(ptr, NULL); +} diff --git a/src/libc/stdlib/stdlib_p.h b/src/libc/stdlib/stdlib_p.h index f4a8288..8d28f12 100644 --- a/src/libc/stdlib/stdlib_p.h +++ b/src/libc/stdlib/stdlib_p.h @@ -28,4 +28,17 @@ int strto_int( long long *__outll, bool __use_unsigned); +/* Parse a floating-point value from a string. This is the base function for + strtod, strtof, and strtold. + + This function is similar to strto_int(). If returns the error code to set in + errno, and can produce one of three outputs depending on which of out, outf + and outl is set. */ +int strto_fp( + char const * restrict __ptr, + char ** restrict __endptr, + double *out, + float *outf, + long double *outl); + #endif /*__STDLIB_P_H__*/ diff --git a/src/libc/stdlib/strto_fp.c b/src/libc/stdlib/strto_fp.c new file mode 100644 index 0000000..b5522bb --- /dev/null +++ b/src/libc/stdlib/strto_fp.c @@ -0,0 +1,217 @@ +#include +#include + +#include +#include +#include + +#include +#include +#include + +/* +** In the following conversions, the significant digits are represented in an +** integer and multiplied at the last moment by a suitable power of 10 (decimal +** representation) or 2 (hexadecimal representation). An integer of a suitable +** size needs to be used; that size is the size of the long double type. +** +** TODO: vhex-x86: Using 128-bit long double is untested! +*/ +#if __SIZEOF_LONG_DOUBLE__ == 8 +# define SIGNIFICAND_TYPE uint64_t +# define SIGNIFICAND_DIGITS 17 +#elif __SIZEOF_LONG_DOUBLE__ <= 16 +# define SIGNIFICAND_TYPE unsigned __int128 +# define SIGNIFICAND_DIGITS 38 +#else +# error long double larger than 128 bits is not supported +#endif + +/* Basically strncasecmp. */ +static int ncasecmp(char const *left, char const *right, size_t n) +{ + for(size_t i = 0; i < n; i++) { + int diff = tolower(left[i]) - tolower(right[i]); + if(diff) return diff; + } + return 0; +} + +/* +** Parse digits and exponent into integers, in decimal or hexadecimal notation. +** +** -> In decimal notation; we read up to 19 (64-bit) or 38 (128-bit) digits, +* which is enough to fill the mantissa of a long double, and later multiply +** the digits by a power of 10. The main approximation is the power of 10. +** +** -> In hexadecimal notation, we read as many bits as the mantissa of a long +** double, then later multiply by a power of 2. There are no approximations. +*/ +static void parse_digits(char const * restrict *ptr0, bool *valid, + SIGNIFICAND_TYPE *digits, long *exponent, bool hexadecimal) +{ + char const *ptr = *ptr0; + bool dot_found = false; + int digits_found = 0; + + *digits = 0; + *exponent = 0; + + int max_digits = hexadecimal ? LDBL_MANT_DIG / 4 : SIGNIFICAND_DIGITS; + + /* TODO: locale: use a locale-aware decimal separator */ + int dot_character = '.'; + int exp_character = (hexadecimal ? 'p' : 'e'); + + for(int i = 0; isdigit(*ptr) || (hexadecimal && isxdigit(*ptr)) + || *ptr == dot_character; i++, ptr++) { + + /* Allow only one dot in the string, stop at the second one */ + if(*ptr == dot_character && dot_found) break; + + if(*ptr == dot_character) { + dot_found = true; + continue; + } + + /* Count digits only until SIGNIFICAND_DIGITS */ + if(digits_found < max_digits) { + if(hexadecimal) { + int v = *ptr - '0'; + if(!isdigit(*ptr)) v = tolower(*ptr)-'a'+10; + *digits = (*digits << 4) + v; + } + else { + *digits = (*digits * 10) + (*ptr - '0'); + } + } + else (*exponent)++; + + if(dot_found) (*exponent)--; + + /* But also round at the first discarded one */ + if(digits_found == max_digits && *ptr >= '5') + (*digits)++; + + digits_found++; + } + + /* Require at least one digit to be present; if not, the whole string + is considered invalid */ + if(!digits_found) { + *valid = false; + return; + } + + /* In hexadecimal, each character is worth 4 bits of exponent */ + if(hexadecimal) (*exponent) *= 4; + + /* Parse exponent */ + if(tolower(*ptr) == exp_character) { + char *end; + long e = strtol(ptr + 1, &end, 10); + + /* If an integer cannot be parsed, ignore the 'e...' part */ + if(end != ptr + 1) { + ptr = end; + *exponent += e; + } + } + + *ptr0 = ptr; + *valid = true; +} + +int strto_fp(char const * restrict ptr, char ** restrict endptr, double *out, + float *outf, long double *outl) +{ + /* Save the value of ptr in endptr, in case format is invalid */ + if(endptr) *endptr = (char *)ptr; + + /* Skip initial whitespace */ + while(isspace(*ptr)) ptr++; + + /* Read optional sign */ + bool negative = false; + if(*ptr == '-') negative = true; + if(*ptr == '-' || *ptr == '+') ptr++; + + int errno_value = 0; + bool valid = true; + + /* Result variable */ + if(out) *out = 0.0; + if(outf) *outf = 0.0f; + if(outl) *outl = 0.0l; + + /* NaN possibly with an argument */ + if(!ncasecmp(ptr, "nan", 3)) { + char const *arg = ""; + ptr += 3; + if(ptr[0] == '(') { + arg = ptr + 1; + do ptr++; + while(ptr[-1] != ')'); + } + + if(out) *out = __builtin_nan(arg); + if(outf) *outf = __builtin_nanf(arg); + if(outl) *outl = __builtin_nanl(arg); + } + /* Infinity */ + else if(!ncasecmp(ptr, "infinity", 8)) { + if(out) *out = __builtin_inf(); + if(outf) *outf = __builtin_inff(); + if(outl) *outl = __builtin_infl(); + ptr += 8; + } + else if(!ncasecmp(ptr, "inf", 3)) { + if(out) *out = __builtin_inf(); + if(outf) *outf = __builtin_inff(); + if(outl) *outl = __builtin_infl(); + ptr += 3; + } + else { + SIGNIFICAND_TYPE digits = 0; + long e = 0; + + if(ptr[0] == '0' && tolower(ptr[1]) == 'x') { + ptr += 2; + parse_digits(&ptr, &valid, &digits, &e, true); + + if(out) *out = (double)digits * exp2(e); + if(outf) *outf = (float)digits * exp2f(e); + if(outl) *outl = (long double)digits * exp2l(e); + } + else { + parse_digits(&ptr, &valid, &digits, &e, false); + + if(out) *out = (double)digits * pow(10, e); + if(outf) *outf = (float)digits * powf(10, e); + if(outl) *outl = (long double)digits * powl(10, e); + } + + /* + ** Detect overflow, somewhat. Implementation is not required to + ** set errno on underflow, which makes things much easier for + ** us as underflow gives 0 (7.20.1.3§10). + */ + if((out && *out == HUGE_VAL) + || (outf && *outf == HUGE_VALF) + || (outl && *outl == HUGE_VALL)) { + errno_value = ERANGE; + } + } + + /* Apply sign; this method is allowed by 7.20.1.3§4.249 */ + if(negative) { + if(out) *out = -(*out); + if(outf) *outf = -(*outf); + if(outl) *outl = -(*outl); + } + + /* Save the result pointer */ + if(endptr && valid) *endptr = (char *)ptr; + + return errno_value; +} diff --git a/src/libc/stdlib/strtod.c b/src/libc/stdlib/strtod.c new file mode 100644 index 0000000..80dede8 --- /dev/null +++ b/src/libc/stdlib/strtod.c @@ -0,0 +1,10 @@ +#include "stdlib_p.h" +#include + +double strtod(char const * restrict ptr, char ** restrict endptr) +{ + double d = 0; + int err = strto_fp(ptr, endptr, &d, NULL, NULL); + if(err != 0) errno = err; + return d; +} diff --git a/src/libc/stdlib/strtof.c b/src/libc/stdlib/strtof.c new file mode 100644 index 0000000..476005d --- /dev/null +++ b/src/libc/stdlib/strtof.c @@ -0,0 +1,10 @@ +#include "stdlib_p.h" +#include + +float strtof(char const * restrict ptr, char ** restrict endptr) +{ + float f = 0; + int err = strto_fp(ptr, endptr, NULL, &f, NULL); + if(err != 0) errno = err; + return f; +} diff --git a/src/libc/stdlib/strtold.c b/src/libc/stdlib/strtold.c index e2a4aa5..bcb137c 100644 --- a/src/libc/stdlib/strtold.c +++ b/src/libc/stdlib/strtold.c @@ -1,193 +1,10 @@ -#include -#include - -#include -#include -#include - -#include +#include "stdlib_p.h" #include -#include - -/* -** In the following conversions, the significant digits are represented in an -** integer and multiplied at the last moment by a suitable power of 10 (decimal -** representation) or 2 (hexadecimal representation). An integer of a suitable -** size needs to be used; that size is the size of the long double type. -** -** TODO: vhex-x86: Using 128-bit long double is untested! -*/ -#if __SIZEOF_LONG_DOUBLE__ == 8 -# define SIGNIFICAND_TYPE uint64_t -# define SIGNIFICAND_DIGITS 17 -#elif __SIZEOF_LONG_DOUBLE__ <= 16 -# define SIGNIFICAND_TYPE unsigned __int128 -# define SIGNIFICAND_DIGITS 38 -#else -# error long double larger than 128 bits is not supported -#endif - -/* Basically strncasecmp. */ -static int ncasecmp(char const *left, char const *right, size_t n) -{ - for(size_t i = 0; i < n; i++) { - int diff = tolower(left[i]) - tolower(right[i]); - if(diff) return diff; - } - return 0; -} - -/* -** Parse digits and exponent into integers, in decimal or hexadecimal notation. -** -** -> In decimal notation; we read up to 19 (64-bit) or 38 (128-bit) digits, -* which is enough to fill the mantissa of a long double, and later multiply -** the digits by a power of 10. The main approximation is the power of 10. -** -** -> In hexadecimal notation, we read as many bits as the mantissa of a long -** double, then later multiply by a power of 2. There are no approximations. -*/ -static void parse_digits(char const * restrict *ptr0, bool *valid, - SIGNIFICAND_TYPE *digits, long *exponent, bool hexadecimal) -{ - char const *ptr = *ptr0; - bool dot_found = false; - int digits_found = 0; - - *digits = 0; - *exponent = 0; - - int max_digits = hexadecimal ? LDBL_MANT_DIG / 4 : SIGNIFICAND_DIGITS; - - /* TODO: locale: use a locale-aware decimal separator */ - int dot_character = '.'; - int exp_character = (hexadecimal ? 'p' : 'e'); - - for(int i = 0; isdigit(*ptr) || (hexadecimal && isxdigit(*ptr)) - || *ptr == dot_character; i++, ptr++) { - - /* Allow only one dot in the string, stop at the second one */ - if(*ptr == dot_character && dot_found) break; - - if(*ptr == dot_character) { - dot_found = true; - continue; - } - - /* Count digits only until SIGNIFICAND_DIGITS */ - if(digits_found < max_digits) { - if(hexadecimal) { - int v = *ptr - '0'; - if(!isdigit(*ptr)) v = tolower(*ptr)-'a'+10; - *digits = (*digits << 4) + v; - } - else { - *digits = (*digits * 10) + (*ptr - '0'); - } - } - else (*exponent)++; - - if(dot_found) (*exponent)--; - - /* But also round at the first discarded one */ - if(digits_found == max_digits && *ptr >= '5') - (*digits)++; - - digits_found++; - } - - /* Require at least one digit to be present; if not, the whole string - is considered invalid */ - if(!digits_found) { - *valid = false; - return; - } - - /* In hexadecimal, each character is worth 4 bits of exponent */ - if(hexadecimal) (*exponent) *= 4; - - /* Parse exponent */ - if(tolower(*ptr) == exp_character) { - char *end; - long e = strtol(ptr + 1, &end, 10); - - /* If an integer cannot be parsed, ignore the 'e...' part */ - if(end != ptr + 1) { - ptr = end; - *exponent += e; - } - } - - *ptr0 = ptr; - *valid = true; -} long double strtold(char const * restrict ptr, char ** restrict endptr) { - /* Save the value of ptr in endptr, in case format is invalid */ - if(endptr) *endptr = (char *)ptr; - - /* Skip initial whitespace */ - while(isspace(*ptr)) ptr++; - - /* Read optional sign */ - bool negative = false; - if(*ptr == '-') negative = true; - if(*ptr == '-' || *ptr == '+') ptr++; - - /* Result variable */ - bool valid = true; - long double x = 0.0; - - /* NaN possibly with an argument */ - if(!ncasecmp(ptr, "nan", 3)) { - if(ptr[3] == '(') { - x = __builtin_nanl(ptr+4); - while(*ptr != ')') ptr++; - } - else { - x = __builtin_nanl(""); - ptr += 3; - } - } - /* Infinity */ - else if(!ncasecmp(ptr, "infinity", 8)) { - x = __builtin_infl(); - ptr += 8; - } - else if(!ncasecmp(ptr, "inf", 3)) { - x = __builtin_infl(); - ptr += 3; - } - else { - SIGNIFICAND_TYPE digits = 0; - long exponent = 0; - - if(ptr[0] == '0' && tolower(ptr[1]) == 'x') { - ptr += 2; - parse_digits(&ptr, &valid, &digits, &exponent, true); - x = (long double)digits * exp2(exponent); - } - else { - parse_digits(&ptr, &valid, &digits, &exponent, false); - x = (long double)digits * powl(10, exponent); - } - - /* - ** Detect overflow, somewhat. Implementation is not required to - ** set errno on underflow, which makes things much easier for - ** us as underflow gives 0 (7.20.1.3§10). - */ - if(x == HUGE_VALL) { - errno = ERANGE; - } - } - - /* Apply sign; this method is allowed by 7.20.1.3§4.249 */ - if(negative) x = -x; - - /* Save the result pointer */ - if(endptr && valid) *endptr = (char *)ptr; - - return x; + long double ld = 0; + int err = strto_fp(ptr, endptr, NULL, NULL, &ld); + if(err != 0) errno = err; + return ld; }