From 26e54af8e0b185596d1eb08a41590123d85c7fd1 Mon Sep 17 00:00:00 2001 From: Lephenixnoir Date: Mon, 22 Aug 2022 19:00:13 +0200 Subject: [PATCH] stdlib: scanf-friendly strto* functions --- CMakeLists.txt | 1 + include/fxlibc/printf.h | 22 +++---- src/stdio/scanf/scan.c | 37 +++++++++++ src/stdio/stdio_p.h | 45 +++++++++++++ src/stdlib/stdlib_p.h | 13 ++-- src/stdlib/strto_fp.c | 141 +++++++++++++++++++++++----------------- src/stdlib/strto_int.c | 47 +++++++------- src/stdlib/strtod.c | 14 +++- src/stdlib/strtof.c | 14 +++- src/stdlib/strtol.c | 14 +++- src/stdlib/strtold.c | 14 +++- src/stdlib/strtoll.c | 14 +++- src/stdlib/strtoul.c | 14 +++- src/stdlib/strtoull.c | 14 +++- 14 files changed, 288 insertions(+), 116 deletions(-) create mode 100644 src/stdio/scanf/scan.c create mode 100644 src/stdio/stdio_p.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 71c6693..7333e3e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -140,6 +140,7 @@ set(SOURCES src/stdio/puts.c src/stdio/remove.c src/stdio/rewind.c + src/stdio/scanf/scan.c src/stdio/setbuf.c src/stdio/setvbuf.c src/stdio/snprintf.c diff --git a/include/fxlibc/printf.h b/include/fxlibc/printf.h index bac624e..a6b6c67 100644 --- a/include/fxlibc/printf.h +++ b/include/fxlibc/printf.h @@ -75,7 +75,6 @@ extern int __printf( va_list *__args); - /* Format extension API. */ struct __printf_format { @@ -84,12 +83,8 @@ struct __printf_format { /* How much significant characters of data, meaning varies. */ int16_t precision; - /* - ** Size specifier for integers (%o, %x, %i, %d, %u), is equal to the - ** sizeof() of the targeted type. Also used for %lc. - */ + /* Size of targeted integer type (%o, %x, %i, %d, %u), in bytes */ uint8_t size; - /* (#) Alternative form: base prefixes, decimal point. */ uint8_t alternative :1; /* ( ) Add a blank sign before nonnegative numbers. */ @@ -111,15 +106,14 @@ struct __printf_format { /* ** Type of format functions. -** -> __spec is the specifier letter (eg. "d" in "%d") -** -> __opts are the length, precision, sign, alignment, etc. options +** -> __out specifies the output and is used when generating text +** -> __fmt contains the format options and specifier letter ** -> __args is a pointer to the variable list of arguments to read from */ typedef void __printf_formatter_t( struct __printf_output *__out, - struct __printf_format *__opts, - va_list *__args -); + struct __printf_format *__fmt, + va_list *__args); /* ** Register a new format. @@ -127,10 +121,10 @@ typedef void __printf_formatter_t( ** The formatter designated by the specified lowercase or uppercase letter ** (eg 'p' or 'P') is registered. This functions allows overriding default ** formatters, but this is very much discouraged. Letters with special meaning -** in the standard cannot be changed. A formatted can be removed of disabled by +** in the standard cannot be changed. A formatter can be removed of disabled by ** registering NULL. ** -** Here are used characters in the C standard: +** Here are the characters used/reserved in the C standard: ** ** a: Hexadecimal floating-point A: Hexadecimal floating-point ** b: _ B: _ @@ -138,7 +132,7 @@ typedef void __printf_formatter_t( ** d: Decimal integer D: _ ** e: Exponent floating-point E: Exponent floating-point ** f: Floating-point F: Floating-point -** g: General floating-point G: General: floating-point +** g: General floating-point G: General floating-point ** h: short or char size H: _ ** i: Integer I: Locale-aware digits ** j: intmax_t size J: _ diff --git a/src/stdio/scanf/scan.c b/src/stdio/scanf/scan.c new file mode 100644 index 0000000..1bc93ee --- /dev/null +++ b/src/stdio/scanf/scan.c @@ -0,0 +1,37 @@ +#include +#include "../stdio_p.h" +#include "../../stdlib/stdlib_p.h" + +void __scanf_start(struct __scanf_input *__in) +{ + + if(__in->fp) + __in->buffer = fgetc(__in->fp); + else { + __in->buffer = *__in->str; + __in->str += (__in->buffer != 0); + } +} + +int __scanf_fetch(struct __scanf_input *__in) +{ + if(__in->fp) + return fgetc(__in->fp); + + int c = *__in->str; + if(c == 0) + return EOF; + __in->str++; + return c; +} + +void __scanf_end(struct __scanf_input *__in) +{ + if(__in->buffer == EOF) + return; + + if(__in->fp) + ungetc(__in->buffer, __in->fp); + else + __in->str--; +} diff --git a/src/stdio/stdio_p.h b/src/stdio/stdio_p.h new file mode 100644 index 0000000..150fc22 --- /dev/null +++ b/src/stdio/stdio_p.h @@ -0,0 +1,45 @@ +#ifndef __STDIO_P_H__ +# define __STDIO_P_H__ + +#include + +/* +** General utilities for scanf(); we expose them here as we use subfunctions of +** strto*() from to implement numerical specifiers. +*/ + +/* +** Input for scanf; exactly one of str and fp must be non-NULL. We include a +** single-character buffer for convenience for scanning functions to test the +** next character, which can be flushed back by ungetc(). +*/ +struct __scanf_input { + char const * __restrict__ str; + FILE *fp; + int buffer; +}; + +/* Initialize the input by feeding the buffer byte. */ +void __scanf_start(struct __scanf_input *__in); + +/* Fetch the next byte from the input and return it (don't call directly). */ +int __scanf_fetch(struct __scanf_input *__in); + +/* Read the next byte while maintaining the buffer. */ +static inline int __scanf_in(struct __scanf_input *__in) +{ + int c = __in->buffer; + __in->buffer = __scanf_fetch(__in); + return c; +} + +/* Peek the next byte without advancing. */ +static inline int __scanf_peek(struct __scanf_input *__in) +{ + return __in->buffer; +} + +/* Close the input by unsending the buffer once finished. */ +void __scanf_end(struct __scanf_input *__in); + +#endif /* __STDIO_P_H__ */ diff --git a/src/stdlib/stdlib_p.h b/src/stdlib/stdlib_p.h index 5831031..c368086 100644 --- a/src/stdlib/stdlib_p.h +++ b/src/stdlib/stdlib_p.h @@ -3,6 +3,7 @@ #include #include +#include "../stdio/stdio_p.h" /* ** Parse an integer from a string. This is the base function for strtol, @@ -23,8 +24,7 @@ ** expensive. */ int __strto_int( - char const * restrict __ptr, - char ** restrict __endptr, + struct __scanf_input *__input, int __base, long *__outl, long long *__outll, @@ -39,10 +39,9 @@ int __strto_int( ** and outl is set. */ int __strto_fp( - char const * restrict __ptr, - char ** restrict __endptr, - double *__out, - float *__outf, - long double *__outl); + struct __scanf_input *__input, + double *__out, + float *__outf, + long double *__outl); #endif /*__STDLIB_P_H__*/ diff --git a/src/stdlib/strto_fp.c b/src/stdlib/strto_fp.c index d9b6cef..14e089e 100644 --- a/src/stdlib/strto_fp.c +++ b/src/stdlib/strto_fp.c @@ -1,3 +1,4 @@ +#include "stdlib_p.h" #include #include @@ -37,12 +38,11 @@ ** -> In hexadecimal notation, we read as many bits as the mantissa of a long ** double, then later multiply by a power of 2. There are no approximations. */ -static void parse_digits(char const * restrict *ptr0, bool *valid, +static bool parse_digits(struct __scanf_input *input, SIGNIFICAND_TYPE *digits, long *exponent, bool hexadecimal) { - char const *ptr = *ptr0; bool dot_found = false; - int digits_found = 0; + int digits_found=0, c=0; *digits = 0; *exponent = 0; @@ -53,13 +53,14 @@ static void parse_digits(char const * restrict *ptr0, bool *valid, int dot_character = '.'; int exp_character = (hexadecimal ? 'p' : 'e'); - for(int i = 0; isdigit(*ptr) || (hexadecimal && isxdigit(*ptr)) - || *ptr == dot_character; i++, ptr++) { + for(int i = 0; true; i++) { + c = __scanf_peek(input); + if(!(isdigit(c) || + (hexadecimal && isxdigit(c)) || + (c == dot_character && !dot_found))) break; + __scanf_in(input); - /* Allow only one dot in the string, stop at the second one */ - if(*ptr == dot_character && dot_found) break; - - if(*ptr == dot_character) { + if(c == dot_character) { dot_found = true; continue; } @@ -67,12 +68,12 @@ static void parse_digits(char const * restrict *ptr0, bool *valid, /* Count digits only until SIGNIFICAND_DIGITS */ if(digits_found < max_digits) { if(hexadecimal) { - int v = *ptr - '0'; - if(!isdigit(*ptr)) v = tolower(*ptr)-'a'+10; + int v = c - '0'; + if(!isdigit(c)) v = tolower(c) - 'a' + 10; *digits = (*digits << 4) + v; } else { - *digits = (*digits * 10) + (*ptr - '0'); + *digits = (*digits * 10) + (c - '0'); } } else (*exponent)++; @@ -80,7 +81,7 @@ static void parse_digits(char const * restrict *ptr0, bool *valid, if(dot_found) (*exponent)--; /* But also round at the first discarded one */ - if(digits_found == max_digits && *ptr >= '5') + if(digits_found == max_digits && c >= '5') (*digits)++; digits_found++; @@ -88,46 +89,54 @@ static void parse_digits(char const * restrict *ptr0, bool *valid, /* Require at least one digit to be present; if not, the whole string is considered invalid */ - if(!digits_found) { - *valid = false; - return; - } + if(!digits_found) + return false; /* In hexadecimal, each character is worth 4 bits of exponent */ if(hexadecimal) (*exponent) *= 4; /* Parse exponent */ - if(tolower(*ptr) == exp_character) { - char *end; - long e = strtol(ptr + 1, &end, 10); + if(tolower(__scanf_peek(input)) == exp_character) { + /* Hack: Restore the str pointer if this fails (which we + cannot determine with a single lookahead) so that *endptr is + set correctly */ + struct __scanf_input backup = *input; - /* If an integer cannot be parsed, ignore the 'e...' part */ - if(end != ptr + 1) { - ptr = end; + __scanf_in(input); + long e = 0; + if(__strto_int(input, 10, &e, NULL, false) == 0) *exponent += e; - } + else + *input = backup; } - *ptr0 = ptr; - *valid = true; + return true; } -int __strto_fp(char const * restrict ptr, char ** restrict endptr, double *out, - float *outf, long double *outl) +static bool expect(struct __scanf_input *input, char const *sequence) { - /* Save the value of ptr in endptr, in case format is invalid */ - if(endptr) *endptr = (char *)ptr; + for(int i = 0; sequence[i]; i++) { + int c = __scanf_in(input); + if(tolower(c) != tolower(sequence[i])) + return false; + } + return true; +} +int __strto_fp(struct __scanf_input *input, double *out, float *outf, + long double *outl) +{ /* Skip initial whitespace */ - while(isspace(*ptr)) ptr++; + while(isspace(__scanf_peek(input))) __scanf_in(input); /* Read optional sign */ bool negative = false; - if(*ptr == '-') negative = true; - if(*ptr == '-' || *ptr == '+') ptr++; + int sign = __scanf_peek(input); + if(sign == '-') negative = true; + if(sign == '-' || sign == '+') __scanf_in(input); int errno_value = 0; - bool valid = true; + bool valid = false; /* Result variable */ if(out) *out = 0.0; @@ -135,47 +144,64 @@ int __strto_fp(char const * restrict ptr, char ** restrict endptr, double *out, if(outl) *outl = 0.0l; /* NaN possibly with an argument */ - if(!strncasecmp(ptr, "nan", 3)) { - char const *arg = ""; - ptr += 3; - if(ptr[0] == '(') { - arg = ptr + 1; - do ptr++; - while(ptr[-1] != ')'); + if(tolower(__scanf_peek(input)) == 'n') { + if(!expect(input, "nan")) + return EINVAL; + + /* Get the argument for up to 32 bytes */ + char arg[32]; + int i = 0; + + if(__scanf_peek(input) == '(') { + while(i < 31) { + int c = __scanf_in(input); + if(c == ')') break; + arg[i++] = c; + } + arg[i] = 0; } if(out) *out = __builtin_nan(arg); if(outf) *outf = __builtin_nanf(arg); if(outl) *outl = __builtin_nanl(arg); + valid = true; } - /* Infinity */ - else if(!strncasecmp(ptr, "infinity", 8)) { + else if(tolower(__scanf_peek(input)) == 'i') { + if(!expect(input, "inf")) + return EINVAL; + if(tolower(__scanf_peek(input)) == 'i' && + !expect(input, "inity")) + return EINVAL; if(out) *out = __builtin_inf(); if(outf) *outf = __builtin_inff(); if(outl) *outl = __builtin_infl(); - ptr += 8; - } - else if(!strncasecmp(ptr, "inf", 3)) { - if(out) *out = __builtin_inf(); - if(outf) *outf = __builtin_inff(); - if(outl) *outl = __builtin_infl(); - ptr += 3; + valid = true; } else { SIGNIFICAND_TYPE digits = 0; long e = 0; - if(ptr[0] == '0' && tolower(ptr[1]) == 'x') { - ptr += 2; - parse_digits(&ptr, &valid, &digits, &e, true); + /* Check for the 0x prefix. Skipping a 0 if we start with 0 but + not 0x isn't a problem. */ + bool hexa = false; + if(__scanf_peek(input) == '0') { + __scanf_in(input); + if(tolower(__scanf_peek(input)) == 'x') { + __scanf_in(input); + hexa = true; + } + /* Count the 0 as a digit */ + else valid = true; + } + if(hexa) { + valid |= parse_digits(input, &digits, &e, true); if(out) *out = (double)digits * exp2(e); if(outf) *outf = (float)digits * exp2f(e); if(outl) *outl = (long double)digits * exp2l(e); } else { - parse_digits(&ptr, &valid, &digits, &e, false); - + valid |= parse_digits(input, &digits, &e, false); if(out) *out = (double)digits * pow(10, e); if(outf) *outf = (float)digits * powf(10, e); if(outl) *outl = (long double)digits * powl(10, e); @@ -200,8 +226,5 @@ int __strto_fp(char const * restrict ptr, char ** restrict endptr, double *out, if(outl) *outl = -(*outl); } - /* Save the result pointer */ - if(endptr && valid) *endptr = (char *)ptr; - - return errno_value; + return valid ? errno_value : EINVAL; } diff --git a/src/stdlib/strto_int.c b/src/stdlib/strto_int.c index 8b6d298..6bbb20d 100644 --- a/src/stdlib/strto_int.c +++ b/src/stdlib/strto_int.c @@ -4,19 +4,17 @@ #include #include -int __strto_int(char const * restrict ptr, char ** restrict endptr, int base, - long *outl, long long *outll, bool use_unsigned) +int __strto_int(struct __scanf_input *input, int base, long *outl, + long long *outll, bool use_unsigned) { - /* Save the value of ptr in endptr now in case the format is invalid */ - if(endptr) *endptr = (char *)ptr; - /* Skip initial whitespace */ - while(isspace(*ptr)) ptr++; + while(isspace(__scanf_peek(input))) __scanf_in(input); /* Accept a sign character */ bool negative = false; - if(*ptr == '-') negative = true; - if(*ptr == '-' || *ptr == '+') ptr++; + int sign = __scanf_peek(input); + if(sign == '-') negative = true; + if(sign == '-' || sign == '+') __scanf_in(input); /* Use unsigned variables as only these have defined overflow */ unsigned long xl = 0; @@ -26,29 +24,34 @@ int __strto_int(char const * restrict ptr, char ** restrict endptr, int base, bool valid = false; /* Read prefixes and determine base */ - if((base == 0 || base == 16) && ptr[0]=='0' && tolower(ptr[1])=='x') { - ptr += 2; - base = 16; + if(__scanf_peek(input) == '0') { + __scanf_in(input); + if((base == 0 || base == 16) && + tolower(__scanf_peek(input)) == 'x') { + __scanf_in(input); + base = 16; + } + /* If we don't consume the x then count the 0 as a digit */ + else valid = true; + if(base == 0) + base = 8; } - else if(base == 0 && ptr[0] == '0') { - ptr++; - base = 8; - } - else if(base == 0) { + if(base == 0) base = 10; - } /* Read digits */ while(1) { int v = -1; - if(isdigit(*ptr)) v = *ptr - '0'; - if(islower(*ptr)) v = *ptr - 'a' + 10; + int c = __scanf_peek(input); + if(isdigit(c)) v = c - '0'; + if(islower(c)) v = c - 'a' + 10; if(v == -1 || v >= base) break; /* The value is valid as long as there is at least one digit */ valid = true; /* (x = base*x + v) but with overflow checks */ + /* TODO: strto_int: We might fail to represent [L]LONG_MIN */ if(outl) { if(__builtin_umull_overflow(xl, base, &xl)) errno_value = ERANGE; @@ -62,7 +65,7 @@ int __strto_int(char const * restrict ptr, char ** restrict endptr, int base, errno_value = ERANGE; } - ptr++; + __scanf_in(input); } /* Handle sign and range */ @@ -101,6 +104,6 @@ int __strto_int(char const * restrict ptr, char ** restrict endptr, int base, if(outl) *outl = xl; if(outll) *outll = xll; - if(endptr && valid) *endptr = (char *)ptr; - return errno_value; + + return valid ? errno_value : EINVAL; } diff --git a/src/stdlib/strtod.c b/src/stdlib/strtod.c index 80716fd..130233b 100644 --- a/src/stdlib/strtod.c +++ b/src/stdlib/strtod.c @@ -4,7 +4,17 @@ double strtod(char const * restrict ptr, char ** restrict endptr) { double d = 0; - int err = __strto_fp(ptr, endptr, &d, NULL, NULL); - if(err != 0) errno = err; + if(endptr) + *endptr = (char *)ptr; + + struct __scanf_input in = { .str = ptr, .fp = NULL }; + __scanf_start(&in); + int err = __strto_fp(&in, &d, NULL, NULL); + __scanf_end(&in); + + if(err != 0) + errno = err; + if(err != EINVAL && endptr) + *endptr = (char *)in.str; return d; } diff --git a/src/stdlib/strtof.c b/src/stdlib/strtof.c index 271aed0..38240ea 100644 --- a/src/stdlib/strtof.c +++ b/src/stdlib/strtof.c @@ -4,7 +4,17 @@ float strtof(char const * restrict ptr, char ** restrict endptr) { float f = 0; - int err = __strto_fp(ptr, endptr, NULL, &f, NULL); - if(err != 0) errno = err; + if(endptr) + *endptr = (char *)ptr; + + struct __scanf_input in = { .str = ptr, .fp = NULL }; + __scanf_start(&in); + int err = __strto_fp(&in, NULL, &f, NULL); + __scanf_end(&in); + + if(err != 0) + errno = err; + if(err != EINVAL && endptr) + *endptr = (char *)in.str; return f; } diff --git a/src/stdlib/strtol.c b/src/stdlib/strtol.c index b7da918..c0609b3 100644 --- a/src/stdlib/strtol.c +++ b/src/stdlib/strtol.c @@ -4,7 +4,17 @@ long int strtol(char const * restrict ptr, char ** restrict endptr, int base) { long n = 0; - int err = __strto_int(ptr, endptr, base, &n, NULL, false); - if(err != 0) errno = err; + if(endptr) + *endptr = (char *)ptr; + + struct __scanf_input in = { .str = ptr, .fp = NULL }; + __scanf_start(&in); + int err = __strto_int(&in, base, &n, NULL, false); + __scanf_end(&in); + + if(err != 0) + errno = err; + if(err != EINVAL && endptr) + *endptr = (char *)in.str; return n; } diff --git a/src/stdlib/strtold.c b/src/stdlib/strtold.c index fcb8474..bf6421b 100644 --- a/src/stdlib/strtold.c +++ b/src/stdlib/strtold.c @@ -4,7 +4,17 @@ long double strtold(char const * restrict ptr, char ** restrict endptr) { long double ld = 0; - int err = __strto_fp(ptr, endptr, NULL, NULL, &ld); - if(err != 0) errno = err; + if(endptr) + *endptr = (char *)ptr; + + struct __scanf_input in = { .str = ptr, .fp = NULL }; + __scanf_start(&in); + int err = __strto_fp(&in, NULL, NULL, &ld); + __scanf_end(&in); + + if(err != 0) + errno = err; + if(err != EINVAL && endptr) + *endptr = (char *)in.str; return ld; } diff --git a/src/stdlib/strtoll.c b/src/stdlib/strtoll.c index ad1c972..2defd3d 100644 --- a/src/stdlib/strtoll.c +++ b/src/stdlib/strtoll.c @@ -5,7 +5,17 @@ long long int strtoll(char const * restrict ptr, char ** restrict endptr, int base) { long long n = 0; - int err = __strto_int(ptr, endptr, base, NULL, &n, false); - if(err != 0) errno = err; + if(endptr) + *endptr = (char *)ptr; + + struct __scanf_input in = { .str = ptr, .fp = NULL }; + __scanf_start(&in); + int err = __strto_int(&in, base, NULL, &n, false); + __scanf_end(&in); + + if(err != 0) + errno = err; + if(err != EINVAL && endptr) + *endptr = (char *)in.str; return n; } diff --git a/src/stdlib/strtoul.c b/src/stdlib/strtoul.c index 2a6294b..a92c265 100644 --- a/src/stdlib/strtoul.c +++ b/src/stdlib/strtoul.c @@ -5,7 +5,17 @@ unsigned long int strtoul(char const * restrict ptr, char ** restrict endptr, int base) { unsigned long n = 0; - int err = __strto_int(ptr, endptr, base, (long *)&n, NULL, true); - if(err != 0) errno = err; + if(endptr) + *endptr = (char *)ptr; + + struct __scanf_input in = { .str = ptr, .fp = NULL }; + __scanf_start(&in); + int err = __strto_int(&in, base, (long *)&n, NULL, true); + __scanf_end(&in); + + if(err != 0) + errno = err; + if(err != EINVAL && endptr) + *endptr = (char *)in.str; return n; } diff --git a/src/stdlib/strtoull.c b/src/stdlib/strtoull.c index d643947..aad4bd3 100644 --- a/src/stdlib/strtoull.c +++ b/src/stdlib/strtoull.c @@ -5,7 +5,17 @@ unsigned long long int strtoull(char const * restrict ptr, char ** restrict endptr, int base) { unsigned long long n = 0; - int err = __strto_int(ptr, endptr, base, NULL, (long long *)&n, true); - if(err != 0) errno = err; + if(endptr) + *endptr = (char *)ptr; + + struct __scanf_input in = { .str = ptr, .fp = NULL }; + __scanf_start(&in); + int err = __strto_int(&in, base, NULL, (long long *)&n, true); + __scanf_end(&in); + + if(err != 0) + errno = err; + if(err != EINVAL && endptr) + *endptr = (char *)in.str; return n; }