diff --git a/src/stdio/scanf/scan.c b/src/stdio/scanf/scan.c index 82192fc..0bf137d 100644 --- a/src/stdio/scanf/scan.c +++ b/src/stdio/scanf/scan.c @@ -106,152 +106,220 @@ void __scanf_store_d(long double value, int size, va_list *args) // %ms and %m[set] are not implemented (with memory allocation while parsing a chain or a set of characters) +struct scanf_format { + /* Maximum field width */ + int field_width; + /* Size of the assigned (integer or floating-point) type, in bytes */ + uint8_t size; + /* Whether to skip assignment */ + bool skip; -/* Set of bytes allowed in a given set %[]. */ -static uint8_t bracket_set[32]; + /* Set of bytes allowed for bracket sets in %[] */ + uint8_t bracket_set[32]; +}; /* Allow/disallow the entire set */ -static void bracket_set_init(bool allow) +static void bracket_set_init(uint8_t *set, bool allow) { - memset(bracket_set, allow ? 0xff : 0x00, sizeof bracket_set); + memset(set, allow ? 0xff : 0x00, 32); } /* Allow/disallow a range of characters. Both ends are included. */ -static void bracket_set_range(uint8_t start, uint8_t end, bool allow) +static void bracket_set_range( + uint8_t *set, uint8_t start, uint8_t end, bool allow) { for(int u = start; u <= end; u++) { int byte = u >> 3; int bit = 1 << (u & 7); if(allow) - bracket_set[byte] |= bit; + set[byte] |= bit; else - bracket_set[byte] &= ~bit; + set[byte] &= ~bit; } } /* Check whether a byte is allowed by the bracket set. */ -static bool bracket_set_test(int c) +static bool bracket_set_test(uint8_t *set, int c) { - int byte = (c >> 3); - int bit = 1 << (c & 7); - return (c != EOF) && (bracket_set[byte] & bit); + return (c != EOF) && (set[c >> 3] & (1 << (c & 7))); } -/* return 0 if Ok or -1 if syntax err in the set format */ -static int bracket_set_parse(char const * __restrict__ format, int *pos ) +/* Parse a bracket set from a format string. Returns true on success. */ +static bool bracket_set_parse(uint8_t *set, char const *format, int *pos) { - int __sor = 0; - int __eor = 0; - bool __neg = false; + int last = 0; + bool allow = true; + bracket_set_init(set, false); - bracket_set_init(false); - - (*pos)++; - - // next will be a "negation" set - if (format[*pos] == '^' ) { - __neg = true; + /* '^' denotes a negated set */ + if(format[*pos] == '^') { + allow = false; (*pos)++; - bracket_set_init(true); - - // the char ']' is part of the set - if (format[*pos] == ']' ) { - bracket_set_range(']', ']', !__neg); - (*pos)++; - } - } - // the char ']' is included in the allowed set - else if (format[*pos] == ']' ) { - __neg = false; - // the char ']' is part of the set - if (format[*pos] == ']' ) { - bracket_set_range(']', ']', !__neg); + bracket_set_init(set, true); + } + /* ']' as the first character adds ']' to the set */ + if(format[*pos] == ']' ) { + bracket_set_range(set, ']', ']', allow); + (*pos)++; + } + + for(; format[*pos] && format[*pos] != ']'; (*pos)++) { + /* '-' as the last character, thus explicitly in the set */ + if(format[*pos] == '-' && format[*pos + 1] == ']') + bracket_set_range(set, '-', '-', allow); + /* '-' as denoting a range */ + else if(format[*pos] == '-') { (*pos)++; + bracket_set_range(set, last, format[*pos], allow); + } + /* Any other character */ + else { + last = format[*pos]; + bracket_set_range(set, last, last, allow); } } + return (format[*pos] == ']'); +} + +static int parse_fmt(char const *fmt, int *pos, struct scanf_format *opt) +{ + opt->field_width = INT_MAX; + opt->size = sizeof(int); + opt->skip = false; + + int width = 0; + char size_letter = 0; + while(1) { - // we find a '-' so need to check if we are considering a range or the char '-' only - if (format[*pos]=='-') { - // the char '-' is included in the allowed set - if (format[*pos+1]==']') { - bracket_set_range('-', '-', !__neg); - (*pos)++; - // we have now finished the reading of the set cause the following char is ']' - return 0; - } - // the char '-' indicates a range of char to be included into the set - else { - (*pos)++; - __eor = format[*pos]; - bracket_set_range( __sor, __eor, !__neg ); - } - } - // we find the char ']' so it means we reach the end of this set - else if (format[*pos]==']') return 0; - // if we reach the '\0' we have a syntax problem - else if (format[*pos]=='\0') return -1; - // we are considering one particular char and prepare for a potential range if we find the char '-' later on - else { - __sor = format[*pos]; - bracket_set_range(__sor, __sor, !__neg); + (*pos)++; + + switch(fmt[*pos]) { + case '*': + opt->skip = true; + break; + + case 'h': + opt->size = (size_letter=='h') ? sizeof(char) : sizeof(short); + size_letter = 'h'; + break; + case 'l': + opt->size = (size_letter=='l') ? sizeof(long long) : sizeof(long); + size_letter = 'l'; + break; + case 'L': + opt->size = sizeof(long double); + size_letter = 'L'; + break; + case 'j': + opt->size = sizeof(intmax_t); + break; + case 'z': + opt->size = sizeof(size_t); + break; + case 't': + opt->size = sizeof(ptrdiff_t); + break; + + case '0' ... '9': + width = width * 10 + (fmt[*pos] - '0'); + opt->field_width = width; + break; + + case '[': (*pos)++; + return bracket_set_parse(opt->bracket_set, fmt, pos) ? '[' : 0; + + case 'd': + case 'i': + case 'o': + case 'u': + case 'x': + case 'X': + case 'p': + case 's': + case 'n': + return fmt[*pos]; + + case 'a': + case 'A': + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + /* Adjust interpretation of no size / 'l' size */ + if(size_letter == 0) + opt->size = sizeof(float); + if(size_letter == 'l') + opt->size = sizeof(double); + return fmt[*pos]; + + case 'c': + if(opt->field_width == INT_MAX) + opt->field_width = 1; + return 'c'; + + default: + return 0; } } + return 0; } - - int __scanf( struct __scanf_input * __restrict__ in, char const * __restrict__ format, va_list *args) { - bool skip = false; - int MOD = sizeof(int); - in->bytes_read = 0; // we haven't started to read char from the input stream int validrets = 0; // to be incremented each time we successfully read and store an input as per the format int err = 0; // err control on __strto_xx( ) functions - int user_length = 0; // length defined by user with a %xx modifier int pos = 0; // current pos in the format string __scanf_start( in ); // TODO: No __scanf_end() in any of the "return validrets"!! - while( format[pos] != 0 ) { - user_length = 0; - MOD = sizeof(int); - skip = false; - - if( format[pos] == ' ' ) { + for(; format[pos]; pos++) { + if(format[pos] == ' ') { __purge_space(in); + continue; } - // we will have to manage a given format - else if( format[pos] == '%' ) { - - int readmaxlength = INT_MAX; - char size_letter = 0; - - // main loop - loopagain: - + else if(format[pos] != '%') { + // if the next char of the stream is corresponding, we validate the read and go to the following char + if(format[pos] == __scanf_peek( in )) { + __scanf_in( in ); + pos++; + continue; + } + else return validrets; // else we return the number of valid read + } + else if(format[pos + 1] == '%') { + if(__scanf_peek(in) != '%') return validrets; + else __scanf_in( in ); pos++; + continue; + } + /* Perform a conversion */ + else { + struct scanf_format opt; + int spec = parse_fmt(format, &pos, &opt); + if(spec == 0) + return validrets; - switch(format[pos]) { + switch(spec) { // we need to decrypt the corresponding scanf set of character case '[': { - err = bracket_set_parse( format, &pos ); - if (err!=0) return validrets; int currentlength = 0; // we need to assign the read char to the corresponding pointer - char *c = skip ? NULL : va_arg(*args, char *); - for(int u=0; ubytes_read; break; - // we are expecting the char '%' to be in the input stream, if not err and return - case '%': { - if (__scanf_peek(in) != '%') return validrets; - else __scanf_in( in ); - break; - } - - // the next read, even if valid, will not be stored - case '*': - skip = true; - goto loopagain; - - case 'h': - MOD = (size_letter == 'h') ? sizeof(char) : sizeof(short); - size_letter = 'h'; - goto loopagain; - case 'l': - MOD = (size_letter == 'l') ? sizeof(long long) : sizeof(long); - /* FP conversions will adjust to sizeof(double) later */ - size_letter = 'l'; - goto loopagain; - case 'L': - MOD = sizeof(long double); - size_letter = 'L'; - goto loopagain; - case 'j': - MOD = sizeof(intmax_t); - goto loopagain; - case 'z': - MOD = sizeof(size_t); - goto loopagain; - case 't': - MOD = sizeof(ptrdiff_t); - goto loopagain; - - case '0' ... '9': { - user_length = user_length * 10 + (int) ( format[pos] - '0' ); - readmaxlength = user_length; - goto loopagain; - break; - } - case 'd': case 'i': case 'o': @@ -328,11 +354,11 @@ int __scanf( long long int temp; err = __strto_int(in, base, NULL, &temp, use_unsigned, - readmaxlength); + opt.field_width); if (err == EOF && validrets == 0) return EOF; if (err != 0) return validrets; - if (skip) __scanf_store_i( temp, 0, args ); - else __scanf_store_i( temp, MOD, args ); + if(!opt.skip) + __scanf_store_i( temp, opt.size, args ); validrets++; break; } @@ -345,51 +371,42 @@ int __scanf( case 'F': case 'g': case 'G': { - /* Adjust interpretation of no size / 'l' size */ - if(size_letter == 0) - MOD = sizeof(float); - if(size_letter == 'l') - MOD = sizeof(double); - // read a double from the current input stream // and store in the corresponding arg as a char by reference long double temp; err = __strto_fp( in, NULL, NULL, &temp, - readmaxlength); + opt.field_width); if (err == EOF && validrets == 0) return EOF; if (err != 0) return validrets; - if (skip) __scanf_store_d( temp, 0, args ); - else __scanf_store_d( temp, MOD, args ); + if(!opt.skip) + __scanf_store_d( temp, opt.size, args ); validrets++; break; } case 'p': { long int temp; - if (!skip) { + if (!opt.skip) { void *p = (void *) va_arg( *args, void** ); // get the adress of the target pointer (void**) err = __strto_int( in, 0, p, NULL, true, - readmaxlength); + opt.field_width); } else err = __strto_int( in, 0, &temp, NULL, true, - readmaxlength); + opt.field_width); if (err == 0) validrets++; else return validrets; - skip = false; break; } case 'c': { - if(readmaxlength == INT_MAX) - readmaxlength = 1; - char *c = skip ? NULL : va_arg(*args, char *); + char *c = opt.skip ? NULL : va_arg(*args, char *); - for(int u = 0; u < readmaxlength; u++) { + for(int u = 0; u < opt.field_width; u++) { int temp = __scanf_in(in); if(temp==EOF) return EOF; else if(c) *c++ = temp; } - validrets += !skip; + validrets += !opt.skip; break; } @@ -398,8 +415,8 @@ int __scanf( int curstrlength = 0; __purge_space(in); - char *c = skip ? NULL : va_arg(*args, char *); - for(int u = 0; u < readmaxlength; u++) { + char *c = opt.skip ? NULL : va_arg(*args, char *); + for(int u = 0; u < opt.field_width; u++) { temp = __scanf_peek(in); if(temp==EOF && curstrlength==0) return validrets; if(isspace(temp) || ((temp==EOF && curstrlength!=0))) { @@ -420,16 +437,6 @@ int __scanf( } } } - // we are looking for a specific character in the input stream - else { - // if the next char of the stream is corresponding, we validate the read and go to the following char - if(format[pos] == __scanf_peek( in )) { - __scanf_in( in ); - pos++; - } - else return validrets; // else we return the number of valid read - } - pos++; } __scanf_end( in );