#include "../stdio_p.h" #include "../../stdlib/stdlib_p.h" #include #include #include #include /* Features of this implementation: - Specifiers: * Integers (%i, %d, %o, %u, %x, %X) * Floating-point numbers (%e, %f, %F, %g, %a) * Strings (%c, %s, %[]) * Pointers (%p) * Total number of bytes read so far (%n) - Integer size modifiers: hh, h, l, ll, L, j, z, t. Supported for all integer conversions, i.e. %i, %d, %u, %o, %x, %X and %n. - Floating-point size modifiers: l, L for %a, %e, %f, %g. - Assignment suppression character '*'. - Maximum field width (but buggy for floating-point, see below). NOT SUPPORTED: - Wide characters: %lc, %ls. - q size modifier. - "'" (quote) specifier for locale-specific thousand separators. - String allocation: %mc, %ms. - Out-of-order assignments "%n$". - TODO: Maximum field width for floating-point is mostly untested and likely has bugs when the field ends in the middle of the number. */ void __scanf_start(struct __scanf_input *in) { if(in->fp) in->buffer = fgetc(in->fp); else { in->buffer = (*in->str ? *in->str : EOF); in->str += (in->buffer != EOF); } } int __scanf_fetch(struct __scanf_input *in) { if(in->fp) return fgetc(in->fp); int c = *in->str; if(c == 0) return EOF; in->str++; return c; } void __scanf_end(struct __scanf_input *in) { if(in->buffer == EOF) return; if(in->fp) ungetc(in->buffer, in->fp); else in->str--; } static void __skip_spaces(struct __scanf_input *in) { while(isspace(__scanf_peek(in))) __scanf_in(in); } static void __scanf_store_i(int64_t value, int size, va_list *args) { if(size == 1) *va_arg(*args, int8_t *) = value; else if(size == 2) *va_arg(*args, int16_t *) = value; else if(size == 4) *va_arg(*args, int32_t *) = value; else if(size == 8) *va_arg(*args, int64_t *) = value; } static void __scanf_store_d(long double value, int size, va_list *args) { if(size == sizeof(float)) *va_arg(*args, float *) = value; else if(size == sizeof(double)) *va_arg(*args, double *) = value; else if(size == sizeof(long double)) *va_arg(*args, long double *) = value; } struct scanf_format { /* Maximum field width */ int field_width; /* Size of the assigned (integer or floating-point) type, in bytes */ uint8_t size; /* Whether to skip assignment */ bool skip; /* Set of bytes allowed for bracket sets in %[] */ uint8_t bracket_set[32]; }; /* Allow/disallow the entire set */ static void bracket_set_init(uint8_t *set, bool allow) { memset(set, allow ? 0xff : 0x00, 32); } /* Allow/disallow a range of characters. Both ends are included. */ static void bracket_set_range( uint8_t *set, uint8_t start, uint8_t end, bool allow) { for(int u = start; u <= end; u++) { int byte = u >> 3; int bit = 1 << (u & 7); if(allow) set[byte] |= bit; else set[byte] &= ~bit; } } /* Check whether a byte is allowed by the bracket set. */ static bool bracket_set_test(uint8_t *set, int c) { return (c != EOF) && (set[c >> 3] & (1 << (c & 7))); } /* Parse a bracket set from a format string. Returns true on success. */ static bool bracket_set_parse(uint8_t *set, char const *format, int *pos) { int last = 0; bool allow = true; bracket_set_init(set, false); /* '^' denotes a negated set */ if(format[*pos] == '^') { allow = false; (*pos)++; bracket_set_init(set, true); } /* ']' as the first character adds ']' to the set */ if(format[*pos] == ']' ) { bracket_set_range(set, ']', ']', allow); (*pos)++; } for(; format[*pos] && format[*pos] != ']'; (*pos)++) { /* '-' as the last character, thus explicitly in the set */ if(format[*pos] == '-' && format[*pos + 1] == ']') bracket_set_range(set, '-', '-', allow); /* '-' as denoting a range */ else if(format[*pos] == '-') { (*pos)++; bracket_set_range(set, last, format[*pos], allow); } /* Any other character */ else { last = format[*pos]; bracket_set_range(set, last, last, allow); } } return (format[*pos] == ']'); } /* Parse a format in the format string. Returns specifier, 0 on error. */ static int parse_fmt(char const *fmt, int *pos, struct scanf_format *opt) { opt->field_width = INT_MAX; opt->size = sizeof(int); opt->skip = false; int width = 0; char size_letter = 0; for((*pos)++;; (*pos)++) { int c = fmt[*pos]; if(c == '*') opt->skip = true; else if(strchr("hlzjtL", c)) { if(c == 'h') opt->size = (size_letter=='h') ? sizeof(char) : sizeof(short); else if(c == 'l') opt->size = (size_letter=='l') ? sizeof(long long) : sizeof(long); else if(c == 'z') opt->size = sizeof(size_t); else if(c == 'j') opt->size = sizeof(intmax_t); else if(c == 't') opt->size = sizeof(ptrdiff_t); else if(c == 'L') opt->size = sizeof(long double); size_letter = c; } else if(isdigit(c)) { width = width * 10 + (fmt[*pos] - '0'); opt->field_width = width; } else if(c == '[') { (*pos)++; return bracket_set_parse(opt->bracket_set, fmt, pos) ? '[' : 0; } else if(strchr("diouxXpsn", c)) return c; else if(strchr("aAeEfFgG", c)) { /* Adjust interpretation of no size / 'l' size */ if(size_letter == 0) opt->size = sizeof(float); if(size_letter == 'l') opt->size = sizeof(double); return c; } else if(c == 'c') { if(opt->field_width == INT_MAX) opt->field_width = 1; return c; } else return 0; } return 0; } int __scanf( struct __scanf_input * __restrict__ in, char const * __restrict__ format, va_list *args) { /* Number of successful assignments */ int validrets = 0; __scanf_start( in ); // TODO: No __scanf_end() in any of the "return validrets"!! for(int pos = 0; format[pos]; pos++) { if(format[pos] == ' ') { __skip_spaces(in); continue; } else if(format[pos] != '%' || format[pos + 1] == '%') { /* Expect this specific character */ if(__scanf_peek(in) != format[pos]) return validrets; __scanf_in(in); pos += (format[pos] == '%'); continue; } /* Perform a conversion */ struct scanf_format opt; int spec = parse_fmt(format, &pos, &opt); if(spec == 0) return validrets; switch(spec) { case '[': { char *c = opt.skip ? NULL : va_arg(*args, char *); int i; for(i = 0; i < opt.field_width; i++) { int temp = __scanf_peek(in); if(bracket_set_test(opt.bracket_set, temp)) { __scanf_in(in); if(c) *c++ = temp; } else if(temp==EOF && !i && !validrets) return EOF; else break; } if(!i) return validrets; *c = '\0'; validrets += !opt.skip; break; } case 'n': __scanf_store_i(in->bytes_read, opt.size, args); break; case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': { int f = format[pos]; int base = (f == 'd' || f == 'u') ? 10 : (f == 'o') ? 8: (f == 'x' || f == 'X') ? 16 : 0; bool use_unsigned = (f == 'o' || f == 'x' || f == 'X'); long long int temp; int err = __strto_int(in, base, NULL, &temp, use_unsigned, opt.field_width); if(err == EOF && validrets == 0) return EOF; if(err) return validrets; if(!opt.skip) __scanf_store_i(temp, opt.size, args); validrets++; break; } case 'a': case 'A': case 'e': case 'E': case 'f': case 'F': case 'g': case 'G': { long double temp; int err = __strto_fp(in, NULL, NULL, &temp, opt.field_width); if(err == EOF && validrets == 0) return EOF; if(err) return validrets; if(!opt.skip) __scanf_store_d(temp, opt.size, args); validrets++; break; } case 'p': { void *p = opt.skip ? NULL : va_arg(*args, void *); _Static_assert(sizeof(p) == sizeof(long)); int err = __strto_int(in, 0, p, NULL, true, opt.field_width); if(err) return validrets; validrets += !opt.skip; break; } case 'c': { char *c = opt.skip ? NULL : va_arg(*args, char *); for(int u = 0; u < opt.field_width; u++) { int temp = __scanf_in(in); if(temp==EOF) return EOF; else if(c) *c++ = temp; } validrets += !opt.skip; break; } case 's': { char *c = opt.skip ? NULL : va_arg(*args, char *); __skip_spaces(in); for(int i = 0; i < opt.field_width; i++) { int temp = __scanf_peek(in); if(temp==EOF && !i) return validrets; if(isspace(temp) || temp==EOF) { if(c) { *c = 0; validrets++; } break; } else { int temp = __scanf_in(in); if(c) *c++ = temp; } } break; } } } __scanf_end( in ); return validrets; }