Compare commits

...

5 Commits

Author SHA1 Message Date
Lephenixnoir 527c2e48fc
stdio: more syntaxic refactoring of scanf 2024-01-14 21:27:48 +01:00
Lephenixnoir 9f6e0c8039
stdio: factor out format parsing in scanf 2024-01-14 21:20:40 +01:00
Lephenixnoir 1caaa8ff63
stdio: use compact storage for %[] set in scanf
256 bytes of globals is a *lot* on the G-III.
2024-01-14 20:36:09 +01:00
Lephenixnoir 5b85d53826
(formatting: case indent) 2024-01-14 20:23:55 +01:00
Lephenixnoir 55ae7df318
stdio: simplify output size management in scanf 2024-01-14 20:23:21 +01:00
2 changed files with 280 additions and 351 deletions

View File

@ -2,6 +2,7 @@
#include "../../stdlib/stdlib_p.h"
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdbool.h>
void __scanf_start(struct __scanf_input *in)
@ -37,21 +38,6 @@ void __scanf_end(struct __scanf_input *in)
in->str--;
}
enum
{
MODSKIP,
MODCHAR,
MODSHORT,
MODNORMAL,
MODLONG,
MODLONGLONG,
MODLONGDOUBLE,
MODINTMAXT,
MODSIZET,
MODPTRDIFFT,
};
void __purge_space( struct __scanf_input * __restrict__ in )
{
while (isspace(__scanf_peek(in))) __scanf_in(in);
@ -59,34 +45,24 @@ void __purge_space( struct __scanf_input * __restrict__ in )
void __scanf_store_i(int64_t value, int size, va_list *args)
{
if(size == MODSKIP) return;
if(size == MODCHAR)
if(size == 1)
*va_arg(*args, int8_t *) = value;
else if(size == MODSHORT)
else if(size == 2)
*va_arg(*args, int16_t *) = value;
else if(size == MODLONGLONG)
else if(size == 4)
*va_arg(*args, int32_t *) = value;
else if(size == 8)
*va_arg(*args, int64_t *) = value;
else if(size == MODINTMAXT)
*va_arg(*args, intmax_t *) = value;
else if(size == MODSIZET)
*va_arg(*args, size_t *) = value;
else if(size == MODPTRDIFFT)
*va_arg(*args, ptrdiff_t *) = value;
else
*va_arg(*args, int *) = value;
}
void __scanf_store_d(long double value, int size, va_list *args)
{
if (size==MODSKIP) return;
if(size == MODLONG)
*va_arg(*args, double *) = value;
else if(size == MODLONGDOUBLE)
*va_arg(*args, long double *) = value;
else
if(size == sizeof(float))
*va_arg(*args, float *) = value;
else if(size == sizeof(double))
*va_arg(*args, double *) = value;
else if(size == sizeof(long double))
*va_arg(*args, long double *) = value;
}
/* STATUS OF __scanf DEVELOPMENT */
@ -130,367 +106,323 @@ void __scanf_store_d(long double value, int size, va_list *args)
// %ms and %m[set] are not implemented (with memory allocation while parsing a chain or a set of characters)
struct scanf_format {
/* Maximum field width */
int field_width;
/* Size of the assigned (integer or floating-point) type, in bytes */
uint8_t size;
/* Whether to skip assignment */
bool skip;
/* list of allowed char given by a set %[], this is updated at every set */
bool __asciiallowed[256] = { true };
/* Set of bytes allowed for bracket sets in %[] */
uint8_t bracket_set[32];
};
/* unallow all the char for the current set */
void __unallow_all_set( void )
/* Allow/disallow the entire set */
static void bracket_set_init(uint8_t *set, bool allow)
{
for(int u =0; u<=255; u++)
__asciiallowed[u]=false;
memset(set, allow ? 0xff : 0x00, 32);
}
/* allow all the char for the current set */
void __allow_all_set( void )
/* Allow/disallow a range of characters. Both ends are included. */
static void bracket_set_range(
uint8_t *set, uint8_t start, uint8_t end, bool allow)
{
for(int u =0; u<=255; u++)
__asciiallowed[u]=true;
for(int u = start; u <= end; u++) {
int byte = u >> 3;
int bit = 1 << (u & 7);
if(allow)
set[byte] |= bit;
else
set[byte] &= ~bit;
}
}
/* allo a range of char for the current set */
/* note1 : c1 and c2 do not to be sorted */
/* note2 : not sur if C standard requires to be ordered or not */
void __define_set_range( char c1, char c2, bool value )
/* Check whether a byte is allowed by the bracket set. */
static bool bracket_set_test(uint8_t *set, int c)
{
char beg = (c1 < c2 ? c1 : c2 );
char end = (c1 >= c2 ? c1 : c2 );
for (int u=beg; u<=end; u++)
__asciiallowed[u] = value;
return (c != EOF) && (set[c >> 3] & (1 << (c & 7)));
}
/* return true if the char is in the allowed set or false otherwise */
bool __is_allowed(int c)
/* Parse a bracket set from a format string. Returns true on success. */
static bool bracket_set_parse(uint8_t *set, char const *format, int *pos)
{
return (c != EOF) && __asciiallowed[c];
}
int last = 0;
bool allow = true;
bracket_set_init(set, false);
/* return 0 if Ok or -1 if syntax err in the set format */
int __scanset(char const * __restrict__ format, int *pos )
{
int __sor = 0;
int __eor = 0;
bool __neg = false;
__unallow_all_set();
(*pos)++;
// next will be a "negation" set
if (format[*pos] == '^' ) {
__neg = true;
/* '^' denotes a negated set */
if(format[*pos] == '^') {
allow = false;
(*pos)++;
__allow_all_set();
// the char ']' is part of the set
if (format[*pos] == ']' ) {
__asciiallowed[ ']' ] = !__neg;
(*pos)++;
}
}
// the char ']' is included in the allowed set
else if (format[*pos] == ']' ) {
__neg = false;
// the char ']' is part of the set
if (format[*pos] == ']' ) {
__asciiallowed[ ']' ] = !__neg;
bracket_set_init(set, true);
}
/* ']' as the first character adds ']' to the set */
if(format[*pos] == ']' ) {
bracket_set_range(set, ']', ']', allow);
(*pos)++;
}
for(; format[*pos] && format[*pos] != ']'; (*pos)++) {
/* '-' as the last character, thus explicitly in the set */
if(format[*pos] == '-' && format[*pos + 1] == ']')
bracket_set_range(set, '-', '-', allow);
/* '-' as denoting a range */
else if(format[*pos] == '-') {
(*pos)++;
bracket_set_range(set, last, format[*pos], allow);
}
/* Any other character */
else {
last = format[*pos];
bracket_set_range(set, last, last, allow);
}
}
return (format[*pos] == ']');
}
static int parse_fmt(char const *fmt, int *pos, struct scanf_format *opt)
{
opt->field_width = INT_MAX;
opt->size = sizeof(int);
opt->skip = false;
int width = 0;
char size_letter = 0;
while(1) {
// we find a '-' so need to check if we are considering a range or the char '-' only
if (format[*pos]=='-') {
// the char '-' is included in the allowed set
if (format[*pos+1]==']') {
__asciiallowed[ '-' ] = !__neg; // if set in very final position before']', this is the char '-' only
(*pos)++;
// we have now finished the reading of the set cause the following char is ']'
return 0;
}
// the char '-' indicates a range of char to be included into the set
else {
(*pos)++;
__eor = format[*pos];
__define_set_range( __sor, __eor, !__neg );
}
}
// we find the char ']' so it means we reach the end of this set
else if (format[*pos]==']') return 0;
// if we reach the '\0' we have a syntax problem
else if (format[*pos]=='\0') return -1;
// we are considering one particular char and prepare for a potential range if we find the char '-' later on
else {
__sor = format[*pos];
__asciiallowed[ __sor ] = !__neg;
(*pos)++;
switch(fmt[*pos]) {
case '*':
opt->skip = true;
break;
case 'h':
opt->size = (size_letter=='h') ? sizeof(char) : sizeof(short);
size_letter = 'h';
break;
case 'l':
opt->size = (size_letter=='l') ? sizeof(long long) : sizeof(long);
size_letter = 'l';
break;
case 'L':
opt->size = sizeof(long double);
size_letter = 'L';
break;
case 'j':
opt->size = sizeof(intmax_t);
break;
case 'z':
opt->size = sizeof(size_t);
break;
case 't':
opt->size = sizeof(ptrdiff_t);
break;
case '0' ... '9':
width = width * 10 + (fmt[*pos] - '0');
opt->field_width = width;
break;
case '[':
(*pos)++;
return bracket_set_parse(opt->bracket_set, fmt, pos) ? '[' : 0;
case 'd':
case 'i':
case 'o':
case 'u':
case 'x':
case 'X':
case 'p':
case 's':
case 'n':
return fmt[*pos];
case 'a':
case 'A':
case 'e':
case 'E':
case 'f':
case 'F':
case 'g':
case 'G':
/* Adjust interpretation of no size / 'l' size */
if(size_letter == 0)
opt->size = sizeof(float);
if(size_letter == 'l')
opt->size = sizeof(double);
return fmt[*pos];
case 'c':
if(opt->field_width == INT_MAX)
opt->field_width = 1;
return 'c';
default:
return 0;
}
}
return 0;
}
int __scanf(
struct __scanf_input * __restrict__ in,
char const * __restrict__ format,
va_list *args)
{
bool skip = false;
int MOD = MODNORMAL;
in->readsofar = 0; // we haven't started to read char from the input stream
in->readmaxlength = -1; // no specific maximum length to read is defined yet
int validrets = 0; // to be incremented each time we successfully read and store an input as per the format
int err = 0; // err control on __strto_xx( ) functions
int user_length = 0; // length defined by user with a %xx modifier
int pos = 0; // current pos in the format string
/* Number of successful assignments */
int validrets = 0;
__scanf_start( in );
// TODO: No __scanf_end() in any of the "return validrets"!!
while( format[pos] != 0 ) {
in->readmaxlength = -1;
user_length = 0;
MOD = MODNORMAL;
skip = false;
__allow_all_set();
if( format[pos] == ' ' ) {
for(int pos = 0; format[pos]; pos++) {
if(format[pos] == ' ') {
__purge_space(in);
continue;
}
else if(format[pos] != '%' || format[pos + 1] == '%') {
/* Expect this specific character */
if(__scanf_peek(in) != format[pos])
return validrets;
__scanf_in(in);
pos += (format[pos] == '%');
continue;
}
// we will have to manage a given format
else if( format[pos] == '%' ) {
in->readmaxlength = INT_MAX;
// main loop
loopagain:
pos++;
switch( format[pos] ) {
// we need to decrypt the corresponding scanf set of character
case '[': {
err = __scanset( format, &pos );
if (err!=0) return validrets;
int currentlength = 0;
// we need to assign the read char to the corresponding pointer
char *c = skip ? NULL : va_arg(*args, char *);
for(unsigned int u=0; u<in->readmaxlength; u++) {
int temp = __scanf_peek(in);
if(__is_allowed(temp)) {
__scanf_in(in);
if(c) *c++ = temp;
currentlength++;
}
else if(temp==EOF && !currentlength && !validrets)
return EOF;
else break;
}
if(!currentlength)
return validrets;
*c = '\0';
validrets += !skip;
break;
}
// return the number of char read so far (cannot be skipped %*n is not valid)
case 'n': {
*(int*) va_arg( *args, int* ) = in->readsofar;
break;
/* Perform a conversion */
struct scanf_format opt;
int spec = parse_fmt(format, &pos, &opt);
if(spec == 0)
return validrets;
switch(spec) {
// we need to decrypt the corresponding scanf set of character
case '[': {
int currentlength = 0;
// we need to assign the read char to the corresponding pointer
char *c = opt.skip ? NULL : va_arg(*args, char *);
for(int u=0; u<opt.field_width; u++) {
int temp = __scanf_peek(in);
if(bracket_set_test(opt.bracket_set, temp)) {
__scanf_in(in);
if(c) *c++ = temp;
currentlength++;
}
else if(temp==EOF && !currentlength && !validrets)
return EOF;
else break;
}
if(!currentlength)
return validrets;
*c = '\0';
validrets += !opt.skip;
break;
}
// we are expecting the char '%' to be in the input stream, if not err and return
case '%': {
if (__scanf_peek(in) != '%') return validrets;
else __scanf_in( in );
break;
}
// return the number of char read so far (cannot be skipped %*n is not valid)
case 'n':
*va_arg(*args, int *) = in->bytes_read;
break;
// the next read, even if valid, will not be stored
case '*': {
skip = true;
goto loopagain;
break;
}
case 'd':
case 'i':
case 'o':
case 'u':
case 'x':
case 'X': {
int f = format[pos];
int base = (f == 'd' || f == 'u') ? 10 :
(f == 'o') ? 8:
(f == 'x' || f == 'X') ? 16 : 0;
bool use_unsigned = (f == 'o' || f == 'x' || f == 'X');
case 'h': {
if (MOD==MODNORMAL || MOD==MODSHORT) {
MOD--;
goto loopagain;
}
else return validrets; // we cannot have %hhh format modifier --> err
break;
}
long long int temp;
int err = __strto_int(in, base, NULL, &temp, use_unsigned,
opt.field_width);
if(err == EOF && validrets == 0) return EOF;
if(err) return validrets;
if(!opt.skip)
__scanf_store_i(temp, opt.size, args);
validrets++;
break;
}
case 'l': {
if (MOD==MODNORMAL || MOD==MODLONG) {
MOD++;
goto loopagain;
}
else return validrets; // we cannot have %ll format modifier --> err
break;
}
case 'a':
case 'A':
case 'e':
case 'E':
case 'f':
case 'F':
case 'g':
case 'G': {
// read a double from the current input stream
// and store in the corresponding arg as a char by reference
long double temp;
int err = __strto_fp(in, NULL, NULL, &temp, opt.field_width);
if(err == EOF && validrets == 0) return EOF;
if(err) return validrets;
if(!opt.skip)
__scanf_store_d(temp, opt.size, args);
validrets++;
break;
}
case 'L': {
if (MOD==MODNORMAL) {
MOD=MODLONGDOUBLE;
goto loopagain;
}
else return validrets; // we cannot have %LL format modifier --> err
break;
}
case 'p': {
long int temp;
int err = 0;
if(!opt.skip) {
void *p = va_arg(*args, void *);
err = __strto_int(in, 0, p, NULL, true, opt.field_width);
validrets += (err == 0);
}
else err = __strto_int(in, 0, &temp, NULL, true, opt.field_width);
if(err) return validrets;
break;
}
case 'j': {
if (MOD==MODNORMAL) {
MOD=MODINTMAXT;
goto loopagain;
}
else return validrets; // we cannot have %LL format modifier --> err
break;
}
case 'c': {
char *c = opt.skip ? NULL : va_arg(*args, char *);
case 'z': {
if (MOD==MODNORMAL) {
MOD=MODSIZET;
goto loopagain;
}
else return validrets; // we cannot have %LL format modifier --> err
break;
}
for(int u = 0; u < opt.field_width; u++) {
int temp = __scanf_in(in);
if(temp==EOF) return EOF;
else if(c) *c++ = temp;
}
validrets += !opt.skip;
break;
}
case 't': {
if (MOD==MODNORMAL) {
MOD=MODPTRDIFFT;
goto loopagain;
}
else return validrets; // we cannot have %LL format modifier --> err
break;
}
case 's': {
char temp;
int curstrlength = 0;
__purge_space(in);
case '0' ... '9': {
user_length = user_length * 10 + (int) ( format[pos] - '0' );
in->readmaxlength = user_length;
goto loopagain;
break;
}
case 'd':
case 'i':
case 'o':
case 'u':
case 'x':
case 'X': {
int f = format[pos];
int base = (f == 'd' || f == 'u') ? 10 :
(f == 'o') ? 8:
(f == 'x' || f == 'X') ? 16 : 0;
bool use_unsigned = (f == 'o' || f == 'x' || f == 'X');
long long int temp;
err = __strto_int(in, base, NULL, &temp, use_unsigned,
in->readmaxlength);
if (err == EOF && validrets == 0) return EOF;
if (err != 0) return validrets;
if (skip) __scanf_store_i( temp, MODSKIP, args );
else __scanf_store_i( temp, MOD, args );
validrets++;
break;
}
case 'a':
case 'A':
case 'e':
case 'E':
case 'f':
case 'F':
case 'g':
case 'G': {
// read a double from the current input stream
// and store in the corresponding arg as a char by reference
long double temp;
err = __strto_fp( in, NULL, NULL, &temp,
in->readmaxlength);
if (err == EOF && validrets == 0) return EOF;
if (err != 0) return validrets;
if (skip) __scanf_store_d( temp, MODSKIP, args );
else __scanf_store_d( temp, MOD, args );
validrets++;
break;
}
case 'p': {
long int temp;
if (!skip) {
void *p = (void *) va_arg( *args, void** ); // get the adress of the target pointer (void**)
err = __strto_int( in, 0, p, NULL, true,
in->readmaxlength);
}
else err = __strto_int( in, 0, &temp, NULL, true,
in->readmaxlength);
if (err == 0) validrets++;
else return validrets;
skip = false;
break;
}
case 'c': {
if(in->readmaxlength == INT_MAX)
in->readmaxlength = 1;
char *c = skip ? NULL : va_arg(*args, char *);
for(unsigned int u = 0; u < in->readmaxlength; u++) {
int temp = __scanf_in(in);
if(temp==EOF) return EOF;
else if(c) *c++ = temp;
}
validrets += !skip;
break;
}
case 's': {
char temp;
int curstrlength = 0;
__purge_space(in);
char *c = skip ? NULL : va_arg(*args, char *);
for(unsigned int u = 0; u < in->readmaxlength; u++) {
temp = __scanf_peek(in);
if(temp==EOF && curstrlength==0) return validrets;
if(isspace(temp) || ((temp==EOF && curstrlength!=0))) {
if(c) {
*c = 0;
validrets++;
}
break;
}
else {
int temp = __scanf_in( in );
if(c)
*c++ = temp;
curstrlength++;
}
char *c = opt.skip ? NULL : va_arg(*args, char *);
for(int u = 0; u < opt.field_width; u++) {
temp = __scanf_peek(in);
if(temp==EOF && curstrlength==0) return validrets;
if(isspace(temp) || ((temp==EOF && curstrlength!=0))) {
if(c) {
*c = 0;
validrets++;
}
break;
}
else {
int temp = __scanf_in(in);
if(c)
*c++ = temp;
curstrlength++;
}
}
break;
}
// we are looking for a specific character in the input stream
else {
// if the next char of the stream is corresponding, we validate the read and go to the following char
if(format[pos] == __scanf_peek( in )) {
__scanf_in( in );
pos++;
}
else return validrets; // else we return the number of valid read
}
pos++;
}
__scanf_end( in );

View File

@ -18,15 +18,12 @@ struct __scanf_input {
char const * __restrict__ str;
FILE *fp;
// max char to read from the input stream as per user length modifier
unsigned int readmaxlength;
// total number of char read so far in the current call of a XYscanf() function (to return a %n when required)
int readsofar;
/* Single-character lookahead buffer */
int buffer;
};
/* Total numbers of bytes read in a scall to *scanf() */
int bytes_read;
};
/* Generic formatted scaning. */
extern int __scanf(
@ -46,7 +43,7 @@ static inline int __scanf_in(struct __scanf_input *__in)
{
int c = __in->buffer;
__in->buffer = __scanf_fetch(__in);
__in->readsofar++;
__in->bytes_read++;
return c;
}