fxlibc/src/stdio/scanf/scan.c

626 lines
17 KiB
C

#include "../stdio_p.h"
#include "../../stdlib/stdlib_p.h"
#include <stdio.h>
#include <ctype.h>
#include <stdbool.h>
void __scanf_start(struct __scanf_input *in)
{
if(in->fp)
in->buffer = fgetc(in->fp);
else {
in->buffer = (*in->str ? *in->str : EOF);
in->str += (in->buffer != EOF);
}
}
int __scanf_fetch(struct __scanf_input *in)
{
if(in->fp)
return fgetc(in->fp);
int c = *in->str;
if(c == 0)
return EOF;
in->str++;
return c;
}
void __scanf_end(struct __scanf_input *in)
{
if(in->buffer == EOF)
return;
if(in->fp)
ungetc(in->buffer, in->fp);
else
in->str--;
}
enum
{
MODSKIP,
MODCHAR,
MODSHORT,
MODNORMAL,
MODLONG,
MODLONGLONG,
MODLONGDOUBLE,
MODINTMAXT,
MODSIZET,
MODPTRDIFFT,
};
void __purge_space( struct __scanf_input * __restrict__ in )
{
while (isspace(__scanf_peek(in))) __scanf_in(in);
}
void __scanf_store_i(int64_t value, int size, va_list *args)
{
if(size == MODSKIP) return;
if(size == MODCHAR)
*va_arg(*args, int8_t *) = value;
else if(size == MODSHORT)
*va_arg(*args, int16_t *) = value;
else if(size == MODLONGLONG)
*va_arg(*args, int64_t *) = value;
else if(size == MODINTMAXT)
*va_arg(*args, intmax_t *) = value;
else if(size == MODSIZET)
*va_arg(*args, size_t *) = value;
else if(size == MODPTRDIFFT)
*va_arg(*args, ptrdiff_t *) = value;
else
*va_arg(*args, int *) = value;
}
void __scanf_store_d(long double value, int size, va_list *args)
{
if (size==MODSKIP) return;
if(size == MODLONG)
*va_arg(*args, double *) = value;
else if(size == MODLONGDOUBLE)
*va_arg(*args, long double *) = value;
else
*va_arg(*args, float *) = value;
}
/* STATUS OF __scanf DEVELOPMENT */
// XX = not done yet (but will be done)
// OK = OK, done and tested
// -- = not applicable
// NO = not supported (and will not be done) only for %lc as long char are not supported by gint
/*************************************************************************************************************/
/* Specifier * Explanation * num * hh * h *none* l * ll * j * z * t * L */
/*************************************************************************************************************/
/* % * Parse literal '%' * -- * -- * -- * OK * -- * -- * -- * -- * -- * -- */
/*************************************************************************************************************/
/* c * match a char or several char * OK * -- * -- * OK * NO * -- * -- * -- * -- * -- */
/*************************************************************************************************************/
/* s * match a string * OK * -- * -- * OK * NO * -- * -- * -- * -- * -- */
/*************************************************************************************************************/
/* [set] * match a set of char * OK * -- * -- * OK * -- * -- * -- * -- * -- * -- */
/*************************************************************************************************************/
/* d * match a decimal integer * OK * OK * OK * OK * OK * OK * OK * OK * OK * -- */
/*************************************************************************************************************/
/* i * match an integer * OK * OK * OK * OK * OK * OK * OK * OK * OK * -- */
/*************************************************************************************************************/
/* d * match an unsigned decimal integer * OK * OK * OK * OK * OK * OK * OK * OK * OK * -- */
/*************************************************************************************************************/
/* o * match a unsigned octal integer * OK * OK * OK * OK * OK * OK * OK * OK * OK * -- */
/*************************************************************************************************************/
/* x,X * match a unsigned hexadecimal integer * OK * OK * OK * OK * OK * OK * OK * OK * OK * -- */
/*************************************************************************************************************/
/* n * return the nb of chars read so far * -- * -- * -- * OK * -- * -- * -- * -- * -- * -- */
/*************************************************************************************************************/
/* a,A * match a floating point number * OK * -- * -- * OK * OK * -- * -- * -- * -- * OK */
/* e,E * match a floating point number * OK * -- * -- * OK * OK * -- * -- * -- * -- * OK */
/* f,F * match a floating point number * OK * -- * -- * OK * OK * -- * -- * -- * -- * OK */
/* g,G * match a floating point number * OK * -- * -- * OK * OK * -- * -- * -- * -- * OK */
/*************************************************************************************************************/
/* p * match a pointer * -- * -- * -- * OK * -- * -- * -- * -- * -- * -- */
/*************************************************************************************************************/
// %ms and %m[set] are not implemented (with memory allocation while parsing a chain or a set of characters)
/* list of allowed char given by a set %[], this is updated at every set */
bool __asciiallowed[256] = { true };
/* unallow all the char for the current set */
void __unallow_all_set( void )
{
for(int u =0; u<=255; u++)
__asciiallowed[u]=false;
}
/* allow all the char for the current set */
void __allow_all_set( void )
{
for(int u =0; u<=255; u++)
__asciiallowed[u]=true;
}
/* allo a range of char for the current set */
/* note1 : c1 and c2 do not to be sorted */
/* note2 : not sur if C standard requires to be ordered or not */
void __define_set_range( char c1, char c2, bool value )
{
char beg = (c1 < c2 ? c1 : c2 );
char end = (c1 >= c2 ? c1 : c2 );
for (int u=beg; u<=end; u++)
__asciiallowed[u] = value;
}
/* return true if the char is in the allowed set or false otherwise */
bool __is_allowed( const unsigned char c )
{
return __asciiallowed[ c ];
}
/* return 0 if Ok or -1 if syntax err in the set format */
int __scanset(char const * __restrict__ format, int *pos )
{
int __sor = 0;
int __eor = 0;
bool __neg = false;
__unallow_all_set();
(*pos)++;
// next will be a "negation" set
if (format[*pos] == '^' ) {
__neg = true;
(*pos)++;
__allow_all_set();
// the char ']' is part of the set
if (format[*pos] == ']' ) {
__asciiallowed[ ']' ] = !__neg;
(*pos)++;
}
}
// the char ']' is included in the allowed set
else if (format[*pos] == ']' ) {
__neg = false;
// the char ']' is part of the set
if (format[*pos] == ']' ) {
__asciiallowed[ ']' ] = !__neg;
(*pos)++;
}
}
// start of format analysis loop
loopset:
// we find a '-' so need to check if we are considering a range or the char '-' only
if (format[*pos]=='-') {
// the char '-' is included in the allowed set
if (format[*pos+1]==']') {
__asciiallowed[ '-' ] = !__neg; // if set in very final position before']', this is the char '-' only
(*pos)++;
// we have now finished the reading of the set cause the following char is ']'
return 0;
}
// the char '-' indicates a range of char to be included into the set
else {
(*pos)++;
__eor = format[*pos];
__define_set_range( __sor, __eor, !__neg );
goto loopset;
}
}
// we find the char ']' so it means we reach the end of this set
else if (format[*pos]==']') return 0;
// if we reach the '\0' we have a syntax problem
else if (format[*pos]=='\0') return -1;
// we are considering one particular char and prepare for a potential range if we find the char '-' later on
else {
__sor = format[*pos];
__asciiallowed[ __sor ] = !__neg;
(*pos)++;
goto loopset;
}
}
int __scanf(
struct __scanf_input * __restrict__ in,
char const * __restrict__ format,
va_list *args)
{
bool skip = false;
int MOD = MODNORMAL;
in->readsofar = 0; // we haven't started to read char from the input stream
in->readmaxlength = -1; // no specific maximum length to read is defined yet
int validrets = 0; // to be incremented each time we successfully read and store an input as per the format
int err = 0; // err control on __strto_xx( ) functions
int user_length = 0; // length defined by user with a %xx modifier
int pos = 0; // current pos in the format string
__scanf_start( in );
while( format[pos] != 0 ) {
in->readmaxlength = -1;
user_length = 0;
MOD = MODNORMAL;
skip = false;
__allow_all_set();
if( format[pos] == ' ' ) {
__purge_space(in);
}
// we will have to manage a given format
else if( format[pos] == '%' ) {
in->readmaxlength = INT_MAX;
// main loop
loopagain:
pos++;
switch( format[pos] ) {
// we need to decrypt the corresponding scanf set of character
case '[': {
char temp;
err = __scanset( format, &pos );
if (err!=0) return validrets;
int currentlength = 0;
// we need to assign the read char to the corresponding pointer
if (!skip) {
char *c = (char *) va_arg( *args, char* );
if (in->readmaxlength==INT_MAX) {
for(;;) {
temp = __scanf_peek( in );
if (temp==EOF) return EOF;
else if (__is_allowed( temp )) {
*c++ = __scanf_in( in );
currentlength++;
}
else {
if (currentlength>0) {
*c = '\0';
validrets++;
goto exitset;
}
else return validrets;
}
}
}
else {
for(unsigned int u=0; u<in->readmaxlength; u++) {
temp = __scanf_peek( in );
if (temp==EOF) return EOF;
else if (__is_allowed( temp )) {
*c++ = __scanf_in( in );
currentlength++;
}
else {
if (currentlength>0) {
*c = '\0';
validrets++;
goto exitset;
}
else return validrets;
}
}
if (currentlength>0) {
*c = '\0';
validrets++;
goto exitset;
}
else return validrets;
}
}
else
{
if (in->readmaxlength==INT_MAX) {
for(;;) {
temp = __scanf_peek( in );
if (temp==EOF) return EOF;
else if (__is_allowed( temp )) {
__scanf_in( in );
currentlength++;
}
else if (currentlength>0) goto exitset;
else return validrets;
}
}
else {
for(unsigned int u=0; u<in->readmaxlength; u++) {
temp = __scanf_peek( in );
if (temp==EOF) return EOF;
else if (__is_allowed( temp )) __scanf_in( in );
else if (currentlength>0) goto exitset;
else return validrets;
}
}
}
exitset:
break;
}
// return the number of char read so far (cannot be skipped %*n is not valid)
case 'n': {
*(int*) va_arg( *args, int* ) = in->readsofar;
break;
}
// we are expecting the char '%' to be in the input stream, if not err and return
case '%': {
if (__scanf_peek(in) != '%') return validrets;
else __scanf_in( in );
break;
}
// the next read, even if valid, will not be stored
case '*': {
skip = true;
goto loopagain;
break;
}
case 'h': {
if (MOD==MODNORMAL || MOD==MODSHORT) {
MOD--;
goto loopagain;
}
else return validrets; // we cannot have %hhh format modifier --> err
break;
}
case 'l': {
if (MOD==MODNORMAL || MOD==MODLONG) {
MOD++;
goto loopagain;
}
else return validrets; // we cannot have %ll format modifier --> err
break;
}
case 'L': {
if (MOD==MODNORMAL) {
MOD=MODLONGDOUBLE;
goto loopagain;
}
else return validrets; // we cannot have %LL format modifier --> err
break;
}
case 'j': {
if (MOD==MODNORMAL) {
MOD=MODINTMAXT;
goto loopagain;
}
else return validrets; // we cannot have %LL format modifier --> err
break;
}
case 'z': {
if (MOD==MODNORMAL) {
MOD=MODSIZET;
goto loopagain;
}
else return validrets; // we cannot have %LL format modifier --> err
break;
}
case 't': {
if (MOD==MODNORMAL) {
MOD=MODPTRDIFFT;
goto loopagain;
}
else return validrets; // we cannot have %LL format modifier --> err
break;
}
case '0' ... '9': {
user_length = user_length * 10 + (int) ( format[pos] - '0' );
in->readmaxlength = user_length;
goto loopagain;
break;
}
case 'd':
case 'i':
case 'o':
case 'u':
case 'x':
case 'X': {
int f = format[pos];
int base = (f == 'd' || f == 'u') ? 10 :
(f == 'o') ? 8:
(f == 'x' || f == 'X') ? 16 : 0;
bool use_unsigned = (f == 'o' || f == 'x' || f == 'X');
long long int temp;
err = __strto_int(in, base, NULL, &temp, use_unsigned,
in->readmaxlength);
if (err == EOF && validrets == 0) return EOF;
if (err != 0) return validrets;
if (skip) __scanf_store_i( temp, MODSKIP, args );
else __scanf_store_i( temp, MOD, args );
validrets++;
break;
}
case 'a':
case 'A':
case 'e':
case 'E':
case 'f':
case 'F':
case 'g':
case 'G': {
// read a double from the current input stream
// and store in the corresponding arg as a char by reference
long double temp;
err = __strto_fp( in, NULL, NULL, &temp,
in->readmaxlength);
if (err == EOF && validrets == 0) return EOF;
if (err != 0) return validrets;
if (skip) __scanf_store_d( temp, MODSKIP, args );
else __scanf_store_d( temp, MOD, args );
validrets++;
break;
}
case 'p': {
long int temp;
if (!skip) {
void *p = (void *) va_arg( *args, void** ); // get the adress of the target pointer (void**)
err = __strto_int( in, 0, p, NULL, true,
in->readmaxlength);
}
else err = __strto_int( in, 0, &temp, NULL, true,
in->readmaxlength);
if (err == 0) validrets++;
else return validrets;
skip = false;
break;
}
case 'c': {
int temp;
if (!skip) {
char *c = (char *) va_arg( *args, char* );
if (in->readmaxlength==INT_MAX) {
temp = __scanf_peek( in );
if (temp==EOF) return EOF;
else *c = __scanf_in( in );
}
else {
for( unsigned int u = 0; u < in->readmaxlength; u++ ) {
temp = __scanf_peek( in );
if (temp==EOF) return EOF;
else *c++ = __scanf_in( in );
}
}
}
else {
if (in->readmaxlength==INT_MAX) {
temp = __scanf_peek( in );
if (temp==EOF) return EOF;
else {
__scanf_in( in );
goto endcharskip;
}
}
else {
for( unsigned int u = 0; u < in->readmaxlength; u++ ) {
temp = __scanf_peek( in );
if (temp==EOF) return EOF;
else __scanf_in( in );
}
goto endcharskip;
}
}
validrets++;
endcharskip:
break;
}
case 's': {
char temp;
int curstrlength = 0;
__purge_space( in );
if (!skip) {
char *c = (char *) va_arg( *args, char* );
if (in->readmaxlength==INT_MAX) {
loopstring:
temp = __scanf_peek( in );
if (temp==EOF && curstrlength==0) return validrets;
if (isspace(temp) || ( (temp==EOF && curstrlength!=0) )) {
*c = 0;
goto loopstringend;
}
else {
*c++ = __scanf_in( in );
curstrlength++;
goto loopstring;
}
}
else {
for( unsigned int u = 0; u < in->readmaxlength; u++ ) {
temp = __scanf_peek( in );
if (temp==EOF && curstrlength==0) return validrets;
if (isspace(temp) || ( (temp==EOF && curstrlength!=0) )) {
*c = 0;
goto loopstringend;
}
else {
*c++ = __scanf_in( in );
curstrlength++;
}
}
}
}
else {
if (in->readmaxlength==INT_MAX) {
loopstringskip:
temp = __scanf_peek( in );
if (temp==EOF && curstrlength==0) return validrets;
if (isspace(temp) || ( (temp==EOF && curstrlength!=0) )) goto loopstringendskip;
else {
__scanf_in( in );
curstrlength++;
goto loopstringskip;
}
}
else {
for( unsigned int u = 0; u < in->readmaxlength; u++ ) {
temp = __scanf_peek( in );
if (temp==EOF && curstrlength==0) return validrets;
if (isspace(temp) || ( (temp==EOF && curstrlength!=0) )) goto loopstringendskip;
else {
__scanf_in( in );
curstrlength++;
}
}
}
}
loopstringend:
validrets++;
loopstringendskip:
break;
}
}
}
// we are looking for a specific character in the input stream
else {
// if the next char of the stream is corresponding, we validate the read and go to the following char
if(format[pos] == __scanf_peek( in )) {
__scanf_in( in );
pos++;
}
else return validrets; // else we return the number of valid read
}
pos++;
}
__scanf_end( in );
return validrets;
}