diff --git a/CMakeLists.txt b/CMakeLists.txt index ec8224f..b6bf513 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -154,6 +154,7 @@ set(SOURCES src/libc/stdio/setvbuf.c src/libc/stdio/snprintf.c src/libc/stdio/sprintf.c + src/libc/stdio/ungetc.c src/libc/stdio/vasprintf.c src/libc/stdio/vdprintf.c src/libc/stdio/vfprintf.c diff --git a/STATUS b/STATUS index 5f0899a..aa3cfa0 100644 --- a/STATUS +++ b/STATUS @@ -85,6 +85,7 @@ TEST: Function/symbol/macro needs to be tested 7.19 7.19.1 Introduction TEST (no wide-oriented streams *) + 7.19.1 stdin, stdout, stderr TODO 7.19.4.1 remove TEST 7.19.4.2 rename TODO @@ -93,22 +94,22 @@ TEST: Function/symbol/macro needs to be tested 7.19.5.1 fclose - 7.19.5.2 fflush - - 7.19.5.3 fopen TEST ("a" and "+" modes remain) + 7.19.5.3 fopen - (EXT) fdopen TEST - 7.19.5.4 freopen TEST + 7.19.5.4 freopen TEST (mode changes untested) 7.19.5.5 setbuf - 7.19.5.6 setvbuf - - 7.19.6.1 fprintf LDEPS(fwrite) + 7.19.6.1 fprintf TEST 7.19.6.2 fscanf TODO - 7.19.6.3 printf LDEPS(fwrite, stdout) + 7.19.6.3 printf LDEPS(stdout) 7.19.6.4 scanf TODO 7.19.6.5 snprintf - 7.19.6.6 sprintf - 7.19.6.7 sscanf TODO - 7.19.6.8 vfprintf LDEPS(fwrite) + 7.19.6.8 vfprintf TEST 7.19.6.9 vfscanf TODO - 7.19.6.10 vprintf LDEPS(fwrite, stdout) + 7.19.6.10 vprintf LDEPS(stdout) 7.19.6.11 vscanf TODO 7.19.6.12 vsnprintf - 7.19.6.13 vsprintf - @@ -128,10 +129,10 @@ TEST: Function/symbol/macro needs to be tested 7.19.7.8 putc LDPES(fputc) 7.19.7.9 putchar LDEPS(fputc) 7.19.7.10 puts LDEPS(fputs) - 7.19.7.11 ungetc TODO + 7.19.7.11 ungetc - - 7.19.8.1 fread TEST ("a" and "+" modes remain) - 7.19.8.2 fwrite TEST ("a" and "+" modes remain) + 7.19.8.1 fread - + 7.19.8.2 fwrite - 7.19.9.1 fgetpos - 7.19.9.2 fseek - @@ -200,7 +201,7 @@ TEST: Function/symbol/macro needs to be tested (EXT) strncasecmp - (EXT) strdup - (EXT) strndup - - (EXT) memrchr - + (EXT) memrchr - (Unoptimized: byte-by-byte) 7.22 => GCC diff --git a/include/stdio.h b/include/stdio.h index 970ee04..13b4517 100644 --- a/include/stdio.h +++ b/include/stdio.h @@ -259,6 +259,9 @@ extern int feof(FILE *__fp); /* Test the error flag. */ extern int ferror(FILE *__fp); +/* Print a message followed by strerror(errno) to stdout. */ +extern void perror(char const *__s); + #ifdef __cplusplus } #endif diff --git a/include/target/gint/bits/types/FILE.h b/include/target/gint/bits/types/FILE.h index af868ce..50be340 100644 --- a/include/target/gint/bits/types/FILE.h +++ b/include/target/gint/bits/types/FILE.h @@ -41,6 +41,12 @@ but not yet written to the file descriptor. ftell() reports fdpos + bufpos. The rest of the buffer is undefined. + The ungetc() function pushes back characters into the buffer; if the FILE is + unbuffered, then it's made buffered temporarily to hold the characters and + cleared at the next fflush() or read. The buffer is put in reading mode. For + this reason, reading functions should test [fp->buf] to check whether there + is a buffer instead of [fp->bufmode != _IONBF]. + Many fields in the FILE structure are abstracted away by API calls in layers: 1. [fd], [fdpos] and [error] are updated by the primitive functions of @@ -67,6 +73,8 @@ typedef struct { size_t bufpos; size_t bufread; size_t bufsize; + /* Number of ungetc()'d characters at the start of buffer data */ + int bufungetc; /* Buffering mode; one of _IOFBF, _IOLBF, or _IONBF */ uint8_t bufmode :2; /* We own the buffer and it needs to be freed */ diff --git a/src/libc/stdio/fflush.c b/src/libc/stdio/fflush.c index 43971c9..5015a21 100644 --- a/src/libc/stdio/fflush.c +++ b/src/libc/stdio/fflush.c @@ -5,30 +5,38 @@ int fflush(FILE *fp) { - // TODO: fflush(NULL) should flush "all" files (do we track them?) + /* TODO: fflush(NULL) should flush "all" files (do we track them?) */ if(!fp) { errno = EINVAL; return EOF; } + if(!fp->buf) return 0; + int rc = 0; + /* In reading mode, reset the file offset */ - if(fp->bufdir == __FILE_BUF_READ && fp->bufpos < fp->bufread) { + if(__fp_hasbuf_read(fp)) { fp->fdpos = fp->fdpos - fp->bufread + fp->bufpos; lseek(fp->fd, fp->fdpos, SEEK_SET); - fp->bufpos = 0; - fp->bufread = 0; - return 0; } /* In writing mode, write pending data */ - if(fp->bufdir == __FILE_BUF_WRITE && fp->bufpos > 0) { + else if(__fp_hasbuf_write(fp)) { ssize_t written = __fp_write(fp, fp->buf, fp->bufpos); - int rc = (written == (ssize_t)fp->bufpos ? 0 : EOF); - fp->bufpos = 0; - return rc; + rc = (written == (ssize_t)fp->bufpos ? 0 : EOF); + + /* TODO: fflush(): Keep data that couldn't be written */ } - return 0; + fp->bufpos = 0; + fp->bufread = 0; + fp->bufungetc = 0; + + /* Clear buffering for unbuffered streams that used ungetc() */ + if(fp->bufmode == _IONBF) + __fp_remove_buffer(fp); + + return rc; } diff --git a/src/libc/stdio/fileutil.c b/src/libc/stdio/fileutil.c index a55de18..6ae5118 100644 --- a/src/libc/stdio/fileutil.c +++ b/src/libc/stdio/fileutil.c @@ -1,4 +1,5 @@ #include "fileutil.h" +#include #include #include #include @@ -34,20 +35,74 @@ void __fp_close(FILE *fp, bool free_fp) free(fp); } +void __fp_remove_buffer(FILE *fp) +{ + if(fp->bufowned) + free(fp->buf); + + fp->buf = NULL; + fp->bufowned = false; + fp->bufsize = 0; +} + +bool __fp_set_buffer(FILE *fp, void *buf, size_t size) +{ + bool owned = false; + + if(!buf) { + owned = true; + buf = malloc(size); + if(!buf) + return false; + } + + fp->buf = buf; + fp->bufowned = owned; + fp->bufsize = size; + return true; +} + void __fp_buffer_mode_read(FILE *fp) { - if(fp->bufmode != _IONBF && fp->bufdir == __FILE_BUF_WRITE && - fp->bufpos > 0) + if(__fp_hasbuf_write(fp)) { fflush(fp); - fp->bufdir = __FILE_BUF_READ; + } + if(fp->buf) + fp->bufdir = __FILE_BUF_READ; } void __fp_buffer_mode_write(FILE *fp) { - if(fp->bufmode != _IONBF && fp->bufdir == __FILE_BUF_READ && - fp->bufpos < fp->bufread) + if(__fp_hasbuf_read(fp)) fflush(fp); - fp->bufdir = __FILE_BUF_WRITE; + if(fp->buf) + fp->bufdir = __FILE_BUF_WRITE; +} + +ssize_t __fp_buffered_read(FILE *fp, void *data, size_t request_size) +{ + if(!fp->buf || __fp_hasbuf_write(fp)) + return -1; + + int read_size = min((int)request_size, fp->bufread - fp->bufpos); + if(read_size <= 0) + return 0; + + memcpy(data, fp->buf + fp->bufpos, read_size); + fp->bufpos += read_size; + fp->bufungetc = max(fp->bufungetc - read_size, 0); + + /* Rewind the buffer if we read it fully */ + if(fp->bufpos >= fp->bufread) { + fp->bufread = 0; + fp->bufpos = 0; + + /* Clear temporary ungetc() buffers of _IONBF streams */ + if(fp->bufmode == _IONBF) + __fp_remove_buffer(fp); + } + + return read_size; } ssize_t __fp_read(FILE *fp, void *data, size_t size) @@ -77,12 +132,6 @@ ssize_t __fp_write(FILE *fp, void const *data, size_t size) { size_t written = 0; - if(fp->append) { - int rc = fseek(fp, 0, SEEK_END); - if(rc < 0) - return EOF; - } - while(written < size) { ssize_t rc = write(fp->fd, data + written, size - written); diff --git a/src/libc/stdio/fileutil.h b/src/libc/stdio/fileutil.h index 8d06c7b..5165f66 100644 --- a/src/libc/stdio/fileutil.h +++ b/src/libc/stdio/fileutil.h @@ -9,18 +9,38 @@ extern "C" { #include #include +/* Check whether a buffer has written data to flush. */ +#define __fp_hasbuf_write(fp) \ + (fp->buf && fp->bufdir == __FILE_BUF_WRITE && fp->bufpos > 0) +/* Check whether a buffer has read data to yield. */ +#define __fp_hasbuf_read(fp) \ + (fp->buf && fp->bufdir == __FILE_BUF_READ && fp->bufpos < fp->bufread) + /* Open a file descriptor in a pre-allocated FILE. */ int __fp_open(FILE *fp, int fd, bool use_buffering); /* Close fp and free all of its resources. */ void __fp_close(FILE *fp, bool free_fp); +/* Remove the buffer on fp. */ +void __fp_remove_buffer(FILE *fp); + +/* Set the specified buffer on fp (can be NULL), in which case malloc(). + Returns true on success, false on error. */ +bool __fp_set_buffer(FILE *fp, void *buf, size_t size); + /* Set reading mode on the buffer. */ void __fp_buffer_mode_read(FILE *fp); /* Set writing mode on the buffer. */ void __fp_buffer_mode_write(FILE *fp); +/* Reads data from the buffer. Always reads as much as possible, up to size. + Returns amount of data read; if >= 0 but < size, the buffer should be + refilled. Returns -1 to indicate that unbuffered access should be used. + Allows reading from temporary ungetc() buffers and cleans them. */ +ssize_t __fp_buffered_read(FILE *fp, void *data, size_t size); + /* Reads data from a file descriptor; updates the fdpos and sets the error indicator. Returns 0 on success, EOF on error. */ ssize_t __fp_read(FILE *fp, void *data, size_t size); diff --git a/src/libc/stdio/fread.c b/src/libc/stdio/fread.c index 053136c..442b9e8 100644 --- a/src/libc/stdio/fread.c +++ b/src/libc/stdio/fread.c @@ -9,43 +9,44 @@ size_t fread(void *data, size_t membsize, size_t nmemb, FILE *fp) return 0; } - size_t size; - if(__builtin_umul_overflow(membsize, nmemb, &size)) { + size_t request_size; + if(__builtin_umul_overflow(membsize, nmemb, &request_size)) { fp->error = 1; return 0; } - if(!fp->buf) { - ssize_t rc = __fp_read(fp, data, size); - return (rc == EOF) ? 0 : rc; - } + /* If the stream if unbuffered, we might have no buffer for the reads. + If it's buffered, we always have one. It's also possible that fp is + unbuffered (_IONBF) but has a buffer temporarily because ungetc() + has been used, in which case we have to transition from buffered + reads into direct reads midway. We use __fp_buffered_read() to + handle this. */ + size_t read_size = 0; __fp_buffer_mode_read(fp); - size_t size_read = 0; - while(size_read < size) { - /* Take what's available in the buffer (may be 0, but only - during the first iteration) */ - size_t size_frag = fp->bufread - fp->bufpos; - if(size_frag > size - size_read) - size_frag = size - size_read; - memcpy(data + size_read, fp->buf + fp->bufpos, size_frag); - size_read += size_frag; - fp->bufpos += size_frag; + while(read_size < request_size) { + int remaining = request_size - read_size; + int chunk = __fp_buffered_read(fp, data+read_size, remaining); - if(fp->bufpos >= fp->bufread) { - fp->bufread = 0; - fp->bufpos = 0; + /* Stream is not/no longer buffered, finish unbuffered */ + if(chunk < 0) { + ssize_t rc = __fp_read(fp, data+read_size, remaining); + return read_size + (rc == EOF ? 0 : rc); } - if(size_read >= size) + + read_size += chunk; + if(read_size >= request_size) break; - /* Get more data */ - ssize_t rc = __fp_read(fp, fp->buf, fp->bufsize); - if(rc <= 0) /* EOF or error */ - break; - fp->bufread = rc; + /* Get more data from the file descriptor into the buffer */ + if(fp->buf) { + ssize_t rc = __fp_read(fp, fp->buf, fp->bufsize); + if(rc <= 0) /* EOF or error */ + break; + fp->bufread = rc; + } } - return size_read; + return read_size; } diff --git a/src/libc/stdio/fwrite.c b/src/libc/stdio/fwrite.c index fd9f33b..f146604 100644 --- a/src/libc/stdio/fwrite.c +++ b/src/libc/stdio/fwrite.c @@ -15,6 +15,11 @@ size_t fwrite(void const *data, size_t membsize, size_t nmemb, FILE *fp) return 0; } + if(fp->append && fseek(fp, 0, SEEK_END) == EOF) { + fp->error = 1; + return 0; + } + if(!fp->buf) { ssize_t rc = __fp_write(fp, data, size); return (rc == EOF) ? 0 : rc; diff --git a/src/libc/stdio/setvbuf.c b/src/libc/stdio/setvbuf.c index 7accec8..dcdf300 100644 --- a/src/libc/stdio/setvbuf.c +++ b/src/libc/stdio/setvbuf.c @@ -1,36 +1,23 @@ #include #include #include +#include "fileutil.h" int setvbuf(FILE * restrict fp, char * restrict buf, int mode, size_t size) { - if(fp->bufmode != _IONBF) { + if(fp->bufmode != _IONBF || fp->buf) { fflush(fp); - if(fp->bufowned) - free(fp->buf); } - fp->buf = NULL; - fp->bufowned = false; + __fp_remove_buffer(fp); fp->bufmode = _IONBF; - fp->bufsize = 0; if(mode == _IONBF) return 0; - if(buf) { - fp->buf = buf; - fp->bufsize = size; - fp->bufmode = mode; - } - else { - fp->buf = malloc(size); - if(!fp->buf) - return -1; - fp->bufsize = size; - fp->bufowned = true; - fp->bufmode = mode; - } + if(!__fp_set_buffer(fp, buf, size)) + return -1; + fp->bufmode = mode; return 0; } diff --git a/src/libc/stdio/ungetc.c b/src/libc/stdio/ungetc.c new file mode 100644 index 0000000..fda5974 --- /dev/null +++ b/src/libc/stdio/ungetc.c @@ -0,0 +1,53 @@ +#include +#include +#include +#include "fileutil.h" + +int ungetc(int c, FILE *fp) +{ + if(c == EOF) + return c; + if(!fp->readable) { + errno = EINVAL; + return EOF; + } + + /* If there is no buffer, create a temporary one */ + if(!fp->buf) + __fp_set_buffer(fp, NULL, 16); + + __fp_buffer_mode_read(fp); + + /* If there is a spot available in the buffer, use it */ + if(fp->bufpos > 0) { + fp->buf[--fp->bufpos] = c; + fp->bufungetc++; + fp->eof = 0; + return c; + } + + /* Otherwise, make some room. If the buffer is full of ungetc() + characters then we can't preserve the semantics, so we fail. */ + if(fp->bufungetc >= (int)fp->bufsize) + return EOF; + + /* If the buffer is full, discard read data that isn't ungetc()'d. */ + if(fp->bufread >= fp->bufsize) { + int discarded = fp->bufread - fp->bufungetc; + if(fseek(fp, -discarded, SEEK_CUR)) + return EOF; + fp->bufread -= discarded; + } + + /* Then move whichever type of read data remains to the very end of the + buffer, so we have as much space as possible for more ungetc(). */ + memmove(fp->buf + fp->bufsize - fp->bufread, fp->buf, fp->bufread); + fp->bufpos = fp->bufsize - fp->bufread; + fp->bufread = fp->bufsize; + + /* Because we've made space, conclude like the first case */ + fp->buf[--fp->bufpos] = c; + fp->bufungetc++; + fp->eof = 0; + return c; +}