stdio: improve printf extension API and type size support

2021-06-08 10:32:08 +02:00 · 2021-06-08 10:32:08 +02:00 · dcaf203f46
parent ed8134970b
commit dcaf203f46
8 changed files with 265 additions and 198 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -120,9 +120,11 @@ set(SOURCES
  src/libc/stdio/dprintf.c
  src/libc/stdio/fprintf.c
  src/libc/stdio/printf.c
+  src/libc/stdio/printf/format_fixed.c
  src/libc/stdio/printf/format_fp.c
  src/libc/stdio/printf/format_usual.c
  src/libc/stdio/printf/print.c
+  src/libc/stdio/printf/util.c
  src/libc/stdio/putc.c
  src/libc/stdio/puts.c
  src/libc/stdio/snprintf.c
--- a/README.md
+++ b/README.md
@ -113,10 +113,10 @@ license. Currently, this includes:

 * A stripped-down version of the [TinyMT random number generator](http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/TINYMT/index.html)
  ([GitHub repository](https://github.com/MersenneTwister-Lab/TinyMT)) by
-  Mutsuo Saito and Makoto Matsumoto. See `src/3rdparty/tinymt32/LICENSE.txt`.
+  Mutsuo Saito and Makoto Matsumoto. See `3rdparty/tinymt32/LICENSE.txt`.
 * A stripped-down version of the [Grisu2b floating-point representation
  algorithm](https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf)
-  with α=-59 and γ=-56, by Florian Loitsch. See `src/3rdparty/grisu2b_59_56/README`
+  with α=-59 and γ=-56, by Florian Loitsch. See `3rdparty/grisu2b_59_56/README`
  for details, and [the original code here](https://drive.google.com/open?id=0BwvYOx00EwKmejFIMjRORTFLcTA).

 ---
--- a/include/fxlibc/printf.h
+++ b/include/fxlibc/printf.h
@ -1,12 +1,46 @@
 #ifndef __FXLIBC_PRINTF_H__
 #define __FXLIBC_PRINTF_H__

-/* This headers covers fxlibc-specific extensions to the *printf API. */
+/*
+** This headers covers fxlibc-specific extensions to the *printf API. fxlibc
+** provides an interface to define new converters, and a number of helpers to
+** parse options, lay out strings, and generate output.
+*/

 #include <bits/types/FILE.h>
+#include <stddef.h>
 #include <stdint.h>
 #include <stdarg.h>

+/* Predefined formatters. */
+
+/*
+** Enable floating-point formatters %e, %E, %f, %F, %g, and %G. The formats are
+** disabled by default because the representation algorithm has tables of
+** powers of 10 and quite a bit of code, resulting in 10-15 kiB of additional
+** size in every binary.
+**
+** Calling this functions pulls object files with floating-point representation
+** code from the fxlibc library and registers formatters for all 6
+** floating-point formats.
+*/
+extern void __printf_enable_fp(void);
+
+/*
+** Enable the fixed-point formatter %D. This format is a nonstandard extension
+** for decimal fixed-point. It works like %d, except that the precision field
+** represents a number of decimals.
+**
+** For instance, "%.3D" will print value 12345 as "12.345". This is simply
+** integer display with a decimal dot inserted, and useful for applications
+** that want some degree of decimal freedom without full-blown floating-point.
+*/
+extern void __printf_enable_fixed(void);
+
+
+
+/* Base function for all formatted printing functions. */
+
 /*
 ** Output specification; only one of the targets may be non-trivial.
 ** -> str != NULL
@ -36,21 +70,10 @@ extern int __printf(
 	char const * restrict __format,
 	va_list *__args);

-/*
-** Enable floating-point formatters %e, %E, %f, %F, %g, and %G. The formats are
-** disabled by default because the representation algorithm has tables of
-** powers of 10 and quite a bit of code, resulting in 10-15 kiB of additional
-** size in every binary.
-**
-** Calling this functions pulls object files with floating-point representation
-** code from the fxlibc library and registers formatters for all 6
-** floating-point formats.
-*/
-extern void __printf_enable_fp(void);
+

 /* Format extension API. */

-/* Standard format. */
 struct __printf_format {
 	/* Minimal length of formatted string (padding can be added). */
 	uint16_t length;
@ -138,10 +161,12 @@ typedef void __printf_formatter_t(
 */
 void __printf_register(int __spec, __printf_formatter_t *__format);

+
+
 /* Functions for formatters to output characters. */

 /* Flush the buffer. (Don't call this directly.) */
-void __printf_flush(struct __printf_output *__out);
+extern void __printf_flush(struct __printf_output *__out);

 /* Output a single character in the buffer (and possibly flush it). */
 static inline void __printf_out(struct __printf_output *__out,
@ -165,6 +190,8 @@ static inline void __printf_outstr(struct __printf_output *__out,
 	for(int i = 0; i < __n; i++) __printf_out(__out, __str[i]);
 }

+
+
 /* Helper functions for formatters. */

 /*
@ -223,4 +250,22 @@ extern void __printf_compute_geometry(
 	struct __printf_format *__opt,
 	struct __printf_geometry *__geometry);

+/* Load a signed integer (__size is specified in __opt->size). */
+extern int64_t __printf_load_i(int size, va_list *args);
+
+/* Load an unsigned integer. */
+extern uint64_t __printf_load_u(int size, va_list *args);
+
+/*
+** Generate the digits of __n in base 10, starting from the least significant
+** digit. Returns number of digits. No NUL terminator is added.
+*/
+extern int __printf_digits10(char *__str, uint64_t __n);
+
+/* Same in base 16. */
+extern int __printf_digits16(char *__str, int uppercase, uint64_t __n);
+
+/* Same in base 8. */
+extern int __printf_digits8(char *__str, uint64_t __n);
+
 #endif /* __FXLIBC_PRINTF_H__ */
--- a/include/stdio.h
+++ b/include/stdio.h
@ -22,22 +22,22 @@ extern FILE *stderr;
 ** - Character, string and pointer formats (%c, %s, %p)
 ** - Character count and strerror() shorthand formats (%n, %m)
 ** - Format options (0, #, -, (space), length, precision)
-** - Parameter length (hh, h, l, ll, z)
+** - Parameter length (hh, h, l, ll, L, z, j, t) (L: if long double is 64-bit)
 ** - Limiting the size of the output and still returning the whole length
 ** - If __printf_enable_fp() from <fxlibc/printf.h> is called: floating-point
 **   formats (%e, %E, %f, %F, %g, %G) (disabled by default to save space)
 **
-** They do not support:
+** They do not (yet?) support:
 ** - Hexadecimal floating-point (%a, %A)
-** - Some size modifiers: long double (L), intmax_t (j), ptrdiff_t (t), and the
-**   nonstandard synonyms q (ll) and Z (z)
+** - Printing long double values when long double is more than 64-bit
 ** - Dynamic length field (*)
+** - Parameter reordering ($m)
 ** - Thousands separators (') and locale-aware digits (I)
-** - Nonstandard synonyms %C (%lc) and %S (%ls)
+** - Nonstandard/old synonyms %C (%lc), %S (%ls), q (ll), and Z (z)
 **
 ** There are extensions, namely to allow for custom conversions to be added.
 ** One custom conversion can be enabled with __printf_enable_fixed() from
-** <fxlibc/printf.h>: a decimal fixed-point format %k which is like %d but
+** <fxlibc/printf.h>: a decimal fixed-point format %D which is like %d but
 ** with a decimal point. See <fxlibc/printf.h> for details.
 */

--- a/src/libc/stdio/printf/format_fixed.c
+++ b/src/libc/stdio/printf/format_fixed.c
@ -0,0 +1,48 @@
+#include <fxlibc/printf.h>
+
+/* Fixed-point decimal formatter (extension: %D)
+    (0)   Pad with zeros, rather than spaces, on the left
+    (-)   Move spaces to the right (overrides '0')
+    ( )   Force a blank sign before nonnegative numbers
+    (+)   Force a sign before every number (overrides ' ')
+   {len}  Minimal number of characters to print
+   {pre}  Number of digits after the decimal dot */
+void __printf_format_D(
+	struct __printf_output * restrict out,
+	struct __printf_format * restrict opt,
+	va_list * restrict args)
+{
+	int64_t n = __printf_load_i(opt->size, args);
+
+	/* Compute the sign and the absolute value */
+	struct __printf_geometry g = {
+		.sign   = (n < 0) ? '-' : '+',
+		.prefix = 0,
+		.style  = _PRINTF_NUMERIC,
+	};
+	if(n < 0) n = -n;
+
+	/* Get the digit string */
+	char digits[32];
+
+	g.content = __printf_digits10(digits, n) + 1;
+	__printf_compute_geometry(opt, &g);
+
+	/* Print the result */
+	__printf_outn(out, ' ', g.left_spaces);
+	if(g.sign) __printf_out(out, g.sign);
+	__printf_outn(out, '0', g.zeros);
+
+	for(int i = g.content - 2; i >= 0; i--)
+	{
+		if(i == opt->precision - 1) __printf_out(out, '.');
+		__printf_out(out, digits[i]);
+	}
+
+	__printf_outn(out, ' ', g.right_spaces);
+}
+
+void __printf_enable_fixed(void)
+{
+	__printf_register('D', __printf_format_D);
+}
--- a/src/libc/stdio/printf/format_usual.c
+++ b/src/libc/stdio/printf/format_usual.c
@ -3,78 +3,6 @@
 #include <errno.h>
 #include <fxlibc/printf.h>

-//---
-// Digit generation
-//---
-
-/* digits_10(): Generate digits in base 10
-   Fills up the provided digit string from least significant to most
-   significant digit, not adding zeros except if argument is zero. Returns the
-   number of digits. No NUL terminator is added. */
-static int digits_10(char *str, uint64_t n)
-{
-	int digits = 0;
-	while(n || !digits)
-	{
-		/* TODO: Use fast division in __printf's digits_10() */
-		str[digits++] = (n % 10) + '0';
-		n /= 10;
-	}
-	return digits;
-}
-
-/* digits_16(): Generate digits in base 16 */
-static int digits_16(char *str, int uppercase, uint64_t n)
-{
-	char *hex = uppercase ? "0123456789ABCDEF" : "0123456789abcdef";
-	int digits = 0;
-
-	while(n || !digits)
-	{
-		str[digits++] = hex[n & 0xf];
-		n >>= 4;
-	}
-	return digits;
-}
-
-/* digits_8(): Generate digits in base 8 */
-static int digits_8(char *str, uint64_t n)
-{
-	int digits = 0;
-
-	while(n || !digits)
-	{
-		str[digits++] = (n & 0x7) + '0';
-		n >>= 3;
-	}
-	return digits;
-}
-
-//---
-// Loading helpers
-//---
-
-static int64_t load_i(int size, va_list *args)
-{
-	/* All smaller types are promoted to int with sign extension, so we
-	   don't need to care about them. */
-	if(size == 3) return va_arg(*args, long);
-	if(size == 4) return va_arg(*args, long long);
-	return va_arg(*args, int);
-}
-
-static uint64_t load_u(int size, va_list *args)
-{
-	/* Again, no need to care about small types */
-	if(size == 3) return va_arg(*args, unsigned long);
-	if(size == 4) return va_arg(*args, unsigned long long);
-	return va_arg(*args, unsigned int);
-}
-
-//---
-// Formatter functions
-//---
-
 /* Character formatter (%c)
    (-)   Move spaces to the right
   {len}  Specifies numbers of (whitespace-padded) characters to print */
@ -134,7 +62,7 @@ void __printf_format_di(
 	struct __printf_format * restrict opt,
 	va_list * restrict args)
 {
-	int64_t n = load_i(opt->size, args);
+	int64_t n = __printf_load_i(opt->size, args);

 	/* Compute the sign and the absolute value */
 	struct __printf_geometry g = {
@ -148,7 +76,7 @@ void __printf_format_di(
 	char digits[32];
 	int pure, total;

-	pure = digits_10(digits, n);
+	pure = __printf_digits10(digits, n);
 	if(opt->precision == 0 && !n) pure = 0;
 	total = (pure > opt->precision ? pure : opt->precision);
 	g.content = total;
@ -177,15 +105,15 @@ void __printf_format_ouxX(
 	struct __printf_format * restrict opt,
 	va_list * restrict args)
 {
-	uint64_t n = load_u(opt->size, args);
+	uint64_t n = __printf_load_u(opt->size, args);

 	char digits[48];
 	int pure = 0, total;
 	int specl = opt->spec | 0x20;

-	if(specl == 'u') pure = digits_10(digits, n);
-	if(specl == 'o') pure = digits_8(digits, n);
-	if(specl == 'x') pure = digits_16(digits, opt->spec == 'X', n);
+	if(specl == 'u') pure = __printf_digits10(digits, n);
+	if(specl == 'o') pure = __printf_digits8(digits, n);
+	if(specl == 'x') pure = __printf_digits16(digits, opt->spec == 'X', n);

 	if(opt->precision == 0 && !n) pure = 0;
 	total = (pure > opt->precision ? pure : opt->precision);
@ -224,7 +152,7 @@ void __printf_format_p(
 	void *p = va_arg(*args, void *);

 	char digits[] = "00000000";
-	digits_16(digits, 0, (uint32_t)p);
+	__printf_digits16(digits, 0, (uint32_t)p);

 	__printf_out(out, '0');
 	__printf_out(out, 'x');
@ -258,45 +186,3 @@ void __printf_format_n(
 	if(opt->size == 3) *(long *)p      = out->count;
 	if(opt->size == 4) *(long long *)p = out->count;
 }
-
-/* Fixed-point decimal formatter (extension: %k)
-    (0)   Pad with zeros, rather than spaces, on the left
-    (-)   Move spaces to the right (overrides '0')
-    ( )   Force a blank sign before nonnegative numbers
-    (+)   Force a sign before every number (overrides ' ')
-   {len}  Minimal number of characters to print
-   {pre}  Number of digits after the decimal dot */
-void __printf_format_k(
-	struct __printf_output * restrict out,
-	struct __printf_format * restrict opt,
-	va_list * restrict args)
-{
-	int64_t n = load_i(opt->size, args);
-
-	/* Compute the sign and the absolute value */
-	struct __printf_geometry g = {
-		.sign   = (n < 0) ? '-' : '+',
-		.prefix = 0,
-		.style  = _PRINTF_NUMERIC,
-	};
-	if(n < 0) n = -n;
-
-	/* Get the digit string */
-	char digits[32];
-
-	g.content = digits_10(digits, n) + 1;
-	__printf_compute_geometry(opt, &g);
-
-	/* Print the result */
-	__printf_outn(out, ' ', g.left_spaces);
-	if(g.sign) __printf_out(out, g.sign);
-	__printf_outn(out, '0', g.zeros);
-
-	for(int i = g.content - 2; i >= 0; i--)
-	{
-		if(i == opt->precision - 1) __printf_out(out, '.');
-		__printf_out(out, digits[i]);
-	}
-
-	__printf_outn(out, ' ', g.right_spaces);
-}
--- a/src/libc/stdio/printf/print.c
+++ b/src/libc/stdio/printf/print.c
@ -1,5 +1,7 @@
 #include <stdio.h>
 #include <ctype.h>
+#include <stdint.h>
+#include <string.h>
 #include <fxlibc/printf.h>

 /* Internal buffer, used when no buffer is specified for output */
@ -123,7 +125,7 @@ void __printf_flush(struct __printf_output *out)
 static struct __printf_format parse_fmt(char const * restrict *options_ptr)
 {
 	/* No options enabled by default, set the size to int */
-	struct __printf_format opt = { .size = 2, .precision = -1 };
+	struct __printf_format opt = { .size = sizeof(int), .precision = -1 };

 	/* This function acts as a deterministic finite automaton */
 	enum {
@ -134,11 +136,16 @@ static struct __printf_format parse_fmt(char const * restrict *options_ptr)

 	char const *options = *options_ptr;

+	/* Previous size letter (used to keep track of hh and ll) */
+	int size_letter = 0;
+
 	for(int c; (c = *options); options++)
 	{
-		int c_low = c | 0x20;
-		if(c_low >= 'a' && c_low <= 'z' && c != 'h' && c != 'l' &&
-			c != 'L') break;
+		int index = -1;
+		if(isupper(c)) index = c - 'A';
+		if(islower(c)) index = c - 'a' + 26;
+		if(index >= 0 && __printf_formatters[index] != _PRINTF_USED)
+			break;

 		if(c == '.')
 		{
@ -171,10 +178,27 @@ static struct __printf_format parse_fmt(char const * restrict *options_ptr)
 		}

 		/* Data size */
-		if(c == 'h') opt.size--;
-		if(c == 'l') opt.size++;
-		if(c == 'z') opt.size = 3;
-		if(c == 'L') {}
+		if(strchr("hlzjtL", c))
+		{
+			if(c == 'h' && size_letter == 'h')
+				opt.size = sizeof(char);
+			else if(c == 'h')
+				opt.size = sizeof(short);
+			else if(c == 'l' && size_letter == 'l')
+				opt.size = sizeof(long long);
+			else if(c == 'l')
+				opt.size = sizeof(long);
+			else if(c == 'z')
+				opt.size = sizeof(size_t);
+			else if(c == 'j')
+				opt.size = sizeof(intmax_t);
+			else if(c == 't')
+				opt.size = sizeof(ptrdiff_t);
+			else if(c == 'L')
+				opt.size = sizeof(long double);
+
+			size_letter = c;
+		}

 		if(c >= '1' && c <= '9') state = length, options--;
 	}
@ -243,50 +267,3 @@ int __printf(
 	__printf_flush(out);
 	return (int)out->count;
 }
-
-void __printf_compute_geometry(
-	struct __printf_format *opt,
-	struct __printf_geometry *g)
-{
-	int integral = (g->style == _PRINTF_INTEGER);
-	int numerical = (g->style == _PRINTF_NUMERIC) || integral;
-	int padding;
-
-	/* Sign character (no discussion required for negative values) */
-	if(numerical && g->sign == '+')
-	{
-		g->sign = 0;
-		if(opt->blank_sign) g->sign = ' ';
-		if(opt->force_sign) g->sign = '+';
-	}
-
-	g->zeros = 0;
-
-	padding = opt->length - (g->sign != 0) - g->prefix
-		- (g->content > opt->precision ? g->content : opt->precision);
-	if(padding < 0) padding = 0;
-
-	/* In integral modes, precision forces zeros */
-	if(integral && opt->precision >= 0)
-	{
-		if(opt->alignment == '0') opt->alignment = 0;
-
-		int zeros = opt->precision - g->content;
-		if(zeros > 0) g->zeros = zeros;
-	}
-
-	if(opt->alignment == '0')
-	{
-		/* Zeros are only allowed in numerical modes */
-		if(numerical) g->zeros = padding;
-		else g->left_spaces = padding;
-	}
-	else if(opt->alignment == '-')
-	{
-		g->right_spaces = padding;
-	}
-	else
-	{
-		g->left_spaces = padding;
-	}
-}
--- a/src/libc/stdio/printf/util.c
+++ b/src/libc/stdio/printf/util.c
@ -0,0 +1,109 @@
+#include <fxlibc/printf.h>
+
+void __printf_compute_geometry(
+	struct __printf_format *opt,
+	struct __printf_geometry *g)
+{
+	int integral = (g->style == _PRINTF_INTEGER);
+	int numerical = (g->style == _PRINTF_NUMERIC) || integral;
+	int padding;
+
+	/* Sign character (no discussion required for negative values) */
+	if(numerical && g->sign == '+')
+	{
+		g->sign = 0;
+		if(opt->blank_sign) g->sign = ' ';
+		if(opt->force_sign) g->sign = '+';
+	}
+
+	g->zeros = 0;
+
+	padding = opt->length - (g->sign != 0) - g->prefix
+		- (g->content > opt->precision ? g->content : opt->precision);
+	if(padding < 0) padding = 0;
+
+	/* In integral modes, precision forces zeros */
+	if(integral && opt->precision >= 0)
+	{
+		if(opt->alignment == '0') opt->alignment = 0;
+
+		int zeros = opt->precision - g->content;
+		if(zeros > 0) g->zeros = zeros;
+	}
+
+	if(opt->alignment == '0')
+	{
+		/* Zeros are only allowed in numerical modes */
+		if(numerical) g->zeros = padding;
+		else g->left_spaces = padding;
+	}
+	else if(opt->alignment == '-')
+	{
+		g->right_spaces = padding;
+	}
+	else
+	{
+		g->left_spaces = padding;
+	}
+}
+
+int64_t __printf_load_i(int size, va_list *args)
+{
+	if(size == 1)
+		return (int8_t)va_arg(*args, int);
+	if(size == 2)
+		return (int16_t)va_arg(*args, int);
+	if(size == 8)
+		return va_arg(*args, long long);
+
+	return va_arg(*args, int);
+}
+
+uint64_t __printf_load_u(int size, va_list *args)
+{
+	if(size == 1)
+		return (uint8_t)va_arg(*args, unsigned int);
+	if(size == 2)
+		return (uint16_t)va_arg(*args, unsigned int);
+	if(size == 8)
+		return va_arg(*args, unsigned long long);
+
+	return va_arg(*args, unsigned int);
+}
+
+int __printf_digits10(char *str, uint64_t n)
+{
+	int digits = 0;
+	while(n || !digits)
+	{
+		/* TODO: Use fast division in __printf_digits10() */
+		str[digits++] = (n % 10) + '0';
+		n /= 10;
+	}
+	return digits;
+}
+
+int __printf_digits16(char *str, int uppercase, uint64_t n)
+{
+	char *hex = uppercase ? "0123456789ABCDEF" : "0123456789abcdef";
+	int digits = 0;
+
+	while(n || !digits)
+	{
+		str[digits++] = hex[n & 0xf];
+		n >>= 4;
+	}
+	return digits;
+}
+
+int __printf_digits8(char *str, uint64_t n)
+{
+	int digits = 0;
+
+	while(n || !digits)
+	{
+		str[digits++] = (n & 0x7) + '0';
+		n >>= 3;
+	}
+	return digits;
+}