From f5cca84ae8b504e943b3b877e9f3811488166379 Mon Sep 17 00:00:00 2001
From: Lephenixnoir <sebastien.michelland@protonmail.com>
Date: Fri, 21 May 2021 23:52:54 +0200
Subject: [PATCH] stdlib: add and test strtod, strtof and atof (DONE)

This uses a generic function strto_fp similar to strto_int that is used
for strtol and its derivatives.
---
 CMakeLists.txt             |   4 +
 STATUS                     |   5 +-
 include/stdlib.h           |   3 +
 src/libc/stdlib/atof.c     |   6 +
 src/libc/stdlib/stdlib_p.h |  13 +++
 src/libc/stdlib/strto_fp.c | 217 +++++++++++++++++++++++++++++++++++++
 src/libc/stdlib/strtod.c   |  10 ++
 src/libc/stdlib/strtof.c   |  10 ++
 src/libc/stdlib/strtold.c  | 193 +--------------------------------
 9 files changed, 270 insertions(+), 191 deletions(-)
 create mode 100644 src/libc/stdlib/atof.c
 create mode 100644 src/libc/stdlib/strto_fp.c
 create mode 100644 src/libc/stdlib/strtod.c
 create mode 100644 src/libc/stdlib/strtof.c

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fd97016..b7ad657 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -109,6 +109,7 @@ set(SOURCES
   src/libc/stdio/printf.c
   # stdlib
   src/libc/stdlib/abs.c
+  src/libc/stdlib/atof.c
   src/libc/stdlib/atoi.c
   src/libc/stdlib/atol.c
   src/libc/stdlib/atoll.c
@@ -119,7 +120,10 @@ set(SOURCES
   src/libc/stdlib/llabs.c
   src/libc/stdlib/lldiv.c
   src/libc/stdlib/reallocarray.c
+  src/libc/stdlib/strto_fp.c
   src/libc/stdlib/strto_int.c
+  src/libc/stdlib/strtod.c
+  src/libc/stdlib/strtof.c
   src/libc/stdlib/strtol.c
   src/libc/stdlib/strtold.c
   src/libc/stdlib/strtoll.c
diff --git a/STATUS b/STATUS
index a59c700..30945cc 100644
--- a/STATUS
+++ b/STATUS
@@ -87,10 +87,9 @@ DONE:       Function/symbol/macro is defined, builds, links, and is tested
 !    7.19.10 Error-handling functions: TODO
 
 7.20 <stdlib.h>
-!    7.20.1.1  atof: TODO
+     7.20.1.1  atof: DONE
      7.20.1.2  atoi, atol, atoll: DONE
-!    7.20.1.3  strtod, strtof: TODO
-               strtold: DONE
+     7.20.1.3  strtod, strtof, strtold: DONE
      7.20.1.4  strtol, strtoul, strtoll, strtoull: DONE
 !    7.20.2  Pseudo-random sequence generation functions: TODO
 !    7.20.3  Memory management functions: TODO (check existing code first)
diff --git a/include/stdlib.h b/include/stdlib.h
index 63e8d91..37ad1c8 100644
--- a/include/stdlib.h
+++ b/include/stdlib.h
@@ -76,6 +76,9 @@ extern long long int atoll(char const *__ptr);
 
 /* Numeric conversion functions. */
 
+/* ASCII to floating-point. */
+extern double atof(char const *__ptr);
+
 /* Parse a long int from a string. */
 extern long int strtol(
 	char const * restrict __ptr,
diff --git a/src/libc/stdlib/atof.c b/src/libc/stdlib/atof.c
new file mode 100644
index 0000000..23917fc
--- /dev/null
+++ b/src/libc/stdlib/atof.c
@@ -0,0 +1,6 @@
+#include <stdlib.h>
+
+double atof(char const *ptr)
+{
+	return (double)strtod(ptr, NULL);
+}
diff --git a/src/libc/stdlib/stdlib_p.h b/src/libc/stdlib/stdlib_p.h
index f4a8288..8d28f12 100644
--- a/src/libc/stdlib/stdlib_p.h
+++ b/src/libc/stdlib/stdlib_p.h
@@ -28,4 +28,17 @@ int strto_int(
 	long long *__outll,
 	bool __use_unsigned);
 
+/* Parse a floating-point value from a string. This is the base function for
+   strtod, strtof, and strtold.
+
+   This function is similar to strto_int(). If returns the error code to set in
+   errno, and can produce one of three outputs depending on which of out, outf
+   and outl is set. */
+int strto_fp(
+   char const * restrict __ptr,
+   char ** restrict __endptr,
+   double *out,
+   float *outf,
+   long double *outl);
+
 #endif /*__STDLIB_P_H__*/
diff --git a/src/libc/stdlib/strto_fp.c b/src/libc/stdlib/strto_fp.c
new file mode 100644
index 0000000..b5522bb
--- /dev/null
+++ b/src/libc/stdlib/strto_fp.c
@@ -0,0 +1,217 @@
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include <float.h>
+#include <fenv.h>
+#include <math.h>
+
+#include <string.h>
+#include <errno.h>
+#include <ctype.h>
+
+/*
+** In the following conversions, the significant digits are represented in an
+** integer and multiplied at the last moment by a suitable power of 10 (decimal
+** representation) or 2 (hexadecimal representation). An integer of a suitable
+** size needs to be used; that size is the size of the long double type.
+**
+** TODO: vhex-x86: Using 128-bit long double is untested!
+*/
+#if __SIZEOF_LONG_DOUBLE__ == 8
+# define SIGNIFICAND_TYPE uint64_t
+# define SIGNIFICAND_DIGITS 17
+#elif __SIZEOF_LONG_DOUBLE__ <= 16
+# define SIGNIFICAND_TYPE unsigned __int128
+# define SIGNIFICAND_DIGITS 38
+#else
+# error long double larger than 128 bits is not supported
+#endif
+
+/* Basically strncasecmp. */
+static int ncasecmp(char const *left, char const *right, size_t n)
+{
+	for(size_t i = 0; i < n; i++) {
+		int diff = tolower(left[i]) - tolower(right[i]);
+		if(diff) return diff;
+	}
+	return 0;
+}
+
+/*
+** Parse digits and exponent into integers, in decimal or hexadecimal notation.
+**
+** -> In decimal notation; we read up to 19 (64-bit) or 38 (128-bit) digits,
+*     which is enough to fill the mantissa of a long double, and later multiply
+**    the digits by a power of 10. The main approximation is the power of 10.
+**
+** -> In hexadecimal notation, we read as many bits as the mantissa of a long
+**    double, then later multiply by a power of 2. There are no approximations.
+*/
+static void parse_digits(char const * restrict *ptr0, bool *valid,
+	SIGNIFICAND_TYPE *digits, long *exponent, bool hexadecimal)
+{
+	char const *ptr = *ptr0;
+	bool dot_found = false;
+	int digits_found = 0;
+
+	*digits = 0;
+	*exponent = 0;
+
+	int max_digits = hexadecimal ? LDBL_MANT_DIG / 4 : SIGNIFICAND_DIGITS;
+
+	/* TODO: locale: use a locale-aware decimal separator */
+	int dot_character = '.';
+	int exp_character = (hexadecimal ? 'p' : 'e');
+
+	for(int i = 0; isdigit(*ptr) || (hexadecimal && isxdigit(*ptr))
+		|| *ptr == dot_character; i++, ptr++) {
+
+		/* Allow only one dot in the string, stop at the second one */
+		if(*ptr == dot_character && dot_found) break;
+
+		if(*ptr == dot_character) {
+			dot_found = true;
+			continue;
+		}
+
+		/* Count digits only until SIGNIFICAND_DIGITS */
+		if(digits_found < max_digits) {
+			if(hexadecimal) {
+				int v = *ptr - '0';
+				if(!isdigit(*ptr)) v = tolower(*ptr)-'a'+10;
+				*digits = (*digits << 4) + v;
+			}
+			else {
+				*digits = (*digits * 10) + (*ptr - '0');
+			}
+		}
+		else (*exponent)++;
+
+		if(dot_found) (*exponent)--;
+
+		/* But also round at the first discarded one */
+		if(digits_found == max_digits && *ptr >= '5')
+			(*digits)++;
+
+		digits_found++;
+	}
+
+	/* Require at least one digit to be present; if not, the whole string
+	   is considered invalid */
+	if(!digits_found) {
+		*valid = false;
+		return;
+	}
+
+	/* In hexadecimal, each character is worth 4 bits of exponent */
+	if(hexadecimal) (*exponent) *= 4;
+
+	/* Parse exponent */
+	if(tolower(*ptr) == exp_character) {
+		char *end;
+		long e = strtol(ptr + 1, &end, 10);
+
+		/* If an integer cannot be parsed, ignore the 'e...' part */
+		if(end != ptr + 1) {
+			ptr = end;
+			*exponent += e;
+		}
+	}
+
+	*ptr0 = ptr;
+	*valid = true;
+}
+
+int strto_fp(char const * restrict ptr, char ** restrict endptr, double *out,
+	float *outf, long double *outl)
+{
+	/* Save the value of ptr in endptr, in case format is invalid */
+	if(endptr) *endptr = (char *)ptr;
+
+	/* Skip initial whitespace */
+	while(isspace(*ptr)) ptr++;
+
+	/* Read optional sign */
+	bool negative = false;
+	if(*ptr == '-') negative = true;
+	if(*ptr == '-' || *ptr == '+') ptr++;
+
+	int errno_value = 0;
+	bool valid = true;
+
+	/* Result variable */
+	if(out)  *out = 0.0;
+	if(outf) *outf = 0.0f;
+	if(outl) *outl = 0.0l;
+
+	/* NaN possibly with an argument */
+	if(!ncasecmp(ptr, "nan", 3)) {
+		char const *arg = "";
+		ptr += 3;
+		if(ptr[0] == '(') {
+			arg = ptr + 1;
+			do ptr++;
+			while(ptr[-1] != ')');
+		}
+
+		if(out)  *out  = __builtin_nan(arg);
+		if(outf) *outf = __builtin_nanf(arg);
+		if(outl) *outl = __builtin_nanl(arg);
+	}
+	/* Infinity */
+	else if(!ncasecmp(ptr, "infinity", 8)) {
+		if(out)  *out  = __builtin_inf();
+		if(outf) *outf = __builtin_inff();
+		if(outl) *outl = __builtin_infl();
+		ptr += 8;
+	}
+	else if(!ncasecmp(ptr, "inf", 3)) {
+		if(out)  *out  = __builtin_inf();
+		if(outf) *outf = __builtin_inff();
+		if(outl) *outl = __builtin_infl();
+		ptr += 3;
+	}
+	else {
+		SIGNIFICAND_TYPE digits = 0;
+		long e = 0;
+
+		if(ptr[0] == '0' && tolower(ptr[1]) == 'x') {
+			ptr += 2;
+			parse_digits(&ptr, &valid, &digits, &e, true);
+
+			if(out)  *out  = (double)digits * exp2(e);
+			if(outf) *outf = (float)digits * exp2f(e);
+			if(outl) *outl = (long double)digits * exp2l(e);
+		}
+		else {
+			parse_digits(&ptr, &valid, &digits, &e, false);
+
+			if(out)  *out  = (double)digits * pow(10, e);
+			if(outf) *outf = (float)digits * powf(10, e);
+			if(outl) *outl = (long double)digits * powl(10, e);
+		}
+
+		/*
+		** Detect overflow, somewhat. Implementation is not required to
+		** set errno on underflow, which makes things much easier for
+		** us as underflow gives 0 (7.20.1.3§10).
+		*/
+		if((out && *out == HUGE_VAL)
+			|| (outf && *outf == HUGE_VALF)
+			|| (outl && *outl == HUGE_VALL)) {
+			errno_value = ERANGE;
+		}
+	}
+
+	/* Apply sign; this method is allowed by 7.20.1.3§4.249 */
+	if(negative) {
+		if(out)  *out  = -(*out);
+		if(outf) *outf = -(*outf);
+		if(outl) *outl = -(*outl);
+	}
+
+	/* Save the result pointer */
+	if(endptr && valid) *endptr = (char *)ptr;
+
+	return errno_value;
+}
diff --git a/src/libc/stdlib/strtod.c b/src/libc/stdlib/strtod.c
new file mode 100644
index 0000000..80dede8
--- /dev/null
+++ b/src/libc/stdlib/strtod.c
@@ -0,0 +1,10 @@
+#include "stdlib_p.h"
+#include <errno.h>
+
+double strtod(char const * restrict ptr, char ** restrict endptr)
+{
+	double d = 0;
+	int err = strto_fp(ptr, endptr, &d, NULL, NULL);
+	if(err != 0) errno = err;
+	return d;
+}
diff --git a/src/libc/stdlib/strtof.c b/src/libc/stdlib/strtof.c
new file mode 100644
index 0000000..476005d
--- /dev/null
+++ b/src/libc/stdlib/strtof.c
@@ -0,0 +1,10 @@
+#include "stdlib_p.h"
+#include <errno.h>
+
+float strtof(char const * restrict ptr, char ** restrict endptr)
+{
+	float f = 0;
+	int err = strto_fp(ptr, endptr, NULL, &f, NULL);
+	if(err != 0) errno = err;
+	return f;
+}
diff --git a/src/libc/stdlib/strtold.c b/src/libc/stdlib/strtold.c
index e2a4aa5..bcb137c 100644
--- a/src/libc/stdlib/strtold.c
+++ b/src/libc/stdlib/strtold.c
@@ -1,193 +1,10 @@
-#include <stdlib.h>
-#include <stdbool.h>
-
-#include <float.h>
-#include <fenv.h>
-#include <math.h>
-
-#include <string.h>
+#include "stdlib_p.h"
 #include <errno.h>
-#include <ctype.h>
-
-/*
-** In the following conversions, the significant digits are represented in an
-** integer and multiplied at the last moment by a suitable power of 10 (decimal
-** representation) or 2 (hexadecimal representation). An integer of a suitable
-** size needs to be used; that size is the size of the long double type.
-**
-** TODO: vhex-x86: Using 128-bit long double is untested!
-*/
-#if __SIZEOF_LONG_DOUBLE__ == 8
-# define SIGNIFICAND_TYPE uint64_t
-# define SIGNIFICAND_DIGITS 17
-#elif __SIZEOF_LONG_DOUBLE__ <= 16
-# define SIGNIFICAND_TYPE unsigned __int128
-# define SIGNIFICAND_DIGITS 38
-#else
-# error long double larger than 128 bits is not supported
-#endif
-
-/* Basically strncasecmp. */
-static int ncasecmp(char const *left, char const *right, size_t n)
-{
-	for(size_t i = 0; i < n; i++) {
-		int diff = tolower(left[i]) - tolower(right[i]);
-		if(diff) return diff;
-	}
-	return 0;
-}
-
-/*
-** Parse digits and exponent into integers, in decimal or hexadecimal notation.
-**
-** -> In decimal notation; we read up to 19 (64-bit) or 38 (128-bit) digits,
-*     which is enough to fill the mantissa of a long double, and later multiply
-**    the digits by a power of 10. The main approximation is the power of 10.
-**
-** -> In hexadecimal notation, we read as many bits as the mantissa of a long
-**    double, then later multiply by a power of 2. There are no approximations.
-*/
-static void parse_digits(char const * restrict *ptr0, bool *valid,
-	SIGNIFICAND_TYPE *digits, long *exponent, bool hexadecimal)
-{
-	char const *ptr = *ptr0;
-	bool dot_found = false;
-	int digits_found = 0;
-
-	*digits = 0;
-	*exponent = 0;
-
-	int max_digits = hexadecimal ? LDBL_MANT_DIG / 4 : SIGNIFICAND_DIGITS;
-
-	/* TODO: locale: use a locale-aware decimal separator */
-	int dot_character = '.';
-	int exp_character = (hexadecimal ? 'p' : 'e');
-
-	for(int i = 0; isdigit(*ptr) || (hexadecimal && isxdigit(*ptr))
-		|| *ptr == dot_character; i++, ptr++) {
-
-		/* Allow only one dot in the string, stop at the second one */
-		if(*ptr == dot_character && dot_found) break;
-
-		if(*ptr == dot_character) {
-			dot_found = true;
-			continue;
-		}
-
-		/* Count digits only until SIGNIFICAND_DIGITS */
-		if(digits_found < max_digits) {
-			if(hexadecimal) {
-				int v = *ptr - '0';
-				if(!isdigit(*ptr)) v = tolower(*ptr)-'a'+10;
-				*digits = (*digits << 4) + v;
-			}
-			else {
-				*digits = (*digits * 10) + (*ptr - '0');
-			}
-		}
-		else (*exponent)++;
-
-		if(dot_found) (*exponent)--;
-
-		/* But also round at the first discarded one */
-		if(digits_found == max_digits && *ptr >= '5')
-			(*digits)++;
-
-		digits_found++;
-	}
-
-	/* Require at least one digit to be present; if not, the whole string
-	   is considered invalid */
-	if(!digits_found) {
-		*valid = false;
-		return;
-	}
-
-	/* In hexadecimal, each character is worth 4 bits of exponent */
-	if(hexadecimal) (*exponent) *= 4;
-
-	/* Parse exponent */
-	if(tolower(*ptr) == exp_character) {
-		char *end;
-		long e = strtol(ptr + 1, &end, 10);
-
-		/* If an integer cannot be parsed, ignore the 'e...' part */
-		if(end != ptr + 1) {
-			ptr = end;
-			*exponent += e;
-		}
-	}
-
-	*ptr0 = ptr;
-	*valid = true;
-}
 
 long double strtold(char const * restrict ptr, char ** restrict endptr)
 {
-	/* Save the value of ptr in endptr, in case format is invalid */
-	if(endptr) *endptr = (char *)ptr;
-
-	/* Skip initial whitespace */
-	while(isspace(*ptr)) ptr++;
-
-	/* Read optional sign */
-	bool negative = false;
-	if(*ptr == '-') negative = true;
-	if(*ptr == '-' || *ptr == '+') ptr++;
-
-	/* Result variable */
-	bool valid = true;
-	long double x = 0.0;
-
-	/* NaN possibly with an argument */
-	if(!ncasecmp(ptr, "nan", 3)) {
-		if(ptr[3] == '(') {
-			x = __builtin_nanl(ptr+4);
-			while(*ptr != ')') ptr++;
-		}
-		else {
-			x = __builtin_nanl("");
-			ptr += 3;
-		}
-	}
-	/* Infinity */
-	else if(!ncasecmp(ptr, "infinity", 8)) {
-		x = __builtin_infl();
-		ptr += 8;
-	}
-	else if(!ncasecmp(ptr, "inf", 3)) {
-		x = __builtin_infl();
-		ptr += 3;
-	}
-	else {
-		SIGNIFICAND_TYPE digits = 0;
-		long exponent = 0;
-
-		if(ptr[0] == '0' && tolower(ptr[1]) == 'x') {
-			ptr += 2;
-			parse_digits(&ptr, &valid, &digits, &exponent, true);
-			x = (long double)digits * exp2(exponent);
-		}
-		else {
-			parse_digits(&ptr, &valid, &digits, &exponent, false);
-			x = (long double)digits * powl(10, exponent);
-		}
-
-		/*
-		** Detect overflow, somewhat. Implementation is not required to
-		** set errno on underflow, which makes things much easier for
-		** us as underflow gives 0 (7.20.1.3§10).
-		*/
-		if(x == HUGE_VALL) {
-			errno = ERANGE;
-		}
-	}
-
-	/* Apply sign; this method is allowed by 7.20.1.3§4.249 */
-	if(negative) x = -x;
-
-	/* Save the result pointer */
-	if(endptr && valid) *endptr = (char *)ptr;
-
-	return x;
+	long double ld = 0;
+	int err = strto_fp(ptr, endptr, NULL, NULL, &ld);
+	if(err != 0) errno = err;
+	return ld;
 }