fxlibc/src/libc/stdlib/strto_fp.c

#include <stdlib.h>
#include <stdbool.h>

#include <float.h>
#include <fenv.h>
#include <math.h>

#include <string.h>
#include <errno.h>
#include <ctype.h>

/*
** In the following conversions, the significant digits are represented in an
** integer and multiplied at the last moment by a suitable power of 10 (decimal
** representation) or 2 (hexadecimal representation). An integer of a suitable
** size needs to be used; that size is the size of the long double type.
**
** TODO: vhex-x86: Using 128-bit long double is untested!
*/
#if __SIZEOF_LONG_DOUBLE__ == 8
# define SIGNIFICAND_TYPE uint64_t
# define SIGNIFICAND_DIGITS 17
#elif __SIZEOF_LONG_DOUBLE__ <= 16
# define SIGNIFICAND_TYPE unsigned __int128
# define SIGNIFICAND_DIGITS 38
#else
# error long double larger than 128 bits is not supported
#endif

/* Basically strncasecmp. */
static int ncasecmp(char const *left, char const *right, size_t n)
{
	for(size_t i = 0; i < n; i++) {
		int diff = tolower(left[i]) - tolower(right[i]);
		if(diff) return diff;
	}
	return 0;
}

/*
** Parse digits and exponent into integers, in decimal or hexadecimal notation.
**
** -> In decimal notation; we read up to 19 (64-bit) or 38 (128-bit) digits,
*     which is enough to fill the mantissa of a long double, and later multiply
**    the digits by a power of 10. The main approximation is the power of 10.
**
** -> In hexadecimal notation, we read as many bits as the mantissa of a long
**    double, then later multiply by a power of 2. There are no approximations.
*/
static void parse_digits(char const * restrict *ptr0, bool *valid,
	SIGNIFICAND_TYPE *digits, long *exponent, bool hexadecimal)
{
	char const *ptr = *ptr0;
	bool dot_found = false;
	int digits_found = 0;

	*digits = 0;
	*exponent = 0;

	int max_digits = hexadecimal ? LDBL_MANT_DIG / 4 : SIGNIFICAND_DIGITS;

	/* TODO: locale: use a locale-aware decimal separator */
	int dot_character = '.';
	int exp_character = (hexadecimal ? 'p' : 'e');

	for(int i = 0; isdigit(*ptr) || (hexadecimal && isxdigit(*ptr))
		|| *ptr == dot_character; i++, ptr++) {

		/* Allow only one dot in the string, stop at the second one */
		if(*ptr == dot_character && dot_found) break;

		if(*ptr == dot_character) {
			dot_found = true;
			continue;
		}

		/* Count digits only until SIGNIFICAND_DIGITS */
		if(digits_found < max_digits) {
			if(hexadecimal) {
				int v = *ptr - '0';
				if(!isdigit(*ptr)) v = tolower(*ptr)-'a'+10;
				*digits = (*digits << 4) + v;
			}
			else {
				*digits = (*digits * 10) + (*ptr - '0');
			}
		}
		else (*exponent)++;

		if(dot_found) (*exponent)--;

		/* But also round at the first discarded one */
		if(digits_found == max_digits && *ptr >= '5')
			(*digits)++;

		digits_found++;
	}

	/* Require at least one digit to be present; if not, the whole string
	   is considered invalid */
	if(!digits_found) {
		*valid = false;
		return;
	}

	/* In hexadecimal, each character is worth 4 bits of exponent */
	if(hexadecimal) (*exponent) *= 4;

	/* Parse exponent */
	if(tolower(*ptr) == exp_character) {
		char *end;
		long e = strtol(ptr + 1, &end, 10);

		/* If an integer cannot be parsed, ignore the 'e...' part */
		if(end != ptr + 1) {
			ptr = end;
			*exponent += e;
		}
	}

	*ptr0 = ptr;
	*valid = true;
}

int strto_fp(char const * restrict ptr, char ** restrict endptr, double *out,
	float *outf, long double *outl)
{
	/* Save the value of ptr in endptr, in case format is invalid */
	if(endptr) *endptr = (char *)ptr;

	/* Skip initial whitespace */
	while(isspace(*ptr)) ptr++;

	/* Read optional sign */
	bool negative = false;
	if(*ptr == '-') negative = true;
	if(*ptr == '-' || *ptr == '+') ptr++;

	int errno_value = 0;
	bool valid = true;

	/* Result variable */
	if(out)  *out = 0.0;
	if(outf) *outf = 0.0f;
	if(outl) *outl = 0.0l;

	/* NaN possibly with an argument */
	if(!ncasecmp(ptr, "nan", 3)) {
		char const *arg = "";
		ptr += 3;
		if(ptr[0] == '(') {
			arg = ptr + 1;
			do ptr++;
			while(ptr[-1] != ')');
		}

		if(out)  *out  = __builtin_nan(arg);
		if(outf) *outf = __builtin_nanf(arg);
		if(outl) *outl = __builtin_nanl(arg);
	}
	/* Infinity */
	else if(!ncasecmp(ptr, "infinity", 8)) {
		if(out)  *out  = __builtin_inf();
		if(outf) *outf = __builtin_inff();
		if(outl) *outl = __builtin_infl();
		ptr += 8;
	}
	else if(!ncasecmp(ptr, "inf", 3)) {
		if(out)  *out  = __builtin_inf();
		if(outf) *outf = __builtin_inff();
		if(outl) *outl = __builtin_infl();
		ptr += 3;
	}
	else {
		SIGNIFICAND_TYPE digits = 0;
		long e = 0;

		if(ptr[0] == '0' && tolower(ptr[1]) == 'x') {
			ptr += 2;
			parse_digits(&ptr, &valid, &digits, &e, true);

			if(out)  *out  = (double)digits * exp2(e);
			if(outf) *outf = (float)digits * exp2f(e);
			if(outl) *outl = (long double)digits * exp2l(e);
		}
		else {
			parse_digits(&ptr, &valid, &digits, &e, false);

			if(out)  *out  = (double)digits * pow(10, e);
			if(outf) *outf = (float)digits * powf(10, e);
			if(outl) *outl = (long double)digits * powl(10, e);
		}

		/*
		** Detect overflow, somewhat. Implementation is not required to
		** set errno on underflow, which makes things much easier for
		** us as underflow gives 0 (7.20.1.3§10).
		*/
		if((out && *out == HUGE_VAL)
			|| (outf && *outf == HUGE_VALF)
			|| (outl && *outl == HUGE_VALL)) {
			errno_value = ERANGE;
		}
	}

	/* Apply sign; this method is allowed by 7.20.1.3§4.249 */
	if(negative) {
		if(out)  *out  = -(*out);
		if(outf) *outf = -(*outf);
		if(outl) *outl = -(*outl);
	}

	/* Save the result pointer */
	if(endptr && valid) *endptr = (char *)ptr;

	return errno_value;
}