Fix code style and comments of new math code

Synchronize code style and comments with Arm Optimized Routines, there
are no code changes in this patch.  This ensures different projects using
the same code have consistent code style so bug fix patches can be applied
more easily.
This commit is contained in:
Szabolcs Nagy 2018-07-03 12:54:36 +01:00 committed by Corinna Vinschen
parent 6a3e08a53e
commit cbe50607fb
8 changed files with 134 additions and 74 deletions

View File

@ -45,6 +45,13 @@
#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
#define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
/* Handle cases that may overflow or underflow when computing the result that
is scale*(1+TMP) without intermediate rounding. The bit representation of
scale is in SBITS, however it has a computed exponent that may have
overflown into the sign bit so that needs to be adjusted before using it as
a double. (int32_t)KI is the k used in the argument reduction and exponent
adjustment of scale, positive k here means the result may overflow and
negative k means the result may underflow. */
static inline double
specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
{
@ -83,6 +90,7 @@ specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
return check_uflow (y);
}
/* Top 12 bits of a double (sign and exponent bits). */
static inline uint32_t
top12 (double x)
{
@ -136,29 +144,29 @@ exp (double x)
ki = asuint64 (kd);
kd -= Shift;
#endif
r = x + kd*NegLn2hiN + kd*NegLn2loN;
r = x + kd * NegLn2hiN + kd * NegLn2loN;
/* 2^(k/N) ~= scale * (1 + tail). */
idx = 2*(ki % N);
idx = 2 * (ki % N);
top = ki << (52 - EXP_TABLE_BITS);
tail = asdouble (T[idx]);
/* This is only a valid scale when -1023*N < k < 1024*N. */
sbits = T[idx + 1] + top;
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r*r;
r2 = r * r;
/* Without fma the worst case error is 0.25/N ulp larger. */
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
#if EXP_POLY_ORDER == 4
tmp = tail + r + r2*C2 + r*r2*(C3 + r*C4);
tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4);
#elif EXP_POLY_ORDER == 5
tmp = tail + r + r2*(C2 + r*C3) + r2*r2*(C4 + r*C5);
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
#elif EXP_POLY_ORDER == 6
tmp = tail + r + r2*(0.5 + r*C3) + r2*r2*(C4 + r*C5 + r2*C6);
tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
#endif
if (unlikely (abstop == 0))
return specialcase (tmp, sbits, ki);
scale = asdouble (sbits);
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
/* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-739, so there
is no spurious underflow here even without fma. */
return scale + scale * tmp;
}

View File

@ -43,6 +43,13 @@
#define C5 __exp_data.exp2_poly[4]
#define C6 __exp_data.exp2_poly[5]
/* Handle cases that may overflow or underflow when computing the result that
is scale*(1+TMP) without intermediate rounding. The bit representation of
scale is in SBITS, however it has a computed exponent that may have
overflown into the sign bit so that needs to be adjusted before using it as
a double. (int32_t)KI is the k used in the argument reduction and exponent
adjustment of scale, positive k here means the result may overflow and
negative k means the result may underflow. */
static inline double
specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
{
@ -81,6 +88,7 @@ specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
return check_uflow (y);
}
/* Top 12 bits of a double (sign and exponent bits). */
static inline uint32_t
top12 (double x)
{
@ -125,22 +133,22 @@ exp2 (double x)
kd -= Shift; /* k/N for int k. */
r = x - kd;
/* 2^(k/N) ~= scale * (1 + tail). */
idx = 2*(ki % N);
idx = 2 * (ki % N);
top = ki << (52 - EXP_TABLE_BITS);
tail = asdouble (T[idx]);
/* This is only a valid scale when -1023*N < k < 1024*N. */
sbits = T[idx + 1] + top;
/* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r*r;
r2 = r * r;
/* Without fma the worst case error is 0.5/N ulp larger. */
/* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */
#if EXP2_POLY_ORDER == 4
tmp = tail + r*C1 + r2*C2 + r*r2*(C3 + r*C4);
tmp = tail + r * C1 + r2 * C2 + r * r2 * (C3 + r * C4);
#elif EXP2_POLY_ORDER == 5
tmp = tail + r*C1 + r2*(C2 + r*C3) + r2*r2*(C4 + r*C5);
tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
#elif EXP2_POLY_ORDER == 6
tmp = tail + r*C1 + r2*(0.5 + r*C3) + r2*r2*(C4 + r*C5 + r2*C6);
tmp = tail + r * C1 + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
#endif
if (unlikely (abstop == 0))
return specialcase (tmp, sbits, ki);

View File

@ -42,6 +42,7 @@
#define N (1 << LOG_TABLE_BITS)
#define OFF 0x3fe6000000000000
/* Top 16 bits of a double. */
static inline uint32_t
top16 (double x)
{
@ -74,44 +75,44 @@ log (double x)
if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
return 0;
r = x - 1.0;
r2 = r*r;
r3 = r*r2;
r2 = r * r;
r3 = r * r2;
#if LOG_POLY1_ORDER == 10
/* Worst-case error is around 0.516 ULP. */
y = r3*(B[1] + r*B[2] + r2*B[3]
+ r3*(B[4] + r*B[5] + r2*B[6] + r3*(B[7] + r*B[8])));
w = B[0]*r2; /* B[0] == -0.5. */
y = r3 * (B[1] + r * B[2] + r2 * B[3]
+ r3 * (B[4] + r * B[5] + r2 * B[6] + r3 * (B[7] + r * B[8])));
w = B[0] * r2; /* B[0] == -0.5. */
hi = r + w;
y += r - hi + w;
y += hi;
#elif LOG_POLY1_ORDER == 11
/* Worst-case error is around 0.516 ULP. */
y = r3*(B[1] + r*B[2]
+ r2*(B[3] + r*B[4] + r2*B[5]
+ r3*(B[6] + r*B[7] + r2*B[8] + r3*B[9])));
w = B[0]*r2; /* B[0] == -0.5. */
y = r3 * (B[1] + r * B[2]
+ r2 * (B[3] + r * B[4] + r2 * B[5]
+ r3 * (B[6] + r * B[7] + r2 * B[8] + r3 * B[9])));
w = B[0] * r2; /* B[0] == -0.5. */
hi = r + w;
y += r - hi + w;
y += hi;
#elif LOG_POLY1_ORDER == 12
y = r3*(B[1] + r*B[2] + r2*B[3]
+ r3*(B[4] + r*B[5] + r2*B[6]
+ r3*(B[7] + r*B[8] + r2*B[9] + r3*B[10])));
y = r3 * (B[1] + r * B[2] + r2 * B[3]
+ r3 * (B[4] + r * B[5] + r2 * B[6]
+ r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
# if N <= 64
/* Worst-case error is around 0.532 ULP. */
w = B[0]*r2; /* B[0] == -0.5. */
w = B[0] * r2; /* B[0] == -0.5. */
hi = r + w;
y += r - hi + w;
y += hi;
# else
/* Worst-case error is around 0.507 ULP. */
w = r*0x1p27;
w = r * 0x1p27;
double_t rhi = r + w - w;
double_t rlo = r - rhi;
w = rhi*rhi*B[0]; /* B[0] == -0.5. */
w = rhi * rhi * B[0]; /* B[0] == -0.5. */
hi = r + w;
lo = r - hi + w;
lo += B[0]*rlo*(rhi + r);
lo += B[0] * rlo * (rhi + r);
y += lo;
y += hi;
# endif
@ -150,14 +151,14 @@ log (double x)
r = fma (z, invc, -1.0);
#else
/* rounding error: 0x1p-55/N + 0x1p-66. */
r = (z - T2[i].chi - T2[i].clo)*invc;
r = (z - T2[i].chi - T2[i].clo) * invc;
#endif
kd = (double_t) k;
/* hi + lo = r + log(c) + k*Ln2. */
w = kd*Ln2hi + logc;
w = kd * Ln2hi + logc;
hi = w + r;
lo = w - hi + r + kd*Ln2lo;
lo = w - hi + r + kd * Ln2lo;
/* log(x) = lo + (log1p(r) - r) + hi. */
r2 = r * r; /* rounding error: 0x1p-54/N^2. */
@ -166,9 +167,12 @@ log (double x)
Worst case error if |y| > 0x1p-4:
0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */
#if LOG_POLY_ORDER == 6
y = lo + r2*A[0] + r*r2*(A[1] + r*A[2] + r2*(A[3] + r*A[4])) + hi;
y = lo + r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
#elif LOG_POLY_ORDER == 7
y = lo + r2*(A[0] + r*A[1] + r2*(A[2] + r*A[3]) + r2*r2*(A[4] + r*A[5])) + hi;
y = lo
+ r2 * (A[0] + r * A[1] + r2 * (A[2] + r * A[3])
+ r2 * r2 * (A[4] + r * A[5]))
+ hi;
#endif
return y;
}

View File

@ -42,6 +42,7 @@
#define N (1 << LOG2_TABLE_BITS)
#define OFF 0x3fe6000000000000
/* Top 16 bits of a double. */
static inline uint32_t
top16 (double x)
{
@ -72,24 +73,24 @@ double
return 0;
r = x - 1.0;
#if __HAVE_FAST_FMA
hi = r*InvLn2hi;
lo = r*InvLn2lo + fma (r, InvLn2hi, -hi);
hi = r * InvLn2hi;
lo = r * InvLn2lo + fma (r, InvLn2hi, -hi);
#else
double_t rhi, rlo;
rhi = asdouble (asuint64 (r) & -1ULL<<32);
rhi = asdouble (asuint64 (r) & -1ULL << 32);
rlo = r - rhi;
hi = rhi*InvLn2hi;
lo = rlo*InvLn2hi + r*InvLn2lo;
hi = rhi * InvLn2hi;
lo = rlo * InvLn2hi + r * InvLn2lo;
#endif
r2 = r * r; /* rounding error: 0x1p-62. */
r4 = r2 * r2;
#if LOG2_POLY1_ORDER == 11
/* Worst-case error is less than 0.54 ULP (0.55 ULP without fma). */
p = r2*(B[0] + r*B[1]);
p = r2 * (B[0] + r * B[1]);
y = hi + p;
lo += hi - y + p;
lo += r4*(B[2] + r*B[3] + r2*(B[4] + r*B[5])
+ r4*(B[6] + r*B[7] + r2*(B[8] + r*B[9])));
lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5])
+ r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
y += lo;
#endif
return y;
@ -125,16 +126,16 @@ double
#if __HAVE_FAST_FMA
/* rounding error: 0x1p-55/N. */
r = fma (z, invc, -1.0);
t1 = r*InvLn2hi;
t2 = r*InvLn2lo + fma (r, InvLn2hi, -t1);
t1 = r * InvLn2hi;
t2 = r * InvLn2lo + fma (r, InvLn2hi, -t1);
#else
double_t rhi, rlo;
/* rounding error: 0x1p-55/N + 0x1p-65. */
r = (z - T2[i].chi - T2[i].clo)*invc;
r = (z - T2[i].chi - T2[i].clo) * invc;
rhi = asdouble (asuint64 (r) & -1ULL << 32);
rlo = r - rhi;
t1 = rhi*InvLn2hi;
t2 = rlo*InvLn2hi + r*InvLn2lo;
t1 = rhi * InvLn2hi;
t2 = rlo * InvLn2hi + r * InvLn2lo;
#endif
/* hi + lo = r/ln2 + log2(c) + k. */
@ -149,8 +150,8 @@ double
#if LOG2_POLY_ORDER == 7
/* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma). */
p = A[0] + r*A[1] + r2*(A[2] + r*A[3]) + r4*(A[4] + r*A[5]);
y = lo + r2*p + hi;
p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
y = lo + r2 * p + hi;
#endif
return y;
}

View File

@ -233,28 +233,48 @@ eval_as_double (double x)
# define unlikely(x) (x)
#endif
/* Error handling tail calls for special cases, with sign argument. */
/* Error handling tail calls for special cases, with a sign argument.
The sign of the return value is set if the argument is non-zero. */
/* The result overflows. */
HIDDEN float __math_oflowf (uint32_t);
/* The result underflows to 0 in nearest rounding mode. */
HIDDEN float __math_uflowf (uint32_t);
/* The result underflows to 0 in some directed rounding mode only. */
HIDDEN float __math_may_uflowf (uint32_t);
/* Division by zero. */
HIDDEN float __math_divzerof (uint32_t);
/* The result overflows. */
HIDDEN double __math_oflow (uint32_t);
/* The result underflows to 0 in nearest rounding mode. */
HIDDEN double __math_uflow (uint32_t);
/* The result underflows to 0 in some directed rounding mode only. */
HIDDEN double __math_may_uflow (uint32_t);
/* Division by zero. */
HIDDEN double __math_divzero (uint32_t);
/* Error handling using input checking. */
/* Invalid input unless it is a quiet NaN. */
HIDDEN float __math_invalidf (float);
/* Invalid input unless it is a quiet NaN. */
HIDDEN double __math_invalid (double);
/* Error handling using output checking, only for errno setting. */
/* Check if the result overflowed to infinity. */
HIDDEN double __math_check_oflow (double);
/* Check if the result underflowed to 0. */
HIDDEN double __math_check_uflow (double);
/* Check if the result overflowed to infinity. */
static inline double
check_oflow (double x)
{
return WANT_ERRNO ? __math_check_oflow (x) : x;
}
/* Check if the result underflowed to 0. */
static inline double
check_uflow (double x)
{
@ -324,7 +344,8 @@ extern const struct powf_log2_data
#define EXP_USE_TOINT_NARROW 0
#define EXP2_POLY_ORDER 5
#define EXP2_POLY_WIDE 0
extern const struct exp_data {
extern const struct exp_data
{
double invln2N;
double shift;
double negln2hiN;
@ -338,7 +359,8 @@ extern const struct exp_data {
#define LOG_TABLE_BITS 7
#define LOG_POLY_ORDER 6
#define LOG_POLY1_ORDER 12
extern const struct log_data {
extern const struct log_data
{
double ln2hi;
double ln2lo;
double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
@ -352,7 +374,8 @@ extern const struct log_data {
#define LOG2_TABLE_BITS 6
#define LOG2_POLY_ORDER 7
#define LOG2_POLY1_ORDER 11
extern const struct log2_data {
extern const struct log2_data
{
double invln2hi;
double invln2lo;
double poly[LOG2_POLY_ORDER - 1];
@ -365,7 +388,8 @@ extern const struct log2_data {
#define POW_LOG_TABLE_BITS 7
#define POW_LOG_POLY_ORDER 8
extern const struct pow_log_data {
extern const struct pow_log_data
{
double ln2hi;
double ln2lo;
double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1. */

View File

@ -46,12 +46,16 @@ ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma)
#define N (1 << POW_LOG_TABLE_BITS)
#define OFF 0x3fe6955500000000
/* Top 12 bits of a double (sign and exponent bits). */
static inline uint32_t
top12 (double x)
{
return asuint64 (x) >> 52;
}
/* Compute y+tail = log(x) where the rounded result is y and tail has about
additional 15 bits precision. The bit representation of x if in ix, but
normalized in the subnormal range using sign bit too for the exponent. */
static inline double_t
log_inline (uint64_t ix, double_t *tail)
{
@ -111,7 +115,8 @@ log_inline (uint64_t ix, double_t *tail)
#endif
/* p = log1p(r) - r - A[0]*r*r. */
#if POW_LOG_POLY_ORDER == 8
p = ar3*(A[1] + r*A[2] + ar2*(A[3] + r*A[4] + ar2*(A[5] + r*A[6])));
p = (ar3
* (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
#endif
lo = lo1 + lo2 + lo3 + lo4 + p;
y = hi + lo;
@ -133,6 +138,13 @@ log_inline (uint64_t ix, double_t *tail)
#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
#define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
/* Handle cases that may overflow or underflow when computing the result that
is scale*(1+TMP) without intermediate rounding. The bit representation of
scale is in SBITS, however it has a computed exponent that may have
overflown into the sign bit so that needs to be adjusted before using it as
a double. (int32_t)KI is the k used in the argument reduction and exponent
adjustment of scale, positive k here means the result may overflow and
negative k means the result may underflow. */
static inline double
specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
{
@ -176,6 +188,8 @@ specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
#define SIGN_BIAS (0x800 << EXP_TABLE_BITS)
/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */
static inline double
exp_inline (double x, double xtail, uint32_t sign_bias)
{
@ -223,26 +237,26 @@ exp_inline (double x, double xtail, uint32_t sign_bias)
ki = asuint64 (kd);
kd -= Shift;
#endif
r = x + kd*NegLn2hiN + kd*NegLn2loN;
r = x + kd * NegLn2hiN + kd * NegLn2loN;
/* The code assumes 2^-200 < |xtail| < 2^-8/N. */
r += xtail;
/* 2^(k/N) ~= scale * (1 + tail). */
idx = 2*(ki % N);
idx = 2 * (ki % N);
top = (ki + sign_bias) << (52 - EXP_TABLE_BITS);
tail = asdouble (T[idx]);
/* This is only a valid scale when -1023*N < k < 1024*N. */
sbits = T[idx + 1] + top;
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r*r;
r2 = r * r;
/* Without fma the worst case error is 0.25/N ulp larger. */
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
#if EXP_POLY_ORDER == 4
tmp = tail + r + r2*C2 + r*r2*(C3 + r*C4);
tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4);
#elif EXP_POLY_ORDER == 5
tmp = tail + r + r2*(C2 + r*C3) + r2*r2*(C4 + r*C5);
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
#elif EXP_POLY_ORDER == 6
tmp = tail + r + r2*(0.5 + r*C3) + r2*r2*(C4 + r*C5 + r2*C6);
tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
#endif
if (unlikely (abstop == 0))
return specialcase (tmp, sbits, ki);
@ -268,6 +282,7 @@ checkint (uint64_t iy)
return 2;
}
/* Returns 1 if input is the bit representation of 0, infinity or nan. */
static inline int
zeroinfnan (uint64_t i)
{

View File

@ -50,14 +50,14 @@ sincosf (float y, float *sinp, float *cosp)
double x2 = x * x;
if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
{
if (unlikely (abstop12 (y) < abstop12 (0x1p-126f)))
/* Force underflow for tiny y. */
force_eval_float (x2);
*sinp = y;
*cosp = 1.0f;
return;
}
{
if (unlikely (abstop12 (y) < abstop12 (0x1p-126f)))
/* Force underflow for tiny y. */
force_eval_float (x2);
*sinp = y;
*cosp = 1.0f;
return;
}
sincosf_poly (x, x2, p, 0, sinp, cosp);
}

View File

@ -49,12 +49,12 @@ sinf (float y)
s = x * x;
if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
{
if (unlikely (abstop12 (y) < abstop12 (0x1p-126f)))
/* Force underflow for tiny y. */
force_eval_float (s);
return y;
}
{
if (unlikely (abstop12 (y) < abstop12 (0x1p-126f)))
/* Force underflow for tiny y. */
force_eval_float (s);
return y;
}
return sinf_poly (x, s, p, 0);
}