From 1d5ba3bb5a3f55e10db05219638cfcd967d65417 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Sat, 18 May 2013 12:34:00 +0000
Subject: math: tan cleanups

* use unsigned arithmetics on the representation
* store arg reduction quotient in unsigned (so n%2 would work like n&1)
* use different convention to pass the arg reduction bit to __tan
  (this argument used to be 1 for even and -1 for odd reduction
  which meant obscure bithacks, the new n&1 is cleaner)
* raise inexact and underflow flags correctly for small x
  (tanl(x) may still raise spurious underflow for small but normal x)
  (this exception raising code increases codesize a bit, similar fixes
  are needed in many other places, it may worth investigating at some
  point if the inexact and underflow flags are worth raising correctly
  as this is not strictly required by the standard)
* tanf manual reduction optimization is kept for now
* tanl code path is cleaned up to follow similar logic to tan and tanf
---
 src/math/tanl.c | 37 +++++++++++--------------------------
 1 file changed, 11 insertions(+), 26 deletions(-)

(limited to 'src/math/tanl.c')

diff --git a/src/math/tanl.c b/src/math/tanl.c
index 0194eaf7..3b51e011 100644
--- a/src/math/tanl.c
+++ b/src/math/tanl.c
@@ -41,42 +41,27 @@ long double tanl(long double x)
 long double tanl(long double x)
 {
 	union IEEEl2bits z;
-	int e0, s;
 	long double y[2];
-	long double hi, lo;
+	unsigned n;
 
 	z.e = x;
-	s = z.bits.sign;
 	z.bits.sign = 0;
 
-	/* If x = +-0 or x is subnormal, then tan(x) = x. */
-	if (z.bits.exp == 0)
-		return x;
-
 	/* If x = NaN or Inf, then tan(x) = NaN. */
-	if (z.bits.exp == 32767)
+	if (z.bits.exp == 0x7fff)
 		return (x - x) / (x - x);
 
-	/* Optimize the case where x is already within range. */
+	/* |x| < (double)pi/4 */
 	if (z.e < M_PI_4) {
-		hi = __tanl(z.e, 0, 0);
-		return s ? -hi : hi;
+		/* x = +-0 or x is subnormal */
+		if (z.bits.exp == 0)
+			/* inexact and underflow if x!=0 */
+			return x + x*0x1p-120f;
+		/* can raise spurious underflow */
+		return __tanl(x, 0, 0);
 	}
 
-	e0 = __rem_pio2l(x, y);
-	hi = y[0];
-	lo = y[1];
-
-	switch (e0 & 3) {
-	case 0:
-	case 2:
-		hi = __tanl(hi, lo, 0);
-		break;
-	case 1:
-	case 3:
-		hi = __tanl(hi, lo, 1);
-		break;
-	}
-	return hi;
+	n = __rem_pio2l(x, y);
+	return __tanl(y[0], y[1], n&1);
 }
 #endif
-- 
cgit v1.2.3


From bfda37935867f9bf271d6074db0accf05c63ad10 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Sat, 18 May 2013 14:40:22 +0000
Subject: math: sin cos cleanup

* use unsigned arithmetics
* use unsigned to store arg reduction quotient (so n&3 is understood)
* remove z=0.0 variables, use literal 0
* raise underflow and inexact exceptions properly when x is small
* fix spurious underflow in tanl
---
 src/math/cos.c     | 20 +++++++++++---------
 src/math/cosf.c    | 33 +++++++++++++++++++--------------
 src/math/cosl.c    | 22 +++++++++++-----------
 src/math/sin.c     | 15 ++++++++-------
 src/math/sincos.c  | 12 ++++++------
 src/math/sincosf.c | 45 ++++++++++++++++++++++++---------------------
 src/math/sincosl.c | 20 +++++++++++---------
 src/math/sinf.c    | 33 ++++++++++++++++++---------------
 src/math/sinl.c    | 29 ++++++++++++++---------------
 src/math/tanl.c    | 11 ++++++-----
 10 files changed, 128 insertions(+), 112 deletions(-)

(limited to 'src/math/tanl.c')

diff --git a/src/math/cos.c b/src/math/cos.c
index 76990e7f..ee97f68b 100644
--- a/src/math/cos.c
+++ b/src/math/cos.c
@@ -44,26 +44,28 @@
 
 double cos(double x)
 {
-	double y[2],z=0.0;
-	int32_t n, ix;
+	double y[2];
+	uint32_t ix;
+	unsigned n;
 
 	GET_HIGH_WORD(ix, x);
+	ix &= 0x7fffffff;
 
 	/* |x| ~< pi/4 */
-	ix &= 0x7fffffff;
 	if (ix <= 0x3fe921fb) {
-		if (ix < 0x3e46a09e)  /* if x < 2**-27 * sqrt(2) */
-			/* raise inexact if x != 0 */
-			if ((int)x == 0)
-				return 1.0;
-		return __cos(x, z);
+		if (ix < 0x3e46a09e) {  /* |x| < 2**-27 * sqrt(2) */
+			/* raise inexact if x!=0 */
+			FORCE_EVAL(x + 0x1p120f);
+			return 1.0;
+		}
+		return __cos(x, 0);
 	}
 
 	/* cos(Inf or NaN) is NaN */
 	if (ix >= 0x7ff00000)
 		return x-x;
 
-	/* argument reduction needed */
+	/* argument reduction */
 	n = __rem_pio2(x, y);
 	switch (n&3) {
 	case 0: return  __cos(y[0], y[1]);
diff --git a/src/math/cosf.c b/src/math/cosf.c
index 4d94130f..23f3e5bf 100644
--- a/src/math/cosf.c
+++ b/src/math/cosf.c
@@ -26,34 +26,39 @@ c4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */
 float cosf(float x)
 {
 	double y;
-	int32_t n, hx, ix;
+	uint32_t ix;
+	unsigned n, sign;
+
+	GET_FLOAT_WORD(ix, x);
+	sign = ix >> 31;
+	ix &= 0x7fffffff;
 
-	GET_FLOAT_WORD(hx, x);
-	ix = hx & 0x7fffffff;
 	if (ix <= 0x3f490fda) {  /* |x| ~<= pi/4 */
-		if (ix < 0x39800000)  /* |x| < 2**-12 */
-			if ((int)x == 0)  /* raise inexact if x != 0 */
-				return 1.0;
+		if (ix < 0x39800000) {  /* |x| < 2**-12 */
+			/* raise inexact if x != 0 */
+			FORCE_EVAL(x + 0x1p120f);
+			return 1.0f;
+		}
 		return __cosdf(x);
 	}
 	if (ix <= 0x407b53d1) {  /* |x| ~<= 5*pi/4 */
 		if (ix > 0x4016cbe3)  /* |x|  ~> 3*pi/4 */
-			return -__cosdf(hx > 0 ? x-c2pio2 : x+c2pio2);
+			return -__cosdf(sign ? x+c2pio2 : x-c2pio2);
 		else {
-			if (hx > 0)
-				return __sindf(c1pio2 - x);
-			else
+			if (sign)
 				return __sindf(x + c1pio2);
+			else
+				return __sindf(c1pio2 - x);
 		}
 	}
 	if (ix <= 0x40e231d5) {  /* |x| ~<= 9*pi/4 */
 		if (ix > 0x40afeddf)  /* |x| ~> 7*pi/4 */
-			return __cosdf(hx > 0 ? x-c4pio2 : x+c4pio2);
+			return __cosdf(sign ? x+c4pio2 : x-c4pio2);
 		else {
-			if (hx > 0)
-				return __sindf(x - c3pio2);
+			if (sign)
+				return __sindf(-x - c3pio2);
 			else
-				return __sindf(-c3pio2 - x);
+				return __sindf(x - c3pio2);
 		}
 	}
 
diff --git a/src/math/cosl.c b/src/math/cosl.c
index 25d9005a..0794d284 100644
--- a/src/math/cosl.c
+++ b/src/math/cosl.c
@@ -39,30 +39,30 @@ long double cosl(long double x) {
 long double cosl(long double x)
 {
 	union IEEEl2bits z;
-	int e0;
+	unsigned n;
 	long double y[2];
 	long double hi, lo;
 
 	z.e = x;
 	z.bits.sign = 0;
 
-	/* If x = +-0 or x is a subnormal number, then cos(x) = 1 */
-	if (z.bits.exp == 0)
-		return 1.0;
-
 	/* If x = NaN or Inf, then cos(x) = NaN. */
-	if (z.bits.exp == 32767)
+	if (z.bits.exp == 0x7fff)
 		return (x - x) / (x - x);
 
-	/* Optimize the case where x is already within range. */
-	if (z.e < M_PI_4)
+	/* |x| < (double)pi/4 */
+	if (z.e < M_PI_4) {
+		/* |x| < 0x1p-64 */
+		if (z.bits.exp < 0x3fff - 64)
+			/* raise inexact if x!=0 */
+			return 1.0 + x;
 		return __cosl(z.e, 0);
+	}
 
-	e0 = __rem_pio2l(x, y);
+	n = __rem_pio2l(x, y);
 	hi = y[0];
 	lo = y[1];
-
-	switch (e0 & 3) {
+	switch (n & 3) {
 	case 0:
 		hi = __cosl(hi, lo);
 		break;
diff --git a/src/math/sin.c b/src/math/sin.c
index 8e430f85..055e215b 100644
--- a/src/math/sin.c
+++ b/src/math/sin.c
@@ -44,21 +44,22 @@
 
 double sin(double x)
 {
-	double y[2], z=0.0;
-	int32_t n, ix;
+	double y[2];
+	uint32_t ix;
+	unsigned n;
 
 	/* High word of x. */
 	GET_HIGH_WORD(ix, x);
+	ix &= 0x7fffffff;
 
 	/* |x| ~< pi/4 */
-	ix &= 0x7fffffff;
 	if (ix <= 0x3fe921fb) {
 		if (ix < 0x3e500000) {  /* |x| < 2**-26 */
-			/* raise inexact if x != 0 */
-			if ((int)x == 0)
-				return x;
+			/* raise inexact if x != 0 and underflow if subnormal*/
+			FORCE_EVAL(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f);
+			return x;
 		}
-		return __sin(x, z, 0);
+		return __sin(x, 0.0, 0);
 	}
 
 	/* sin(Inf or NaN) is NaN */
diff --git a/src/math/sincos.c b/src/math/sincos.c
index 442e285e..49f8a098 100644
--- a/src/math/sincos.c
+++ b/src/math/sincos.c
@@ -15,7 +15,8 @@
 void sincos(double x, double *sin, double *cos)
 {
 	double y[2], s, c;
-	uint32_t n, ix;
+	uint32_t ix;
+	unsigned n;
 
 	GET_HIGH_WORD(ix, x);
 	ix &= 0x7fffffff;
@@ -24,11 +25,10 @@ void sincos(double x, double *sin, double *cos)
 	if (ix <= 0x3fe921fb) {
 		/* if |x| < 2**-27 * sqrt(2) */
 		if (ix < 0x3e46a09e) {
-			/* raise inexact if x != 0 */
-			if ((int)x == 0) {
-				*sin = x;
-				*cos = 1.0;
-			}
+			/* raise inexact if x!=0 and underflow if subnormal */
+			FORCE_EVAL(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f);
+			*sin = x;
+			*cos = 1.0;
 			return;
 		}
 		*sin = __sin(x, 0.0, 0);
diff --git a/src/math/sincosf.c b/src/math/sincosf.c
index 5e3b9a41..1b50f01c 100644
--- a/src/math/sincosf.c
+++ b/src/math/sincosf.c
@@ -25,21 +25,23 @@ s4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */
 
 void sincosf(float x, float *sin, float *cos)
 {
-	double y, s, c;
-	uint32_t n, hx, ix;
+	double y;
+	float_t s, c;
+	uint32_t ix;
+	unsigned n, sign;
 
-	GET_FLOAT_WORD(hx, x);
-	ix = hx & 0x7fffffff;
+	GET_FLOAT_WORD(ix, x);
+	sign = ix >> 31;
+	ix &= 0x7fffffff;
 
 	/* |x| ~<= pi/4 */
 	if (ix <= 0x3f490fda) {
 		/* |x| < 2**-12 */
 		if (ix < 0x39800000) {
-			/* raise inexact if x != 0 */
-			if((int)x == 0) {
-				*sin = x;
-				*cos = 1.0f;
-			}
+			/* raise inexact if x!=0 and underflow if subnormal */
+			FORCE_EVAL(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f);
+			*sin = x;
+			*cos = 1.0f;
 			return;
 		}
 		*sin = __sindf(x);
@@ -50,34 +52,35 @@ void sincosf(float x, float *sin, float *cos)
 	/* |x| ~<= 5*pi/4 */
 	if (ix <= 0x407b53d1) {
 		if (ix <= 0x4016cbe3) {  /* |x| ~<= 3pi/4 */
-			if (hx < 0x80000000) {
-				*sin = __cosdf(x - s1pio2);
-				*cos = __sindf(s1pio2 - x);
-			} else {
+			if (sign) {
 				*sin = -__cosdf(x + s1pio2);
 				*cos = __sindf(x + s1pio2);
+			} else {
+				*sin = __cosdf(s1pio2 - x);
+				*cos = __sindf(s1pio2 - x);
 			}
 			return;
 		}
-		*sin = __sindf(hx < 0x80000000 ? s2pio2 - x : -s2pio2 - x);
-		*cos = -__cosdf(hx < 0x80000000 ? x - s2pio2 : x + s2pio2);
+		/* -sin(x+c) is not correct if x+c could be 0: -0 vs +0 */
+		*sin = -__sindf(sign ? x + s2pio2 : x - s2pio2);
+		*cos = -__cosdf(sign ? x + s2pio2 : x - s2pio2);
 		return;
 	}
 
 	/* |x| ~<= 9*pi/4 */
 	if (ix <= 0x40e231d5) {
 		if (ix <= 0x40afeddf) {  /* |x| ~<= 7*pi/4 */
-			if (hx < 0x80000000) {
+			if (sign) {
+				*sin = __cosdf(x + s3pio2);
+				*cos = -__sindf(x + s3pio2);
+			} else {
 				*sin = -__cosdf(x - s3pio2);
 				*cos = __sindf(x - s3pio2);
-			} else {
-				*sin = __cosdf(x + s3pio2);
-				*cos = __sindf(-s3pio2 - x);
 			}
 			return;
 		}
-		*sin = __sindf(hx < 0x80000000 ? x - s4pio2 : x + s4pio2);
-		*cos = __cosdf(hx < 0x80000000 ? x - s4pio2 : x + s4pio2);
+		*sin = __sindf(sign ? x + s4pio2 : x - s4pio2);
+		*cos = __cosdf(sign ? x + s4pio2 : x - s4pio2);
 		return;
 	}
 
diff --git a/src/math/sincosl.c b/src/math/sincosl.c
index d632fe6f..5db69bd6 100644
--- a/src/math/sincosl.c
+++ b/src/math/sincosl.c
@@ -10,27 +10,29 @@ void sincosl(long double x, long double *sin, long double *cos)
 void sincosl(long double x, long double *sin, long double *cos)
 {
 	union IEEEl2bits u;
-	int n;
+	unsigned n;
 	long double y[2], s, c;
 
 	u.e = x;
 	u.bits.sign = 0;
 
-	/* x = +-0 or subnormal */
-	if (!u.bits.exp) {
-		*sin = x;
-		*cos = 1.0;
-		return;
-	}
-
 	/* x = nan or inf */
 	if (u.bits.exp == 0x7fff) {
 		*sin = *cos = x - x;
 		return;
 	}
 
-	/* |x| < pi/4 */
+	/* |x| < (double)pi/4 */
 	if (u.e < M_PI_4) {
+		/* |x| < 0x1p-64 */
+		if (u.bits.exp < 0x3fff - 64) {
+			/* raise underflow if subnormal */
+			if (u.bits.exp == 0) FORCE_EVAL(x*0x1p-120f);
+			*sin = x;
+			/* raise inexact if x!=0 */
+			*cos = 1.0 + x;
+			return;
+		}
 		*sin = __sinl(x, 0, 0);
 		*cos = __cosl(x, 0);
 		return;
diff --git a/src/math/sinf.c b/src/math/sinf.c
index dcca67af..64e39f50 100644
--- a/src/math/sinf.c
+++ b/src/math/sinf.c
@@ -26,35 +26,38 @@ s4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */
 float sinf(float x)
 {
 	double y;
-	int32_t n, hx, ix;
+	uint32_t ix;
+	int n, sign;
 
-	GET_FLOAT_WORD(hx, x);
-	ix = hx & 0x7fffffff;
+	GET_FLOAT_WORD(ix, x);
+	sign = ix >> 31;
+	ix &= 0x7fffffff;
 
 	if (ix <= 0x3f490fda) {  /* |x| ~<= pi/4 */
-		if (ix < 0x39800000)  /* |x| < 2**-12 */
-			/* raise inexact if x != 0 */
-			if((int)x == 0)
-				return x;
+		if (ix < 0x39800000) {  /* |x| < 2**-12 */
+			/* raise inexact if x!=0 and underflow if subnormal */
+			FORCE_EVAL(ix < 0x00800000 ? x/0x1p120f : x+0x1p120f);
+			return x;
+		}
 		return __sindf(x);
 	}
 	if (ix <= 0x407b53d1) {  /* |x| ~<= 5*pi/4 */
 		if (ix <= 0x4016cbe3) {  /* |x| ~<= 3pi/4 */
-			if (hx > 0)
-				return __cosdf(x - s1pio2);
-			else
+			if (sign)
 				return -__cosdf(x + s1pio2);
+			else
+				return __cosdf(x - s1pio2);
 		}
-		return __sindf(hx > 0 ? s2pio2 - x : -s2pio2 - x);
+		return __sindf(sign ? -(x + s2pio2) : -(x - s2pio2));
 	}
 	if (ix <= 0x40e231d5) {  /* |x| ~<= 9*pi/4 */
 		if (ix <= 0x40afeddf) {  /* |x| ~<= 7*pi/4 */
-			if (hx > 0)
-				return -__cosdf(x - s3pio2);
-			else
+			if (sign)
 				return __cosdf(x + s3pio2);
+			else
+				return -__cosdf(x - s3pio2);
 		}
-		return __sindf(hx > 0 ? x - s4pio2 : x + s4pio2);
+		return __sindf(sign ? x + s4pio2 : x - s4pio2);
 	}
 
 	/* sin(Inf or NaN) is NaN */
diff --git a/src/math/sinl.c b/src/math/sinl.c
index 7e0b44f4..6ca99986 100644
--- a/src/math/sinl.c
+++ b/src/math/sinl.c
@@ -37,33 +37,32 @@ long double sinl(long double x)
 long double sinl(long double x)
 {
 	union IEEEl2bits z;
-	int e0, s;
+	unsigned n;
 	long double y[2];
 	long double hi, lo;
 
 	z.e = x;
-	s = z.bits.sign;
 	z.bits.sign = 0;
 
-	/* If x = +-0 or x is a subnormal number, then sin(x) = x */
-	if (z.bits.exp == 0)
-		return x;
-
 	/* If x = NaN or Inf, then sin(x) = NaN. */
-	if (z.bits.exp == 32767)
+	if (z.bits.exp == 0x7fff)
 		return (x - x) / (x - x);
 
-	/* Optimize the case where x is already within range. */
+	/* |x| < (double)pi/4 */
 	if (z.e < M_PI_4) {
-		hi = __sinl(z.e, 0, 0);
-		return  s ? -hi : hi;
+		/* |x| < 0x1p-64 */
+		if (z.bits.exp < 0x3fff - 64) {
+			/* raise inexact if x!=0 and underflow if subnormal */
+			FORCE_EVAL(z.bits.exp == 0 ? x/0x1p120f : x+0x1p120f);
+			return x;
+		}
+		return __sinl(x, 0.0, 0);
 	}
 
-	e0 = __rem_pio2l(x, y);
+	n = __rem_pio2l(x, y);
 	hi = y[0];
 	lo = y[1];
-
-	switch (e0 & 3) {
+	switch (n & 3) {
 	case 0:
 		hi = __sinl(hi, lo, 1);
 		break;
@@ -71,10 +70,10 @@ long double sinl(long double x)
 		hi = __cosl(hi, lo);
 		break;
 	case 2:
-		hi = - __sinl(hi, lo, 1);
+		hi = -__sinl(hi, lo, 1);
 		break;
 	case 3:
-		hi = - __cosl(hi, lo);
+		hi = -__cosl(hi, lo);
 		break;
 	}
 	return hi;
diff --git a/src/math/tanl.c b/src/math/tanl.c
index 3b51e011..546c7a02 100644
--- a/src/math/tanl.c
+++ b/src/math/tanl.c
@@ -53,11 +53,12 @@ long double tanl(long double x)
 
 	/* |x| < (double)pi/4 */
 	if (z.e < M_PI_4) {
-		/* x = +-0 or x is subnormal */
-		if (z.bits.exp == 0)
-			/* inexact and underflow if x!=0 */
-			return x + x*0x1p-120f;
-		/* can raise spurious underflow */
+		/* |x| < 0x1p-64 */
+		if (z.bits.exp < 0x3fff - 64) {
+			/* raise inexact if x!=0 and underflow if subnormal */
+			FORCE_EVAL(z.bits.exp == 0 ? x/0x1p120f : x+0x1p120f);
+			return x;
+		}
 		return __tanl(x, 0, 0);
 	}
 
-- 
cgit v1.2.3