From patchwork Thu Nov  2 18:48:57 2017
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Patrick McGehearty <patrick.mcgehearty@oracle.com>
X-Patchwork-Id: 24041
Received: (qmail 36005 invoked by alias); 2 Nov 2017 18:49:13 -0000
Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm
Precedence: bulk
List-Id: <libc-alpha.sourceware.org>
List-Unsubscribe: <mailto:libc-alpha-unsubscribe-##L=##H@sourceware.org>
List-Subscribe: <mailto:libc-alpha-subscribe@sourceware.org>
List-Archive: <http://sourceware.org/ml/libc-alpha/>
List-Post: <mailto:libc-alpha@sourceware.org>
List-Help: <mailto:libc-alpha-help@sourceware.org>,
	<http://sourceware.org/ml/#faqs>
Sender: libc-alpha-owner@sourceware.org
Delivered-To: mailing list libc-alpha@sourceware.org
Received: (qmail 35971 invoked by uid 89); 2 Nov 2017 18:49:13 -0000
Authentication-Results: sourceware.org; auth=none
X-Virus-Found: No
X-Spam-SWARE-Status: No, score=-26.9 required=5.0 tests=BAYES_00, GIT_PATCH_0,
	GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, RP_MATCHES_RCVD,
	SPF_PASS,
	UNPARSEABLE_RELAY autolearn=ham version=3.3.2 spammy=1288, 498,
	1.7.1, yyy
X-HELO: userp1040.oracle.com
From: Patrick McGehearty <patrick.mcgehearty@oracle.com>
To: libc-alpha@sourceware.org
Subject: [PATCH 2/2] Improves __ieee754_exp() performance by greater than 5x
	on sparc/x86.
Date: Thu,  2 Nov 2017 14:48:57 -0400
Message-Id: <1509648537-63658-2-git-send-email-patrick.mcgehearty@oracle.com>
In-Reply-To: <1509648537-63658-1-git-send-email-patrick.mcgehearty@oracle.com>
References: <1509648537-63658-1-git-send-email-patrick.mcgehearty@oracle.com>

Version 4 of proposed patch.

New comments revised to use GNU standard comment formating.
Limited comment added in eexp.tbl for TBL[]. The original src
used for porting to Linux did not have a comment about TBL[].
The new comment is limited to the current worker's level of
understanding.

The (-xx.x > threshold2) case is changed to return force_underflow.
For FE_TONEAREST, tiny*tiny will always be zero but for
FE_UPWARD, it will be the smallest representable value.

That change caused no change in the math test results for Sparc or x86.
---
 sysdeps/ieee754/dbl-64/e_exp.c  |   38 +++++++++++++++-----------------------
 sysdeps/ieee754/dbl-64/eexp.tbl |   14 +++++++++-----
 2 files changed, 24 insertions(+), 28 deletions(-)

diff --git a/sysdeps/ieee754/dbl-64/e_exp.c b/sysdeps/ieee754/dbl-64/e_exp.c
index f118f33..c1e6585 100644
--- a/sysdeps/ieee754/dbl-64/e_exp.c
+++ b/sysdeps/ieee754/dbl-64/e_exp.c
@@ -34,8 +34,7 @@
 /***************************************************************************/
 
 /*  IBM exp(x) replaced by following exp(x) in 2017. IBM exp1(x,xx) remains. */
-/*
-   exp(x)
+/* exp(x)
    Hybrid algorithm of Peter Tang's Table driven method (for large
    arguments) and an accurate table (for small arguments).
    Written by K.C. Ng, November 1988.
@@ -70,8 +69,7 @@
    Misc. info.
 	For IEEE double
 		if x >  7.09782712893383973096e+02 then exp(x) overflow
-		if x < -7.45133219101941108420e+02 then exp(x) underflow
- */
+		if x < -7.45133219101941108420e+02 then exp(x) underflow. */
 
 #include <math.h>
 #include <math-svid-compat.h>
@@ -130,10 +128,8 @@ __ieee754_exp (double x_arg)
 	      retval = one + xx.x * (one + half * xx.x);
 	      return (retval);
 	    }
-	  /* 
-	     Use FE_TONEAREST rounding mode for computing yy.y 
-	     Avoid set/reset of rounding mode if already in FE_TONEAREST mode
-	  */
+	  /* Use FE_TONEAREST rounding mode for computing yy.y.
+	     Avoid set/reset of rounding mode if in FE_TONEAREST mode. */
 	  fe_val = get_rounding_mode ();
 	  if (fe_val == FE_TONEAREST) {
 	    t = xx.x * xx.x;
@@ -151,16 +147,14 @@ __ieee754_exp (double x_arg)
 	  return (retval);
 	}
 
-      /* find the multiple of 2^-6 nearest x */
+      /* Find the multiple of 2^-6 nearest x. */
       k = hx >> 20;
       j = (0x00100000 | (hx & 0x000fffff)) >> (0x40c - k);
       j = (j - 1) & ~1;
       if (ix < 0)
 	j += 134;
-      /* 
-	 Use FE_TONEAREST rounding mode for computing yy.y 
-	 Avoid set/reset of rounding mode if already in FE_TONEAREST mode
-      */
+      /* Use FE_TONEAREST rounding mode for computing yy.y.
+	 Avoid set/reset of rounding mode if in FE_TONEAREST mode. */
       fe_val = get_rounding_mode ();
       if (fe_val == FE_TONEAREST) {
 	z = xx.x - TBL2[j];
@@ -181,31 +175,29 @@ __ieee754_exp (double x_arg)
     }
 
   if (hx >= 0x40862e42)
-    {				/* x is large, infinite, or nan */
+    {				/* x is large, infinite, or nan. */
       if (hx >= 0x7ff00000)
 	{
 	  if (ix == 0xfff00000 && xx.i_part[LOW_HALF] == 0)
-	    return (zero);	/* exp(-inf) = 0 */
-	  return (xx.x * xx.x);	/* exp(nan/inf) is nan or inf */
+	    return (zero);	/* exp(-inf) = 0. */
+	  return (xx.x * xx.x);	/* exp(nan/inf) is nan or inf. */
 	}
       if (xx.x > threshold1)
-	{			/* set overflow error condition */
+	{			/* set overflow error condition. */
 	  retval = hhuge * hhuge;
 	  return retval;
 	} 
       if (-xx.x > threshold2)
-	{			/* set underflow error condition */
+	{			/* set underflow error condition. */
 	  double force_underflow = tiny * tiny;
 	  math_force_eval (force_underflow);
-	  retval = zero;
+	  retval = force_underflow;
 	  return retval;
 	}
     }
 
-  /* 
-     Use FE_TONEAREST rounding mode for computing yy.y 
-     Avoid set/reset of rounding mode if already in FE_TONEAREST mode
-  */
+  /* Use FE_TONEAREST rounding mode for computing yy.y.
+     Avoid set/reset of rounding mode if already in FE_TONEAREST mode. */
   fe_val = get_rounding_mode ();
   if (fe_val == FE_TONEAREST) {
     t = invln2_32 * xx.x;
diff --git a/sysdeps/ieee754/dbl-64/eexp.tbl b/sysdeps/ieee754/dbl-64/eexp.tbl
index ec48489..1fed9f4 100644
--- a/sysdeps/ieee754/dbl-64/eexp.tbl
+++ b/sysdeps/ieee754/dbl-64/eexp.tbl
@@ -16,6 +16,11 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
+
+/* TBL[2*j] and TBL[2*j+1] are double precision numbers used to
+   approximate exp(x) using the formula given in the comments
+   for e_exp.c. */
+
 static const double TBL[64] = {
     0x1.0000000000000p+0,  0x0.0000000000000p+0,
     0x1.059b0d3158574p+0,  0x1.d73e2a475b465p-55,
@@ -49,8 +54,8 @@ static const double TBL[64] = {
     0x1.dfc97337b9b5fp+0, -0x1.1a5cd4f184b5cp-54,
     0x1.ea4afa2a490dap+0, -0x1.e9c23179c2893p-54,
     0x1.f50765b6e4540p+0,  0x1.9d3e12dd8a18bp-54};
-/*
-   For i = 0, ..., 66,
+
+/* For i = 0, ..., 66,
      TBL2[2*i] is a double precision number near (i+1)*2^-6, and
      TBL2[2*i+1] = exp(TBL2[2*i]) to within a relative error less
      than 2^-60.
@@ -58,8 +63,7 @@ static const double TBL[64] = {
    For i = 67, ..., 133,
      TBL2[2*i] is a double precision number near -(i+1)*2^-6, and
      TBL2[2*i+1] = exp(TBL2[2*i]) to within a relative error less
-     than 2^-60.
-*/
+     than 2^-60. */
 
 static const double TBL2[268] = {
     0x1.ffffffffffc82p-7,   0x1.04080ab55de32p+0,
@@ -209,7 +213,7 @@ static const double
   t5 = 0x1.6c1728d739765p-10, /* 1.3888949086377719040e-3 */
 /* maximum value for x to not overflow */
   threshold1 = 0x1.62e42fefa39efp+9, /* 7.09782712893383973096e+02 */
-/* maximum value for -x to not underflow */
+/* maximum value for -x to not underflow to zero in FE_NEAREST mode */
   threshold2 = 0x1.74910d52d3051p+9, /* 7.45133219101941108420e+02 */
 /* scaling factor used when result near zero*/
   twom54 = 0x1.0000000000000p-54; /* 5.55111512312578270212e-17 */