Patchwork [2/2] Improves __ieee754_exp() performance by greater than 5x on sparc/x86.

login
register
mail settings
Submitter Patrick McGehearty
Date Nov. 2, 2017, 6:48 p.m.
Message ID <1509648537-63658-2-git-send-email-patrick.mcgehearty@oracle.com>
Download mbox | patch
Permalink /patch/24041/
State New
Headers show

Comments

Patrick McGehearty - Nov. 2, 2017, 6:48 p.m.
Version 4 of proposed patch.

New comments revised to use GNU standard comment formating.
Limited comment added in eexp.tbl for TBL[]. The original src
used for porting to Linux did not have a comment about TBL[].
The new comment is limited to the current worker's level of
understanding.

The (-xx.x > threshold2) case is changed to return force_underflow.
For FE_TONEAREST, tiny*tiny will always be zero but for
FE_UPWARD, it will be the smallest representable value.

That change caused no change in the math test results for Sparc or x86.
---
 sysdeps/ieee754/dbl-64/e_exp.c  |   38 +++++++++++++++-----------------------
 sysdeps/ieee754/dbl-64/eexp.tbl |   14 +++++++++-----
 2 files changed, 24 insertions(+), 28 deletions(-)
Joseph Myers - Nov. 2, 2017, 9:08 p.m.
On Thu, 2 Nov 2017, Patrick McGehearty wrote:

> Version 4 of proposed patch.

I'd expect a single patch with the formatting fixed rather than a repost 
of a previous version followed by a patch to fix up the formatting, since 
the version with non-GNU formatting should never be committed.

>  	  if (fe_val == FE_TONEAREST) {
>  	    t = xx.x * xx.x;

Another formatting issue: that's not the correct brace position in GNU 
style, which always uses

if (condition)
  {
    block contents;
  }

so the braced block is below and indented from the if, then the contents 
of the block are again indented.

> -/* maximum value for -x to not underflow */
> +/* maximum value for -x to not underflow to zero in FE_NEAREST mode */

It's FE_TONEAREST, I think this means for exp(-x) not to underflow rather 
than for -x not to underflow, and the formatting as elsewhere should be 
sentence style,

/* Maximum value for exp (-x) not to underflow to zero in FE_TONEAREST
   mode.  */

or similar.  Likewise for other comments (start with a capital letter, end 
with "." and two spaces).

Patch

diff --git a/sysdeps/ieee754/dbl-64/e_exp.c b/sysdeps/ieee754/dbl-64/e_exp.c
index f118f33..c1e6585 100644
--- a/sysdeps/ieee754/dbl-64/e_exp.c
+++ b/sysdeps/ieee754/dbl-64/e_exp.c
@@ -34,8 +34,7 @@ 
 /***************************************************************************/
 
 /*  IBM exp(x) replaced by following exp(x) in 2017. IBM exp1(x,xx) remains. */
-/*
-   exp(x)
+/* exp(x)
    Hybrid algorithm of Peter Tang's Table driven method (for large
    arguments) and an accurate table (for small arguments).
    Written by K.C. Ng, November 1988.
@@ -70,8 +69,7 @@ 
    Misc. info.
 	For IEEE double
 		if x >  7.09782712893383973096e+02 then exp(x) overflow
-		if x < -7.45133219101941108420e+02 then exp(x) underflow
- */
+		if x < -7.45133219101941108420e+02 then exp(x) underflow. */
 
 #include <math.h>
 #include <math-svid-compat.h>
@@ -130,10 +128,8 @@  __ieee754_exp (double x_arg)
 	      retval = one + xx.x * (one + half * xx.x);
 	      return (retval);
 	    }
-	  /* 
-	     Use FE_TONEAREST rounding mode for computing yy.y 
-	     Avoid set/reset of rounding mode if already in FE_TONEAREST mode
-	  */
+	  /* Use FE_TONEAREST rounding mode for computing yy.y.
+	     Avoid set/reset of rounding mode if in FE_TONEAREST mode. */
 	  fe_val = get_rounding_mode ();
 	  if (fe_val == FE_TONEAREST) {
 	    t = xx.x * xx.x;
@@ -151,16 +147,14 @@  __ieee754_exp (double x_arg)
 	  return (retval);
 	}
 
-      /* find the multiple of 2^-6 nearest x */
+      /* Find the multiple of 2^-6 nearest x. */
       k = hx >> 20;
       j = (0x00100000 | (hx & 0x000fffff)) >> (0x40c - k);
       j = (j - 1) & ~1;
       if (ix < 0)
 	j += 134;
-      /* 
-	 Use FE_TONEAREST rounding mode for computing yy.y 
-	 Avoid set/reset of rounding mode if already in FE_TONEAREST mode
-      */
+      /* Use FE_TONEAREST rounding mode for computing yy.y.
+	 Avoid set/reset of rounding mode if in FE_TONEAREST mode. */
       fe_val = get_rounding_mode ();
       if (fe_val == FE_TONEAREST) {
 	z = xx.x - TBL2[j];
@@ -181,31 +175,29 @@  __ieee754_exp (double x_arg)
     }
 
   if (hx >= 0x40862e42)
-    {				/* x is large, infinite, or nan */
+    {				/* x is large, infinite, or nan. */
       if (hx >= 0x7ff00000)
 	{
 	  if (ix == 0xfff00000 && xx.i_part[LOW_HALF] == 0)
-	    return (zero);	/* exp(-inf) = 0 */
-	  return (xx.x * xx.x);	/* exp(nan/inf) is nan or inf */
+	    return (zero);	/* exp(-inf) = 0. */
+	  return (xx.x * xx.x);	/* exp(nan/inf) is nan or inf. */
 	}
       if (xx.x > threshold1)
-	{			/* set overflow error condition */
+	{			/* set overflow error condition. */
 	  retval = hhuge * hhuge;
 	  return retval;
 	} 
       if (-xx.x > threshold2)
-	{			/* set underflow error condition */
+	{			/* set underflow error condition. */
 	  double force_underflow = tiny * tiny;
 	  math_force_eval (force_underflow);
-	  retval = zero;
+	  retval = force_underflow;
 	  return retval;
 	}
     }
 
-  /* 
-     Use FE_TONEAREST rounding mode for computing yy.y 
-     Avoid set/reset of rounding mode if already in FE_TONEAREST mode
-  */
+  /* Use FE_TONEAREST rounding mode for computing yy.y.
+     Avoid set/reset of rounding mode if already in FE_TONEAREST mode. */
   fe_val = get_rounding_mode ();
   if (fe_val == FE_TONEAREST) {
     t = invln2_32 * xx.x;
diff --git a/sysdeps/ieee754/dbl-64/eexp.tbl b/sysdeps/ieee754/dbl-64/eexp.tbl
index ec48489..1fed9f4 100644
--- a/sysdeps/ieee754/dbl-64/eexp.tbl
+++ b/sysdeps/ieee754/dbl-64/eexp.tbl
@@ -16,6 +16,11 @@ 
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
+
+/* TBL[2*j] and TBL[2*j+1] are double precision numbers used to
+   approximate exp(x) using the formula given in the comments
+   for e_exp.c. */
+
 static const double TBL[64] = {
     0x1.0000000000000p+0,  0x0.0000000000000p+0,
     0x1.059b0d3158574p+0,  0x1.d73e2a475b465p-55,
@@ -49,8 +54,8 @@  static const double TBL[64] = {
     0x1.dfc97337b9b5fp+0, -0x1.1a5cd4f184b5cp-54,
     0x1.ea4afa2a490dap+0, -0x1.e9c23179c2893p-54,
     0x1.f50765b6e4540p+0,  0x1.9d3e12dd8a18bp-54};
-/*
-   For i = 0, ..., 66,
+
+/* For i = 0, ..., 66,
      TBL2[2*i] is a double precision number near (i+1)*2^-6, and
      TBL2[2*i+1] = exp(TBL2[2*i]) to within a relative error less
      than 2^-60.
@@ -58,8 +63,7 @@  static const double TBL[64] = {
    For i = 67, ..., 133,
      TBL2[2*i] is a double precision number near -(i+1)*2^-6, and
      TBL2[2*i+1] = exp(TBL2[2*i]) to within a relative error less
-     than 2^-60.
-*/
+     than 2^-60. */
 
 static const double TBL2[268] = {
     0x1.ffffffffffc82p-7,   0x1.04080ab55de32p+0,
@@ -209,7 +213,7 @@  static const double
   t5 = 0x1.6c1728d739765p-10, /* 1.3888949086377719040e-3 */
 /* maximum value for x to not overflow */
   threshold1 = 0x1.62e42fefa39efp+9, /* 7.09782712893383973096e+02 */
-/* maximum value for -x to not underflow */
+/* maximum value for -x to not underflow to zero in FE_NEAREST mode */
   threshold2 = 0x1.74910d52d3051p+9, /* 7.45133219101941108420e+02 */
 /* scaling factor used when result near zero*/
   twom54 = 0x1.0000000000000p-54; /* 5.55111512312578270212e-17 */