From patchwork Thu Nov 2 18:48:57 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Patrick McGehearty X-Patchwork-Id: 24041 Received: (qmail 36005 invoked by alias); 2 Nov 2017 18:49:13 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 35971 invoked by uid 89); 2 Nov 2017 18:49:13 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-26.9 required=5.0 tests=BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, RP_MATCHES_RCVD, SPF_PASS, UNPARSEABLE_RELAY autolearn=ham version=3.3.2 spammy=1288, 498, 1.7.1, yyy X-HELO: userp1040.oracle.com From: Patrick McGehearty To: libc-alpha@sourceware.org Subject: [PATCH 2/2] Improves __ieee754_exp() performance by greater than 5x on sparc/x86. Date: Thu, 2 Nov 2017 14:48:57 -0400 Message-Id: <1509648537-63658-2-git-send-email-patrick.mcgehearty@oracle.com> In-Reply-To: <1509648537-63658-1-git-send-email-patrick.mcgehearty@oracle.com> References: <1509648537-63658-1-git-send-email-patrick.mcgehearty@oracle.com> Version 4 of proposed patch. New comments revised to use GNU standard comment formating. Limited comment added in eexp.tbl for TBL[]. The original src used for porting to Linux did not have a comment about TBL[]. The new comment is limited to the current worker's level of understanding. The (-xx.x > threshold2) case is changed to return force_underflow. For FE_TONEAREST, tiny*tiny will always be zero but for FE_UPWARD, it will be the smallest representable value. That change caused no change in the math test results for Sparc or x86. --- sysdeps/ieee754/dbl-64/e_exp.c | 38 +++++++++++++++----------------------- sysdeps/ieee754/dbl-64/eexp.tbl | 14 +++++++++----- 2 files changed, 24 insertions(+), 28 deletions(-) diff --git a/sysdeps/ieee754/dbl-64/e_exp.c b/sysdeps/ieee754/dbl-64/e_exp.c index f118f33..c1e6585 100644 --- a/sysdeps/ieee754/dbl-64/e_exp.c +++ b/sysdeps/ieee754/dbl-64/e_exp.c @@ -34,8 +34,7 @@ /***************************************************************************/ /* IBM exp(x) replaced by following exp(x) in 2017. IBM exp1(x,xx) remains. */ -/* - exp(x) +/* exp(x) Hybrid algorithm of Peter Tang's Table driven method (for large arguments) and an accurate table (for small arguments). Written by K.C. Ng, November 1988. @@ -70,8 +69,7 @@ Misc. info. For IEEE double if x > 7.09782712893383973096e+02 then exp(x) overflow - if x < -7.45133219101941108420e+02 then exp(x) underflow - */ + if x < -7.45133219101941108420e+02 then exp(x) underflow. */ #include #include @@ -130,10 +128,8 @@ __ieee754_exp (double x_arg) retval = one + xx.x * (one + half * xx.x); return (retval); } - /* - Use FE_TONEAREST rounding mode for computing yy.y - Avoid set/reset of rounding mode if already in FE_TONEAREST mode - */ + /* Use FE_TONEAREST rounding mode for computing yy.y. + Avoid set/reset of rounding mode if in FE_TONEAREST mode. */ fe_val = get_rounding_mode (); if (fe_val == FE_TONEAREST) { t = xx.x * xx.x; @@ -151,16 +147,14 @@ __ieee754_exp (double x_arg) return (retval); } - /* find the multiple of 2^-6 nearest x */ + /* Find the multiple of 2^-6 nearest x. */ k = hx >> 20; j = (0x00100000 | (hx & 0x000fffff)) >> (0x40c - k); j = (j - 1) & ~1; if (ix < 0) j += 134; - /* - Use FE_TONEAREST rounding mode for computing yy.y - Avoid set/reset of rounding mode if already in FE_TONEAREST mode - */ + /* Use FE_TONEAREST rounding mode for computing yy.y. + Avoid set/reset of rounding mode if in FE_TONEAREST mode. */ fe_val = get_rounding_mode (); if (fe_val == FE_TONEAREST) { z = xx.x - TBL2[j]; @@ -181,31 +175,29 @@ __ieee754_exp (double x_arg) } if (hx >= 0x40862e42) - { /* x is large, infinite, or nan */ + { /* x is large, infinite, or nan. */ if (hx >= 0x7ff00000) { if (ix == 0xfff00000 && xx.i_part[LOW_HALF] == 0) - return (zero); /* exp(-inf) = 0 */ - return (xx.x * xx.x); /* exp(nan/inf) is nan or inf */ + return (zero); /* exp(-inf) = 0. */ + return (xx.x * xx.x); /* exp(nan/inf) is nan or inf. */ } if (xx.x > threshold1) - { /* set overflow error condition */ + { /* set overflow error condition. */ retval = hhuge * hhuge; return retval; } if (-xx.x > threshold2) - { /* set underflow error condition */ + { /* set underflow error condition. */ double force_underflow = tiny * tiny; math_force_eval (force_underflow); - retval = zero; + retval = force_underflow; return retval; } } - /* - Use FE_TONEAREST rounding mode for computing yy.y - Avoid set/reset of rounding mode if already in FE_TONEAREST mode - */ + /* Use FE_TONEAREST rounding mode for computing yy.y. + Avoid set/reset of rounding mode if already in FE_TONEAREST mode. */ fe_val = get_rounding_mode (); if (fe_val == FE_TONEAREST) { t = invln2_32 * xx.x; diff --git a/sysdeps/ieee754/dbl-64/eexp.tbl b/sysdeps/ieee754/dbl-64/eexp.tbl index ec48489..1fed9f4 100644 --- a/sysdeps/ieee754/dbl-64/eexp.tbl +++ b/sysdeps/ieee754/dbl-64/eexp.tbl @@ -16,6 +16,11 @@ License along with the GNU C Library; if not, see . */ + +/* TBL[2*j] and TBL[2*j+1] are double precision numbers used to + approximate exp(x) using the formula given in the comments + for e_exp.c. */ + static const double TBL[64] = { 0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x1.059b0d3158574p+0, 0x1.d73e2a475b465p-55, @@ -49,8 +54,8 @@ static const double TBL[64] = { 0x1.dfc97337b9b5fp+0, -0x1.1a5cd4f184b5cp-54, 0x1.ea4afa2a490dap+0, -0x1.e9c23179c2893p-54, 0x1.f50765b6e4540p+0, 0x1.9d3e12dd8a18bp-54}; -/* - For i = 0, ..., 66, + +/* For i = 0, ..., 66, TBL2[2*i] is a double precision number near (i+1)*2^-6, and TBL2[2*i+1] = exp(TBL2[2*i]) to within a relative error less than 2^-60. @@ -58,8 +63,7 @@ static const double TBL[64] = { For i = 67, ..., 133, TBL2[2*i] is a double precision number near -(i+1)*2^-6, and TBL2[2*i+1] = exp(TBL2[2*i]) to within a relative error less - than 2^-60. -*/ + than 2^-60. */ static const double TBL2[268] = { 0x1.ffffffffffc82p-7, 0x1.04080ab55de32p+0, @@ -209,7 +213,7 @@ static const double t5 = 0x1.6c1728d739765p-10, /* 1.3888949086377719040e-3 */ /* maximum value for x to not overflow */ threshold1 = 0x1.62e42fefa39efp+9, /* 7.09782712893383973096e+02 */ -/* maximum value for -x to not underflow */ +/* maximum value for -x to not underflow to zero in FE_NEAREST mode */ threshold2 = 0x1.74910d52d3051p+9, /* 7.45133219101941108420e+02 */ /* scaling factor used when result near zero*/ twom54 = 0x1.0000000000000p-54; /* 5.55111512312578270212e-17 */