From patchwork Tue Oct 31 07:58:56 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Rajalakshmi S X-Patchwork-Id: 23991 Received: (qmail 106645 invoked by alias); 31 Oct 2017 07:59:09 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 106627 invoked by uid 89); 31 Oct 2017 07:59:08 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-24.2 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, KAM_ASCII_DIVIDERS, KAM_LAZY_DOMAIN_SECURITY, RCVD_IN_DNSWL_LOW autolearn=ham version=3.3.2 spammy=2835, ian, Developed, lance X-HELO: mx0a-001b2d01.pphosted.com From: Rajalakshmi Srinivasaraghavan To: libc-alpha@sourceware.org Cc: Rajalakshmi Srinivasaraghavan Subject: [PATCHv2] New generic sinf Date: Tue, 31 Oct 2017 13:28:56 +0530 X-TM-AS-GCONF: 00 x-cbid: 17103107-0016-0000-0000-000004FBBC8E X-IBM-AV-DETECTION: SAVI=unused REMOTE=unused XFE=unused x-cbparentid: 17103107-0017-0000-0000-0000283742EE Message-Id: <1509436736-23436-1-git-send-email-raji@linux.vnet.ibm.com> X-Proofpoint-Virus-Version: vendor=fsecure engine=2.50.10432:, , definitions=2017-10-31_03:, , signatures=0 X-Proofpoint-Spam-Details: rule=outbound_notspam policy=outbound score=0 priorityscore=1501 malwarescore=0 suspectscore=3 phishscore=0 bulkscore=0 spamscore=0 clxscore=1011 lowpriorityscore=0 impostorscore=0 adultscore=0 classifier=spam adjust=0 reason=mlx scancount=1 engine=8.0.1-1707230000 definitions=main-1710310112 Changes since version 1: - Removed sccs id. - Replaced spaces with tabs. --- The same logic used in s_sinf.S version of x86 and powerpc is moved as generic s_sinf.c, so there is no performance improvement in x86_64 and powerpc64. For s390, this is the improvement noted. With patch: benchtests/bench-sinf "": { "duration": 9.91026e+09, "iterations": 4.6512e+08, "max": 130.26, "min": 7.027, "mean": 21.3069 } Without patch: "": { "duration": 1.00656e+10, "iterations": 1.65699e+08, "max": 1740.57, "min": 4.729, "mean": 60.7461 } Also addressed comments from Joseph on generic sincosf version Ref:https://sourceware.org/ml/libc-alpha/2017-10/msg00367.html --- This implementation is based on optimized sinf assembly versions of x86_64 and powerpc. Tested on s390, x86_64 and powerpc64le. 2017-10-31 Rajalakshmi Srinivasaraghavan * sysdeps/ieee754/flt-32/s_sinf.c: New implementation. --- sysdeps/ieee754/flt-32/s_sinf.c | 245 +++++++++++++++++++++++++++++++++------- 1 file changed, 206 insertions(+), 39 deletions(-) diff --git a/sysdeps/ieee754/flt-32/s_sinf.c b/sysdeps/ieee754/flt-32/s_sinf.c index 3ec98f811d..f7ce377f4c 100644 --- a/sysdeps/ieee754/flt-32/s_sinf.c +++ b/sysdeps/ieee754/flt-32/s_sinf.c @@ -1,21 +1,20 @@ -/* s_sinf.c -- float version of s_sin.c. - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - */ +/* Compute sine of argument. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. -#if defined(LIBM_SCCS) && !defined(lint) -static char rcsid[] = "$NetBSD: s_sinf.c,v 1.4 1995/05/10 20:48:16 jtc Exp $"; -#endif + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ #include #include @@ -28,35 +27,203 @@ static char rcsid[] = "$NetBSD: s_sinf.c,v 1.4 1995/05/10 20:48:16 jtc Exp $"; # define SINF_FUNC SINF #endif -float SINF_FUNC(float x) -{ - float y[2],z=0.0; - int32_t n, ix; +/* Chebyshev constants for sin, range -PI/4 - PI/4. */ +static const double C0 = -0x1.ffffffffe98aep-2; +static const double C1 = 0x1.55555545c50c7p-5; +static const double C2 = -0x1.6c16b348b6874p-10; +static const double C3 = 0x1.a00eb9ac43ccp-16; +static const double C4 = -0x1.23c97dd8844d7p-22; +static const double S0 = -0x1.5555555551cd9p-3; +static const double S1 = 0x1.1111110c2688bp-7; +static const double S2 = -0x1.a019f8b4bd1f9p-13; +static const double S3 = 0x1.71d7264e6b5b4p-19; +static const double S4 = -0x1.a947e1674b58ap-26; +static const double SS0 = -0x1.555555543d49dp-3; +static const double SS1 = 0x1.110f475cec8c5p-7; +static const double SMALL = 0x1p-50; +static const double inv_PI_4 = 0x1.45f306dc9c883p+0; +static const double PI_2_hi = -0x1.921fb544p+0; +static const double PI_2_lo = -0x1.0b4611a626332p-34; - GET_FLOAT_WORD(ix,x); +#define FLOAT_EXPONENT_SHIFT 23 +#define FLOAT_EXPONENT_BIAS 127 - /* |x| ~< pi/4 */ - ix &= 0x7fffffff; - if(ix <= 0x3f490fd8) return __kernel_sinf(x,z,0); +static const double pio2_table[] = { + 0 * M_PI_2, + 1 * M_PI_2, + 2 * M_PI_2, + 3 * M_PI_2, + 4 * M_PI_2, + 5 * M_PI_2 +}; - /* sin(Inf or NaN) is NaN */ - else if (ix>=0x7f800000) { - if (ix == 0x7f800000) - __set_errno (EDOM); - return x-x; - } +static const double invpio4_table[] = { + 0x0p+0, + 0x1.45f306cp+0, + 0x1.c9c882ap-28, + 0x1.4fe13a8p-58, + 0x1.f47d4dp-85, + 0x1.bb81b6cp-112, + 0x1.4acc9ep-142, + 0x1.0e4107cp-169, + 0x1.ca2c756p-196, + 0x1.bd778acp-224 +}; + +static const int ones[] = { +1, -1 }; + +static inline float +reduced (const double theta, const unsigned long n, + const unsigned long signbit) +{ + double sx, theta2; + /* We are operating on |x|, so we need to add back the original + * signbit for sinf. */ + int sign_sin; + sign_sin = ones[((n >> 2) & 1) ^ signbit]; + theta2 = theta * theta; + /* Chebyshev polynomial of the form for sin: + * x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))). + * 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))). */ + if ((n & 2) == 0) + { + sx = S3 + theta2 * S4; /* S3+x^2*S4. */ + sx = S2 + theta2 * sx; /* S2+x^2*(S3+x^2*S4). */ + sx = S1 + theta2 * sx; /* S1+x^2*(S2+x^2*(S3+x^2*S4)). */ + sx = S0 + theta2 * sx; /* S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4))). */ + /* x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))). */ + sx = theta + theta * theta2 * sx; + } + else + { + sx = C3 + theta2 * C4; /* C3+x^2*C4. */ + sx = C2 + theta2 * sx; /* C2+x^2*(C3+x^2*C4). */ + sx = C1 + theta2 * sx; /* C1+x^2*(C2+x^2*(C3+x^2*C4)). */ + sx = C0 + theta2 * sx; /* C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4))). */ + /* 1.0 + x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))). */ + sx = 1.0 + theta2 * sx; + } + + /* Add in the signbit and assign the result. */ + return sign_sin * sx; +} - /* argument reduction needed */ - else { - n = __ieee754_rem_pio2f(x,y); - switch(n&3) { - case 0: return __kernel_sinf(y[0],y[1],1); - case 1: return __kernel_cosf(y[0],y[1]); - case 2: return -__kernel_sinf(y[0],y[1],1); - default: - return -__kernel_cosf(y[0],y[1]); +float +SINF_FUNC(float x) +{ + double cx; + double theta = x; + double abstheta = fabs (theta); + /* if |x|< Pi/4. */ + if (abstheta < M_PI_4) + { + if (abstheta >= +0.03125) /* |x| >= 2^-5. */ + { + double theta2 = theta * theta; + /* Chebyshev polynomial of the form for sin. */ + cx = S3 + theta2 * S4; + cx = S2 + theta2 * cx; + cx = S1 + theta2 * cx; + cx = S0 + theta2 * cx; + cx = theta + theta * theta2 * cx; + return cx; + } + else if (abstheta >= 0x1p-27) /* |x| >= 2^-27. */ + { + /* A simpler Chebyshev approximation is close enough for this range: + * for sin: x+x^3*(SS0+x^2*SS1). */ + double theta2 = theta * theta; + cx = SS0 + theta2 * SS1; + cx = theta + theta * theta2 * cx; + return cx; + } + else + { + /* Handle some special cases. */ + if (theta) + return theta - (theta * SMALL); + else + return theta; + } + } + else /* |x| >= Pi/4. */ + { + unsigned long signbit = (x < 0); + if (abstheta < 9 * M_PI_4) /* |x| < 9*Pi/4. */ + { + unsigned long n = (abstheta * inv_PI_4) + 1; + theta = abstheta - pio2_table[n / 2]; + return reduced (theta, n, signbit); + } + else if (abstheta < INFINITY) + { + if (abstheta < 8388608.0) /* |x| < 2^23. */ + { + unsigned long n = floor (abstheta * inv_PI_4) + 1.0; + double x = floor (n / 2.0); + theta = x * PI_2_lo + (x * PI_2_hi + abstheta); + /* Argument reduction needed. */ + return reduced (theta, n, signbit); + } + else /* |x| >= 2^23. */ + { + x = fabs (x); + int32_t exponent; + GET_FLOAT_WORD (exponent, x); + exponent = + (exponent >> FLOAT_EXPONENT_SHIFT) - FLOAT_EXPONENT_BIAS; + exponent += 3; + exponent = (exponent * (0x100000000 / 28 + 1)) >> 32; + double a = invpio4_table[exponent] * x; + double b = invpio4_table[exponent + 1] * x; + double c = invpio4_table[exponent + 2] * x; + double d = invpio4_table[exponent + 3] * x; + unsigned long l = a; + l &= ~0x7; + a -= l; + double e = a + b; + l = e; + e = a - l; + if (l & 1) + { + e -= 1.0; + e += b; + e += c; + e += d; + e *= M_PI_4; + return reduced (e, l + 1, signbit); + } + else + { + e += b; + e += c; + e += d; + if (e <= 1.0) + { + e *= M_PI_4; + return reduced (e, l + 1, signbit); + } + else + { + l++; + e -= 2.0; + e *= M_PI_4; + return reduced (e, l + 1, signbit); + } + } } } + else + { + int32_t ix; + /* High word of x. */ + GET_FLOAT_WORD (ix, abstheta); + /* sin(Inf or NaN) is NaN. */ + if (ix == 0x7f800000) + __set_errno (EDOM); + return x - x; + } + } } #ifndef SINF