From patchwork Fri Mar 9 15:48:47 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Wilco Dijkstra X-Patchwork-Id: 26252 Received: (qmail 16198 invoked by alias); 9 Mar 2018 15:48:54 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 16186 invoked by uid 89); 9 Mar 2018 15:48:53 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-24.5 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, KAM_ASCII_DIVIDERS, RCVD_IN_DNSWL_NONE, SPF_HELO_PASS, SPF_PASS autolearn=ham version=3.3.2 spammy= X-HELO: EUR02-VE1-obe.outbound.protection.outlook.com From: Wilco Dijkstra To: "libc-alpha@sourceware.org" CC: nd Subject: [PATCH 3/6] Remove slow paths from sin/cos Date: Fri, 9 Mar 2018 15:48:47 +0000 Message-ID: authentication-results: spf=none (sender IP is ) smtp.mailfrom=Wilco.Dijkstra@arm.com; x-ms-publictraffictype: Email x-microsoft-exchange-diagnostics: 1; DB6PR0801MB1878; 7:9EfTK3wsE08XnnATeYhyLLn6Ejgy00fBP1sUvZTjuG2WpOq/qsaeq0kBDGYBihhlhbDfZhCmmIFemDjKCYbTHmDaOuyFNbpMgegyFe5+hYfSYwad9ZnwNKkTjZmmQcOm3b2m9szWmS9qDehqwiXkHhm18YTuUIJYmoFprSJFKxIulUwwP1fkxBT6wx4AzGUyGrWzNK5ek5dc/lKQfvcWp3lbBsJRQtRauy77nVoA0bQGwxMIGaiXQw/xfy4i7R7b x-ms-exchange-antispam-srfa-diagnostics: SSOS; x-ms-office365-filtering-ht: Tenant x-ms-office365-filtering-correlation-id: 740aa337-df39-42c5-c6dc-08d585d53f57 x-microsoft-antispam: UriScan:; BCL:0; PCL:0; RULEID:(7020095)(4652020)(48565401081)(5600026)(4604075)(3008032)(2017052603328)(7153060)(7193020); SRVR:DB6PR0801MB1878; x-ms-traffictypediagnostic: DB6PR0801MB1878: nodisclaimer: True x-microsoft-antispam-prvs: x-exchange-antispam-report-test: UriScan:(180628864354917); x-exchange-antispam-report-cfa-test: BCL:0; PCL:0; RULEID:(8211001083)(6040522)(2401047)(8121501046)(5005006)(3002001)(10201501046)(93006095)(93001095)(3231220)(944501244)(52105095)(6055026)(6041310)(20161123558120)(201703131423095)(201702281528075)(20161123555045)(201703061421075)(201703061406153)(20161123564045)(20161123562045)(20161123560045)(6072148)(201708071742011); SRVR:DB6PR0801MB1878; BCL:0; PCL:0; RULEID:; SRVR:DB6PR0801MB1878; x-forefront-prvs: 0606BBEB39 x-forefront-antispam-report: SFV:NSPM; SFS:(10009020)(366004)(39860400002)(39380400002)(396003)(376002)(346002)(54534003)(377424004)(189003)(199004)(102836004)(74316002)(6436002)(3846002)(66066001)(72206003)(8676002)(8936002)(6116002)(81166006)(81156014)(3280700002)(2900100001)(68736007)(25786009)(14454004)(6506007)(6916009)(99286004)(2351001)(7696005)(86362001)(478600001)(7736002)(305945005)(2906002)(5640700003)(53936002)(106356001)(3660700001)(5250100002)(59450400001)(105586002)(97736004)(33656002)(2501003)(55016002)(5660300001)(4326008)(316002)(9686003)(26005); DIR:OUT; SFP:1101; SCL:1; SRVR:DB6PR0801MB1878; H:DB6PR0801MB2053.eurprd08.prod.outlook.com; FPR:; SPF:None; PTR:InfoNoRecords; MX:1; A:1; LANG:en; received-spf: None (protection.outlook.com: arm.com does not designate permitted sender hosts) x-microsoft-antispam-message-info: 1M9eJjldefsoNp1e7IexG4b2pxUqoabpwBKoU/FqdF20yn70CUhWuzozWiSsNza0V8Q/R7jvmBUw1ICnzsAmdj/7rhB1eFmllaaEdjSHvaZeo1x60c2zs3v+LlcRQ2VcWeJm8oP2iSJMI7jTJ0S8iXvePJGD1ieDGb4+Ot/2+Kq9H8rYolb82bXLU/JQqGDiIGscOiWynKqjHjXz6GGUBfF6Y2/Hv+noHeWEBN13hwPMrDfkIuCWug32D2KwQzBgYi+3aWSBoOgyRlvsdNlJ/M0XhsW3D9JVcHRSoNpGg6as5K0BSUqaODEgyKrMvTJVLOThWknHz+ZzaCb0zuw00Q== spamdiagnosticoutput: 1:99 spamdiagnosticmetadata: NSPM MIME-Version: 1.0 X-OriginatorOrg: arm.com X-MS-Exchange-CrossTenant-Network-Message-Id: 740aa337-df39-42c5-c6dc-08d585d53f57 X-MS-Exchange-CrossTenant-originalarrivaltime: 09 Mar 2018 15:48:47.9726 (UTC) X-MS-Exchange-CrossTenant-fromentityheader: Hosted X-MS-Exchange-CrossTenant-id: f34e5979-57d9-4aaa-ad4d-b122a662184d X-MS-Exchange-Transport-CrossTenantHeadersStamped: DB6PR0801MB1878 This patch improves the accuracy of the range reduction. When the input is large (2^27) and very close to a multiple of PI/2, using 110 bits of PI is not enough. Improve range reduction accuracy to 136 bits. As a result the special checks for results close to zero can be removed. The ULP of the polynomials is at worst 0.55ULP, so there is no reason for the slow functions, and they can be removed. ChangeLog: 2018-03-09 Wilco Dijkstra * sysdeps/ieee754/dbl-64/s_sin.c (reduce_sincos_1): Rename to sincos_1, improve accuracy to 136 bits. (do_sincos_1): Rename to do_sincos, remove fallbacks to slow functions. (__sin): Use improved reduction and simplified do_sincos calculation. (__cos): Likewise. * sysdeps/ieee754/dbl-64/s_sincos.c (__sincos): Likewise. diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c index 1f98e29278183d1fccd7c2b3fd467d6b16c245ed..b48b8627a7a801dfafecc920062aaaac51969a8a 100644 --- a/sysdeps/ieee754/dbl-64/s_sin.c +++ b/sysdeps/ieee754/dbl-64/s_sin.c @@ -295,9 +295,13 @@ reduce_and_compute (double x, bool shift_quadrant) return retval; } +/* Reduce range of x to within PI/2 with abs (x) < 105414350. The high part + is written to *a, the low part to *da. Range reduction is accurate to 136 + bits so that when x is large and *a very close to zero, all 53 bits of *a + are correct. */ static inline int4 __always_inline -reduce_sincos_1 (double x, double *a, double *da) +reduce_sincos (double x, double *a, double *da) { mynumber v; @@ -306,62 +310,45 @@ reduce_sincos_1 (double x, double *a, double *da) v.x = t; double y = (x - xn * mp1) - xn * mp2; int4 n = v.i[LOW_HALF] & 3; - double db = xn * mp3; - double b = y - db; - db = (y - b) - db; + + double b, db, t1, t2; + t1 = xn * pp3; + t2 = y - t1; + db = (y - t2) - t1; + + t1 = xn * pp4; + b = t2 - t1; + db += (t2 - b) - t1; *a = b; *da = db; - return n; } -/* Compute sin (A + DA). cos can be computed by passing SHIFT_QUADRANT as - true, which results in shifting the quadrant N clockwise. */ +/* Compute sin or cos (A + DA) for the given quadrant N. */ static double __always_inline -do_sincos_1 (double a, double da, double x, int4 n, bool shift_quadrant) +do_sincos (double a, double da, int4 n) { - double xx, retval, res, cor; - double eps = fabs (x) * 1.2e-30; + double retval, cor; - int k1 = (n + shift_quadrant) & 3; - switch (k1) - { /* quarter of unit circle */ - case 2: - a = -a; - da = -da; - /* Fall through. */ - case 0: - xx = a * a; + if (n & 1) + /* Max ULP is 0.513. */ + retval = do_cos (a, da, &cor); + else + { + double xx = a * a; + /* Max ULP is 0.501 if xx < 0.01588, otherwise ULP is 0.518. */ if (xx < 0.01588) - { - /* Taylor series. */ - res = TAYLOR_SIN (xx, a, da, cor); - cor = 1.02 * cor + __copysign (eps, cor); - retval = (res == res + cor) ? res : sloww (a, da, x, shift_quadrant); - } + retval = TAYLOR_SIN (xx, a, da, cor); else - { - res = do_sin (a, da, &cor); - cor = 1.035 * cor + __copysign (eps, cor); - retval = ((res == res + cor) ? __copysign (res, a) - : sloww1 (a, da, x, shift_quadrant)); - } - break; - - case 1: - case 3: - res = do_cos (a, da, &cor); - cor = 1.025 * cor + __copysign (eps, cor); - retval = ((res == res + cor) ? ((n & 2) ? -res : res) - : sloww2 (a, da, x, n)); - break; + retval = __copysign (do_sin (a, da, &cor), a); } - return retval; + return (n & 2) ? -retval : retval; } + /*******************************************************************/ /* An ultimate sin routine. Given an IEEE double machine number x */ /* it computes the correctly rounded (to nearest) value of sin(x) */ @@ -374,9 +361,9 @@ SECTION #endif __sin (double x) { - double xx, t, cor; + double xx, t, a, da, cor; mynumber u; - int4 k, m; + int4 k, m, n; double retval = 0; #ifndef IN_SINCOS @@ -419,9 +406,8 @@ __sin (double x) /*-------------------------- 2.426265<|x|< 105414350 ----------------------*/ else if (k < 0x419921FB) { - double a, da; - int4 n = reduce_sincos_1 (x, &a, &da); - retval = do_sincos_1 (a, da, x, n, false); + n = reduce_sincos (x, &a, &da); + retval = do_sincos (a, da, n); } /* else if (k < 0x419921FB ) */ /* --------------------105414350 <|x| <2^1024------------------------------*/ @@ -435,6 +421,11 @@ __sin (double x) __set_errno (EDOM); retval = x / x; } +#else + /* Disable warning... */ + n = 0, n = n; + a = 0, a = a; + da = 0, da = da; #endif return retval; @@ -456,7 +447,7 @@ __cos (double x) { double y, xx, cor, a, da; mynumber u; - int4 k, m; + int4 k, m, n; double retval = 0; @@ -496,9 +487,8 @@ __cos (double x) #ifndef IN_SINCOS else if (k < 0x419921FB) { /* 2.426265<|x|< 105414350 */ - double a, da; - int4 n = reduce_sincos_1 (x, &a, &da); - retval = do_sincos_1 (a, da, x, n, true); + n = reduce_sincos (x, &a, &da); + retval = do_sincos (a, da, n + 1); } /* else if (k < 0x419921FB ) */ /* 105414350 <|x| <2^1024 */ @@ -511,6 +501,9 @@ __cos (double x) __set_errno (EDOM); retval = x / x; /* |x| > 2^1024 */ } +#else + /* Disable warning... */ + n = 0, n = n; #endif return retval; diff --git a/sysdeps/ieee754/dbl-64/s_sincos.c b/sysdeps/ieee754/dbl-64/s_sincos.c index a9af8ce526bfe78c06cfafa65de0815ec69585c5..4f032d2e42593ccde22169b374728386dd8fca8e 100644 --- a/sysdeps/ieee754/dbl-64/s_sincos.c +++ b/sysdeps/ieee754/dbl-64/s_sincos.c @@ -79,10 +79,10 @@ __sincos (double x, double *sinx, double *cosx) if (k < 0x419921FB) { double a, da; - int4 n = reduce_sincos_1 (x, &a, &da); + int4 n = reduce_sincos (x, &a, &da); - *sinx = do_sincos_1 (a, da, x, n, false); - *cosx = do_sincos_1 (a, da, x, n, true); + *sinx = do_sincos (a, da, n); + *cosx = do_sincos (a, da, n + 1); return; }