From patchwork Wed Feb 20 08:23:18 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "C.r. Guo" X-Patchwork-Id: 31531 Received: (qmail 24886 invoked by alias); 20 Feb 2019 08:23:26 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 24863 invoked by uid 89); 20 Feb 2019 08:23:26 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-24.4 required=5.0 tests=BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, KAM_NUMSUBJECT, RCVD_IN_DNSWL_NONE, SPF_HELO_PASS, SPF_PASS, UNSUBSCRIBE_BODY autolearn=ham version=3.3.2 spammy=Suite, square, 1.5, muller X-HELO: EUR03-AM5-obe.outbound.protection.outlook.com DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=nxp.com; s=selector1; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck; bh=DQQf6CnWYKlp+DqcwbrhkzeDnLznnPozUg8C774W5Lw=; b=hPCjiJEqXbQD9x1PIxSdeKTkfYornASiuBqjjstioR3iq8AWorvmgUFzy5ZphkvjkN1og0Lb/9Or/xsWU7ztlbDpXQvn4f5RqxnQ7GkSXjRQ0razomTOj1mo5nOtWXXN+AlDS2se0qiUkWGeEi1/C7ADgaErw9iCEofg13WN47s= From: "C.r. Guo" To: "libc-alpha@sourceware.org" CC: "C.r. Guo" Subject: [PATCH] glibc: support e5500 and e6500 Date: Wed, 20 Feb 2019 08:23:18 +0000 Message-ID: <1550650978-2800-1-git-send-email-chunrong.guo@nxp.com> authentication-results: spf=none (sender IP is ) smtp.mailfrom=chunrong.guo@nxp.com; received-spf: None (protection.outlook.com: nxp.com does not designate permitted sender hosts) x-ms-exchange-senderadcheck: 1 MIME-Version: 1.0 X-MS-Exchange-CrossTenant-mailboxtype: HOSTED From: Chunrong Guo Signed-off-by: Chunrong Guo --- sysdeps/powerpc/powerpc64/be/e5500/Implies | 2 + sysdeps/powerpc/powerpc64/be/e5500/fpu/e_sqrt.c | 137 +++++++++++++++++++++ sysdeps/powerpc/powerpc64/be/e5500/fpu/e_sqrtf.c | 103 ++++++++++++++++ .../powerpc/powerpc64/be/e5500/multiarch/Implies | 1 + sysdeps/powerpc/powerpc64/be/e6500/Implies | 2 + sysdeps/powerpc/powerpc64/be/e6500/fpu/e_sqrt.c | 137 +++++++++++++++++++++ sysdeps/powerpc/powerpc64/be/e6500/fpu/e_sqrtf.c | 103 ++++++++++++++++ .../powerpc/powerpc64/be/e6500/multiarch/Implies | 1 + 8 files changed, 486 insertions(+) create mode 100644 sysdeps/powerpc/powerpc64/be/e5500/Implies create mode 100644 sysdeps/powerpc/powerpc64/be/e5500/fpu/e_sqrt.c create mode 100644 sysdeps/powerpc/powerpc64/be/e5500/fpu/e_sqrtf.c create mode 100644 sysdeps/powerpc/powerpc64/be/e5500/multiarch/Implies create mode 100644 sysdeps/powerpc/powerpc64/be/e6500/Implies create mode 100644 sysdeps/powerpc/powerpc64/be/e6500/fpu/e_sqrt.c create mode 100644 sysdeps/powerpc/powerpc64/be/e6500/fpu/e_sqrtf.c create mode 100644 sysdeps/powerpc/powerpc64/be/e6500/multiarch/Implies diff --git a/sysdeps/powerpc/powerpc64/be/e5500/Implies b/sysdeps/powerpc/powerpc64/be/e5500/Implies new file mode 100644 index 0000000..a795586 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/be/e5500/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc64/e5500 + diff --git a/sysdeps/powerpc/powerpc64/be/e5500/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc64/be/e5500/fpu/e_sqrt.c new file mode 100644 index 0000000..13a8197 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/be/e5500/fpu/e_sqrt.c @@ -0,0 +1,137 @@ +/* Double-precision floating point square root. + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include + +#include +#include + +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; +static const float two108 = 3.245185536584267269e+32; +static const float twom54 = 5.551115123125782702e-17; +static const float half = 0.5; + +/* The method is based on the descriptions in: + + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 + + We find the actual square root and half of its reciprocal + simultaneously. */ + +double +__slow_ieee754_sqrt (double b) +{ + if (__builtin_expect (b > 0, 1)) + { + double y, g, h, d, r; + ieee_double_shape_type u; + + if (__builtin_expect (b != a_inf.value, 1)) + { + fenv_t fe; + + fe = fegetenv_register (); + + u.value = b; + + relax_fenv_state (); + + __asm__ ("frsqrte %[estimate], %[x]\n" + : [estimate] "=f" (y) : [x] "f" (b)); + + /* Following Muller et al, page 168, equation 5.20. + + h goes to 1/(2*sqrt(b)) + g goes to sqrt(b). + + We need three iterations to get within 1ulp. */ + + /* Indicate that these can be performed prior to the branch. GCC + insists on sinking them below the branch, however; it seems like + they'd be better before the branch so that we can cover any latency + from storing the argument and loading its high word. Oh well. */ + + g = b * y; + h = 0.5 * y; + + /* Handle small numbers by scaling. */ + if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0)) + return __slow_ieee754_sqrt (b * two108) * twom54; + +#define FMADD(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) +#define FNMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + /* g is now +/- 1ulp, or exactly equal to, the square root of b. */ + + /* Final refinement. */ + d = FNMSUB (g, g, b); + + fesetenv_register (fe); + return FMADD (d, h, g); + } + } + else if (b < 0) + { + /* For some reason, some PowerPC32 processors don't implement + FE_INVALID_SQRT. */ +#ifdef FE_INVALID_SQRT + feraiseexcept (FE_INVALID_SQRT); + + fenv_union_t u = { .fenv = fegetenv_register () }; + if ((u.l & FE_INVALID) == 0) +#endif + feraiseexcept (FE_INVALID); + b = a_nan.value; + } + return f_wash (b); +} + +#undef __ieee754_sqrt +double +__ieee754_sqrt (double x) +{ + return __slow_ieee754_sqrt (x); +} + +strong_alias (__ieee754_sqrt, __sqrt_finite) diff --git a/sysdeps/powerpc/powerpc64/be/e5500/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc64/be/e5500/fpu/e_sqrtf.c new file mode 100644 index 0000000..fae2d81 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/be/e5500/fpu/e_sqrtf.c @@ -0,0 +1,103 @@ +/* Single-precision floating point square root. + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include + +#include +#include + +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; +static const float threehalf = 1.5; + +/* The method is based on the descriptions in: + + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 + + We find the reciprocal square root and use that to compute the actual + square root. */ + +float +__slow_ieee754_sqrtf (float b) +{ + if (__builtin_expect (b > 0, 1)) + { +#define FMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) +#define FNMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) + + if (__builtin_expect (b != a_inf.value, 1)) + { + double y, x; + fenv_t fe; + + fe = fegetenv_register (); + + relax_fenv_state (); + + /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */ + y = FMSUB (threehalf, b, b); + + /* Initial estimate. */ + __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b)); + + /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */ + x = x * FNMSUB (y, x * x, threehalf); + x = x * FNMSUB (y, x * x, threehalf); + x = x * FNMSUB (y, x * x, threehalf); + + /* All done. */ + fesetenv_register (fe); + return x * b; + } + } + else if (b < 0) + { + /* For some reason, some PowerPC32 processors don't implement + FE_INVALID_SQRT. */ +#ifdef FE_INVALID_SQRT + feraiseexcept (FE_INVALID_SQRT); + + fenv_union_t u = { .fenv = fegetenv_register () }; + if ((u.l & FE_INVALID) == 0) +#endif + feraiseexcept (FE_INVALID); + b = a_nan.value; + } + return f_washf (b); +} +#undef __ieee754_sqrtf +float +__ieee754_sqrtf (float x) +{ + return __slow_ieee754_sqrtf (x); +} + +strong_alias (__ieee754_sqrtf, __sqrtf_finite) diff --git a/sysdeps/powerpc/powerpc64/be/e5500/multiarch/Implies b/sysdeps/powerpc/powerpc64/be/e5500/multiarch/Implies new file mode 100644 index 0000000..30edcf7 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/be/e5500/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/multiarch diff --git a/sysdeps/powerpc/powerpc64/be/e6500/Implies b/sysdeps/powerpc/powerpc64/be/e6500/Implies new file mode 100644 index 0000000..9b8fc07 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/be/e6500/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc64/e6500 + diff --git a/sysdeps/powerpc/powerpc64/be/e6500/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc64/be/e6500/fpu/e_sqrt.c new file mode 100644 index 0000000..13a8197 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/be/e6500/fpu/e_sqrt.c @@ -0,0 +1,137 @@ +/* Double-precision floating point square root. + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include + +#include +#include + +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; +static const float two108 = 3.245185536584267269e+32; +static const float twom54 = 5.551115123125782702e-17; +static const float half = 0.5; + +/* The method is based on the descriptions in: + + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 + + We find the actual square root and half of its reciprocal + simultaneously. */ + +double +__slow_ieee754_sqrt (double b) +{ + if (__builtin_expect (b > 0, 1)) + { + double y, g, h, d, r; + ieee_double_shape_type u; + + if (__builtin_expect (b != a_inf.value, 1)) + { + fenv_t fe; + + fe = fegetenv_register (); + + u.value = b; + + relax_fenv_state (); + + __asm__ ("frsqrte %[estimate], %[x]\n" + : [estimate] "=f" (y) : [x] "f" (b)); + + /* Following Muller et al, page 168, equation 5.20. + + h goes to 1/(2*sqrt(b)) + g goes to sqrt(b). + + We need three iterations to get within 1ulp. */ + + /* Indicate that these can be performed prior to the branch. GCC + insists on sinking them below the branch, however; it seems like + they'd be better before the branch so that we can cover any latency + from storing the argument and loading its high word. Oh well. */ + + g = b * y; + h = 0.5 * y; + + /* Handle small numbers by scaling. */ + if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0)) + return __slow_ieee754_sqrt (b * two108) * twom54; + +#define FMADD(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) +#define FNMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + r = FNMSUB (g, h, half); + g = FMADD (g, r, g); + h = FMADD (h, r, h); + + /* g is now +/- 1ulp, or exactly equal to, the square root of b. */ + + /* Final refinement. */ + d = FNMSUB (g, g, b); + + fesetenv_register (fe); + return FMADD (d, h, g); + } + } + else if (b < 0) + { + /* For some reason, some PowerPC32 processors don't implement + FE_INVALID_SQRT. */ +#ifdef FE_INVALID_SQRT + feraiseexcept (FE_INVALID_SQRT); + + fenv_union_t u = { .fenv = fegetenv_register () }; + if ((u.l & FE_INVALID) == 0) +#endif + feraiseexcept (FE_INVALID); + b = a_nan.value; + } + return f_wash (b); +} + +#undef __ieee754_sqrt +double +__ieee754_sqrt (double x) +{ + return __slow_ieee754_sqrt (x); +} + +strong_alias (__ieee754_sqrt, __sqrt_finite) diff --git a/sysdeps/powerpc/powerpc64/be/e6500/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc64/be/e6500/fpu/e_sqrtf.c new file mode 100644 index 0000000..fae2d81 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/be/e6500/fpu/e_sqrtf.c @@ -0,0 +1,103 @@ +/* Single-precision floating point square root. + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include + +#include +#include + +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; +static const float threehalf = 1.5; + +/* The method is based on the descriptions in: + + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 + + We find the reciprocal square root and use that to compute the actual + square root. */ + +float +__slow_ieee754_sqrtf (float b) +{ + if (__builtin_expect (b > 0, 1)) + { +#define FMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) +#define FNMSUB(a_, c_, b_) \ + ({ double __r; \ + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ + __r;}) + + if (__builtin_expect (b != a_inf.value, 1)) + { + double y, x; + fenv_t fe; + + fe = fegetenv_register (); + + relax_fenv_state (); + + /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */ + y = FMSUB (threehalf, b, b); + + /* Initial estimate. */ + __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b)); + + /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */ + x = x * FNMSUB (y, x * x, threehalf); + x = x * FNMSUB (y, x * x, threehalf); + x = x * FNMSUB (y, x * x, threehalf); + + /* All done. */ + fesetenv_register (fe); + return x * b; + } + } + else if (b < 0) + { + /* For some reason, some PowerPC32 processors don't implement + FE_INVALID_SQRT. */ +#ifdef FE_INVALID_SQRT + feraiseexcept (FE_INVALID_SQRT); + + fenv_union_t u = { .fenv = fegetenv_register () }; + if ((u.l & FE_INVALID) == 0) +#endif + feraiseexcept (FE_INVALID); + b = a_nan.value; + } + return f_washf (b); +} +#undef __ieee754_sqrtf +float +__ieee754_sqrtf (float x) +{ + return __slow_ieee754_sqrtf (x); +} + +strong_alias (__ieee754_sqrtf, __sqrtf_finite) diff --git a/sysdeps/powerpc/powerpc64/be/e6500/multiarch/Implies b/sysdeps/powerpc/powerpc64/be/e6500/multiarch/Implies new file mode 100644 index 0000000..30edcf7 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/be/e6500/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/multiarch