glibc: support e5500 and e6500
Commit Message
From: Chunrong Guo <chunrong.guo@nxp.com>
Signed-off-by: Chunrong Guo <chunrong.guo@nxp.com>
---
sysdeps/powerpc/powerpc64/be/e5500/Implies | 2 +
sysdeps/powerpc/powerpc64/be/e5500/fpu/e_sqrt.c | 137 +++++++++++++++++++++
sysdeps/powerpc/powerpc64/be/e5500/fpu/e_sqrtf.c | 103 ++++++++++++++++
.../powerpc/powerpc64/be/e5500/multiarch/Implies | 1 +
sysdeps/powerpc/powerpc64/be/e6500/Implies | 2 +
sysdeps/powerpc/powerpc64/be/e6500/fpu/e_sqrt.c | 137 +++++++++++++++++++++
sysdeps/powerpc/powerpc64/be/e6500/fpu/e_sqrtf.c | 103 ++++++++++++++++
.../powerpc/powerpc64/be/e6500/multiarch/Implies | 1 +
8 files changed, 486 insertions(+)
create mode 100644 sysdeps/powerpc/powerpc64/be/e5500/Implies
create mode 100644 sysdeps/powerpc/powerpc64/be/e5500/fpu/e_sqrt.c
create mode 100644 sysdeps/powerpc/powerpc64/be/e5500/fpu/e_sqrtf.c
create mode 100644 sysdeps/powerpc/powerpc64/be/e5500/multiarch/Implies
create mode 100644 sysdeps/powerpc/powerpc64/be/e6500/Implies
create mode 100644 sysdeps/powerpc/powerpc64/be/e6500/fpu/e_sqrt.c
create mode 100644 sysdeps/powerpc/powerpc64/be/e6500/fpu/e_sqrtf.c
create mode 100644 sysdeps/powerpc/powerpc64/be/e6500/multiarch/Implies
Comments
On 20/02/2019 05:23, C.r. Guo wrote:
> From: Chunrong Guo <chunrong.guo@nxp.com>
>
> Signed-off-by: Chunrong Guo <chunrong.guo@nxp.com>
First the title is misleading, as Joseph has stated on BZ#24128 e5500/e6500
is fully functional requiring no such sysdeps file (since BZ#16576). This
patch is in fact an optimization patch and for such you need to provide
not only if it is does not add any regression, but also performance numbers
to validate the new implementation (how much faster is from generic
__slow_ieee754_sqrt and in which ranges).
Did you actually run it on a e6500 and check the output? Because running on
a ISA 2.06 compliant platform which supports frsqrte, fmadd, fnmsub (POWER7)
I am seeing for both e5500 and e6500 __slow_ieee754_sqrt:
FAIL: math/test-double-finite-sqrt
FAIL: math/test-double-sqrt
FAIL: math/test-float32x-finite-sqrt
FAIL: math/test-float32x-sqrt
FAIL: math/test-float64-finite-sqrt
FAIL: math/test-float64-sqrt
FAIL: math/test-idouble-sqrt
FAIL: math/test-ifloat32x-sqrt
FAIL: math/test-ifloat64-sqrt
(the float version fails as well)
The issues are basically this sqrt is not exactly rounded for non default
rounding modes:
$ cat math/test-double-sqrt.out
testing double (without inline functions)
Failure: Test: sqrt_downward (0x1.6e66a858p-1016)
Result:
is: 1.4276460526639628e-153 0x1.324402a00b45fp-508
should be: 1.4276460526639625e-153 0x1.324402a00b45ep-508
difference: 2.6497349136889905e-169 0x1.0000000000000p-560
ulp : 1.0000
max.ulp : 0.0000
Failure: Test: sqrt_downward (0x1.8661cbf8p-1016)
Result:
is: 1.4736254818836879e-153 0x1.3c212046bfdffp-508
should be: 1.4736254818836876e-153 0x1.3c212046bfdfep-508
difference: 2.6497349136889905e-169 0x1.0000000000000p-560
ulp : 1.0000
max.ulp : 0.0000
Failure: Test: sqrt_downward (0x1.bbb221b4p-1016)
Result:
is: 1.5710314965589996e-153 0x1.510681b939931p-508
should be: 1.5710314965589993e-153 0x1.510681b939930p-508
difference: 2.6497349136889905e-169 0x1.0000000000000p-560
ulp : 1.0000
max.ulp : 0.0000
Failure: Test: sqrt_downward (0x1.dbb258c8p-1016)
Result:
is: 1.6266992797941982e-153 0x1.5cf7b0f78d3afp-508
should be: 1.6266992797941979e-153 0x1.5cf7b0f78d3aep-508
difference: 2.6497349136889905e-169 0x1.0000000000000p-560
ulp : 1.0000
max.ulp : 0.0000
Failure: Test: sqrt_downward (0x2.ae207d48p-1016)
Result:
is: 1.9536395872248397e-153 0x1.a31ab946d340bp-508
should be: 1.9536395872248394e-153 0x1.a31ab946d340ap-508
difference: 2.6497349136889905e-169 0x1.0000000000000p-560
ulp : 1.0000
max.ulp : 0.0000
Failure: Test: sqrt_downward (0x2p+0)
Result:
is: 1.4142135623730951e+00 0x1.6a09e667f3bcdp+0
should be: 1.4142135623730949e+00 0x1.6a09e667f3bccp+0
difference: 2.2204460492503131e-16 0x1.0000000000000p-52
ulp : 1.0000
max.ulp : 0.0000
[...]
Test suite completed:
468 test cases plus 464 tests for exception flags and
464 tests for errno executed.
126 errors occurred.
So before to actually review this patch we need to make sure its
correctness.
[1] https://sourceware.org/bugzilla/show_bug.cgi?id=24128
> ---
> sysdeps/powerpc/powerpc64/be/e5500/Implies | 2 +
> sysdeps/powerpc/powerpc64/be/e5500/fpu/e_sqrt.c | 137 +++++++++++++++++++++
> sysdeps/powerpc/powerpc64/be/e5500/fpu/e_sqrtf.c | 103 ++++++++++++++++
> .../powerpc/powerpc64/be/e5500/multiarch/Implies | 1 +
> sysdeps/powerpc/powerpc64/be/e6500/Implies | 2 +
> sysdeps/powerpc/powerpc64/be/e6500/fpu/e_sqrt.c | 137 +++++++++++++++++++++
> sysdeps/powerpc/powerpc64/be/e6500/fpu/e_sqrtf.c | 103 ++++++++++++++++
> .../powerpc/powerpc64/be/e6500/multiarch/Implies | 1 +
> 8 files changed, 486 insertions(+)
> create mode 100644 sysdeps/powerpc/powerpc64/be/e5500/Implies
> create mode 100644 sysdeps/powerpc/powerpc64/be/e5500/fpu/e_sqrt.c
> create mode 100644 sysdeps/powerpc/powerpc64/be/e5500/fpu/e_sqrtf.c
> create mode 100644 sysdeps/powerpc/powerpc64/be/e5500/multiarch/Implies
> create mode 100644 sysdeps/powerpc/powerpc64/be/e6500/Implies
> create mode 100644 sysdeps/powerpc/powerpc64/be/e6500/fpu/e_sqrt.c
> create mode 100644 sysdeps/powerpc/powerpc64/be/e6500/fpu/e_sqrtf.c
> create mode 100644 sysdeps/powerpc/powerpc64/be/e6500/multiarch/Implies
>
> diff --git a/sysdeps/powerpc/powerpc64/be/e5500/Implies b/sysdeps/powerpc/powerpc64/be/e5500/Implies
> new file mode 100644
> index 0000000..a795586
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc64/be/e5500/Implies
> @@ -0,0 +1,2 @@
> +powerpc/powerpc64/e5500
> +
> diff --git a/sysdeps/powerpc/powerpc64/be/e5500/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc64/be/e5500/fpu/e_sqrt.c
> new file mode 100644
> index 0000000..13a8197
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc64/be/e5500/fpu/e_sqrt.c
> @@ -0,0 +1,137 @@
> +/* Double-precision floating point square root.
> + Copyright (C) 2010 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, write to the Free
> + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
> + 02111-1307 USA. */
> +
> +#include <math.h>
> +#include <math_private.h>
> +#include <fenv_libc.h>
> +#include <inttypes.h>
> +
> +#include <sysdep.h>
> +#include <ldsodefs.h>
> +
> +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
> +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
> +static const float two108 = 3.245185536584267269e+32;
> +static const float twom54 = 5.551115123125782702e-17;
> +static const float half = 0.5;
> +
> +/* The method is based on the descriptions in:
> +
> + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
> + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
> +
> + We find the actual square root and half of its reciprocal
> + simultaneously. */
> +
> +double
> +__slow_ieee754_sqrt (double b)
> +{
> + if (__builtin_expect (b > 0, 1))
> + {
> + double y, g, h, d, r;
> + ieee_double_shape_type u;
> +
> + if (__builtin_expect (b != a_inf.value, 1))
> + {
> + fenv_t fe;
> +
> + fe = fegetenv_register ();
> +
> + u.value = b;
> +
> + relax_fenv_state ();
> +
> + __asm__ ("frsqrte %[estimate], %[x]\n"
> + : [estimate] "=f" (y) : [x] "f" (b));
> +
> + /* Following Muller et al, page 168, equation 5.20.
> +
> + h goes to 1/(2*sqrt(b))
> + g goes to sqrt(b).
> +
> + We need three iterations to get within 1ulp. */
> +
> + /* Indicate that these can be performed prior to the branch. GCC
> + insists on sinking them below the branch, however; it seems like
> + they'd be better before the branch so that we can cover any latency
> + from storing the argument and loading its high word. Oh well. */
> +
> + g = b * y;
> + h = 0.5 * y;
> +
> + /* Handle small numbers by scaling. */
> + if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
> + return __slow_ieee754_sqrt (b * two108) * twom54;
> +
> +#define FMADD(a_, c_, b_) \
> + ({ double __r; \
> + __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
> + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
> + __r;})
> +#define FNMSUB(a_, c_, b_) \
> + ({ double __r; \
> + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
> + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
> + __r;})
> +
> + r = FNMSUB (g, h, half);
> + g = FMADD (g, r, g);
> + h = FMADD (h, r, h);
> +
> + r = FNMSUB (g, h, half);
> + g = FMADD (g, r, g);
> + h = FMADD (h, r, h);
> +
> + r = FNMSUB (g, h, half);
> + g = FMADD (g, r, g);
> + h = FMADD (h, r, h);
> +
> + /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
> +
> + /* Final refinement. */
> + d = FNMSUB (g, g, b);
> +
> + fesetenv_register (fe);
> + return FMADD (d, h, g);
> + }
> + }
> + else if (b < 0)
> + {
> + /* For some reason, some PowerPC32 processors don't implement
> + FE_INVALID_SQRT. */
> +#ifdef FE_INVALID_SQRT
> + feraiseexcept (FE_INVALID_SQRT);
> +
> + fenv_union_t u = { .fenv = fegetenv_register () };
> + if ((u.l & FE_INVALID) == 0)
> +#endif
> + feraiseexcept (FE_INVALID);
> + b = a_nan.value;
> + }
> + return f_wash (b);
> +}
> +
> +#undef __ieee754_sqrt
> +double
> +__ieee754_sqrt (double x)
> +{
> + return __slow_ieee754_sqrt (x);
> +}
> +
> +strong_alias (__ieee754_sqrt, __sqrt_finite)
> diff --git a/sysdeps/powerpc/powerpc64/be/e5500/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc64/be/e5500/fpu/e_sqrtf.c
> new file mode 100644
> index 0000000..fae2d81
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc64/be/e5500/fpu/e_sqrtf.c
> @@ -0,0 +1,103 @@
> +/* Single-precision floating point square root.
> + Copyright (C) 2010 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, write to the Free
> + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
> + 02111-1307 USA. */
> +
> +#include <math.h>
> +#include <math_private.h>
> +#include <fenv_libc.h>
> +#include <inttypes.h>
> +
> +#include <sysdep.h>
> +#include <ldsodefs.h>
> +
> +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
> +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
> +static const float threehalf = 1.5;
> +
> +/* The method is based on the descriptions in:
> +
> + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
> + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
> +
> + We find the reciprocal square root and use that to compute the actual
> + square root. */
> +
> +float
> +__slow_ieee754_sqrtf (float b)
> +{
> + if (__builtin_expect (b > 0, 1))
> + {
> +#define FMSUB(a_, c_, b_) \
> + ({ double __r; \
> + __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
> + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
> + __r;})
> +#define FNMSUB(a_, c_, b_) \
> + ({ double __r; \
> + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
> + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
> + __r;})
> +
> + if (__builtin_expect (b != a_inf.value, 1))
> + {
> + double y, x;
> + fenv_t fe;
> +
> + fe = fegetenv_register ();
> +
> + relax_fenv_state ();
> +
> + /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
> + y = FMSUB (threehalf, b, b);
> +
> + /* Initial estimate. */
> + __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
> +
> + /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
> + x = x * FNMSUB (y, x * x, threehalf);
> + x = x * FNMSUB (y, x * x, threehalf);
> + x = x * FNMSUB (y, x * x, threehalf);
> +
> + /* All done. */
> + fesetenv_register (fe);
> + return x * b;
> + }
> + }
> + else if (b < 0)
> + {
> + /* For some reason, some PowerPC32 processors don't implement
> + FE_INVALID_SQRT. */
> +#ifdef FE_INVALID_SQRT
> + feraiseexcept (FE_INVALID_SQRT);
> +
> + fenv_union_t u = { .fenv = fegetenv_register () };
> + if ((u.l & FE_INVALID) == 0)
> +#endif
> + feraiseexcept (FE_INVALID);
> + b = a_nan.value;
> + }
> + return f_washf (b);
> +}
> +#undef __ieee754_sqrtf
> +float
> +__ieee754_sqrtf (float x)
> +{
> + return __slow_ieee754_sqrtf (x);
> +}
> +
> +strong_alias (__ieee754_sqrtf, __sqrtf_finite)
> diff --git a/sysdeps/powerpc/powerpc64/be/e5500/multiarch/Implies b/sysdeps/powerpc/powerpc64/be/e5500/multiarch/Implies
> new file mode 100644
> index 0000000..30edcf7
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc64/be/e5500/multiarch/Implies
> @@ -0,0 +1 @@
> +powerpc/powerpc64/multiarch
> diff --git a/sysdeps/powerpc/powerpc64/be/e6500/Implies b/sysdeps/powerpc/powerpc64/be/e6500/Implies
> new file mode 100644
> index 0000000..9b8fc07
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc64/be/e6500/Implies
> @@ -0,0 +1,2 @@
> +powerpc/powerpc64/e6500
> +
> diff --git a/sysdeps/powerpc/powerpc64/be/e6500/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc64/be/e6500/fpu/e_sqrt.c
> new file mode 100644
> index 0000000..13a8197
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc64/be/e6500/fpu/e_sqrt.c
> @@ -0,0 +1,137 @@
> +/* Double-precision floating point square root.
> + Copyright (C) 2010 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, write to the Free
> + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
> + 02111-1307 USA. */
> +
> +#include <math.h>
> +#include <math_private.h>
> +#include <fenv_libc.h>
> +#include <inttypes.h>
> +
> +#include <sysdep.h>
> +#include <ldsodefs.h>
> +
> +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
> +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
> +static const float two108 = 3.245185536584267269e+32;
> +static const float twom54 = 5.551115123125782702e-17;
> +static const float half = 0.5;
> +
> +/* The method is based on the descriptions in:
> +
> + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
> + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
> +
> + We find the actual square root and half of its reciprocal
> + simultaneously. */
> +
> +double
> +__slow_ieee754_sqrt (double b)
> +{
> + if (__builtin_expect (b > 0, 1))
> + {
> + double y, g, h, d, r;
> + ieee_double_shape_type u;
> +
> + if (__builtin_expect (b != a_inf.value, 1))
> + {
> + fenv_t fe;
> +
> + fe = fegetenv_register ();
> +
> + u.value = b;
> +
> + relax_fenv_state ();
> +
> + __asm__ ("frsqrte %[estimate], %[x]\n"
> + : [estimate] "=f" (y) : [x] "f" (b));
> +
> + /* Following Muller et al, page 168, equation 5.20.
> +
> + h goes to 1/(2*sqrt(b))
> + g goes to sqrt(b).
> +
> + We need three iterations to get within 1ulp. */
> +
> + /* Indicate that these can be performed prior to the branch. GCC
> + insists on sinking them below the branch, however; it seems like
> + they'd be better before the branch so that we can cover any latency
> + from storing the argument and loading its high word. Oh well. */
> +
> + g = b * y;
> + h = 0.5 * y;
> +
> + /* Handle small numbers by scaling. */
> + if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
> + return __slow_ieee754_sqrt (b * two108) * twom54;
> +
> +#define FMADD(a_, c_, b_) \
> + ({ double __r; \
> + __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
> + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
> + __r;})
> +#define FNMSUB(a_, c_, b_) \
> + ({ double __r; \
> + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
> + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
> + __r;})
> +
> + r = FNMSUB (g, h, half);
> + g = FMADD (g, r, g);
> + h = FMADD (h, r, h);
> +
> + r = FNMSUB (g, h, half);
> + g = FMADD (g, r, g);
> + h = FMADD (h, r, h);
> +
> + r = FNMSUB (g, h, half);
> + g = FMADD (g, r, g);
> + h = FMADD (h, r, h);
> +
> + /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
> +
> + /* Final refinement. */
> + d = FNMSUB (g, g, b);
> +
> + fesetenv_register (fe);
> + return FMADD (d, h, g);
> + }
> + }
> + else if (b < 0)
> + {
> + /* For some reason, some PowerPC32 processors don't implement
> + FE_INVALID_SQRT. */
> +#ifdef FE_INVALID_SQRT
> + feraiseexcept (FE_INVALID_SQRT);
> +
> + fenv_union_t u = { .fenv = fegetenv_register () };
> + if ((u.l & FE_INVALID) == 0)
> +#endif
> + feraiseexcept (FE_INVALID);
> + b = a_nan.value;
> + }
> + return f_wash (b);
> +}
> +
> +#undef __ieee754_sqrt
> +double
> +__ieee754_sqrt (double x)
> +{
> + return __slow_ieee754_sqrt (x);
> +}
> +
> +strong_alias (__ieee754_sqrt, __sqrt_finite)
> diff --git a/sysdeps/powerpc/powerpc64/be/e6500/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc64/be/e6500/fpu/e_sqrtf.c
> new file mode 100644
> index 0000000..fae2d81
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc64/be/e6500/fpu/e_sqrtf.c
> @@ -0,0 +1,103 @@
> +/* Single-precision floating point square root.
> + Copyright (C) 2010 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, write to the Free
> + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
> + 02111-1307 USA. */
> +
> +#include <math.h>
> +#include <math_private.h>
> +#include <fenv_libc.h>
> +#include <inttypes.h>
> +
> +#include <sysdep.h>
> +#include <ldsodefs.h>
> +
> +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
> +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
> +static const float threehalf = 1.5;
> +
> +/* The method is based on the descriptions in:
> +
> + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
> + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
> +
> + We find the reciprocal square root and use that to compute the actual
> + square root. */
> +
> +float
> +__slow_ieee754_sqrtf (float b)
> +{
> + if (__builtin_expect (b > 0, 1))
> + {
> +#define FMSUB(a_, c_, b_) \
> + ({ double __r; \
> + __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
> + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
> + __r;})
> +#define FNMSUB(a_, c_, b_) \
> + ({ double __r; \
> + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
> + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
> + __r;})
> +
> + if (__builtin_expect (b != a_inf.value, 1))
> + {
> + double y, x;
> + fenv_t fe;
> +
> + fe = fegetenv_register ();
> +
> + relax_fenv_state ();
> +
> + /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
> + y = FMSUB (threehalf, b, b);
> +
> + /* Initial estimate. */
> + __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
> +
> + /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
> + x = x * FNMSUB (y, x * x, threehalf);
> + x = x * FNMSUB (y, x * x, threehalf);
> + x = x * FNMSUB (y, x * x, threehalf);
> +
> + /* All done. */
> + fesetenv_register (fe);
> + return x * b;
> + }
> + }
> + else if (b < 0)
> + {
> + /* For some reason, some PowerPC32 processors don't implement
> + FE_INVALID_SQRT. */
> +#ifdef FE_INVALID_SQRT
> + feraiseexcept (FE_INVALID_SQRT);
> +
> + fenv_union_t u = { .fenv = fegetenv_register () };
> + if ((u.l & FE_INVALID) == 0)
> +#endif
> + feraiseexcept (FE_INVALID);
> + b = a_nan.value;
> + }
> + return f_washf (b);
> +}
> +#undef __ieee754_sqrtf
> +float
> +__ieee754_sqrtf (float x)
> +{
> + return __slow_ieee754_sqrtf (x);
> +}
> +
> +strong_alias (__ieee754_sqrtf, __sqrtf_finite)
> diff --git a/sysdeps/powerpc/powerpc64/be/e6500/multiarch/Implies b/sysdeps/powerpc/powerpc64/be/e6500/multiarch/Implies
> new file mode 100644
> index 0000000..30edcf7
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc64/be/e6500/multiarch/Implies
> @@ -0,0 +1 @@
> +powerpc/powerpc64/multiarch
>
On Wed, 20 Feb 2019, C.r. Guo wrote:
> From: Chunrong Guo <chunrong.guo@nxp.com>
>
> Signed-off-by: Chunrong Guo <chunrong.guo@nxp.com>
I don't see a copyright assignment on file from NXP (though there was an
older one from Freescale).
> diff --git a/sysdeps/powerpc/powerpc64/be/e5500/Implies b/sysdeps/powerpc/powerpc64/be/e5500/Implies
> new file mode 100644
> index 0000000..a795586
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc64/be/e5500/Implies
> @@ -0,0 +1,2 @@
> +powerpc/powerpc64/e5500
> +
No blank lines at the end of files, the commit hooks don't allow them to
be pushed (or whitespace at end of line, or tabs-after-spaces
indentation).
> +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
> +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
Use INFINITY and NAN constants if needed, rather than hardcoding
representations like this.
> +static const float two108 = 3.245185536584267269e+32;
> +static const float twom54 = 5.551115123125782702e-17;
Use hex floats (and there's no real need to have such variables, just
write 0x1p108 or 0x1p-54 or corresponding float constants as needed
directly in the code using such values).
> +#define FMADD(a_, c_, b_) \
> + ({ double __r; \
> + __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
> + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
> + __r;})
> +#define FNMSUB(a_, c_, b_) \
> + ({ double __r; \
> + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
> + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
> + __r;})
Use __builtin_fma / __builtin_fmaf, not asm.
new file mode 100644
@@ -0,0 +1,2 @@
+powerpc/powerpc64/e5500
+
new file mode 100644
@@ -0,0 +1,137 @@
+/* Double-precision floating point square root.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <math.h>
+#include <math_private.h>
+#include <fenv_libc.h>
+#include <inttypes.h>
+
+#include <sysdep.h>
+#include <ldsodefs.h>
+
+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
+static const float two108 = 3.245185536584267269e+32;
+static const float twom54 = 5.551115123125782702e-17;
+static const float half = 0.5;
+
+/* The method is based on the descriptions in:
+
+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
+
+ We find the actual square root and half of its reciprocal
+ simultaneously. */
+
+double
+__slow_ieee754_sqrt (double b)
+{
+ if (__builtin_expect (b > 0, 1))
+ {
+ double y, g, h, d, r;
+ ieee_double_shape_type u;
+
+ if (__builtin_expect (b != a_inf.value, 1))
+ {
+ fenv_t fe;
+
+ fe = fegetenv_register ();
+
+ u.value = b;
+
+ relax_fenv_state ();
+
+ __asm__ ("frsqrte %[estimate], %[x]\n"
+ : [estimate] "=f" (y) : [x] "f" (b));
+
+ /* Following Muller et al, page 168, equation 5.20.
+
+ h goes to 1/(2*sqrt(b))
+ g goes to sqrt(b).
+
+ We need three iterations to get within 1ulp. */
+
+ /* Indicate that these can be performed prior to the branch. GCC
+ insists on sinking them below the branch, however; it seems like
+ they'd be better before the branch so that we can cover any latency
+ from storing the argument and loading its high word. Oh well. */
+
+ g = b * y;
+ h = 0.5 * y;
+
+ /* Handle small numbers by scaling. */
+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
+ return __slow_ieee754_sqrt (b * two108) * twom54;
+
+#define FMADD(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+#define FNMSUB(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+
+ r = FNMSUB (g, h, half);
+ g = FMADD (g, r, g);
+ h = FMADD (h, r, h);
+
+ r = FNMSUB (g, h, half);
+ g = FMADD (g, r, g);
+ h = FMADD (h, r, h);
+
+ r = FNMSUB (g, h, half);
+ g = FMADD (g, r, g);
+ h = FMADD (h, r, h);
+
+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
+
+ /* Final refinement. */
+ d = FNMSUB (g, g, b);
+
+ fesetenv_register (fe);
+ return FMADD (d, h, g);
+ }
+ }
+ else if (b < 0)
+ {
+ /* For some reason, some PowerPC32 processors don't implement
+ FE_INVALID_SQRT. */
+#ifdef FE_INVALID_SQRT
+ feraiseexcept (FE_INVALID_SQRT);
+
+ fenv_union_t u = { .fenv = fegetenv_register () };
+ if ((u.l & FE_INVALID) == 0)
+#endif
+ feraiseexcept (FE_INVALID);
+ b = a_nan.value;
+ }
+ return f_wash (b);
+}
+
+#undef __ieee754_sqrt
+double
+__ieee754_sqrt (double x)
+{
+ return __slow_ieee754_sqrt (x);
+}
+
+strong_alias (__ieee754_sqrt, __sqrt_finite)
new file mode 100644
@@ -0,0 +1,103 @@
+/* Single-precision floating point square root.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <math.h>
+#include <math_private.h>
+#include <fenv_libc.h>
+#include <inttypes.h>
+
+#include <sysdep.h>
+#include <ldsodefs.h>
+
+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
+static const float threehalf = 1.5;
+
+/* The method is based on the descriptions in:
+
+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
+
+ We find the reciprocal square root and use that to compute the actual
+ square root. */
+
+float
+__slow_ieee754_sqrtf (float b)
+{
+ if (__builtin_expect (b > 0, 1))
+ {
+#define FMSUB(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+#define FNMSUB(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+
+ if (__builtin_expect (b != a_inf.value, 1))
+ {
+ double y, x;
+ fenv_t fe;
+
+ fe = fegetenv_register ();
+
+ relax_fenv_state ();
+
+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
+ y = FMSUB (threehalf, b, b);
+
+ /* Initial estimate. */
+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
+
+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
+ x = x * FNMSUB (y, x * x, threehalf);
+ x = x * FNMSUB (y, x * x, threehalf);
+ x = x * FNMSUB (y, x * x, threehalf);
+
+ /* All done. */
+ fesetenv_register (fe);
+ return x * b;
+ }
+ }
+ else if (b < 0)
+ {
+ /* For some reason, some PowerPC32 processors don't implement
+ FE_INVALID_SQRT. */
+#ifdef FE_INVALID_SQRT
+ feraiseexcept (FE_INVALID_SQRT);
+
+ fenv_union_t u = { .fenv = fegetenv_register () };
+ if ((u.l & FE_INVALID) == 0)
+#endif
+ feraiseexcept (FE_INVALID);
+ b = a_nan.value;
+ }
+ return f_washf (b);
+}
+#undef __ieee754_sqrtf
+float
+__ieee754_sqrtf (float x)
+{
+ return __slow_ieee754_sqrtf (x);
+}
+
+strong_alias (__ieee754_sqrtf, __sqrtf_finite)
new file mode 100644
@@ -0,0 +1 @@
+powerpc/powerpc64/multiarch
new file mode 100644
@@ -0,0 +1,2 @@
+powerpc/powerpc64/e6500
+
new file mode 100644
@@ -0,0 +1,137 @@
+/* Double-precision floating point square root.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <math.h>
+#include <math_private.h>
+#include <fenv_libc.h>
+#include <inttypes.h>
+
+#include <sysdep.h>
+#include <ldsodefs.h>
+
+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
+static const float two108 = 3.245185536584267269e+32;
+static const float twom54 = 5.551115123125782702e-17;
+static const float half = 0.5;
+
+/* The method is based on the descriptions in:
+
+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
+
+ We find the actual square root and half of its reciprocal
+ simultaneously. */
+
+double
+__slow_ieee754_sqrt (double b)
+{
+ if (__builtin_expect (b > 0, 1))
+ {
+ double y, g, h, d, r;
+ ieee_double_shape_type u;
+
+ if (__builtin_expect (b != a_inf.value, 1))
+ {
+ fenv_t fe;
+
+ fe = fegetenv_register ();
+
+ u.value = b;
+
+ relax_fenv_state ();
+
+ __asm__ ("frsqrte %[estimate], %[x]\n"
+ : [estimate] "=f" (y) : [x] "f" (b));
+
+ /* Following Muller et al, page 168, equation 5.20.
+
+ h goes to 1/(2*sqrt(b))
+ g goes to sqrt(b).
+
+ We need three iterations to get within 1ulp. */
+
+ /* Indicate that these can be performed prior to the branch. GCC
+ insists on sinking them below the branch, however; it seems like
+ they'd be better before the branch so that we can cover any latency
+ from storing the argument and loading its high word. Oh well. */
+
+ g = b * y;
+ h = 0.5 * y;
+
+ /* Handle small numbers by scaling. */
+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
+ return __slow_ieee754_sqrt (b * two108) * twom54;
+
+#define FMADD(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+#define FNMSUB(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+
+ r = FNMSUB (g, h, half);
+ g = FMADD (g, r, g);
+ h = FMADD (h, r, h);
+
+ r = FNMSUB (g, h, half);
+ g = FMADD (g, r, g);
+ h = FMADD (h, r, h);
+
+ r = FNMSUB (g, h, half);
+ g = FMADD (g, r, g);
+ h = FMADD (h, r, h);
+
+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
+
+ /* Final refinement. */
+ d = FNMSUB (g, g, b);
+
+ fesetenv_register (fe);
+ return FMADD (d, h, g);
+ }
+ }
+ else if (b < 0)
+ {
+ /* For some reason, some PowerPC32 processors don't implement
+ FE_INVALID_SQRT. */
+#ifdef FE_INVALID_SQRT
+ feraiseexcept (FE_INVALID_SQRT);
+
+ fenv_union_t u = { .fenv = fegetenv_register () };
+ if ((u.l & FE_INVALID) == 0)
+#endif
+ feraiseexcept (FE_INVALID);
+ b = a_nan.value;
+ }
+ return f_wash (b);
+}
+
+#undef __ieee754_sqrt
+double
+__ieee754_sqrt (double x)
+{
+ return __slow_ieee754_sqrt (x);
+}
+
+strong_alias (__ieee754_sqrt, __sqrt_finite)
new file mode 100644
@@ -0,0 +1,103 @@
+/* Single-precision floating point square root.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <math.h>
+#include <math_private.h>
+#include <fenv_libc.h>
+#include <inttypes.h>
+
+#include <sysdep.h>
+#include <ldsodefs.h>
+
+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
+static const float threehalf = 1.5;
+
+/* The method is based on the descriptions in:
+
+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
+
+ We find the reciprocal square root and use that to compute the actual
+ square root. */
+
+float
+__slow_ieee754_sqrtf (float b)
+{
+ if (__builtin_expect (b > 0, 1))
+ {
+#define FMSUB(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+#define FNMSUB(a_, c_, b_) \
+ ({ double __r; \
+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
+ __r;})
+
+ if (__builtin_expect (b != a_inf.value, 1))
+ {
+ double y, x;
+ fenv_t fe;
+
+ fe = fegetenv_register ();
+
+ relax_fenv_state ();
+
+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
+ y = FMSUB (threehalf, b, b);
+
+ /* Initial estimate. */
+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
+
+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
+ x = x * FNMSUB (y, x * x, threehalf);
+ x = x * FNMSUB (y, x * x, threehalf);
+ x = x * FNMSUB (y, x * x, threehalf);
+
+ /* All done. */
+ fesetenv_register (fe);
+ return x * b;
+ }
+ }
+ else if (b < 0)
+ {
+ /* For some reason, some PowerPC32 processors don't implement
+ FE_INVALID_SQRT. */
+#ifdef FE_INVALID_SQRT
+ feraiseexcept (FE_INVALID_SQRT);
+
+ fenv_union_t u = { .fenv = fegetenv_register () };
+ if ((u.l & FE_INVALID) == 0)
+#endif
+ feraiseexcept (FE_INVALID);
+ b = a_nan.value;
+ }
+ return f_washf (b);
+}
+#undef __ieee754_sqrtf
+float
+__ieee754_sqrtf (float x)
+{
+ return __slow_ieee754_sqrtf (x);
+}
+
+strong_alias (__ieee754_sqrtf, __sqrtf_finite)
new file mode 100644
@@ -0,0 +1 @@
+powerpc/powerpc64/multiarch