powerpc: Fix fsqrt build in libm
Commit Message
Some powerpc64 processors (e5500 core for instance) does not provide the
fsqrt instruction, however current check to use in math_private.h is
__WORDSIZE and _ARCH_PWR4 (ISA 2.02). This is patch change it to use
the compiler flag _ARCH_PPCSQ (which is the same condition GCC uses to
decide whether to generate fsqrt instruction).
It fixes BZ#16576.
Tested on powerpc64/powerpc32 with -with-cpu=power4 and with -with-cpu=powerpc64.
The powerpc64 testcase showed some ULPs and exceptions issues when using
the software implementation and I will track them in other bug reports.
Carlos, I would like to push to 2.21.
--
[BZ #16576]
* sysdeps/powerpc/fpu/math_private.h [__CPU_HAS_FSQRT]: Remove define
and use _ARCH_PPCSQ instead.
(__ieee754_sqrt): Likewise.
(__ieee754_sqrtf): Likewise.
* sysdeps/powerpc/fpu/e_sqrt.c (__slow_ieee754_sqrt): Build only if
_ARCH_PPCSQ is defined.
(__ieee754_sqrt): Use _ARCH_PPCSQ to select wheter to use hardware
fsqrt instruction.
* sysdeps/powerpc/fpu/e_sqrtf.c (__ieee754_sqrtf): Build only if
_ARCH_PPCSQ is defined.
(__ieee754_sqrtf): Use _ARCH_PPCSQ to select wheter to use hardware
fsqrts instruction.
* sysdeps/powerpc/powerpc64/fpu/e_sqrt.c: Remove file.
--
Comments
On 01/23/2015 02:41 PM, Adhemerval Zanella wrote:
> Some powerpc64 processors (e5500 core for instance) does not provide the
> fsqrt instruction, however current check to use in math_private.h is
> __WORDSIZE and _ARCH_PWR4 (ISA 2.02). This is patch change it to use
> the compiler flag _ARCH_PPCSQ (which is the same condition GCC uses to
> decide whether to generate fsqrt instruction).
>
> It fixes BZ#16576.
>
> Tested on powerpc64/powerpc32 with -with-cpu=power4 and with -with-cpu=powerpc64.
> The powerpc64 testcase showed some ULPs and exceptions issues when using
> the software implementation and I will track them in other bug reports.
>
> Carlos, I would like to push to 2.21.
OK for 2.21.
> --
>
> [BZ #16576]
> * sysdeps/powerpc/fpu/math_private.h [__CPU_HAS_FSQRT]: Remove define
> and use _ARCH_PPCSQ instead.
> (__ieee754_sqrt): Likewise.
> (__ieee754_sqrtf): Likewise.
> * sysdeps/powerpc/fpu/e_sqrt.c (__slow_ieee754_sqrt): Build only if
> _ARCH_PPCSQ is defined.
> (__ieee754_sqrt): Use _ARCH_PPCSQ to select wheter to use hardware
> fsqrt instruction.
> * sysdeps/powerpc/fpu/e_sqrtf.c (__ieee754_sqrtf): Build only if
> _ARCH_PPCSQ is defined.
> (__ieee754_sqrtf): Use _ARCH_PPCSQ to select wheter to use hardware
> fsqrts instruction.
> * sysdeps/powerpc/powerpc64/fpu/e_sqrt.c: Remove file.
>
> --
>
> diff --git a/NEWS b/NEWS
> index 903d925..ed15176 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -10,15 +10,15 @@ Version 2.21
> * The following bugs are resolved with this release:
>
> 6652, 10672, 12674, 12847, 12926, 13862, 14132, 14138, 14171, 14498,
> - 15215, 15884, 16009, 16418, 16191, 16469, 16617, 16619, 16657, 16740,
> - 16857, 17192, 17266, 17273, 17344, 17363, 17370, 17371, 17411, 17460,
> - 17475, 17485, 17501, 17506, 17508, 17522, 17555, 17570, 17571, 17572,
> - 17573, 17574, 17582, 17583, 17584, 17585, 17589, 17594, 17601, 17608,
> - 17616, 17625, 17630, 17633, 17634, 17635, 17647, 17653, 17657, 17658,
> - 17664, 17665, 17668, 17682, 17702, 17717, 17719, 17722, 17723, 17724,
> - 17725, 17732, 17733, 17744, 17745, 17746, 17747, 17748, 17775, 17777,
> - 17780, 17781, 17782, 17791, 17793, 17796, 17797, 17803, 17806, 17834,
> - 17844, 17848.
> + 15215, 15884, 16009, 16418, 16191, 16469, 16576, 16617, 16619, 16657,
> + 16740, 16857, 17192, 17266, 17273, 17344, 17363, 17370, 17371, 17411,
> + 17460, 17475, 17485, 17501, 17506, 17508, 17522, 17555, 17570, 17571,
> + 17572, 17573, 17574, 17582, 17583, 17584, 17585, 17589, 17594, 17601,
> + 17608, 17616, 17625, 17630, 17633, 17634, 17635, 17647, 17653, 17657,
> + 17658, 17664, 17665, 17668, 17682, 17702, 17717, 17719, 17722, 17723,
> + 17724, 17725, 17732, 17733, 17744, 17745, 17746, 17747, 17748, 17775,
> + 17777, 17780, 17781, 17782, 17791, 17793, 17796, 17797, 17803, 17806,
> + 17834, 17844, 17848.
>
> * A new semaphore algorithm has been implemented in generic C code for all
> machines. Previous custom assembly implementations of semaphore were
> diff --git a/sysdeps/powerpc/fpu/e_sqrt.c b/sysdeps/powerpc/fpu/e_sqrt.c
> index ba77ae5..4a854a1 100644
> --- a/sysdeps/powerpc/fpu/e_sqrt.c
> +++ b/sysdeps/powerpc/fpu/e_sqrt.c
> @@ -24,6 +24,7 @@
> #include <sysdep.h>
> #include <ldsodefs.h>
>
> +#ifndef _ARCH_PPCSQ
> static const double almost_half = 0.5000000000000001; /* 0.5 + 2^-53 */
> static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
> static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
> @@ -152,6 +153,7 @@ __slow_ieee754_sqrt (double x)
> }
> return f_wash (x);
> }
> +#endif /* _ARCH_PPCSQ */
>
> #undef __ieee754_sqrt
> double
> @@ -159,16 +161,11 @@ __ieee754_sqrt (double x)
> {
> double z;
>
> - /* If the CPU is 64-bit we can use the optional FP instructions. */
> - if (__CPU_HAS_FSQRT)
> - {
> - /* Volatile is required to prevent the compiler from moving the
> - fsqrt instruction above the branch. */
> - __asm __volatile (" fsqrt %0,%1\n"
> - :"=f" (z):"f" (x));
> - }
> - else
> - z = __slow_ieee754_sqrt (x);
> +#ifdef _ARCH_PPCSQ
> + asm ("fsqrt %0,%1\n" :"=f" (z):"f" (x));
> +#else
> + z = __slow_ieee754_sqrt (x);
> +#endif
>
> return z;
> }
> diff --git a/sysdeps/powerpc/fpu/e_sqrtf.c b/sysdeps/powerpc/fpu/e_sqrtf.c
> index 5cf47f7..034b6f5 100644
> --- a/sysdeps/powerpc/fpu/e_sqrtf.c
> +++ b/sysdeps/powerpc/fpu/e_sqrtf.c
> @@ -24,6 +24,7 @@
> #include <sysdep.h>
> #include <ldsodefs.h>
>
> +#ifndef _ARCH_PPCSQ
> static const float almost_half = 0.50000006; /* 0.5 + 2^-24 */
> static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
> static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
> @@ -128,6 +129,7 @@ __slow_ieee754_sqrtf (float x)
> }
> return f_washf (x);
> }
> +#endif /* _ARCH_PPCSQ */
>
> #undef __ieee754_sqrtf
> float
> @@ -135,16 +137,11 @@ __ieee754_sqrtf (float x)
> {
> double z;
>
> - /* If the CPU is 64-bit we can use the optional FP instructions. */
> - if (__CPU_HAS_FSQRT)
> - {
> - /* Volatile is required to prevent the compiler from moving the
> - fsqrt instruction above the branch. */
> - __asm __volatile (" fsqrts %0,%1\n"
> - :"=f" (z):"f" (x));
> - }
> - else
> - z = __slow_ieee754_sqrtf (x);
> +#ifdef _ARCH_PPCSQ
> + asm ("fsqrts %0,%1\n" :"=f" (z):"f" (x));
> +#else
> + z = __slow_ieee754_sqrtf (x);
> +#endif
>
> return z;
> }
> diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h
> index 6631535..37e7456 100644
> --- a/sysdeps/powerpc/fpu/math_private.h
> +++ b/sysdeps/powerpc/fpu/math_private.h
> @@ -25,26 +25,17 @@
> #include <fenv_private.h>
> #include_next <math_private.h>
>
> -# if __WORDSIZE == 64 || defined _ARCH_PWR4
> -# define __CPU_HAS_FSQRT 1
> -# else
> -# define __CPU_HAS_FSQRT ((GLRO(dl_hwcap) & PPC_FEATURE_64) != 0)
> -# endif
> -
> extern double __slow_ieee754_sqrt (double);
> extern __always_inline double
> __ieee754_sqrt (double __x)
> {
> double __z;
>
> - if (__CPU_HAS_FSQRT)
> - {
> - /* Volatile is required to prevent the compiler from moving the
> - fsqrt instruction above the branch. */
> - __asm __volatile ("fsqrt %0,%1" : "=f" (__z) : "f" (__x));
> - }
> - else
> - __z = __slow_ieee754_sqrt(__x);
> +#ifdef _ARCH_PPCSQ
> + asm ("fsqrt %0,%1" : "=f" (__z) : "f" (__x));
> +#else
> + __z = __slow_ieee754_sqrt(__x);
> +#endif
>
> return __z;
> }
> @@ -55,14 +46,11 @@ __ieee754_sqrtf (float __x)
> {
> float __z;
>
> - if (__CPU_HAS_FSQRT)
> - {
> - /* Volatile is required to prevent the compiler from moving the
> - fsqrts instruction above the branch. */
> - __asm __volatile ("fsqrts %0,%1" : "=f" (__z) : "f" (__x));
> - }
> - else
> - __z = __slow_ieee754_sqrtf(__x);
> +#ifdef _ARCH_PPCSQ
> + asm ("fsqrts %0,%1" : "=f" (__z) : "f" (__x));
> +#else
> + __z = __slow_ieee754_sqrtf(__x);
> +#endif
>
> return __z;
> }
> diff --git a/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c
> deleted file mode 100644
> index 796388e..0000000
> --- a/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c
> +++ /dev/null
> @@ -1,30 +0,0 @@
> -/* Double-precision floating point square root.
> - Copyright (C) 1997-2015 Free Software Foundation, Inc.
> - This file is part of the GNU C Library.
> -
> - The GNU C Library is free software; you can redistribute it and/or
> - modify it under the terms of the GNU Lesser General Public
> - License as published by the Free Software Foundation; either
> - version 2.1 of the License, or (at your option) any later version.
> -
> - The GNU C Library is distributed in the hope that it will be useful,
> - but WITHOUT ANY WARRANTY; without even the implied warranty of
> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> - Lesser General Public License for more details.
> -
> - You should have received a copy of the GNU Lesser General Public
> - License along with the GNU C Library; if not, see
> - <http://www.gnu.org/licenses/>. */
> -
> -#include <math.h>
> -#include <math_private.h>
> -
> -#undef __ieee754_sqrt
> -double
> -__ieee754_sqrt (double x)
> -{
> - double z;
> - __asm __volatile ("fsqrt %0,%1" : "=f" (z) : "f" (x));
> - return z;
> -}
> -strong_alias (__ieee754_sqrt, __sqrt_finite)
> diff --git a/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c
> deleted file mode 100644
> index 5502525..0000000
> --- a/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c
> +++ /dev/null
> @@ -1,30 +0,0 @@
> -/* Single-precision floating point square root.
> - Copyright (C) 1997-2015 Free Software Foundation, Inc.
> - This file is part of the GNU C Library.
> -
> - The GNU C Library is free software; you can redistribute it and/or
> - modify it under the terms of the GNU Lesser General Public
> - License as published by the Free Software Foundation; either
> - version 2.1 of the License, or (at your option) any later version.
> -
> - The GNU C Library is distributed in the hope that it will be useful,
> - but WITHOUT ANY WARRANTY; without even the implied warranty of
> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> - Lesser General Public License for more details.
> -
> - You should have received a copy of the GNU Lesser General Public
> - License along with the GNU C Library; if not, see
> - <http://www.gnu.org/licenses/>. */
> -
> -#include <math.h>
> -#include <math_private.h>
> -
> -#undef __ieee754_sqrtf
> -float
> -__ieee754_sqrtf (float x)
> -{
> - double z;
> - __asm ("fsqrts %0,%1" : "=f" (z) : "f" (x));
> - return z;
> -}
> -strong_alias (__ieee754_sqrtf, __sqrtf_finite)
>
@@ -10,15 +10,15 @@ Version 2.21
* The following bugs are resolved with this release:
6652, 10672, 12674, 12847, 12926, 13862, 14132, 14138, 14171, 14498,
- 15215, 15884, 16009, 16418, 16191, 16469, 16617, 16619, 16657, 16740,
- 16857, 17192, 17266, 17273, 17344, 17363, 17370, 17371, 17411, 17460,
- 17475, 17485, 17501, 17506, 17508, 17522, 17555, 17570, 17571, 17572,
- 17573, 17574, 17582, 17583, 17584, 17585, 17589, 17594, 17601, 17608,
- 17616, 17625, 17630, 17633, 17634, 17635, 17647, 17653, 17657, 17658,
- 17664, 17665, 17668, 17682, 17702, 17717, 17719, 17722, 17723, 17724,
- 17725, 17732, 17733, 17744, 17745, 17746, 17747, 17748, 17775, 17777,
- 17780, 17781, 17782, 17791, 17793, 17796, 17797, 17803, 17806, 17834,
- 17844, 17848.
+ 15215, 15884, 16009, 16418, 16191, 16469, 16576, 16617, 16619, 16657,
+ 16740, 16857, 17192, 17266, 17273, 17344, 17363, 17370, 17371, 17411,
+ 17460, 17475, 17485, 17501, 17506, 17508, 17522, 17555, 17570, 17571,
+ 17572, 17573, 17574, 17582, 17583, 17584, 17585, 17589, 17594, 17601,
+ 17608, 17616, 17625, 17630, 17633, 17634, 17635, 17647, 17653, 17657,
+ 17658, 17664, 17665, 17668, 17682, 17702, 17717, 17719, 17722, 17723,
+ 17724, 17725, 17732, 17733, 17744, 17745, 17746, 17747, 17748, 17775,
+ 17777, 17780, 17781, 17782, 17791, 17793, 17796, 17797, 17803, 17806,
+ 17834, 17844, 17848.
* A new semaphore algorithm has been implemented in generic C code for all
machines. Previous custom assembly implementations of semaphore were
@@ -24,6 +24,7 @@
#include <sysdep.h>
#include <ldsodefs.h>
+#ifndef _ARCH_PPCSQ
static const double almost_half = 0.5000000000000001; /* 0.5 + 2^-53 */
static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
@@ -152,6 +153,7 @@ __slow_ieee754_sqrt (double x)
}
return f_wash (x);
}
+#endif /* _ARCH_PPCSQ */
#undef __ieee754_sqrt
double
@@ -159,16 +161,11 @@ __ieee754_sqrt (double x)
{
double z;
- /* If the CPU is 64-bit we can use the optional FP instructions. */
- if (__CPU_HAS_FSQRT)
- {
- /* Volatile is required to prevent the compiler from moving the
- fsqrt instruction above the branch. */
- __asm __volatile (" fsqrt %0,%1\n"
- :"=f" (z):"f" (x));
- }
- else
- z = __slow_ieee754_sqrt (x);
+#ifdef _ARCH_PPCSQ
+ asm ("fsqrt %0,%1\n" :"=f" (z):"f" (x));
+#else
+ z = __slow_ieee754_sqrt (x);
+#endif
return z;
}
@@ -24,6 +24,7 @@
#include <sysdep.h>
#include <ldsodefs.h>
+#ifndef _ARCH_PPCSQ
static const float almost_half = 0.50000006; /* 0.5 + 2^-24 */
static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
@@ -128,6 +129,7 @@ __slow_ieee754_sqrtf (float x)
}
return f_washf (x);
}
+#endif /* _ARCH_PPCSQ */
#undef __ieee754_sqrtf
float
@@ -135,16 +137,11 @@ __ieee754_sqrtf (float x)
{
double z;
- /* If the CPU is 64-bit we can use the optional FP instructions. */
- if (__CPU_HAS_FSQRT)
- {
- /* Volatile is required to prevent the compiler from moving the
- fsqrt instruction above the branch. */
- __asm __volatile (" fsqrts %0,%1\n"
- :"=f" (z):"f" (x));
- }
- else
- z = __slow_ieee754_sqrtf (x);
+#ifdef _ARCH_PPCSQ
+ asm ("fsqrts %0,%1\n" :"=f" (z):"f" (x));
+#else
+ z = __slow_ieee754_sqrtf (x);
+#endif
return z;
}
@@ -25,26 +25,17 @@
#include <fenv_private.h>
#include_next <math_private.h>
-# if __WORDSIZE == 64 || defined _ARCH_PWR4
-# define __CPU_HAS_FSQRT 1
-# else
-# define __CPU_HAS_FSQRT ((GLRO(dl_hwcap) & PPC_FEATURE_64) != 0)
-# endif
-
extern double __slow_ieee754_sqrt (double);
extern __always_inline double
__ieee754_sqrt (double __x)
{
double __z;
- if (__CPU_HAS_FSQRT)
- {
- /* Volatile is required to prevent the compiler from moving the
- fsqrt instruction above the branch. */
- __asm __volatile ("fsqrt %0,%1" : "=f" (__z) : "f" (__x));
- }
- else
- __z = __slow_ieee754_sqrt(__x);
+#ifdef _ARCH_PPCSQ
+ asm ("fsqrt %0,%1" : "=f" (__z) : "f" (__x));
+#else
+ __z = __slow_ieee754_sqrt(__x);
+#endif
return __z;
}
@@ -55,14 +46,11 @@ __ieee754_sqrtf (float __x)
{
float __z;
- if (__CPU_HAS_FSQRT)
- {
- /* Volatile is required to prevent the compiler from moving the
- fsqrts instruction above the branch. */
- __asm __volatile ("fsqrts %0,%1" : "=f" (__z) : "f" (__x));
- }
- else
- __z = __slow_ieee754_sqrtf(__x);
+#ifdef _ARCH_PPCSQ
+ asm ("fsqrts %0,%1" : "=f" (__z) : "f" (__x));
+#else
+ __z = __slow_ieee754_sqrtf(__x);
+#endif
return __z;
}
deleted file mode 100644
@@ -1,30 +0,0 @@
-/* Double-precision floating point square root.
- Copyright (C) 1997-2015 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <math.h>
-#include <math_private.h>
-
-#undef __ieee754_sqrt
-double
-__ieee754_sqrt (double x)
-{
- double z;
- __asm __volatile ("fsqrt %0,%1" : "=f" (z) : "f" (x));
- return z;
-}
-strong_alias (__ieee754_sqrt, __sqrt_finite)
deleted file mode 100644
@@ -1,30 +0,0 @@
-/* Single-precision floating point square root.
- Copyright (C) 1997-2015 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <math.h>
-#include <math_private.h>
-
-#undef __ieee754_sqrtf
-float
-__ieee754_sqrtf (float x)
-{
- double z;
- __asm ("fsqrts %0,%1" : "=f" (z) : "f" (x));
- return z;
-}
-strong_alias (__ieee754_sqrtf, __sqrtf_finite)