[v2,1/3] powerpc: Add optimized ilogb* for POWER9

Message ID 20210301175140.29109-1-rzinsly@linux.ibm.com
State Superseded
Headers
Series [v2,1/3] powerpc: Add optimized ilogb* for POWER9 |

Commit Message

Raphael M Zinsly March 1, 2021, 5:51 p.m. UTC
  Changes since v1:
	- Move the builtins definitions to powerpc's math_private.h.
	- Check if the correct GCC version is used.

--8<---

The instructions xsxexpdp and xsxexpqp introduced on POWER9 extract
the exponent from a double-precision and quad-precision floating-point
respectively, thus they can be used to improve ilogb, ilogbf and ilogbf128.
---
 sysdeps/powerpc/fpu/math_private.h            | 20 +++++++++++-
 .../powerpc64/le/fpu/w_ilogb_template.c       | 31 +++++++++++++++++++
 2 files changed, 50 insertions(+), 1 deletion(-)
 create mode 100644 sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
  

Comments

Paul A. Clarke March 2, 2021, 1:27 a.m. UTC | #1
On Mon, Mar 01, 2021 at 02:51:38PM -0300, Raphael Moreira Zinsly via Libc-alpha wrote:
> Changes since v1:
> 	- Move the builtins definitions to powerpc's math_private.h.
> 	- Check if the correct GCC version is used.
> 
> --8<---
> 
> The instructions xsxexpdp and xsxexpqp introduced on POWER9 extract
> the exponent from a double-precision and quad-precision floating-point
> respectively, thus they can be used to improve ilogb, ilogbf and ilogbf128.
> ---
>  sysdeps/powerpc/fpu/math_private.h            | 20 +++++++++++-
>  .../powerpc64/le/fpu/w_ilogb_template.c       | 31 +++++++++++++++++++
>  2 files changed, 50 insertions(+), 1 deletion(-)
>  create mode 100644 sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
> 
> diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h
> index 91b1361749..accc28d091 100644
> --- a/sysdeps/powerpc/fpu/math_private.h
> +++ b/sysdeps/powerpc/fpu/math_private.h
> @@ -25,7 +25,23 @@
> 
>  #include_next <math_private.h>
> 
> -#if defined _ARCH_PWR9 && __HAVE_DISTINCT_FLOAT128
> +#ifdef _ARCH_PWR9
> +
> +#define __builtin_test_dc_ilogbf __builtin_test_dc_ilogb
> +#define __builtin_ilogbf __builtin_ilogb
> +
> +#define __builtin_test_dc_ilogbl __builtin_test_dc_ilogbf128
> +#define __builtin_ilogbl __builtin_ilogbf128
> +
> +#define __builtin_test_dc_ilogb(x, y) \
> +        __builtin_vsx_scalar_test_data_class_dp(x, y)
> +#define __builtin_ilogb(x) __builtin_vsx_scalar_extract_exp(x) - 0x3ff
> +
> +#define __builtin_test_dc_ilogbf128(x, y) \
> +        __builtin_vsx_scalar_test_data_class_qp(x, y)
> +#define __builtin_ilogbf128(x) __builtin_vsx_scalar_extract_expq(x) - 0x3fff
> +
> +#if __HAVE_DISTINCT_FLOAT128
>  extern __always_inline _Float128
>  __ieee754_sqrtf128 (_Float128 __x)
>  {
> @@ -35,4 +51,6 @@ __ieee754_sqrtf128 (_Float128 __x)
>  }
>  #endif
> 
> +#endif /* _ARCH_PWR9 */
> +
>  #endif /* _PPC_MATH_PRIVATE_H_ */
> diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
> new file mode 100644
> index 0000000000..17ac7809e1
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
> @@ -0,0 +1,31 @@
> +/* The builtins used are only available with GCC 8.0 or newer.  */
> +#if defined _ARCH_PWR9 && __GNUC_PREREQ (8, 0)

I wonder if it would be better to use __glibc_has_builtin () for the
builtins on which you depend, rather than testing for a specific GCC level.

(Same for patch 2/3.)

PC

> +#include <math.h>
> +#include <errno.h>
> +#include <limits.h>
> +#include <math_private.h>
> +#include <fenv.h>
> +
> +int
> +M_DECL_FUNC (__ilogb) (FLOAT x)
> +{
> +  int r;
> +  /* Check for exceptional cases.  */
> +  if (! M_SUF(__builtin_test_dc_ilogb) (x, 0x7f))
> +    r = M_SUF (__builtin_ilogb) (x);
> +  else
> +    /* Fallback to the generic ilogb if x is NaN, Inf or subnormal.  */
> +    r = M_SUF (__ieee754_ilogb) (x);
> +  if (__builtin_expect (r == FP_ILOGB0, 0)
> +      || __builtin_expect (r == FP_ILOGBNAN, 0)
> +      || __builtin_expect (r == INT_MAX, 0))
> +    {
> +      __set_errno (EDOM);
> +      __feraiseexcept (FE_INVALID);
> +    }
> +  return r;
> +}
> +declare_mgen_alias (__ilogb, ilogb)
> +#else
> +#include <math/w_ilogb_template.c>
> +#endif
> -- 
> 2.29.2
>
  
Paul E Murphy March 2, 2021, 5:46 p.m. UTC | #2
On 3/1/21 11:51 AM, Raphael Moreira Zinsly wrote:
> Changes since v1:
> 	- Move the builtins definitions to powerpc's math_private.h.
> 	- Check if the correct GCC version is used.
> 
> --8<---
> 
> The instructions xsxexpdp and xsxexpqp introduced on POWER9 extract
> the exponent from a double-precision and quad-precision floating-point
> respectively, thus they can be used to improve ilogb, ilogbf and ilogbf128.
> ---
>   sysdeps/powerpc/fpu/math_private.h            | 20 +++++++++++-
>   .../powerpc64/le/fpu/w_ilogb_template.c       | 31 +++++++++++++++++++
>   2 files changed, 50 insertions(+), 1 deletion(-)
>   create mode 100644 sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
> 
> diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h
> index 91b1361749..accc28d091 100644
> --- a/sysdeps/powerpc/fpu/math_private.h
> +++ b/sysdeps/powerpc/fpu/math_private.h
> @@ -25,7 +25,23 @@
>   
>   #include_next <math_private.h>
>   
> -#if defined _ARCH_PWR9 && __HAVE_DISTINCT_FLOAT128
> +#ifdef _ARCH_PWR9
> +
> +#define __builtin_test_dc_ilogbf __builtin_test_dc_ilogb
> +#define __builtin_ilogbf __builtin_ilogb
> +
> +#define __builtin_test_dc_ilogbl __builtin_test_dc_ilogbf128
> +#define __builtin_ilogbl __builtin_ilogbf128

I suspect this converting an ibm128 value to float128.  Can you verify 
whether this is the case, and if so, exclude ibm long doubles from this 
optimization? (Note, libm should be building long double == ibm128)

Otherwise, LGTM.

> +
> +#define __builtin_test_dc_ilogb(x, y) \
> +        __builtin_vsx_scalar_test_data_class_dp(x, y)
> +#define __builtin_ilogb(x) __builtin_vsx_scalar_extract_exp(x) - 0x3ff
> +
> +#define __builtin_test_dc_ilogbf128(x, y) \
> +        __builtin_vsx_scalar_test_data_class_qp(x, y)
> +#define __builtin_ilogbf128(x) __builtin_vsx_scalar_extract_expq(x) - 0x3fff
> +
> +#if __HAVE_DISTINCT_FLOAT128
>   extern __always_inline _Float128
>   __ieee754_sqrtf128 (_Float128 __x)
>   {
> @@ -35,4 +51,6 @@ __ieee754_sqrtf128 (_Float128 __x)
>   }
>   #endif
>   
> +#endif /* _ARCH_PWR9 */
> +
>   #endif /* _PPC_MATH_PRIVATE_H_ */
> diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
> new file mode 100644
> index 0000000000..17ac7809e1
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
> @@ -0,0 +1,31 @@
> +/* The builtins used are only available with GCC 8.0 or newer.  */
> +#if defined _ARCH_PWR9 && __GNUC_PREREQ (8, 0)
> +#include <math.h>
> +#include <errno.h>
> +#include <limits.h>
> +#include <math_private.h>
> +#include <fenv.h>
> +
> +int
> +M_DECL_FUNC (__ilogb) (FLOAT x)
> +{
> +  int r;
> +  /* Check for exceptional cases.  */
> +  if (! M_SUF(__builtin_test_dc_ilogb) (x, 0x7f))
> +    r = M_SUF (__builtin_ilogb) (x);
> +  else
> +    /* Fallback to the generic ilogb if x is NaN, Inf or subnormal.  */
> +    r = M_SUF (__ieee754_ilogb) (x);
> +  if (__builtin_expect (r == FP_ILOGB0, 0)
> +      || __builtin_expect (r == FP_ILOGBNAN, 0)
> +      || __builtin_expect (r == INT_MAX, 0))
> +    {
> +      __set_errno (EDOM);
> +      __feraiseexcept (FE_INVALID);
> +    }
> +  return r;
> +}
> +declare_mgen_alias (__ilogb, ilogb)
> +#else
> +#include <math/w_ilogb_template.c>
> +#endif
>
  
Raphael M Zinsly March 3, 2021, 4:23 p.m. UTC | #3
On 01/03/2021 22:27, Paul A. Clarke wrote:
> On Mon, Mar 01, 2021 at 02:51:38PM -0300, Raphael Moreira Zinsly via Libc-alpha wrote:
>> Changes since v1:
>> 	- Move the builtins definitions to powerpc's math_private.h.
>> 	- Check if the correct GCC version is used.
>>
>> --8<---
>>
>> The instructions xsxexpdp and xsxexpqp introduced on POWER9 extract
>> the exponent from a double-precision and quad-precision floating-point
>> respectively, thus they can be used to improve ilogb, ilogbf and ilogbf128.
>> ---
>>   sysdeps/powerpc/fpu/math_private.h            | 20 +++++++++++-
>>   .../powerpc64/le/fpu/w_ilogb_template.c       | 31 +++++++++++++++++++
>>   2 files changed, 50 insertions(+), 1 deletion(-)
>>   create mode 100644 sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
>>
>> diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h
>> index 91b1361749..accc28d091 100644
>> --- a/sysdeps/powerpc/fpu/math_private.h
>> +++ b/sysdeps/powerpc/fpu/math_private.h
>> @@ -25,7 +25,23 @@
>>
>>   #include_next <math_private.h>
>>
>> -#if defined _ARCH_PWR9 && __HAVE_DISTINCT_FLOAT128
>> +#ifdef _ARCH_PWR9
>> +
>> +#define __builtin_test_dc_ilogbf __builtin_test_dc_ilogb
>> +#define __builtin_ilogbf __builtin_ilogb
>> +
>> +#define __builtin_test_dc_ilogbl __builtin_test_dc_ilogbf128
>> +#define __builtin_ilogbl __builtin_ilogbf128
>> +
>> +#define __builtin_test_dc_ilogb(x, y) \
>> +        __builtin_vsx_scalar_test_data_class_dp(x, y)
>> +#define __builtin_ilogb(x) __builtin_vsx_scalar_extract_exp(x) - 0x3ff
>> +
>> +#define __builtin_test_dc_ilogbf128(x, y) \
>> +        __builtin_vsx_scalar_test_data_class_qp(x, y)
>> +#define __builtin_ilogbf128(x) __builtin_vsx_scalar_extract_expq(x) - 0x3fff
>> +
>> +#if __HAVE_DISTINCT_FLOAT128
>>   extern __always_inline _Float128
>>   __ieee754_sqrtf128 (_Float128 __x)
>>   {
>> @@ -35,4 +51,6 @@ __ieee754_sqrtf128 (_Float128 __x)
>>   }
>>   #endif
>>
>> +#endif /* _ARCH_PWR9 */
>> +
>>   #endif /* _PPC_MATH_PRIVATE_H_ */
>> diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
>> new file mode 100644
>> index 0000000000..17ac7809e1
>> --- /dev/null
>> +++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
>> @@ -0,0 +1,31 @@
>> +/* The builtins used are only available with GCC 8.0 or newer.  */
>> +#if defined _ARCH_PWR9 && __GNUC_PREREQ (8, 0)
> 
> I wonder if it would be better to use __glibc_has_builtin () for the
> builtins on which you depend, rather than testing for a specific GCC level.
>

I didn't find a __glibc_has_builtin definition, do you mean the 
preprocessor's __has_builtin()? I believe it's not available on GCC 8.0.

> (Same for patch 2/3.)
> 
> PC
  
Paul A. Clarke March 3, 2021, 5:20 p.m. UTC | #4
On Wed, Mar 03, 2021 at 01:23:43PM -0300, Raphael M Zinsly via Libc-alpha wrote:
> On 01/03/2021 22:27, Paul A. Clarke wrote:
> > On Mon, Mar 01, 2021 at 02:51:38PM -0300, Raphael Moreira Zinsly via Libc-alpha wrote:
> > > Changes since v1:
> > > 	- Move the builtins definitions to powerpc's math_private.h.
> > > 	- Check if the correct GCC version is used.
> > > 
> > > --8<---
> > > 
> > > The instructions xsxexpdp and xsxexpqp introduced on POWER9 extract
> > > the exponent from a double-precision and quad-precision floating-point
> > > respectively, thus they can be used to improve ilogb, ilogbf and ilogbf128.
> > > ---
> > >   sysdeps/powerpc/fpu/math_private.h            | 20 +++++++++++-
> > >   .../powerpc64/le/fpu/w_ilogb_template.c       | 31 +++++++++++++++++++
> > >   2 files changed, 50 insertions(+), 1 deletion(-)
> > >   create mode 100644 sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
> > > 
> > > diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h
> > > index 91b1361749..accc28d091 100644
> > > --- a/sysdeps/powerpc/fpu/math_private.h
> > > +++ b/sysdeps/powerpc/fpu/math_private.h
> > > @@ -25,7 +25,23 @@
> > > 
> > >   #include_next <math_private.h>
> > > 
> > > -#if defined _ARCH_PWR9 && __HAVE_DISTINCT_FLOAT128
> > > +#ifdef _ARCH_PWR9
> > > +
> > > +#define __builtin_test_dc_ilogbf __builtin_test_dc_ilogb
> > > +#define __builtin_ilogbf __builtin_ilogb
> > > +
> > > +#define __builtin_test_dc_ilogbl __builtin_test_dc_ilogbf128
> > > +#define __builtin_ilogbl __builtin_ilogbf128
> > > +
> > > +#define __builtin_test_dc_ilogb(x, y) \
> > > +        __builtin_vsx_scalar_test_data_class_dp(x, y)
> > > +#define __builtin_ilogb(x) __builtin_vsx_scalar_extract_exp(x) - 0x3ff
> > > +
> > > +#define __builtin_test_dc_ilogbf128(x, y) \
> > > +        __builtin_vsx_scalar_test_data_class_qp(x, y)
> > > +#define __builtin_ilogbf128(x) __builtin_vsx_scalar_extract_expq(x) - 0x3fff
> > > +
> > > +#if __HAVE_DISTINCT_FLOAT128
> > >   extern __always_inline _Float128
> > >   __ieee754_sqrtf128 (_Float128 __x)
> > >   {
> > > @@ -35,4 +51,6 @@ __ieee754_sqrtf128 (_Float128 __x)
> > >   }
> > >   #endif
> > > 
> > > +#endif /* _ARCH_PWR9 */
> > > +
> > >   #endif /* _PPC_MATH_PRIVATE_H_ */
> > > diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
> > > new file mode 100644
> > > index 0000000000..17ac7809e1
> > > --- /dev/null
> > > +++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
> > > @@ -0,0 +1,31 @@
> > > +/* The builtins used are only available with GCC 8.0 or newer.  */
> > > +#if defined _ARCH_PWR9 && __GNUC_PREREQ (8, 0)
> > 
> > I wonder if it would be better to use __glibc_has_builtin () for the
> > builtins on which you depend, rather than testing for a specific GCC level.
> > 
> 
> I didn't find a __glibc_has_builtin definition, do you mean the
> preprocessor's __has_builtin()? I believe it's not available on GCC 8.0.

misc/sys/cdefs.h:
--
/* Compilers that lack __has_attribute may object to
       #if defined __has_attribute && __has_attribute (...)
   even though they do not need to evaluate the right-hand side of the &&.
   Similarly for __has_builtin, etc.  */
[...]
#ifdef __has_builtin
# define __glibc_has_builtin(name) __has_builtin (name)
#else
# define __glibc_has_builtin(name) 0
#endif
--

... so, it covers pre-GCC8 by always saying "nope".

PC
  

Patch

diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h
index 91b1361749..accc28d091 100644
--- a/sysdeps/powerpc/fpu/math_private.h
+++ b/sysdeps/powerpc/fpu/math_private.h
@@ -25,7 +25,23 @@ 
 
 #include_next <math_private.h>
 
-#if defined _ARCH_PWR9 && __HAVE_DISTINCT_FLOAT128
+#ifdef _ARCH_PWR9
+
+#define __builtin_test_dc_ilogbf __builtin_test_dc_ilogb
+#define __builtin_ilogbf __builtin_ilogb
+
+#define __builtin_test_dc_ilogbl __builtin_test_dc_ilogbf128
+#define __builtin_ilogbl __builtin_ilogbf128
+
+#define __builtin_test_dc_ilogb(x, y) \
+        __builtin_vsx_scalar_test_data_class_dp(x, y)
+#define __builtin_ilogb(x) __builtin_vsx_scalar_extract_exp(x) - 0x3ff
+
+#define __builtin_test_dc_ilogbf128(x, y) \
+        __builtin_vsx_scalar_test_data_class_qp(x, y)
+#define __builtin_ilogbf128(x) __builtin_vsx_scalar_extract_expq(x) - 0x3fff
+
+#if __HAVE_DISTINCT_FLOAT128
 extern __always_inline _Float128
 __ieee754_sqrtf128 (_Float128 __x)
 {
@@ -35,4 +51,6 @@  __ieee754_sqrtf128 (_Float128 __x)
 }
 #endif
 
+#endif /* _ARCH_PWR9 */
+
 #endif /* _PPC_MATH_PRIVATE_H_ */
diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
new file mode 100644
index 0000000000..17ac7809e1
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
@@ -0,0 +1,31 @@ 
+/* The builtins used are only available with GCC 8.0 or newer.  */
+#if defined _ARCH_PWR9 && __GNUC_PREREQ (8, 0)
+#include <math.h>
+#include <errno.h>
+#include <limits.h>
+#include <math_private.h>
+#include <fenv.h>
+
+int
+M_DECL_FUNC (__ilogb) (FLOAT x)
+{
+  int r;
+  /* Check for exceptional cases.  */
+  if (! M_SUF(__builtin_test_dc_ilogb) (x, 0x7f))
+    r = M_SUF (__builtin_ilogb) (x);
+  else
+    /* Fallback to the generic ilogb if x is NaN, Inf or subnormal.  */
+    r = M_SUF (__ieee754_ilogb) (x);
+  if (__builtin_expect (r == FP_ILOGB0, 0)
+      || __builtin_expect (r == FP_ILOGBNAN, 0)
+      || __builtin_expect (r == INT_MAX, 0))
+    {
+      __set_errno (EDOM);
+      __feraiseexcept (FE_INVALID);
+    }
+  return r;
+}
+declare_mgen_alias (__ilogb, ilogb)
+#else
+#include <math/w_ilogb_template.c>
+#endif