diff mbox series

[v2,1/3] powerpc: Add optimized ilogb* for POWER9

Message ID	20210301175140.29109-1-rzinsly@linux.ibm.com
State	Superseded
Headers	DMARC-Filter: OpenDMARC Filter v1.3.2 sourceware.org 4D800386185F To: libc-alpha@sourceware.org Subject: [PATCH v2 1/3] powerpc: Add optimized ilogb* for POWER9 Date: Mon, 1 Mar 2021 14:51:38 -0300 Message-Id: <20210301175140.29109-1-rzinsly@linux.ibm.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: list From: Raphael Moreira Zinsly via Libc-alpha <libc-alpha@sourceware.org> Reply-To: Raphael Moreira Zinsly <rzinsly@linux.ibm.com> Cc: murphyp@linux.ibm.com, tuliom@linux.ibm.com, Raphael Moreira Zinsly <rzinsly@linux.ibm.com> Errors-To: libc-alpha-bounces@sourceware.org Sender: "Libc-alpha" <libc-alpha-bounces@sourceware.org>
Series	[v2,1/3] powerpc: Add optimized ilogb* for POWER9 \| [v2,1/3] powerpc: Add optimized ilogb* for POWER9 [v2,2/3] powerpc: Add optimized llogb* for POWER9 [v2,3/3] benchtests: Add ilogb* tests

Commit Message

Raphael M Zinsly March 1, 2021, 5:51 p.m. UTC

  Changes since v1:
	- Move the builtins definitions to powerpc's math_private.h.
	- Check if the correct GCC version is used.

--8<---

The instructions xsxexpdp and xsxexpqp introduced on POWER9 extract
the exponent from a double-precision and quad-precision floating-point
respectively, thus they can be used to improve ilogb, ilogbf and ilogbf128.
---
 sysdeps/powerpc/fpu/math_private.h            | 20 +++++++++++-
 .../powerpc64/le/fpu/w_ilogb_template.c       | 31 +++++++++++++++++++
 2 files changed, 50 insertions(+), 1 deletion(-)
 create mode 100644 sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c

Comments

Paul A. Clarke March 2, 2021, 1:27 a.m. UTC | #1

On Mon, Mar 01, 2021 at 02:51:38PM -0300, Raphael Moreira Zinsly via Libc-alpha wrote:
> Changes since v1:
> 	- Move the builtins definitions to powerpc's math_private.h.
> 	- Check if the correct GCC version is used.
> 
> --8<---
> 
> The instructions xsxexpdp and xsxexpqp introduced on POWER9 extract
> the exponent from a double-precision and quad-precision floating-point
> respectively, thus they can be used to improve ilogb, ilogbf and ilogbf128.
> ---
>  sysdeps/powerpc/fpu/math_private.h            | 20 +++++++++++-
>  .../powerpc64/le/fpu/w_ilogb_template.c       | 31 +++++++++++++++++++
>  2 files changed, 50 insertions(+), 1 deletion(-)
>  create mode 100644 sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
> 
> diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h
> index 91b1361749..accc28d091 100644
> --- a/sysdeps/powerpc/fpu/math_private.h
> +++ b/sysdeps/powerpc/fpu/math_private.h
> @@ -25,7 +25,23 @@
> 
>  #include_next <math_private.h>
> 
> -#if defined _ARCH_PWR9 && __HAVE_DISTINCT_FLOAT128
> +#ifdef _ARCH_PWR9
> +
> +#define __builtin_test_dc_ilogbf __builtin_test_dc_ilogb
> +#define __builtin_ilogbf __builtin_ilogb
> +
> +#define __builtin_test_dc_ilogbl __builtin_test_dc_ilogbf128
> +#define __builtin_ilogbl __builtin_ilogbf128
> +
> +#define __builtin_test_dc_ilogb(x, y) \
> +        __builtin_vsx_scalar_test_data_class_dp(x, y)
> +#define __builtin_ilogb(x) __builtin_vsx_scalar_extract_exp(x) - 0x3ff
> +
> +#define __builtin_test_dc_ilogbf128(x, y) \
> +        __builtin_vsx_scalar_test_data_class_qp(x, y)
> +#define __builtin_ilogbf128(x) __builtin_vsx_scalar_extract_expq(x) - 0x3fff
> +
> +#if __HAVE_DISTINCT_FLOAT128
>  extern __always_inline _Float128
>  __ieee754_sqrtf128 (_Float128 __x)
>  {
> @@ -35,4 +51,6 @@ __ieee754_sqrtf128 (_Float128 __x)
>  }
>  #endif
> 
> +#endif /* _ARCH_PWR9 */
> +
>  #endif /* _PPC_MATH_PRIVATE_H_ */
> diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
> new file mode 100644
> index 0000000000..17ac7809e1
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
> @@ -0,0 +1,31 @@
> +/* The builtins used are only available with GCC 8.0 or newer.  */
> +#if defined _ARCH_PWR9 && __GNUC_PREREQ (8, 0)

I wonder if it would be better to use __glibc_has_builtin () for the
builtins on which you depend, rather than testing for a specific GCC level.

(Same for patch 2/3.)

PC

> +#include <math.h>
> +#include <errno.h>
> +#include <limits.h>
> +#include <math_private.h>
> +#include <fenv.h>
> +
> +int
> +M_DECL_FUNC (__ilogb) (FLOAT x)
> +{
> +  int r;
> +  /* Check for exceptional cases.  */
> +  if (! M_SUF(__builtin_test_dc_ilogb) (x, 0x7f))
> +    r = M_SUF (__builtin_ilogb) (x);
> +  else
> +    /* Fallback to the generic ilogb if x is NaN, Inf or subnormal.  */
> +    r = M_SUF (__ieee754_ilogb) (x);
> +  if (__builtin_expect (r == FP_ILOGB0, 0)
> +      || __builtin_expect (r == FP_ILOGBNAN, 0)
> +      || __builtin_expect (r == INT_MAX, 0))
> +    {
> +      __set_errno (EDOM);
> +      __feraiseexcept (FE_INVALID);
> +    }
> +  return r;
> +}
> +declare_mgen_alias (__ilogb, ilogb)
> +#else
> +#include <math/w_ilogb_template.c>
> +#endif
> -- 
> 2.29.2
>

Paul E Murphy March 2, 2021, 5:46 p.m. UTC | #2

On 3/1/21 11:51 AM, Raphael Moreira Zinsly wrote:
> Changes since v1:
> 	- Move the builtins definitions to powerpc's math_private.h.
> 	- Check if the correct GCC version is used.
> 
> --8<---
> 
> The instructions xsxexpdp and xsxexpqp introduced on POWER9 extract
> the exponent from a double-precision and quad-precision floating-point
> respectively, thus they can be used to improve ilogb, ilogbf and ilogbf128.
> ---
>   sysdeps/powerpc/fpu/math_private.h            | 20 +++++++++++-
>   .../powerpc64/le/fpu/w_ilogb_template.c       | 31 +++++++++++++++++++
>   2 files changed, 50 insertions(+), 1 deletion(-)
>   create mode 100644 sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
> 
> diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h
> index 91b1361749..accc28d091 100644
> --- a/sysdeps/powerpc/fpu/math_private.h
> +++ b/sysdeps/powerpc/fpu/math_private.h
> @@ -25,7 +25,23 @@
>   
>   #include_next <math_private.h>
>   
> -#if defined _ARCH_PWR9 && __HAVE_DISTINCT_FLOAT128
> +#ifdef _ARCH_PWR9
> +
> +#define __builtin_test_dc_ilogbf __builtin_test_dc_ilogb
> +#define __builtin_ilogbf __builtin_ilogb
> +
> +#define __builtin_test_dc_ilogbl __builtin_test_dc_ilogbf128
> +#define __builtin_ilogbl __builtin_ilogbf128

I suspect this converting an ibm128 value to float128.  Can you verify 
whether this is the case, and if so, exclude ibm long doubles from this 
optimization? (Note, libm should be building long double == ibm128)

Otherwise, LGTM.

> +
> +#define __builtin_test_dc_ilogb(x, y) \
> +        __builtin_vsx_scalar_test_data_class_dp(x, y)
> +#define __builtin_ilogb(x) __builtin_vsx_scalar_extract_exp(x) - 0x3ff
> +
> +#define __builtin_test_dc_ilogbf128(x, y) \
> +        __builtin_vsx_scalar_test_data_class_qp(x, y)
> +#define __builtin_ilogbf128(x) __builtin_vsx_scalar_extract_expq(x) - 0x3fff
> +
> +#if __HAVE_DISTINCT_FLOAT128
>   extern __always_inline _Float128
>   __ieee754_sqrtf128 (_Float128 __x)
>   {
> @@ -35,4 +51,6 @@ __ieee754_sqrtf128 (_Float128 __x)
>   }
>   #endif
>   
> +#endif /* _ARCH_PWR9 */
> +
>   #endif /* _PPC_MATH_PRIVATE_H_ */
> diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
> new file mode 100644
> index 0000000000..17ac7809e1
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
> @@ -0,0 +1,31 @@
> +/* The builtins used are only available with GCC 8.0 or newer.  */
> +#if defined _ARCH_PWR9 && __GNUC_PREREQ (8, 0)
> +#include <math.h>
> +#include <errno.h>
> +#include <limits.h>
> +#include <math_private.h>
> +#include <fenv.h>
> +
> +int
> +M_DECL_FUNC (__ilogb) (FLOAT x)
> +{
> +  int r;
> +  /* Check for exceptional cases.  */
> +  if (! M_SUF(__builtin_test_dc_ilogb) (x, 0x7f))
> +    r = M_SUF (__builtin_ilogb) (x);
> +  else
> +    /* Fallback to the generic ilogb if x is NaN, Inf or subnormal.  */
> +    r = M_SUF (__ieee754_ilogb) (x);
> +  if (__builtin_expect (r == FP_ILOGB0, 0)
> +      || __builtin_expect (r == FP_ILOGBNAN, 0)
> +      || __builtin_expect (r == INT_MAX, 0))
> +    {
> +      __set_errno (EDOM);
> +      __feraiseexcept (FE_INVALID);
> +    }
> +  return r;
> +}
> +declare_mgen_alias (__ilogb, ilogb)
> +#else
> +#include <math/w_ilogb_template.c>
> +#endif
>

Raphael M Zinsly March 3, 2021, 4:23 p.m. UTC | #3

On 01/03/2021 22:27, Paul A. Clarke wrote:
> On Mon, Mar 01, 2021 at 02:51:38PM -0300, Raphael Moreira Zinsly via Libc-alpha wrote:
>> Changes since v1:
>> 	- Move the builtins definitions to powerpc's math_private.h.
>> 	- Check if the correct GCC version is used.
>>
>> --8<---
>>
>> The instructions xsxexpdp and xsxexpqp introduced on POWER9 extract
>> the exponent from a double-precision and quad-precision floating-point
>> respectively, thus they can be used to improve ilogb, ilogbf and ilogbf128.
>> ---
>>   sysdeps/powerpc/fpu/math_private.h            | 20 +++++++++++-
>>   .../powerpc64/le/fpu/w_ilogb_template.c       | 31 +++++++++++++++++++
>>   2 files changed, 50 insertions(+), 1 deletion(-)
>>   create mode 100644 sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
>>
>> diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h
>> index 91b1361749..accc28d091 100644
>> --- a/sysdeps/powerpc/fpu/math_private.h
>> +++ b/sysdeps/powerpc/fpu/math_private.h
>> @@ -25,7 +25,23 @@
>>
>>   #include_next <math_private.h>
>>
>> -#if defined _ARCH_PWR9 && __HAVE_DISTINCT_FLOAT128
>> +#ifdef _ARCH_PWR9
>> +
>> +#define __builtin_test_dc_ilogbf __builtin_test_dc_ilogb
>> +#define __builtin_ilogbf __builtin_ilogb
>> +
>> +#define __builtin_test_dc_ilogbl __builtin_test_dc_ilogbf128
>> +#define __builtin_ilogbl __builtin_ilogbf128
>> +
>> +#define __builtin_test_dc_ilogb(x, y) \
>> +        __builtin_vsx_scalar_test_data_class_dp(x, y)
>> +#define __builtin_ilogb(x) __builtin_vsx_scalar_extract_exp(x) - 0x3ff
>> +
>> +#define __builtin_test_dc_ilogbf128(x, y) \
>> +        __builtin_vsx_scalar_test_data_class_qp(x, y)
>> +#define __builtin_ilogbf128(x) __builtin_vsx_scalar_extract_expq(x) - 0x3fff
>> +
>> +#if __HAVE_DISTINCT_FLOAT128
>>   extern __always_inline _Float128
>>   __ieee754_sqrtf128 (_Float128 __x)
>>   {
>> @@ -35,4 +51,6 @@ __ieee754_sqrtf128 (_Float128 __x)
>>   }
>>   #endif
>>
>> +#endif /* _ARCH_PWR9 */
>> +
>>   #endif /* _PPC_MATH_PRIVATE_H_ */
>> diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
>> new file mode 100644
>> index 0000000000..17ac7809e1
>> --- /dev/null
>> +++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
>> @@ -0,0 +1,31 @@
>> +/* The builtins used are only available with GCC 8.0 or newer.  */
>> +#if defined _ARCH_PWR9 && __GNUC_PREREQ (8, 0)
> 
> I wonder if it would be better to use __glibc_has_builtin () for the
> builtins on which you depend, rather than testing for a specific GCC level.
>

I didn't find a __glibc_has_builtin definition, do you mean the 
preprocessor's __has_builtin()? I believe it's not available on GCC 8.0.

> (Same for patch 2/3.)
> 
> PC

Paul A. Clarke March 3, 2021, 5:20 p.m. UTC | #4

On Wed, Mar 03, 2021 at 01:23:43PM -0300, Raphael M Zinsly via Libc-alpha wrote:
> On 01/03/2021 22:27, Paul A. Clarke wrote:
> > On Mon, Mar 01, 2021 at 02:51:38PM -0300, Raphael Moreira Zinsly via Libc-alpha wrote:
> > > Changes since v1:
> > > 	- Move the builtins definitions to powerpc's math_private.h.
> > > 	- Check if the correct GCC version is used.
> > > 
> > > --8<---
> > > 
> > > The instructions xsxexpdp and xsxexpqp introduced on POWER9 extract
> > > the exponent from a double-precision and quad-precision floating-point
> > > respectively, thus they can be used to improve ilogb, ilogbf and ilogbf128.
> > > ---
> > >   sysdeps/powerpc/fpu/math_private.h            | 20 +++++++++++-
> > >   .../powerpc64/le/fpu/w_ilogb_template.c       | 31 +++++++++++++++++++
> > >   2 files changed, 50 insertions(+), 1 deletion(-)
> > >   create mode 100644 sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
> > > 
> > > diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h
> > > index 91b1361749..accc28d091 100644
> > > --- a/sysdeps/powerpc/fpu/math_private.h
> > > +++ b/sysdeps/powerpc/fpu/math_private.h
> > > @@ -25,7 +25,23 @@
> > > 
> > >   #include_next <math_private.h>
> > > 
> > > -#if defined _ARCH_PWR9 && __HAVE_DISTINCT_FLOAT128
> > > +#ifdef _ARCH_PWR9
> > > +
> > > +#define __builtin_test_dc_ilogbf __builtin_test_dc_ilogb
> > > +#define __builtin_ilogbf __builtin_ilogb
> > > +
> > > +#define __builtin_test_dc_ilogbl __builtin_test_dc_ilogbf128
> > > +#define __builtin_ilogbl __builtin_ilogbf128
> > > +
> > > +#define __builtin_test_dc_ilogb(x, y) \
> > > +        __builtin_vsx_scalar_test_data_class_dp(x, y)
> > > +#define __builtin_ilogb(x) __builtin_vsx_scalar_extract_exp(x) - 0x3ff
> > > +
> > > +#define __builtin_test_dc_ilogbf128(x, y) \
> > > +        __builtin_vsx_scalar_test_data_class_qp(x, y)
> > > +#define __builtin_ilogbf128(x) __builtin_vsx_scalar_extract_expq(x) - 0x3fff
> > > +
> > > +#if __HAVE_DISTINCT_FLOAT128
> > >   extern __always_inline _Float128
> > >   __ieee754_sqrtf128 (_Float128 __x)
> > >   {
> > > @@ -35,4 +51,6 @@ __ieee754_sqrtf128 (_Float128 __x)
> > >   }
> > >   #endif
> > > 
> > > +#endif /* _ARCH_PWR9 */
> > > +
> > >   #endif /* _PPC_MATH_PRIVATE_H_ */
> > > diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
> > > new file mode 100644
> > > index 0000000000..17ac7809e1
> > > --- /dev/null
> > > +++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
> > > @@ -0,0 +1,31 @@
> > > +/* The builtins used are only available with GCC 8.0 or newer.  */
> > > +#if defined _ARCH_PWR9 && __GNUC_PREREQ (8, 0)
> > 
> > I wonder if it would be better to use __glibc_has_builtin () for the
> > builtins on which you depend, rather than testing for a specific GCC level.
> > 
> 
> I didn't find a __glibc_has_builtin definition, do you mean the
> preprocessor's __has_builtin()? I believe it's not available on GCC 8.0.

misc/sys/cdefs.h:
--
/* Compilers that lack __has_attribute may object to
       #if defined __has_attribute && __has_attribute (...)
   even though they do not need to evaluate the right-hand side of the &&.
   Similarly for __has_builtin, etc.  */
[...]
#ifdef __has_builtin
# define __glibc_has_builtin(name) __has_builtin (name)
#else
# define __glibc_has_builtin(name) 0
#endif
--

... so, it covers pre-GCC8 by always saying "nope".

PC

diff mbox series

Patch

diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h
index 91b1361749..accc28d091 100644
--- a/sysdeps/powerpc/fpu/math_private.h
+++ b/sysdeps/powerpc/fpu/math_private.h
@@ -25,7 +25,23 @@ 
 
 #include_next <math_private.h>
 
-#if defined _ARCH_PWR9 && __HAVE_DISTINCT_FLOAT128
+#ifdef _ARCH_PWR9
+
+#define __builtin_test_dc_ilogbf __builtin_test_dc_ilogb
+#define __builtin_ilogbf __builtin_ilogb
+
+#define __builtin_test_dc_ilogbl __builtin_test_dc_ilogbf128
+#define __builtin_ilogbl __builtin_ilogbf128
+
+#define __builtin_test_dc_ilogb(x, y) \
+        __builtin_vsx_scalar_test_data_class_dp(x, y)
+#define __builtin_ilogb(x) __builtin_vsx_scalar_extract_exp(x) - 0x3ff
+
+#define __builtin_test_dc_ilogbf128(x, y) \
+        __builtin_vsx_scalar_test_data_class_qp(x, y)
+#define __builtin_ilogbf128(x) __builtin_vsx_scalar_extract_expq(x) - 0x3fff
+
+#if __HAVE_DISTINCT_FLOAT128
 extern __always_inline _Float128
 __ieee754_sqrtf128 (_Float128 __x)
 {
@@ -35,4 +51,6 @@  __ieee754_sqrtf128 (_Float128 __x)
 }
 #endif
 
+#endif /* _ARCH_PWR9 */
+
 #endif /* _PPC_MATH_PRIVATE_H_ */
diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
new file mode 100644
index 0000000000..17ac7809e1
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
@@ -0,0 +1,31 @@ 
+/* The builtins used are only available with GCC 8.0 or newer.  */
+#if defined _ARCH_PWR9 && __GNUC_PREREQ (8, 0)
+#include <math.h>
+#include <errno.h>
+#include <limits.h>
+#include <math_private.h>
+#include <fenv.h>
+
+int
+M_DECL_FUNC (__ilogb) (FLOAT x)
+{
+  int r;
+  /* Check for exceptional cases.  */
+  if (! M_SUF(__builtin_test_dc_ilogb) (x, 0x7f))
+    r = M_SUF (__builtin_ilogb) (x);
+  else
+    /* Fallback to the generic ilogb if x is NaN, Inf or subnormal.  */
+    r = M_SUF (__ieee754_ilogb) (x);
+  if (__builtin_expect (r == FP_ILOGB0, 0)
+      || __builtin_expect (r == FP_ILOGBNAN, 0)
+      || __builtin_expect (r == INT_MAX, 0))
+    {
+      __set_errno (EDOM);
+      __feraiseexcept (FE_INVALID);
+    }
+  return r;
+}
+declare_mgen_alias (__ilogb, ilogb)
+#else
+#include <math/w_ilogb_template.c>
+#endif