diff mbox series

[v3,1/3] powerpc: Add optimized ilogb* for POWER9

Message ID	20210304150057.14418-1-rzinsly@linux.ibm.com
State	Committed
Headers	DMARC-Filter: OpenDMARC Filter v1.3.2 sourceware.org 00BD0384647C To: libc-alpha@sourceware.org Subject: [PATCH v3 1/3] powerpc: Add optimized ilogb* for POWER9 Date: Thu, 4 Mar 2021 12:00:55 -0300 Message-Id: <20210304150057.14418-1-rzinsly@linux.ibm.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: list From: Raphael Moreira Zinsly via Libc-alpha <libc-alpha@sourceware.org> Reply-To: Raphael Moreira Zinsly <rzinsly@linux.ibm.com> Cc: murphyp@linux.ibm.com, tuliom@linux.ibm.com, pc@us.ibm.com, Raphael Moreira Zinsly <rzinsly@linux.ibm.com> Errors-To: libc-alpha-bounces@sourceware.org Sender: "Libc-alpha" <libc-alpha-bounces@sourceware.org>
Series	[v3,1/3] powerpc: Add optimized ilogb* for POWER9 \| [v3,1/3] powerpc: Add optimized ilogb* for POWER9 [v3,2/3] powerpc: Add optimized llogb* for POWER9 [v3,3/3] benchtests: Add ilogb* tests

Commit Message

Raphael M Zinsly March 4, 2021, 3 p.m. UTC

  Changes since v2:
	- Moved the GCC version test to math_private.h and start using
	  __has_builtin().
	- Removed the optimization from long double as it was converting
	  ibm128 to float128.

---8<---

The instructions xsxexpdp and xsxexpqp introduced on POWER9 extract
the exponent from a double-precision and quad-precision floating-point
respectively, thus they can be used to improve ilogb, ilogbf and ilogbf128.
---
 sysdeps/powerpc/fpu/math_private.h            | 26 +++++++++++++++-
 .../powerpc64/le/fpu/w_ilogb_template.c       | 30 +++++++++++++++++++
 sysdeps/powerpc/powerpc64/le/fpu/w_ilogbl.c   |  3 ++
 3 files changed, 58 insertions(+), 1 deletion(-)
 create mode 100644 sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
 create mode 100644 sysdeps/powerpc/powerpc64/le/fpu/w_ilogbl.c

Comments

Paul E Murphy March 4, 2021, 3:30 p.m. UTC | #1

On 3/4/21 9:00 AM, Raphael Moreira Zinsly wrote:
> Changes since v2:
> 	- Moved the GCC version test to math_private.h and start using
> 	  __has_builtin().
> 	- Removed the optimization from long double as it was converting
> 	  ibm128 to float128.
> 
> ---8<---
> 
> The instructions xsxexpdp and xsxexpqp introduced on POWER9 extract
> the exponent from a double-precision and quad-precision floating-point
> respectively, thus they can be used to improve ilogb, ilogbf and ilogbf128.
> ---
>   sysdeps/powerpc/fpu/math_private.h            | 26 +++++++++++++++-
>   .../powerpc64/le/fpu/w_ilogb_template.c       | 30 +++++++++++++++++++
>   sysdeps/powerpc/powerpc64/le/fpu/w_ilogbl.c   |  3 ++
>   3 files changed, 58 insertions(+), 1 deletion(-)
>   create mode 100644 sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
>   create mode 100644 sysdeps/powerpc/powerpc64/le/fpu/w_ilogbl.c
> 
> diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h
> index 91b1361749..21628f3bda 100644
> --- a/sysdeps/powerpc/fpu/math_private.h
> +++ b/sysdeps/powerpc/fpu/math_private.h
> @@ -25,7 +25,28 @@
>   
>   #include_next <math_private.h>
>   
> -#if defined _ARCH_PWR9 && __HAVE_DISTINCT_FLOAT128
> +#ifdef _ARCH_PWR9
> +
> +#if __GNUC_PREREQ (8, 0)
> +# define _GL_HAS_BUILTIN_ILOGB 1
> +#elif defined __has_builtin
> +# define _GL_HAS_BUILTIN_ILOGB __has_builtin (__builtin_vsx_scalar_extract_exp)
> +#else
> +# define _GL_HAS_BUILTIN_ILOGB 0
> +#endif
> +
> +#define __builtin_test_dc_ilogbf __builtin_test_dc_ilogb
> +#define __builtin_ilogbf __builtin_ilogb
> +
> +#define __builtin_test_dc_ilogb(x, y) \
> +        __builtin_vsx_scalar_test_data_class_dp(x, y)
> +#define __builtin_ilogb(x) __builtin_vsx_scalar_extract_exp(x) - 0x3ff
> +
> +#define __builtin_test_dc_ilogbf128(x, y) \
> +        __builtin_vsx_scalar_test_data_class_qp(x, y)
> +#define __builtin_ilogbf128(x) __builtin_vsx_scalar_extract_expq(x) - 0x3fff
> +
> +#if __HAVE_DISTINCT_FLOAT128
>   extern __always_inline _Float128
>   __ieee754_sqrtf128 (_Float128 __x)
>   {
> @@ -34,5 +55,8 @@ __ieee754_sqrtf128 (_Float128 __x)
>     return __z;
>   }
>   #endif
> +#else /* !_ARCH_PWR9 */
> +#define _GL_HAS_BUILTIN_ILOGB 0
> +#endif
>   
>   #endif /* _PPC_MATH_PRIVATE_H_ */
> diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
> new file mode 100644
> index 0000000000..b5c1c0aa9d
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
> @@ -0,0 +1,30 @@
> +#include <math.h>
> +#include <errno.h>
> +#include <limits.h>
> +#include <math_private.h>
> +#include <fenv.h>
> +
> +#if _GL_HAS_BUILTIN_ILOGB
> +int
> +M_DECL_FUNC (__ilogb) (FLOAT x)
> +{
> +  int r;
> +  /* Check for exceptional cases.  */
> +  if (! M_SUF(__builtin_test_dc_ilogb) (x, 0x7f))
> +    r = M_SUF (__builtin_ilogb) (x);
> +  else
> +    /* Fallback to the generic ilogb if x is NaN, Inf or subnormal.  */
> +    r = M_SUF (__ieee754_ilogb) (x);
> +  if (__builtin_expect (r == FP_ILOGB0, 0)
> +      || __builtin_expect (r == FP_ILOGBNAN, 0)
> +      || __builtin_expect (r == INT_MAX, 0))
> +    {
> +      __set_errno (EDOM);
> +      __feraiseexcept (FE_INVALID);
> +    }
> +  return r;
> +}
> +declare_mgen_alias (__ilogb, ilogb)
> +#else
> +#include <math/w_ilogb_template.c>
> +#endif
> diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogbl.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogbl.c
> new file mode 100644
> index 0000000000..215a00141d
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogbl.c
> @@ -0,0 +1,3 @@
> +/* Skip the optimization for long double as it uses ibm128. */

I would recommend rewording this as "... as ibm128 does not provide an 
optimized builtin".

Though, I suspect you could use the double version of these built-ins by 
extracting the significant double of the ibm128, and testing/extracting 
it in a similar manner.

Anyhow, this patch is OK with a minor rewording of this comment.

> +#include <math-type-macros-ldouble.h>
> +#include <math/w_ilogb_template.c>
>

Raphael M Zinsly March 16, 2021, 4:20 p.m. UTC | #2

On 04/03/2021 12:30, Paul E Murphy wrote:
> 
>> diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogbl.c 
>> b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogbl.c
>> new file mode 100644
>> index 0000000000..215a00141d
>> --- /dev/null
>> +++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogbl.c
>> @@ -0,0 +1,3 @@
>> +/* Skip the optimization for long double as it uses ibm128. */
> 
> I would recommend rewording this as "... as ibm128 does not provide an 
> optimized builtin".
> 
> Though, I suspect you could use the double version of these built-ins by 
> extracting the significant double of the ibm128, and testing/extracting 
> it in a similar manner.
> 
> Anyhow, this patch is OK with a minor rewording of this comment.
> 

I fixed that and pushed as 56c81132ccc6f468fa4fc29c536db060e18e9d87, thanks!

Best Regards,

diff mbox series

Patch

diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h
index 91b1361749..21628f3bda 100644
--- a/sysdeps/powerpc/fpu/math_private.h
+++ b/sysdeps/powerpc/fpu/math_private.h
@@ -25,7 +25,28 @@ 
 
 #include_next <math_private.h>
 
-#if defined _ARCH_PWR9 && __HAVE_DISTINCT_FLOAT128
+#ifdef _ARCH_PWR9
+
+#if __GNUC_PREREQ (8, 0)
+# define _GL_HAS_BUILTIN_ILOGB 1
+#elif defined __has_builtin
+# define _GL_HAS_BUILTIN_ILOGB __has_builtin (__builtin_vsx_scalar_extract_exp)
+#else
+# define _GL_HAS_BUILTIN_ILOGB 0
+#endif
+
+#define __builtin_test_dc_ilogbf __builtin_test_dc_ilogb
+#define __builtin_ilogbf __builtin_ilogb
+
+#define __builtin_test_dc_ilogb(x, y) \
+        __builtin_vsx_scalar_test_data_class_dp(x, y)
+#define __builtin_ilogb(x) __builtin_vsx_scalar_extract_exp(x) - 0x3ff
+
+#define __builtin_test_dc_ilogbf128(x, y) \
+        __builtin_vsx_scalar_test_data_class_qp(x, y)
+#define __builtin_ilogbf128(x) __builtin_vsx_scalar_extract_expq(x) - 0x3fff
+
+#if __HAVE_DISTINCT_FLOAT128
 extern __always_inline _Float128
 __ieee754_sqrtf128 (_Float128 __x)
 {
@@ -34,5 +55,8 @@  __ieee754_sqrtf128 (_Float128 __x)
   return __z;
 }
 #endif
+#else /* !_ARCH_PWR9 */
+#define _GL_HAS_BUILTIN_ILOGB 0
+#endif
 
 #endif /* _PPC_MATH_PRIVATE_H_ */
diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
new file mode 100644
index 0000000000..b5c1c0aa9d
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
@@ -0,0 +1,30 @@ 
+#include <math.h>
+#include <errno.h>
+#include <limits.h>
+#include <math_private.h>
+#include <fenv.h>
+
+#if _GL_HAS_BUILTIN_ILOGB
+int
+M_DECL_FUNC (__ilogb) (FLOAT x)
+{
+  int r;
+  /* Check for exceptional cases.  */
+  if (! M_SUF(__builtin_test_dc_ilogb) (x, 0x7f))
+    r = M_SUF (__builtin_ilogb) (x);
+  else
+    /* Fallback to the generic ilogb if x is NaN, Inf or subnormal.  */
+    r = M_SUF (__ieee754_ilogb) (x);
+  if (__builtin_expect (r == FP_ILOGB0, 0)
+      || __builtin_expect (r == FP_ILOGBNAN, 0)
+      || __builtin_expect (r == INT_MAX, 0))
+    {
+      __set_errno (EDOM);
+      __feraiseexcept (FE_INVALID);
+    }
+  return r;
+}
+declare_mgen_alias (__ilogb, ilogb)
+#else
+#include <math/w_ilogb_template.c>
+#endif
diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogbl.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogbl.c
new file mode 100644
index 0000000000..215a00141d
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogbl.c
@@ -0,0 +1,3 @@ 
+/* Skip the optimization for long double as it uses ibm128. */
+#include <math-type-macros-ldouble.h>
+#include <math/w_ilogb_template.c>