diff mbox series

[1/3] powerpc: Add optimized ilogb* for POWER9

Message ID 20210226160814.24963-1-rzinsly@linux.ibm.com
State Superseded
Delegated to: Tulio Magno Quites Machado Filho
Headers show
Series [1/3] powerpc: Add optimized ilogb* for POWER9 | expand

Commit Message

Raphael M Zinsly Feb. 26, 2021, 4:08 p.m. UTC
The instructions xsxexpdp and xsxexpqp introduced on POWER9 extract
the exponent from a double-precision and quad-precision floating-point
respectively, thus they can be used to improve ilogb, ilogbf and ilogbf128.
---
 .../powerpc64/le/fpu/multiarch/math_private.h | 14 +++++++++
 .../powerpc64/le/fpu/w_ilogb_template.c       | 29 +++++++++++++++++++
 2 files changed, 43 insertions(+)
 create mode 100644 sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c

Comments

Raphael M Zinsly Feb. 26, 2021, 4:14 p.m. UTC | #1
Benchtests results without and with this patch on a POWER9:

without:
"ilogbf128": {
    "subnormal": {
     "duration": 5.10445e+08,
     "iterations": 2.5826e+07,
     "max": 39.454,
     "min": 3.098,
     "mean": 19.7648
    },
    "normal": {
     "duration": 5.02611e+08,
     "iterations": 1.54482e+08,
     "max": 6.037,
     "min": 3.241,
     "mean": 3.25352
    }
   },
"ilogb": {
    "subnormal": {
     "duration": 5.09116e+08,
     "iterations": 4.7656e+07,
     "max": 33.015,
     "min": 3.042,
     "mean": 10.6831
    },
    "normal": {
     "duration": 5.01558e+08,
     "iterations": 1.72702e+08,
     "max": 5.6,
     "min": 2.893,
     "mean": 2.90418
    }
   },
   "ilogbf": {
    "subnormal": {
     "duration": 5.0847e+08,
     "iterations": 5.8494e+07,
     "max": 15.928,
     "min": 3.032,
     "mean": 8.69269
    },
    "normal": {
     "duration": 5.00965e+08,
     "iterations": 1.80536e+08,
     "max": 10.71,
     "min": 2.756,
     "mean": 2.77488
    }
   }


with:
"ilogbf128": {
    "subnormal": {
     "duration": 5.10468e+08,
     "iterations": 2.534e+07,
     "max": 38.864,
     "min": 3.379,
     "mean": 20.1448
    },
    "normal": {
     "duration": 4.98668e+08,
     "iterations": 2.1883e+08,
     "max": 16.539,
     "min": 2.215,
     "mean": 2.27879
    }
   },
"ilogb": {
    "subnormal": {
     "duration": 5.0921e+08,
     "iterations": 4.6646e+07,
     "max": 21.122,
     "min": 3.477,
     "mean": 10.9165
    },
    "normal": {
     "duration": 4.98203e+08,
     "iterations": 2.2731e+08,
     "max": 4.944,
     "min": 2.12,
     "mean": 2.19173
    }
   },
   "ilogbf": {
    "subnormal": {
     "duration": 4.9848e+08,
     "iterations": 2.2245e+08,
     "max": 11.685,
     "min": 2.16,
     "mean": 2.24086
    },
    "normal": {
     "duration": 4.98546e+08,
     "iterations": 2.22032e+08,
     "max": 5.855,
     "min": 2.166,
     "mean": 2.24538
    }
   }
Paul E Murphy Feb. 26, 2021, 4:16 p.m. UTC | #2
On 2/26/21 10:08 AM, Raphael Moreira Zinsly wrote:
> The instructions xsxexpdp and xsxexpqp introduced on POWER9 extract
> the exponent from a double-precision and quad-precision floating-point
> respectively, thus they can be used to improve ilogb, ilogbf and ilogbf128.
> ---
>   .../powerpc64/le/fpu/multiarch/math_private.h | 14 +++++++++
>   .../powerpc64/le/fpu/w_ilogb_template.c       | 29 +++++++++++++++++++
>   2 files changed, 43 insertions(+)
>   create mode 100644 sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
> 
> diff --git a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math_private.h b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math_private.h
> index 0970709cff..955497e98e 100644
> --- a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math_private.h
> +++ b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math_private.h
> @@ -1,6 +1,20 @@
>   #ifndef MATH_PRIVATE_PPC64LE_MA
>   #define MATH_PRIVATE_PPC64LE_MA 1
>   
> +#define __builtin_test_dc_ilogbf __builtin_test_dc_ilogb
> +#define __builtin_ilogbf __builtin_ilogb
> +
> +#define __builtin_test_dc_ilogbl __builtin_test_dc_ilogbf128
> +#define __builtin_ilogbl __builtin_ilogbf128
> +
> +#define __builtin_test_dc_ilogb(x, y) \
> +        __builtin_vsx_scalar_test_data_class_dp(x, y)
> +#define __builtin_ilogb(x) __builtin_vsx_scalar_extract_exp(x) - 0x3ff
> +
> +#define __builtin_test_dc_ilogbf128(x, y) \
> +        __builtin_vsx_scalar_test_data_class_qp(x, y)
> +#define __builtin_ilogbf128(x) __builtin_vsx_scalar_extract_expq(x) - 0x3fff
> +
>   #include_next <math_private.h>

Should these be placed in the powerpc math_private.h?  I suspect this 
will not build if multiarch is disabled.

Also, have you tested with the minimum version of GCC required for 
glibc? I am not sure when these builtins were added.
Raphael M Zinsly Feb. 26, 2021, 6:30 p.m. UTC | #3
On 26/02/2021 13:16, Paul E Murphy wrote:
> 
> 
> On 2/26/21 10:08 AM, Raphael Moreira Zinsly wrote:
>> The instructions xsxexpdp and xsxexpqp introduced on POWER9 extract
>> the exponent from a double-precision and quad-precision floating-point
>> respectively, thus they can be used to improve ilogb, ilogbf and 
>> ilogbf128.
>> ---
>>   .../powerpc64/le/fpu/multiarch/math_private.h | 14 +++++++++
>>   .../powerpc64/le/fpu/w_ilogb_template.c       | 29 +++++++++++++++++++
>>   2 files changed, 43 insertions(+)
>>   create mode 100644 sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
>>
>> diff --git a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math_private.h 
>> b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math_private.h
>> index 0970709cff..955497e98e 100644
>> --- a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math_private.h
>> +++ b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math_private.h
>> @@ -1,6 +1,20 @@
>>   #ifndef MATH_PRIVATE_PPC64LE_MA
>>   #define MATH_PRIVATE_PPC64LE_MA 1
>> +#define __builtin_test_dc_ilogbf __builtin_test_dc_ilogb
>> +#define __builtin_ilogbf __builtin_ilogb
>> +
>> +#define __builtin_test_dc_ilogbl __builtin_test_dc_ilogbf128
>> +#define __builtin_ilogbl __builtin_ilogbf128
>> +
>> +#define __builtin_test_dc_ilogb(x, y) \
>> +        __builtin_vsx_scalar_test_data_class_dp(x, y)
>> +#define __builtin_ilogb(x) __builtin_vsx_scalar_extract_exp(x) - 0x3ff
>> +
>> +#define __builtin_test_dc_ilogbf128(x, y) \
>> +        __builtin_vsx_scalar_test_data_class_qp(x, y)
>> +#define __builtin_ilogbf128(x) __builtin_vsx_scalar_extract_expq(x) - 
>> 0x3fff
>> +
>>   #include_next <math_private.h>
> 
> Should these be placed in the powerpc math_private.h?  I suspect this 
> will not build if multiarch is disabled.

Thanks for catching that, I'll fix that in the next version.

> 
> Also, have you tested with the minimum version of GCC required for 
> glibc? I am not sure when these builtins were added.

You are right, this doesn't work on GCC older than 8.4, I'll add a guard
for that on my V2.

Thanks,
diff mbox series

Patch

diff --git a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math_private.h b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math_private.h
index 0970709cff..955497e98e 100644
--- a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math_private.h
+++ b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math_private.h
@@ -1,6 +1,20 @@ 
 #ifndef MATH_PRIVATE_PPC64LE_MA
 #define MATH_PRIVATE_PPC64LE_MA 1
 
+#define __builtin_test_dc_ilogbf __builtin_test_dc_ilogb
+#define __builtin_ilogbf __builtin_ilogb
+
+#define __builtin_test_dc_ilogbl __builtin_test_dc_ilogbf128
+#define __builtin_ilogbl __builtin_ilogbf128
+
+#define __builtin_test_dc_ilogb(x, y) \
+        __builtin_vsx_scalar_test_data_class_dp(x, y)
+#define __builtin_ilogb(x) __builtin_vsx_scalar_extract_exp(x) - 0x3ff
+
+#define __builtin_test_dc_ilogbf128(x, y) \
+        __builtin_vsx_scalar_test_data_class_qp(x, y)
+#define __builtin_ilogbf128(x) __builtin_vsx_scalar_extract_expq(x) - 0x3fff
+
 #include_next <math_private.h>
 
 #if defined (_F128_ENABLE_IFUNC)
diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
new file mode 100644
index 0000000000..3d76a3d0fb
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb_template.c
@@ -0,0 +1,29 @@ 
+#ifdef _ARCH_PWR9
+#include <math.h>
+#include <errno.h>
+#include <limits.h>
+#include <math_private.h>
+#include <fenv.h>
+
+int
+M_DECL_FUNC (__ilogb) (FLOAT x)
+{
+  int r;
+  /* Check for exceptional cases.  */
+  if (! M_SUF(__builtin_test_dc_ilogb) (x, 0x7f))
+    r = M_SUF (__builtin_ilogb) (x);
+  else
+    /* Fallback to the generic ilogb if x is NaN, Inf or subnormal.  */
+    r = M_SUF (__ieee754_ilogb) (x);
+  if (__builtin_expect (r == FP_ILOGB0, 0)
+      || __builtin_expect (r == FP_ILOGBNAN, 0)
+      || __builtin_expect (r == INT_MAX, 0))
+    {
+      __set_errno (EDOM);
+      __feraiseexcept (FE_INVALID);
+    }
+  return r;
+}
+declare_mgen_alias (__ilogb, ilogb)
+
+#endif