[v2,5/6,powerpc] __fesetround_inline optimizations
Commit Message
From: "Paul A. Clarke" <pc@us.ibm.com>
On POWER9, use more efficient means to update the 2-bit rounding mode
via the 'mffscrn' instruction (instead of two 'mtfsb0/1' instructions
or one 'mtfsfi' instruction that modifies 4 bits).
Suggested-by: Paul E. Murphy <murphyp@linux.ibm.com>
2019-09-19 Paul A. Clarke <pc@us.ibm.com>
* sysdeps/powerpc/fpu/fenv_libc.h (__fesetround_inline): Use
'mffscrn' instruction on POWER9.
(__fesetround_inline_nocheck): Likewise.
---
v2: No change.
sysdeps/powerpc/fpu/fenv_libc.h | 18 +++++++++++++++---
1 file changed, 15 insertions(+), 3 deletions(-)
Comments
On 9/19/19 1:46 PM, Paul A. Clarke wrote:
> From: "Paul A. Clarke" <pc@us.ibm.com>
>
> On POWER9, use more efficient means to update the 2-bit rounding mode
> via the 'mffscrn' instruction (instead of two 'mtfsb0/1' instructions
> or one 'mtfsfi' instruction that modifies 4 bits).
>
> Suggested-by: Paul E. Murphy <murphyp@linux.ibm.com>
>
> 2019-09-19 Paul A. Clarke <pc@us.ibm.com>
>
> * sysdeps/powerpc/fpu/fenv_libc.h (__fesetround_inline): Use
> 'mffscrn' instruction on POWER9.
> (__fesetround_inline_nocheck): Likewise.
> ---
> v2: No change.
>
> sysdeps/powerpc/fpu/fenv_libc.h | 18 +++++++++++++++---
> 1 file changed, 15 insertions(+), 3 deletions(-)
>
> diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
> index 53de1c8..3b91340 100644
> --- a/sysdeps/powerpc/fpu/fenv_libc.h
> +++ b/sysdeps/powerpc/fpu/fenv_libc.h
> @@ -156,7 +156,12 @@ typedef union
> static inline int
> __fesetround_inline (int round)
> {
> - if ((unsigned int) round < 2)
> +#ifdef _ARCH_PWR9
> + __fe_mffscrn (round);
> +#else
> + if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))
> + __fe_mffscrn (round);
> + else if ((unsigned int) round < 2)
> {
> asm volatile ("mtfsb0 30");
> if ((unsigned int) round == 0)
> @@ -172,7 +177,7 @@ __fesetround_inline (int round)
> else
> asm volatile ("mtfsb1 31");
> }
> -
> +#endif
> return 0;
> }
>
> @@ -181,7 +186,14 @@ __fesetround_inline (int round)
> static inline void
> __fesetround_inline_nocheck (const int round)
> {
> - asm volatile ("mtfsfi 7,%0" : : "i" (round));
> +#ifdef _ARCH_PWR9
> + __fe_mffscrn (round);
> +#else
> + if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))
> + __fe_mffscrn (round);
> + else
> + asm volatile ("mtfsfi 7,%0" : : "i" (round));
> +#endif
> }
>
> #define FPSCR_MASK(bit) (1 << (31 - (bit)))
>
OK.
Reviewed-By: Paul E Murphy <murphyp@linux.ibm.com>
@@ -156,7 +156,12 @@ typedef union
static inline int
__fesetround_inline (int round)
{
- if ((unsigned int) round < 2)
+#ifdef _ARCH_PWR9
+ __fe_mffscrn (round);
+#else
+ if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))
+ __fe_mffscrn (round);
+ else if ((unsigned int) round < 2)
{
asm volatile ("mtfsb0 30");
if ((unsigned int) round == 0)
@@ -172,7 +177,7 @@ __fesetround_inline (int round)
else
asm volatile ("mtfsb1 31");
}
-
+#endif
return 0;
}
@@ -181,7 +186,14 @@ __fesetround_inline (int round)
static inline void
__fesetround_inline_nocheck (const int round)
{
- asm volatile ("mtfsfi 7,%0" : : "i" (round));
+#ifdef _ARCH_PWR9
+ __fe_mffscrn (round);
+#else
+ if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))
+ __fe_mffscrn (round);
+ else
+ asm volatile ("mtfsfi 7,%0" : : "i" (round));
+#endif
}
#define FPSCR_MASK(bit) (1 << (31 - (bit)))