Patchwork [v2,5/6,powerpc] __fesetround_inline optimizations

login
register
mail settings
Submitter Paul Clarke
Date Sept. 19, 2019, 6:46 p.m.
Message ID <1568918810-20393-6-git-send-email-pc@us.ibm.com>
Download mbox | patch
Permalink /patch/34600/
State New
Headers show

Comments

Paul Clarke - Sept. 19, 2019, 6:46 p.m.
From: "Paul A. Clarke" <pc@us.ibm.com>

On POWER9, use more efficient means to update the 2-bit rounding mode
via the 'mffscrn' instruction (instead of two 'mtfsb0/1' instructions
or one 'mtfsfi' instruction that modifies 4 bits).

Suggested-by: Paul E. Murphy  <murphyp@linux.ibm.com>

2019-09-19  Paul A. Clarke  <pc@us.ibm.com>

	* sysdeps/powerpc/fpu/fenv_libc.h (__fesetround_inline): Use
	'mffscrn' instruction on POWER9.
	(__fesetround_inline_nocheck): Likewise.
---
v2: No change.

 sysdeps/powerpc/fpu/fenv_libc.h | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)
Paul E Murphy - Sept. 23, 2019, 4:08 p.m.
On 9/19/19 1:46 PM, Paul A. Clarke wrote:
> From: "Paul A. Clarke" <pc@us.ibm.com>
> 
> On POWER9, use more efficient means to update the 2-bit rounding mode
> via the 'mffscrn' instruction (instead of two 'mtfsb0/1' instructions
> or one 'mtfsfi' instruction that modifies 4 bits).
> 
> Suggested-by: Paul E. Murphy  <murphyp@linux.ibm.com>
> 
> 2019-09-19  Paul A. Clarke  <pc@us.ibm.com>
> 
> 	* sysdeps/powerpc/fpu/fenv_libc.h (__fesetround_inline): Use
> 	'mffscrn' instruction on POWER9.
> 	(__fesetround_inline_nocheck): Likewise.
> ---
> v2: No change.
> 
>   sysdeps/powerpc/fpu/fenv_libc.h | 18 +++++++++++++++---
>   1 file changed, 15 insertions(+), 3 deletions(-)
> 
> diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
> index 53de1c8..3b91340 100644
> --- a/sysdeps/powerpc/fpu/fenv_libc.h
> +++ b/sysdeps/powerpc/fpu/fenv_libc.h
> @@ -156,7 +156,12 @@ typedef union
>   static inline int
>   __fesetround_inline (int round)
>   {
> -  if ((unsigned int) round < 2)
> +#ifdef _ARCH_PWR9
> +  __fe_mffscrn (round);
> +#else
> +  if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))
> +    __fe_mffscrn (round);
> +  else if ((unsigned int) round < 2)
>       {
>          asm volatile ("mtfsb0 30");
>          if ((unsigned int) round == 0)
> @@ -172,7 +177,7 @@ __fesetround_inline (int round)
>          else
>            asm volatile ("mtfsb1 31");
>       }
> -
> +#endif
>     return 0;
>   }
>   
> @@ -181,7 +186,14 @@ __fesetround_inline (int round)
>   static inline void
>   __fesetround_inline_nocheck (const int round)
>   {
> -  asm volatile ("mtfsfi 7,%0" : : "i" (round));
> +#ifdef _ARCH_PWR9
> +  __fe_mffscrn (round);
> +#else
> +  if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))
> +    __fe_mffscrn (round);
> +  else
> +    asm volatile ("mtfsfi 7,%0" : : "i" (round));
> +#endif
>   }
>   
>   #define FPSCR_MASK(bit) (1 << (31 - (bit)))
> 

OK.

Reviewed-By: Paul E Murphy <murphyp@linux.ibm.com>

Patch

diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
index 53de1c8..3b91340 100644
--- a/sysdeps/powerpc/fpu/fenv_libc.h
+++ b/sysdeps/powerpc/fpu/fenv_libc.h
@@ -156,7 +156,12 @@  typedef union
 static inline int
 __fesetround_inline (int round)
 {
-  if ((unsigned int) round < 2)
+#ifdef _ARCH_PWR9
+  __fe_mffscrn (round);
+#else
+  if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))
+    __fe_mffscrn (round);
+  else if ((unsigned int) round < 2)
     {
        asm volatile ("mtfsb0 30");
        if ((unsigned int) round == 0)
@@ -172,7 +177,7 @@  __fesetround_inline (int round)
        else
          asm volatile ("mtfsb1 31");
     }
-
+#endif
   return 0;
 }
 
@@ -181,7 +186,14 @@  __fesetround_inline (int round)
 static inline void
 __fesetround_inline_nocheck (const int round)
 {
-  asm volatile ("mtfsfi 7,%0" : : "i" (round));
+#ifdef _ARCH_PWR9
+  __fe_mffscrn (round);
+#else
+  if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))
+    __fe_mffscrn (round);
+  else
+    asm volatile ("mtfsfi 7,%0" : : "i" (round));
+#endif
 }
 
 #define FPSCR_MASK(bit) (1 << (31 - (bit)))