PowerPC: Suppress unnecessary FPSCR write
Commit Message
This patch optimizes the FPSCR update on exception and rounding change
functions by just updating its value if new value if different from
current one. It also optimizes fedisableexcept and feenableexcept by
removing an unecessary FPSCR read.
Tested on powerpc32, powerpc64, and powerpc64le.
---
2014-04-28 Adhemerval Zanella <azanella@linux.vnet.ibm.com>
* sysdeps/powerpc/fpu/fclrexcpt.c (__feclearexcept): Do not update
FPSCR if value do not change.
* sysdeps/powerpc/fpu/fedisblxcpt.c (fedisableexcept): Likewise.
* sysdeps/powerpc/fpu/feenablxcpt.c (feenableexcept): Likewise.
* sysdeps/powerpc/fpu/feholdexcpt.c (feholdexcept): Likewise.
* sysdeps/powerpc/fpu/fesetenv.c (__fesetenv): Likewise.
* sysdeps/powerpc/fpu/fsetexcptflg.c (__fesetexceptflag): Likewise.
* sysdeps/powerpc/fpu/fenv_libc.h (fenv_reg_to_exceptions): New helper
function.
Comments
On Mon, Apr 28, 2014 at 06:39:53PM -0300, Adhemerval Zanella wrote:
> This patch optimizes the FPSCR update on exception and rounding change
> functions by just updating its value if new value if different from
> current one. It also optimizes fedisableexcept and feenableexcept by
> removing an unecessary FPSCR read.
>
> Tested on powerpc32, powerpc64, and powerpc64le.
>
> ---
>
> 2014-04-28 Adhemerval Zanella <azanella@linux.vnet.ibm.com>
>
> * sysdeps/powerpc/fpu/fclrexcpt.c (__feclearexcept): Do not update
> FPSCR if value do not change.
> * sysdeps/powerpc/fpu/fedisblxcpt.c (fedisableexcept): Likewise.
> * sysdeps/powerpc/fpu/feenablxcpt.c (feenableexcept): Likewise.
> * sysdeps/powerpc/fpu/feholdexcpt.c (feholdexcept): Likewise.
> * sysdeps/powerpc/fpu/fesetenv.c (__fesetenv): Likewise.
> * sysdeps/powerpc/fpu/fsetexcptflg.c (__fesetexceptflag): Likewise.
> * sysdeps/powerpc/fpu/fenv_libc.h (fenv_reg_to_exceptions): New helper
> function.
>
> diff --git a/sysdeps/powerpc/fpu/fclrexcpt.c b/sysdeps/powerpc/fpu/fclrexcpt.c
> index cda2810..4607f62 100644
> --- a/sysdeps/powerpc/fpu/fclrexcpt.c
> +++ b/sysdeps/powerpc/fpu/fclrexcpt.c
> @@ -22,17 +22,18 @@
> int
> __feclearexcept (int excepts)
> {
> - fenv_union_t u;
> + fenv_union_t u, n;
>
> /* Get the current state. */
> u.fenv = fegetenv_register ();
>
> /* Clear the relevant bits. */
> - u.l = u.l & ~((-(excepts >> (31 - FPSCR_VX) & 1) & FE_ALL_INVALID)
> + n.l = u.l & ~((-(excepts >> (31 - FPSCR_VX) & 1) & FE_ALL_INVALID)
> | (excepts & FPSCR_STICKY_BITS));
>
> /* Put the new state in effect. */
> - fesetenv_register (u.fenv);
> + if (u.l != n.l)
> + fesetenv_register (n.fenv);
>
> /* Success. */
> return 0;
> diff --git a/sysdeps/powerpc/fpu/fedisblxcpt.c b/sysdeps/powerpc/fpu/fedisblxcpt.c
> index 5883e09..94c01ab 100644
> --- a/sysdeps/powerpc/fpu/fedisblxcpt.c
> +++ b/sysdeps/powerpc/fpu/fedisblxcpt.c
> @@ -22,15 +22,17 @@
> int
> fedisableexcept (int excepts)
> {
> - fenv_union_t fe;
> - int result, new;
> + fenv_union_t fe, curr;
> + int result = 0, new;
result doesn't need to be initialized.
>
> - result = __fegetexcept ();
> + /* Get current exception mask to return. */
> + fe.fenv = curr.fenv = fegetenv_register ();
> + result = fenv_reg_to_exceptions (fe.l);
>
> if ((excepts & FE_ALL_INVALID) == FE_ALL_INVALID)
> excepts = (excepts | FE_INVALID) & ~ FE_ALL_INVALID;
>
> - fe.fenv = fegetenv_register ();
> + /* Sets the new exception mask. */
> if (excepts & FE_INEXACT)
> fe.l &= ~(1 << (31 - FPSCR_XE));
> if (excepts & FE_DIVBYZERO)
> @@ -41,7 +43,9 @@ fedisableexcept (int excepts)
> fe.l &= ~(1 << (31 - FPSCR_OE));
> if (excepts & FE_INVALID)
> fe.l &= ~(1 << (31 - FPSCR_VE));
> - fesetenv_register (fe.fenv);
> +
> + if (fe.l != curr.l)
> + fesetenv_register (fe.fenv);
>
> new = __fegetexcept ();
> if (new == 0 && result != 0)
> diff --git a/sysdeps/powerpc/fpu/feenablxcpt.c b/sysdeps/powerpc/fpu/feenablxcpt.c
> index 8ad0f97..01a68cf 100644
> --- a/sysdeps/powerpc/fpu/feenablxcpt.c
> +++ b/sysdeps/powerpc/fpu/feenablxcpt.c
> @@ -22,15 +22,17 @@
> int
> feenableexcept (int excepts)
> {
> - fenv_union_t fe;
> - int result, new;
> + fenv_union_t fe, curr;
> + int result = 0, new;
>
Likewise.
> - result = __fegetexcept ();
> + /* Get current exception mask to return. */
> + fe.fenv = curr.fenv = fegetenv_register ();
> + result = fenv_reg_to_exceptions (fe.l);
>
> if ((excepts & FE_ALL_INVALID) == FE_ALL_INVALID)
> excepts = (excepts | FE_INVALID) & ~ FE_ALL_INVALID;
>
> - fe.fenv = fegetenv_register ();
> + /* Sets the new exception mask. */
> if (excepts & FE_INEXACT)
> fe.l |= (1 << (31 - FPSCR_XE));
> if (excepts & FE_DIVBYZERO)
> @@ -41,7 +43,9 @@ feenableexcept (int excepts)
> fe.l |= (1 << (31 - FPSCR_OE));
> if (excepts & FE_INVALID)
> fe.l |= (1 << (31 - FPSCR_VE));
> - fesetenv_register (fe.fenv);
> +
> + if (fe.l != curr.l)
> + fesetenv_register (fe.fenv);
>
> new = __fegetexcept ();
> if (new != 0 && result == 0)
> diff --git a/sysdeps/powerpc/fpu/feholdexcpt.c b/sysdeps/powerpc/fpu/feholdexcpt.c
> index 1375a2f..764dd38 100644
> --- a/sysdeps/powerpc/fpu/feholdexcpt.c
> +++ b/sysdeps/powerpc/fpu/feholdexcpt.c
> @@ -32,6 +32,9 @@ feholdexcept (fenv_t *envp)
> flag. */
> new.l = old.l & 0xffffffff00000007LL;
>
> + if (new.l == old.l)
> + return 0;
> +
> /* If the old env had any enabled exceptions, then mask SIGFPE in the
> MSR FE0/FE1 bits. This may allow the FPU to run faster because it
> always takes the default action and can not generate SIGFPE. */
> diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
> index 28e4d1f..c1df5ce 100644
> --- a/sysdeps/powerpc/fpu/fenv_libc.h
> +++ b/sysdeps/powerpc/fpu/fenv_libc.h
> @@ -146,6 +146,23 @@ enum {
> /* the remaining two least-significant bits keep the rounding mode */
> };
>
> +static inline int
> +fenv_reg_to_exceptions (unsigned long long l)
> +{
> + int result = 0;
> + if (l & (1 << (31 - FPSCR_XE)))
> + result |= FE_INEXACT;
> + if (l & (1 << (31 - FPSCR_ZE)))
> + result |= FE_DIVBYZERO;
> + if (l & (1 << (31 - FPSCR_UE)))
> + result |= FE_UNDERFLOW;
> + if (l & (1 << (31 - FPSCR_OE)))
> + result |= FE_OVERFLOW;
> + if (l & (1 << (31 - FPSCR_VE)))
> + result |= FE_INVALID;
> + return result;
> +}
> +
> #ifdef _ARCH_PWR6
> /* Not supported in ISA 2.05. Provided for source compat only. */
> # define FPSCR_NI 29
> diff --git a/sysdeps/powerpc/fpu/fesetenv.c b/sysdeps/powerpc/fpu/fesetenv.c
> index fa99ddb..138bde0 100644
> --- a/sysdeps/powerpc/fpu/fesetenv.c
> +++ b/sysdeps/powerpc/fpu/fesetenv.c
> @@ -29,6 +29,8 @@ __fesetenv (const fenv_t *envp)
> /* get the currently set exceptions. */
> new.fenv = *envp;
> old.fenv = fegetenv_register ();
> + if (old.l == new.l)
> + return 0;
>
> /* If the old env has no enabled exceptions and the new env has any enabled
> exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put the
> diff --git a/sysdeps/powerpc/fpu/fsetexcptflg.c b/sysdeps/powerpc/fpu/fsetexcptflg.c
> index 95193c1..cee6682 100644
> --- a/sysdeps/powerpc/fpu/fsetexcptflg.c
> +++ b/sysdeps/powerpc/fpu/fsetexcptflg.c
> @@ -21,7 +21,7 @@
> int
> __fesetexceptflag (const fexcept_t *flagp, int excepts)
> {
> - fenv_union_t u;
> + fenv_union_t u, n;
> fexcept_t flag;
>
> /* Get the current state. */
> @@ -31,7 +31,7 @@ __fesetexceptflag (const fexcept_t *flagp, int excepts)
> flag = *flagp & excepts;
>
> /* Replace the exception status */
> - u.l = ((u.l & ~(FPSCR_STICKY_BITS & excepts))
> + n.l = ((u.l & ~(FPSCR_STICKY_BITS & excepts))
> | (flag & FPSCR_STICKY_BITS)
> | (flag >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT))
> & FE_INVALID_SOFTWARE));
> @@ -39,7 +39,8 @@ __fesetexceptflag (const fexcept_t *flagp, int excepts)
> /* Store the new status word (along with the rest of the environment).
> This may cause floating-point exceptions if the restored state
> requests it. */
> - fesetenv_register (u.fenv);
> + if (n.l != u.l)
> + fesetenv_register (u.fenv);
>
> /* Deal with FE_INVALID_SOFTWARE not being implemented on some chips. */
> if (flag & FE_INVALID)
>
Looks good to me with those changes.
Siddhesh
On 29-04-2014 01:07, Siddhesh Poyarekar wrote:
> On Mon, Apr 28, 2014 at 06:39:53PM -0300, Adhemerval Zanella wrote:
>> This patch optimizes the FPSCR update on exception and rounding change
>> functions by just updating its value if new value if different from
>> current one. It also optimizes fedisableexcept and feenableexcept by
>> removing an unecessary FPSCR read.
>>
>> Tested on powerpc32, powerpc64, and powerpc64le.
> Looks good to me with those changes.
>
> Siddhesh
Thanks for the review, I have pushed as 18f2945ae9216cfcd53a162080a73e3d719de9e6
(I screw up and ending add dc041bd4dbaf0f22b962647e0d0a1b8090d0e679 as well because
forgot to rebase...).
@@ -22,17 +22,18 @@
int
__feclearexcept (int excepts)
{
- fenv_union_t u;
+ fenv_union_t u, n;
/* Get the current state. */
u.fenv = fegetenv_register ();
/* Clear the relevant bits. */
- u.l = u.l & ~((-(excepts >> (31 - FPSCR_VX) & 1) & FE_ALL_INVALID)
+ n.l = u.l & ~((-(excepts >> (31 - FPSCR_VX) & 1) & FE_ALL_INVALID)
| (excepts & FPSCR_STICKY_BITS));
/* Put the new state in effect. */
- fesetenv_register (u.fenv);
+ if (u.l != n.l)
+ fesetenv_register (n.fenv);
/* Success. */
return 0;
@@ -22,15 +22,17 @@
int
fedisableexcept (int excepts)
{
- fenv_union_t fe;
- int result, new;
+ fenv_union_t fe, curr;
+ int result = 0, new;
- result = __fegetexcept ();
+ /* Get current exception mask to return. */
+ fe.fenv = curr.fenv = fegetenv_register ();
+ result = fenv_reg_to_exceptions (fe.l);
if ((excepts & FE_ALL_INVALID) == FE_ALL_INVALID)
excepts = (excepts | FE_INVALID) & ~ FE_ALL_INVALID;
- fe.fenv = fegetenv_register ();
+ /* Sets the new exception mask. */
if (excepts & FE_INEXACT)
fe.l &= ~(1 << (31 - FPSCR_XE));
if (excepts & FE_DIVBYZERO)
@@ -41,7 +43,9 @@ fedisableexcept (int excepts)
fe.l &= ~(1 << (31 - FPSCR_OE));
if (excepts & FE_INVALID)
fe.l &= ~(1 << (31 - FPSCR_VE));
- fesetenv_register (fe.fenv);
+
+ if (fe.l != curr.l)
+ fesetenv_register (fe.fenv);
new = __fegetexcept ();
if (new == 0 && result != 0)
@@ -22,15 +22,17 @@
int
feenableexcept (int excepts)
{
- fenv_union_t fe;
- int result, new;
+ fenv_union_t fe, curr;
+ int result = 0, new;
- result = __fegetexcept ();
+ /* Get current exception mask to return. */
+ fe.fenv = curr.fenv = fegetenv_register ();
+ result = fenv_reg_to_exceptions (fe.l);
if ((excepts & FE_ALL_INVALID) == FE_ALL_INVALID)
excepts = (excepts | FE_INVALID) & ~ FE_ALL_INVALID;
- fe.fenv = fegetenv_register ();
+ /* Sets the new exception mask. */
if (excepts & FE_INEXACT)
fe.l |= (1 << (31 - FPSCR_XE));
if (excepts & FE_DIVBYZERO)
@@ -41,7 +43,9 @@ feenableexcept (int excepts)
fe.l |= (1 << (31 - FPSCR_OE));
if (excepts & FE_INVALID)
fe.l |= (1 << (31 - FPSCR_VE));
- fesetenv_register (fe.fenv);
+
+ if (fe.l != curr.l)
+ fesetenv_register (fe.fenv);
new = __fegetexcept ();
if (new != 0 && result == 0)
@@ -32,6 +32,9 @@ feholdexcept (fenv_t *envp)
flag. */
new.l = old.l & 0xffffffff00000007LL;
+ if (new.l == old.l)
+ return 0;
+
/* If the old env had any enabled exceptions, then mask SIGFPE in the
MSR FE0/FE1 bits. This may allow the FPU to run faster because it
always takes the default action and can not generate SIGFPE. */
@@ -146,6 +146,23 @@ enum {
/* the remaining two least-significant bits keep the rounding mode */
};
+static inline int
+fenv_reg_to_exceptions (unsigned long long l)
+{
+ int result = 0;
+ if (l & (1 << (31 - FPSCR_XE)))
+ result |= FE_INEXACT;
+ if (l & (1 << (31 - FPSCR_ZE)))
+ result |= FE_DIVBYZERO;
+ if (l & (1 << (31 - FPSCR_UE)))
+ result |= FE_UNDERFLOW;
+ if (l & (1 << (31 - FPSCR_OE)))
+ result |= FE_OVERFLOW;
+ if (l & (1 << (31 - FPSCR_VE)))
+ result |= FE_INVALID;
+ return result;
+}
+
#ifdef _ARCH_PWR6
/* Not supported in ISA 2.05. Provided for source compat only. */
# define FPSCR_NI 29
@@ -29,6 +29,8 @@ __fesetenv (const fenv_t *envp)
/* get the currently set exceptions. */
new.fenv = *envp;
old.fenv = fegetenv_register ();
+ if (old.l == new.l)
+ return 0;
/* If the old env has no enabled exceptions and the new env has any enabled
exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put the
@@ -21,7 +21,7 @@
int
__fesetexceptflag (const fexcept_t *flagp, int excepts)
{
- fenv_union_t u;
+ fenv_union_t u, n;
fexcept_t flag;
/* Get the current state. */
@@ -31,7 +31,7 @@ __fesetexceptflag (const fexcept_t *flagp, int excepts)
flag = *flagp & excepts;
/* Replace the exception status */
- u.l = ((u.l & ~(FPSCR_STICKY_BITS & excepts))
+ n.l = ((u.l & ~(FPSCR_STICKY_BITS & excepts))
| (flag & FPSCR_STICKY_BITS)
| (flag >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT))
& FE_INVALID_SOFTWARE));
@@ -39,7 +39,8 @@ __fesetexceptflag (const fexcept_t *flagp, int excepts)
/* Store the new status word (along with the rest of the environment).
This may cause floating-point exceptions if the restored state
requests it. */
- fesetenv_register (u.fenv);
+ if (n.l != u.l)
+ fesetenv_register (u.fenv);
/* Deal with FE_INVALID_SOFTWARE not being implemented on some chips. */
if (flag & FE_INVALID)