[AArch64] Suppress unnecessary FPSR and FPCR writes

Message ID 20140423235146.GA20028@e104535-lin.arm.com
State Committed
Headers

Commit Message

Ian Bolton April 23, 2014, 11:51 p.m. UTC
  Writes to fpsr and fpcr are expensive.  This patch adds checks before each
write to ensure we only write new values to them (as opposed to rewriting
what's there).

Tested with make check and make bench, with no regressions.

OK for commit?

Cheers,
Ian


2014-04-24  Ian Bolton  <ian.bolton@arm.com>

	* sysdeps/aarch64/fpu/fclrexcpt.c (feclearexcept): Don't write to
	fpsr if value didn't change.
	* sysdeps/aarch64/fpu/fedisblxcpt.c (fedisableexcept): Don't write
	to fpcr if value didn't change.
	* sysdeps/aarch64/fpu/feenablxcpt.c (feenableexcept): Likewise.
	* sysdeps/aarch64/fpu/feholdexcpt.c (feholdexcept): Don't write to
	fpsr or fpcr if value didn't change.
	* sysdeps/aarch64/fpu/fesetenv.c (fesetenv): Likewise.
	* sysdeps/aarch64/fpu/fesetround.c (fesetround): Don't write to
	fpcr if value didn't change.
	* sysdeps/aarch64/fpu/fsetexcptflg.c (fesetexceptflag): Don't write
	to fpsr if value didn't change.
  

Comments

Marcus Shawcroft April 24, 2014, 6:28 a.m. UTC | #1
On 24 April 2014 00:51, Ian Bolton <ian.bolton@arm.com> wrote:
> Writes to fpsr and fpcr are expensive.  This patch adds checks before each
> write to ensure we only write new values to them (as opposed to rewriting
> what's there).
>
> Tested with make check and make bench, with no regressions.
>
> OK for commit?
>
> Cheers,
> Ian
>
>
> 2014-04-24  Ian Bolton  <ian.bolton@arm.com>
>
>         * sysdeps/aarch64/fpu/fclrexcpt.c (feclearexcept): Don't write to
>         fpsr if value didn't change.
>         * sysdeps/aarch64/fpu/fedisblxcpt.c (fedisableexcept): Don't write
>         to fpcr if value didn't change.
>         * sysdeps/aarch64/fpu/feenablxcpt.c (feenableexcept): Likewise.
>         * sysdeps/aarch64/fpu/feholdexcpt.c (feholdexcept): Don't write to
>         fpsr or fpcr if value didn't change.
>         * sysdeps/aarch64/fpu/fesetenv.c (fesetenv): Likewise.
>         * sysdeps/aarch64/fpu/fesetround.c (fesetround): Don't write to
>         fpcr if value didn't change.
>         * sysdeps/aarch64/fpu/fsetexcptflg.c (fesetexceptflag): Don't write
>         to fpsr if value didn't change.

OK, and committed.
/Marcus
  

Patch

diff --git a/sysdeps/aarch64/fpu/fclrexcpt.c b/sysdeps/aarch64/fpu/fclrexcpt.c
index 531269f..1f05259 100644
--- a/sysdeps/aarch64/fpu/fclrexcpt.c
+++ b/sysdeps/aarch64/fpu/fclrexcpt.c
@@ -22,14 +22,15 @@ 
 int
 feclearexcept (int excepts)
 {
-  fpu_fpsr_t fpsr;
+  fpu_fpsr_t fpsr, fpsr_new;
 
   excepts &= FE_ALL_EXCEPT;
 
   _FPU_GETFPSR (fpsr);
-  fpsr = (fpsr & ~FE_ALL_EXCEPT) | (fpsr & FE_ALL_EXCEPT & ~excepts);
+  fpsr_new = (fpsr & ~FE_ALL_EXCEPT) | (fpsr & FE_ALL_EXCEPT & ~excepts);
 
-  _FPU_SETFPSR (fpsr);
+  if (fpsr != fpsr_new)
+    _FPU_SETFPSR (fpsr_new);
 
   return 0;
 }
diff --git a/sysdeps/aarch64/fpu/fedisblxcpt.c b/sysdeps/aarch64/fpu/fedisblxcpt.c
index 719d52f..67e0aea 100644
--- a/sysdeps/aarch64/fpu/fedisblxcpt.c
+++ b/sysdeps/aarch64/fpu/fedisblxcpt.c
@@ -22,7 +22,7 @@ 
 int
 fedisableexcept (int excepts)
 {
-  fpu_control_t fpcr;
+  fpu_control_t fpcr, fpcr_new;
   int original_excepts;
 
   _FPU_GETCW (fpcr);
@@ -31,9 +31,10 @@  fedisableexcept (int excepts)
 
   excepts &= FE_ALL_EXCEPT;
 
-  fpcr &= ~(excepts << FE_EXCEPT_SHIFT);
+  fpcr_new = fpcr & ~(excepts << FE_EXCEPT_SHIFT);
 
-  _FPU_SETCW (fpcr);
+  if (fpcr != fpcr_new)
+    _FPU_SETCW (fpcr_new);
 
   return original_excepts;
 }
diff --git a/sysdeps/aarch64/fpu/feenablxcpt.c b/sysdeps/aarch64/fpu/feenablxcpt.c
index 07a4bbb..58288be 100644
--- a/sysdeps/aarch64/fpu/feenablxcpt.c
+++ b/sysdeps/aarch64/fpu/feenablxcpt.c
@@ -22,7 +22,7 @@ 
 int
 feenableexcept (int excepts)
 {
-  fpu_control_t fpcr;
+  fpu_control_t fpcr, fpcr_new;
   int original_excepts;
 
   _FPU_GETCW (fpcr);
@@ -31,9 +31,10 @@  feenableexcept (int excepts)
 
   excepts &= FE_ALL_EXCEPT;
 
-  fpcr |= (excepts << FE_EXCEPT_SHIFT);
+  fpcr_new = fpcr | (excepts << FE_EXCEPT_SHIFT);
 
-  _FPU_SETCW (fpcr);
+  if (fpcr != fpcr_new)
+    _FPU_SETCW (fpcr_new);
 
   /* Trapping exceptions are optional in AArch64 the relevant enable
      bits in FPCR are RES0 hence the absence of support can be
diff --git a/sysdeps/aarch64/fpu/feholdexcpt.c b/sysdeps/aarch64/fpu/feholdexcpt.c
index 0514ac1..639b61e 100644
--- a/sysdeps/aarch64/fpu/feholdexcpt.c
+++ b/sysdeps/aarch64/fpu/feholdexcpt.c
@@ -22,8 +22,8 @@ 
 int
 feholdexcept (fenv_t *envp)
 {
-  fpu_fpsr_t fpsr;
-  fpu_control_t fpcr;
+  fpu_fpsr_t fpsr, fpsr_new;
+  fpu_control_t fpcr, fpcr_new;
 
   _FPU_GETCW (fpcr);
   envp->__fpcr = fpcr;
@@ -32,14 +32,16 @@  feholdexcept (fenv_t *envp)
   envp->__fpsr = fpsr;
 
   /* Now set all exceptions to non-stop.  */
-  fpcr &= ~(FE_ALL_EXCEPT << FE_EXCEPT_SHIFT);
+  fpcr_new = fpcr & ~(FE_ALL_EXCEPT << FE_EXCEPT_SHIFT);
 
   /* And clear all exception flags.  */
-  fpsr &= ~FE_ALL_EXCEPT;
+  fpsr_new = fpsr & ~FE_ALL_EXCEPT;
 
-  _FPU_SETFPSR (fpsr);
+  if (fpsr != fpsr_new)
+    _FPU_SETFPSR (fpsr_new);
 
-  _FPU_SETCW (fpcr);
+  if (fpcr != fpcr_new)
+    _FPU_SETCW (fpcr_new);
 
   return 0;
 }
diff --git a/sysdeps/aarch64/fpu/fesetenv.c b/sysdeps/aarch64/fpu/fesetenv.c
index a2434e3..8650666 100644
--- a/sysdeps/aarch64/fpu/fesetenv.c
+++ b/sysdeps/aarch64/fpu/fesetenv.c
@@ -22,35 +22,37 @@ 
 int
 fesetenv (const fenv_t *envp)
 {
-  fpu_control_t fpcr;
-  fpu_fpsr_t fpsr;
+  fpu_control_t fpcr, fpcr_new;
+  fpu_fpsr_t fpsr, fpsr_new;
   fpu_control_t updated_fpcr;
 
   _FPU_GETCW (fpcr);
   _FPU_GETFPSR (fpsr);
 
-  fpcr &= _FPU_RESERVED;
-  fpsr &= _FPU_FPSR_RESERVED;
+  fpcr_new = fpcr & _FPU_RESERVED;
+  fpsr_new = fpsr & _FPU_FPSR_RESERVED;
 
   if (envp == FE_DFL_ENV)
     {
-      fpcr |= _FPU_DEFAULT;
-      fpsr |= _FPU_FPSR_DEFAULT;
+      fpcr_new |= _FPU_DEFAULT;
+      fpsr_new |= _FPU_FPSR_DEFAULT;
     }
   else if (envp == FE_NOMASK_ENV)
     {
-      fpcr |= _FPU_FPCR_IEEE;
-      fpsr |= _FPU_FPSR_IEEE;
+      fpcr_new |= _FPU_FPCR_IEEE;
+      fpsr_new |= _FPU_FPSR_IEEE;
     }
   else
     {
-      fpcr |= envp->__fpcr & ~_FPU_RESERVED;
-      fpsr |= envp->__fpsr & ~_FPU_FPSR_RESERVED;
+      fpcr_new |= envp->__fpcr & ~_FPU_RESERVED;
+      fpsr_new |= envp->__fpsr & ~_FPU_FPSR_RESERVED;
     }
 
-  _FPU_SETFPSR (fpsr);
+  if (fpsr != fpsr_new)
+    _FPU_SETFPSR (fpsr_new);
 
-  _FPU_SETCW (fpcr);
+  if (fpcr != fpcr_new)
+    _FPU_SETCW (fpcr_new);
 
   /* Trapping exceptions are optional in AArch64 the relevant enable
      bits in FPCR are RES0 hence the absence of support can be
@@ -58,7 +60,7 @@  fesetenv (const fenv_t *envp)
      value.  */
 
   _FPU_GETCW (updated_fpcr);
-  if ((updated_fpcr & fpcr) != fpcr)
+  if ((updated_fpcr & fpcr_new) != fpcr_new)
     return 1;
 
   return 0;
diff --git a/sysdeps/aarch64/fpu/fesetround.c b/sysdeps/aarch64/fpu/fesetround.c
index 40a05f6..ccfb8f4 100644
--- a/sysdeps/aarch64/fpu/fesetround.c
+++ b/sysdeps/aarch64/fpu/fesetround.c
@@ -22,7 +22,7 @@ 
 int
 fesetround (int round)
 {
-  fpu_control_t fpcr;
+  fpu_control_t fpcr, fpcr_new;
 
   switch (round)
     {
@@ -31,9 +31,10 @@  fesetround (int round)
     case FE_DOWNWARD:
     case FE_TOWARDZERO:
       _FPU_GETCW (fpcr);
-      fpcr = (fpcr & ~FE_TOWARDZERO) | round;
+      fpcr_new = (fpcr & ~FE_TOWARDZERO) | round;
 
-      _FPU_SETCW (fpcr);
+      if (fpcr != fpcr_new)
+	_FPU_SETCW (fpcr_new);
       return 0;
 
     default:
diff --git a/sysdeps/aarch64/fpu/fsetexcptflg.c b/sysdeps/aarch64/fpu/fsetexcptflg.c
index 49cd1e4..3e9a700 100644
--- a/sysdeps/aarch64/fpu/fsetexcptflg.c
+++ b/sysdeps/aarch64/fpu/fsetexcptflg.c
@@ -23,17 +23,18 @@ 
 int
 fesetexceptflag (const fexcept_t *flagp, int excepts)
 {
-  fpu_fpsr_t fpsr;
+  fpu_fpsr_t fpsr, fpsr_new;
 
   /* Get the current environment.  */
   _FPU_GETFPSR (fpsr);
 
   /* Set the desired exception mask.  */
-  fpsr &= ~(excepts & FE_ALL_EXCEPT);
-  fpsr |= (*flagp & excepts & FE_ALL_EXCEPT);
+  fpsr_new = fpsr & ~(excepts & FE_ALL_EXCEPT);
+  fpsr_new |= (*flagp & excepts & FE_ALL_EXCEPT);
 
   /* Save state back to the FPU.  */
-  _FPU_SETFPSR (fpsr);
+  if (fpsr != fpsr_new)
+    _FPU_SETFPSR (fpsr_new);
 
   return 0;
 }