[12/13] AArch64: Cleanup fenv implementation

Message ID 001601cfeee7$da6bd5f0$8f4381d0$@com
State Committed
Headers

Commit Message

Wilco Dijkstra Oct. 23, 2014, 5:36 p.m. UTC
  Improve fesetenv performance by avoiding unnecessary FPSR/FPCR reads/writes. 
It uses the same logic as the ARM version. The common case removes 1 FPSR
and 1 FPCR read. For FE_DFL_ENV and FE_NOMASK_ENV a FPCR read is avoided in
case the FPCR does not change.

ChangeLog: 
2014-10-23  Wilco Dijkstra  <wdijkstr@arm.com>

	* sysdeps/aarch64/fpu/fesetenv.c (fesetenv):
	Optimize to reduce FPCR/FPSR accesses.

---
 sysdeps/aarch64/fpu/fesetenv.c | 40 +++++++++++++++++++++++-----------------
 1 file changed, 23 insertions(+), 17 deletions(-)
  

Comments

Marcus Shawcroft Dec. 18, 2014, 4:27 p.m. UTC | #1
On 23 October 2014 at 18:36, Wilco Dijkstra <wdijkstr@arm.com> wrote:
> Improve fesetenv performance by avoiding unnecessary FPSR/FPCR reads/writes.
> It uses the same logic as the ARM version. The common case removes 1 FPSR
> and 1 FPCR read. For FE_DFL_ENV and FE_NOMASK_ENV a FPCR read is avoided in
> case the FPCR does not change.
>
> ChangeLog:
> 2014-10-23  Wilco Dijkstra  <wdijkstr@arm.com>
>
>         * sysdeps/aarch64/fpu/fesetenv.c (fesetenv):
>         Optimize to reduce FPCR/FPSR accesses.
OK .Marcus
  

Patch

diff --git a/sysdeps/aarch64/fpu/fesetenv.c b/sysdeps/aarch64/fpu/fesetenv.c
index c19680d..f71014d 100644
--- a/sysdeps/aarch64/fpu/fesetenv.c
+++ b/sysdeps/aarch64/fpu/fesetenv.c
@@ -29,8 +29,20 @@  fesetenv (const fenv_t *envp)
   fpu_fpsr_t fpsr_new;
 
   _FPU_GETCW (fpcr);
-  _FPU_GETFPSR (fpsr);
 
+  if ((envp != FE_DFL_ENV) && (envp != FE_NOMASK_ENV))
+    {
+      /* The new FPCR/FPSR are valid, so don't merge the reserved flags.  */
+      fpcr_new = envp->__fpcr;
+
+      if (fpcr != fpcr_new)
+	_FPU_SETCW (fpcr_new);
+
+      _FPU_SETFPSR (envp->__fpsr);
+      return 0;
+    }
+
+  _FPU_GETFPSR (fpsr);
   fpcr_new = fpcr & _FPU_RESERVED;
   fpsr_new = fpsr & _FPU_FPSR_RESERVED;
 
@@ -39,31 +51,25 @@  fesetenv (const fenv_t *envp)
       fpcr_new |= _FPU_DEFAULT;
       fpsr_new |= _FPU_FPSR_DEFAULT;
     }
-  else if (envp == FE_NOMASK_ENV)
+  else
     {
       fpcr_new |= _FPU_FPCR_IEEE;
       fpsr_new |= _FPU_FPSR_IEEE;
     }
-  else
-    {
-      fpcr_new |= envp->__fpcr & ~_FPU_RESERVED;
-      fpsr_new |= envp->__fpsr & ~_FPU_FPSR_RESERVED;
-    }
 
-  if (fpsr != fpsr_new)
-    _FPU_SETFPSR (fpsr_new);
+  _FPU_SETFPSR (fpsr_new);
 
   if (fpcr != fpcr_new)
-    _FPU_SETCW (fpcr_new);
+    {
+      _FPU_SETCW (fpcr_new);
 
-  /* Trapping exceptions are optional in AArch64 the relevant enable
-     bits in FPCR are RES0 hence the absence of support can be
-     detected by reading back the FPCR and comparing with the required
-     value.  */
+      /* Trapping exceptions are optional in AArch64; the relevant enable
+	 bits in FPCR are RES0 hence the absence of support can be detected
+	 by reading back the FPCR and comparing with the required value.  */
+      _FPU_GETCW (updated_fpcr);
 
-  _FPU_GETCW (updated_fpcr);
-  if ((updated_fpcr & fpcr_new) != fpcr_new)
-    return 1;
+      return fpcr_new & ~updated_fpcr;
+    }
 
   return 0;
 }