Aarch64 - Rewrite feupdateenv

Message ID 000501cf7698$b53ffd50$1fbff7f0$@com
State Committed
Headers

Commit Message

Wilco Dijkstra May 23, 2014, 3:07 p.m. UTC
  Hi,

This patch rewrites feupdateenv to improve performance by avoiding unnecessary FPSCR reads/writes
and to fix bug 16918 (https://sourceware.org/bugzilla/show_bug.cgi?id=16918).

OK?

Wilco

ChangeLog:
2014-05-23  Wilco  <wdijkstr@arm.com>

	* sysdeps/aarch64/fpu/feupdateenv (feupdateenv):
	Rewrite to reduce FPCR/FPSR accesses and fix bug 16918.
---
 sysdeps/aarch64/fpu/feupdateenv.c | 59 +++++++++++++++++++++++++++++++++++----
 1 file changed, 54 insertions(+), 5 deletions(-)
  

Comments

Marcus Shawcroft June 2, 2014, 11:38 a.m. UTC | #1
On 23 May 2014 16:07, Wilco <wdijkstr@arm.com> wrote:
> Hi,
>
> This patch rewrites feupdateenv to improve performance by avoiding unnecessary FPSCR reads/writes
> and to fix bug 16918 (https://sourceware.org/bugzilla/show_bug.cgi?id=16918).
>
> OK?
>
> Wilco
>
> ChangeLog:
> 2014-05-23  Wilco  <wdijkstr@arm.com>
>
>         * sysdeps/aarch64/fpu/feupdateenv (feupdateenv):
>         Rewrite to reduce FPCR/FPSR accesses and fix bug 16918.

BZ16918 is specific to the ARM implementation, this patch fixes the
AArch64 implementation, I created 17009 instead.  The patch looks OK.
Committed and NEWS updated w.r.t. 17009.

Cheers
/Marcus
  

Patch

diff --git a/sysdeps/aarch64/fpu/feupdateenv.c b/sysdeps/aarch64/fpu/feupdateenv.c
index 6d64a9b..ac2f6fe 100644
--- a/sysdeps/aarch64/fpu/feupdateenv.c
+++ b/sysdeps/aarch64/fpu/feupdateenv.c
@@ -22,16 +22,65 @@ 
 int
 feupdateenv (const fenv_t *envp)
 {
+  fpu_control_t fpcr;
+  fpu_control_t fpcr_new;
+  fpu_control_t updated_fpcr;
   fpu_fpsr_t fpsr;
+  fpu_fpsr_t fpsr_new;
+  int excepts;
 
-  /* Get the current exception state.  */
+  _FPU_GETCW (fpcr);
   _FPU_GETFPSR (fpsr);
+  excepts = fpsr & FE_ALL_EXCEPT;
 
-  /* Install new environment.  */
-  fesetenv (envp);
+  if ((envp != FE_DFL_ENV) && (envp != FE_NOMASK_ENV))
+    {
+      fpcr_new = envp->__fpcr;
+      fpsr_new = envp->__fpsr | excepts;
 
-  /* Raise the saved exceptions.  */
-  feraiseexcept (fpsr & FE_ALL_EXCEPT);
+      if (fpcr != fpcr_new)
+        _FPU_SETCW (fpcr_new);
+
+      if (fpsr != fpsr_new)
+        _FPU_SETFPSR (fpsr_new);
+
+      if (excepts & (fpcr_new >> FE_EXCEPT_SHIFT))
+        return feraiseexcept (excepts);
+
+      return 0;
+    }
+
+  fpcr_new = fpcr & _FPU_RESERVED;
+  fpsr_new = fpsr & (_FPU_FPSR_RESERVED | FE_ALL_EXCEPT);
+
+  if (envp == FE_DFL_ENV)
+    {
+      fpcr_new |= _FPU_DEFAULT;
+      fpsr_new |= _FPU_FPSR_DEFAULT;
+    }
+  else
+    {
+      fpcr_new |= _FPU_FPCR_IEEE;
+      fpsr_new |= _FPU_FPSR_IEEE;
+    }
+
+  _FPU_SETFPSR (fpsr_new);
+
+  if (fpcr != fpcr_new)
+    {
+      _FPU_SETCW (fpcr_new);
+
+      /* Trapping exceptions are optional in AArch64; the relevant enable
+	 bits in FPCR are RES0 hence the absence of support can be detected
+	 by reading back the FPCR and comparing with the required value.  */
+      _FPU_GETCW (updated_fpcr);
+
+      if (fpcr_new & ~updated_fpcr)
+        return 1;
+    }
+
+  if (excepts & (fpcr_new >> FE_EXCEPT_SHIFT))
+    return feraiseexcept (excepts);
 
   return 0;
 }