[12/13] AArch64: Cleanup fenv implementation
Commit Message
Improve fesetenv performance by avoiding unnecessary FPSR/FPCR reads/writes.
It uses the same logic as the ARM version. The common case removes 1 FPSR
and 1 FPCR read. For FE_DFL_ENV and FE_NOMASK_ENV a FPCR read is avoided in
case the FPCR does not change.
ChangeLog:
2014-10-23 Wilco Dijkstra <wdijkstr@arm.com>
* sysdeps/aarch64/fpu/fesetenv.c (fesetenv):
Optimize to reduce FPCR/FPSR accesses.
---
sysdeps/aarch64/fpu/fesetenv.c | 40 +++++++++++++++++++++++-----------------
1 file changed, 23 insertions(+), 17 deletions(-)
Comments
On 23 October 2014 at 18:36, Wilco Dijkstra <wdijkstr@arm.com> wrote:
> Improve fesetenv performance by avoiding unnecessary FPSR/FPCR reads/writes.
> It uses the same logic as the ARM version. The common case removes 1 FPSR
> and 1 FPCR read. For FE_DFL_ENV and FE_NOMASK_ENV a FPCR read is avoided in
> case the FPCR does not change.
>
> ChangeLog:
> 2014-10-23 Wilco Dijkstra <wdijkstr@arm.com>
>
> * sysdeps/aarch64/fpu/fesetenv.c (fesetenv):
> Optimize to reduce FPCR/FPSR accesses.
OK .Marcus
@@ -29,8 +29,20 @@ fesetenv (const fenv_t *envp)
fpu_fpsr_t fpsr_new;
_FPU_GETCW (fpcr);
- _FPU_GETFPSR (fpsr);
+ if ((envp != FE_DFL_ENV) && (envp != FE_NOMASK_ENV))
+ {
+ /* The new FPCR/FPSR are valid, so don't merge the reserved flags. */
+ fpcr_new = envp->__fpcr;
+
+ if (fpcr != fpcr_new)
+ _FPU_SETCW (fpcr_new);
+
+ _FPU_SETFPSR (envp->__fpsr);
+ return 0;
+ }
+
+ _FPU_GETFPSR (fpsr);
fpcr_new = fpcr & _FPU_RESERVED;
fpsr_new = fpsr & _FPU_FPSR_RESERVED;
@@ -39,31 +51,25 @@ fesetenv (const fenv_t *envp)
fpcr_new |= _FPU_DEFAULT;
fpsr_new |= _FPU_FPSR_DEFAULT;
}
- else if (envp == FE_NOMASK_ENV)
+ else
{
fpcr_new |= _FPU_FPCR_IEEE;
fpsr_new |= _FPU_FPSR_IEEE;
}
- else
- {
- fpcr_new |= envp->__fpcr & ~_FPU_RESERVED;
- fpsr_new |= envp->__fpsr & ~_FPU_FPSR_RESERVED;
- }
- if (fpsr != fpsr_new)
- _FPU_SETFPSR (fpsr_new);
+ _FPU_SETFPSR (fpsr_new);
if (fpcr != fpcr_new)
- _FPU_SETCW (fpcr_new);
+ {
+ _FPU_SETCW (fpcr_new);
- /* Trapping exceptions are optional in AArch64 the relevant enable
- bits in FPCR are RES0 hence the absence of support can be
- detected by reading back the FPCR and comparing with the required
- value. */
+ /* Trapping exceptions are optional in AArch64; the relevant enable
+ bits in FPCR are RES0 hence the absence of support can be detected
+ by reading back the FPCR and comparing with the required value. */
+ _FPU_GETCW (updated_fpcr);
- _FPU_GETCW (updated_fpcr);
- if ((updated_fpcr & fpcr_new) != fpcr_new)
- return 1;
+ return fpcr_new & ~updated_fpcr;
+ }
return 0;
}