[AArch64] Suppress unnecessary FPSR and FPCR writes
Commit Message
Writes to fpsr and fpcr are expensive. This patch adds checks before each
write to ensure we only write new values to them (as opposed to rewriting
what's there).
Tested with make check and make bench, with no regressions.
OK for commit?
Cheers,
Ian
2014-04-24 Ian Bolton <ian.bolton@arm.com>
* sysdeps/aarch64/fpu/fclrexcpt.c (feclearexcept): Don't write to
fpsr if value didn't change.
* sysdeps/aarch64/fpu/fedisblxcpt.c (fedisableexcept): Don't write
to fpcr if value didn't change.
* sysdeps/aarch64/fpu/feenablxcpt.c (feenableexcept): Likewise.
* sysdeps/aarch64/fpu/feholdexcpt.c (feholdexcept): Don't write to
fpsr or fpcr if value didn't change.
* sysdeps/aarch64/fpu/fesetenv.c (fesetenv): Likewise.
* sysdeps/aarch64/fpu/fesetround.c (fesetround): Don't write to
fpcr if value didn't change.
* sysdeps/aarch64/fpu/fsetexcptflg.c (fesetexceptflag): Don't write
to fpsr if value didn't change.
Comments
On 24 April 2014 00:51, Ian Bolton <ian.bolton@arm.com> wrote:
> Writes to fpsr and fpcr are expensive. This patch adds checks before each
> write to ensure we only write new values to them (as opposed to rewriting
> what's there).
>
> Tested with make check and make bench, with no regressions.
>
> OK for commit?
>
> Cheers,
> Ian
>
>
> 2014-04-24 Ian Bolton <ian.bolton@arm.com>
>
> * sysdeps/aarch64/fpu/fclrexcpt.c (feclearexcept): Don't write to
> fpsr if value didn't change.
> * sysdeps/aarch64/fpu/fedisblxcpt.c (fedisableexcept): Don't write
> to fpcr if value didn't change.
> * sysdeps/aarch64/fpu/feenablxcpt.c (feenableexcept): Likewise.
> * sysdeps/aarch64/fpu/feholdexcpt.c (feholdexcept): Don't write to
> fpsr or fpcr if value didn't change.
> * sysdeps/aarch64/fpu/fesetenv.c (fesetenv): Likewise.
> * sysdeps/aarch64/fpu/fesetround.c (fesetround): Don't write to
> fpcr if value didn't change.
> * sysdeps/aarch64/fpu/fsetexcptflg.c (fesetexceptflag): Don't write
> to fpsr if value didn't change.
OK, and committed.
/Marcus
@@ -22,14 +22,15 @@
int
feclearexcept (int excepts)
{
- fpu_fpsr_t fpsr;
+ fpu_fpsr_t fpsr, fpsr_new;
excepts &= FE_ALL_EXCEPT;
_FPU_GETFPSR (fpsr);
- fpsr = (fpsr & ~FE_ALL_EXCEPT) | (fpsr & FE_ALL_EXCEPT & ~excepts);
+ fpsr_new = (fpsr & ~FE_ALL_EXCEPT) | (fpsr & FE_ALL_EXCEPT & ~excepts);
- _FPU_SETFPSR (fpsr);
+ if (fpsr != fpsr_new)
+ _FPU_SETFPSR (fpsr_new);
return 0;
}
@@ -22,7 +22,7 @@
int
fedisableexcept (int excepts)
{
- fpu_control_t fpcr;
+ fpu_control_t fpcr, fpcr_new;
int original_excepts;
_FPU_GETCW (fpcr);
@@ -31,9 +31,10 @@ fedisableexcept (int excepts)
excepts &= FE_ALL_EXCEPT;
- fpcr &= ~(excepts << FE_EXCEPT_SHIFT);
+ fpcr_new = fpcr & ~(excepts << FE_EXCEPT_SHIFT);
- _FPU_SETCW (fpcr);
+ if (fpcr != fpcr_new)
+ _FPU_SETCW (fpcr_new);
return original_excepts;
}
@@ -22,7 +22,7 @@
int
feenableexcept (int excepts)
{
- fpu_control_t fpcr;
+ fpu_control_t fpcr, fpcr_new;
int original_excepts;
_FPU_GETCW (fpcr);
@@ -31,9 +31,10 @@ feenableexcept (int excepts)
excepts &= FE_ALL_EXCEPT;
- fpcr |= (excepts << FE_EXCEPT_SHIFT);
+ fpcr_new = fpcr | (excepts << FE_EXCEPT_SHIFT);
- _FPU_SETCW (fpcr);
+ if (fpcr != fpcr_new)
+ _FPU_SETCW (fpcr_new);
/* Trapping exceptions are optional in AArch64 the relevant enable
bits in FPCR are RES0 hence the absence of support can be
@@ -22,8 +22,8 @@
int
feholdexcept (fenv_t *envp)
{
- fpu_fpsr_t fpsr;
- fpu_control_t fpcr;
+ fpu_fpsr_t fpsr, fpsr_new;
+ fpu_control_t fpcr, fpcr_new;
_FPU_GETCW (fpcr);
envp->__fpcr = fpcr;
@@ -32,14 +32,16 @@ feholdexcept (fenv_t *envp)
envp->__fpsr = fpsr;
/* Now set all exceptions to non-stop. */
- fpcr &= ~(FE_ALL_EXCEPT << FE_EXCEPT_SHIFT);
+ fpcr_new = fpcr & ~(FE_ALL_EXCEPT << FE_EXCEPT_SHIFT);
/* And clear all exception flags. */
- fpsr &= ~FE_ALL_EXCEPT;
+ fpsr_new = fpsr & ~FE_ALL_EXCEPT;
- _FPU_SETFPSR (fpsr);
+ if (fpsr != fpsr_new)
+ _FPU_SETFPSR (fpsr_new);
- _FPU_SETCW (fpcr);
+ if (fpcr != fpcr_new)
+ _FPU_SETCW (fpcr_new);
return 0;
}
@@ -22,35 +22,37 @@
int
fesetenv (const fenv_t *envp)
{
- fpu_control_t fpcr;
- fpu_fpsr_t fpsr;
+ fpu_control_t fpcr, fpcr_new;
+ fpu_fpsr_t fpsr, fpsr_new;
fpu_control_t updated_fpcr;
_FPU_GETCW (fpcr);
_FPU_GETFPSR (fpsr);
- fpcr &= _FPU_RESERVED;
- fpsr &= _FPU_FPSR_RESERVED;
+ fpcr_new = fpcr & _FPU_RESERVED;
+ fpsr_new = fpsr & _FPU_FPSR_RESERVED;
if (envp == FE_DFL_ENV)
{
- fpcr |= _FPU_DEFAULT;
- fpsr |= _FPU_FPSR_DEFAULT;
+ fpcr_new |= _FPU_DEFAULT;
+ fpsr_new |= _FPU_FPSR_DEFAULT;
}
else if (envp == FE_NOMASK_ENV)
{
- fpcr |= _FPU_FPCR_IEEE;
- fpsr |= _FPU_FPSR_IEEE;
+ fpcr_new |= _FPU_FPCR_IEEE;
+ fpsr_new |= _FPU_FPSR_IEEE;
}
else
{
- fpcr |= envp->__fpcr & ~_FPU_RESERVED;
- fpsr |= envp->__fpsr & ~_FPU_FPSR_RESERVED;
+ fpcr_new |= envp->__fpcr & ~_FPU_RESERVED;
+ fpsr_new |= envp->__fpsr & ~_FPU_FPSR_RESERVED;
}
- _FPU_SETFPSR (fpsr);
+ if (fpsr != fpsr_new)
+ _FPU_SETFPSR (fpsr_new);
- _FPU_SETCW (fpcr);
+ if (fpcr != fpcr_new)
+ _FPU_SETCW (fpcr_new);
/* Trapping exceptions are optional in AArch64 the relevant enable
bits in FPCR are RES0 hence the absence of support can be
@@ -58,7 +60,7 @@ fesetenv (const fenv_t *envp)
value. */
_FPU_GETCW (updated_fpcr);
- if ((updated_fpcr & fpcr) != fpcr)
+ if ((updated_fpcr & fpcr_new) != fpcr_new)
return 1;
return 0;
@@ -22,7 +22,7 @@
int
fesetround (int round)
{
- fpu_control_t fpcr;
+ fpu_control_t fpcr, fpcr_new;
switch (round)
{
@@ -31,9 +31,10 @@ fesetround (int round)
case FE_DOWNWARD:
case FE_TOWARDZERO:
_FPU_GETCW (fpcr);
- fpcr = (fpcr & ~FE_TOWARDZERO) | round;
+ fpcr_new = (fpcr & ~FE_TOWARDZERO) | round;
- _FPU_SETCW (fpcr);
+ if (fpcr != fpcr_new)
+ _FPU_SETCW (fpcr_new);
return 0;
default:
@@ -23,17 +23,18 @@
int
fesetexceptflag (const fexcept_t *flagp, int excepts)
{
- fpu_fpsr_t fpsr;
+ fpu_fpsr_t fpsr, fpsr_new;
/* Get the current environment. */
_FPU_GETFPSR (fpsr);
/* Set the desired exception mask. */
- fpsr &= ~(excepts & FE_ALL_EXCEPT);
- fpsr |= (*flagp & excepts & FE_ALL_EXCEPT);
+ fpsr_new = fpsr & ~(excepts & FE_ALL_EXCEPT);
+ fpsr_new |= (*flagp & excepts & FE_ALL_EXCEPT);
/* Save state back to the FPU. */
- _FPU_SETFPSR (fpsr);
+ if (fpsr != fpsr_new)
+ _FPU_SETFPSR (fpsr_new);
return 0;
}