[v2,3/6,powerpc] libc_feupdateenv_test: optimize FPSCR access
Commit Message
From: "Paul A. Clarke" <pc@us.ibm.com>
ROUND_TO_ODD and a couple of other places use libc_feupdateenv_test to
restore the rounding mode and exception enables, preserve exception flags,
and test whether given exception(s) were generated.
If the exception flags haven't changed, then it is sufficient and a bit
more efficient to just restore the rounding mode and enables, rather than
writing the full Floating-Point Status and Control Register (FPSCR).
Reviewed-by: Paul E. Murphy <murphyp@linux.ibm.com>
2019-09-19 Paul A. Clarke <pc@us.ibm.com>
* sysdeps/powerpc/fpu/fenv_libc.h (FPSCR_EXCEPTIONS_MASK): New.
* sysdeps/powerpc/fpu/fenv_private.h (__libc_femergeenv_ppc): Optimize
to write FPSCR control only, if exceptions have not changed.
---
v2:
- No changes, but respun after removing _FPU macros at the suggestion
of Paul Murphy, to make it a bit easier to review.
sysdeps/powerpc/fpu/fenv_libc.h | 4 ++++
sysdeps/powerpc/fpu/fenv_private.h | 16 ++++++++++++++--
2 files changed, 18 insertions(+), 2 deletions(-)
Comments
On 9/19/19 1:46 PM, Paul A. Clarke wrote:
> From: "Paul A. Clarke" <pc@us.ibm.com>
>
> ROUND_TO_ODD and a couple of other places use libc_feupdateenv_test to
> restore the rounding mode and exception enables, preserve exception flags,
> and test whether given exception(s) were generated.
>
> If the exception flags haven't changed, then it is sufficient and a bit
> more efficient to just restore the rounding mode and enables, rather than
> writing the full Floating-Point Status and Control Register (FPSCR).
>
> Reviewed-by: Paul E. Murphy <murphyp@linux.ibm.com>
>
> 2019-09-19 Paul A. Clarke <pc@us.ibm.com>
>
> * sysdeps/powerpc/fpu/fenv_libc.h (FPSCR_EXCEPTIONS_MASK): New.
> * sysdeps/powerpc/fpu/fenv_private.h (__libc_femergeenv_ppc): Optimize
> to write FPSCR control only, if exceptions have not changed.
> ---
> v2:
> - No changes, but respun after removing _FPU macros at the suggestion
> of Paul Murphy, to make it a bit easier to review.
>
> sysdeps/powerpc/fpu/fenv_libc.h | 4 ++++
> sysdeps/powerpc/fpu/fenv_private.h | 16 ++++++++++++++--
> 2 files changed, 18 insertions(+), 2 deletions(-)
>
> diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
> index 549defa..53de1c8 100644
> --- a/sysdeps/powerpc/fpu/fenv_libc.h
> +++ b/sysdeps/powerpc/fpu/fenv_libc.h
> @@ -264,6 +264,10 @@ enum {
> (FPSCR_VE_MASK|FPSCR_OE_MASK|FPSCR_UE_MASK|FPSCR_ZE_MASK|FPSCR_XE_MASK)
> #define FPSCR_BASIC_EXCEPTIONS_MASK \
> (FPSCR_VX_MASK|FPSCR_OX_MASK|FPSCR_UX_MASK|FPSCR_ZX_MASK|FPSCR_XX_MASK)
> +#define FPSCR_EXCEPTIONS_MASK (FPSCR_BASIC_EXCEPTIONS_MASK| \
> + FPSCR_VXSNAN_MASK|FPSCR_VXISI_MASK|FPSCR_VXIDI_MASK|FPSCR_VXZDZ_MASK| \
> + FPSCR_VXIMZ_MASK|FPSCR_VXVC_MASK|FPSCR_VXSOFT_MASK|FPSCR_VXSQRT_MASK| \
> + FPSCR_VXCVI_MASK)
> #define FPSCR_FPRF_MASK \
> (FPSCR_FPRF_C_MASK|FPSCR_FPRF_FL_MASK|FPSCR_FPRF_FG_MASK| \
> FPSCR_FPRF_FE_MASK|FPSCR_FPRF_FU_MASK)
> diff --git a/sysdeps/powerpc/fpu/fenv_private.h b/sysdeps/powerpc/fpu/fenv_private.h
> index 30cbf30..9496026 100644
> --- a/sysdeps/powerpc/fpu/fenv_private.h
> +++ b/sysdeps/powerpc/fpu/fenv_private.h
> @@ -52,8 +52,20 @@ __libc_femergeenv_ppc (const fenv_t *envp, unsigned long long old_mask,
> __TEST_AND_EXIT_NON_STOP (old.l, new.l);
> __TEST_AND_ENTER_NON_STOP (old.l, new.l);
>
> - /* Atomically enable and raise (if appropriate) exceptions set in `new'. */
> - fesetenv_register (new.fenv);
> + /* If requesting to keep status, replace control, and merge exceptions,
> + and exceptions haven't changed, we can just set new control instead
> + of the whole FPSCR. */
> + if ((old_mask & (FPSCR_CONTROL_MASK|FPSCR_STATUS_MASK|FPSCR_EXCEPTIONS_MASK))
> + == (FPSCR_STATUS_MASK|FPSCR_EXCEPTIONS_MASK) &&
> + (new_mask & (FPSCR_CONTROL_MASK|FPSCR_STATUS_MASK|FPSCR_EXCEPTIONS_MASK))
> + == (FPSCR_CONTROL_MASK|FPSCR_EXCEPTIONS_MASK) &&
> + (old.l & FPSCR_EXCEPTIONS_MASK) == (new.l & FPSCR_EXCEPTIONS_MASK))
> + {
> + fesetenv_mode (new.fenv);
> + }
> + else
> + /* Atomically enable and raise (if appropriate) exceptions set in `new'. */
> + fesetenv_register (new.fenv);
>
> return old.l;
> }
>
OK.
Reviewed-By: Paul E Murphy <murphyp@linux.ibm.com>
@@ -264,6 +264,10 @@ enum {
(FPSCR_VE_MASK|FPSCR_OE_MASK|FPSCR_UE_MASK|FPSCR_ZE_MASK|FPSCR_XE_MASK)
#define FPSCR_BASIC_EXCEPTIONS_MASK \
(FPSCR_VX_MASK|FPSCR_OX_MASK|FPSCR_UX_MASK|FPSCR_ZX_MASK|FPSCR_XX_MASK)
+#define FPSCR_EXCEPTIONS_MASK (FPSCR_BASIC_EXCEPTIONS_MASK| \
+ FPSCR_VXSNAN_MASK|FPSCR_VXISI_MASK|FPSCR_VXIDI_MASK|FPSCR_VXZDZ_MASK| \
+ FPSCR_VXIMZ_MASK|FPSCR_VXVC_MASK|FPSCR_VXSOFT_MASK|FPSCR_VXSQRT_MASK| \
+ FPSCR_VXCVI_MASK)
#define FPSCR_FPRF_MASK \
(FPSCR_FPRF_C_MASK|FPSCR_FPRF_FL_MASK|FPSCR_FPRF_FG_MASK| \
FPSCR_FPRF_FE_MASK|FPSCR_FPRF_FU_MASK)
@@ -52,8 +52,20 @@ __libc_femergeenv_ppc (const fenv_t *envp, unsigned long long old_mask,
__TEST_AND_EXIT_NON_STOP (old.l, new.l);
__TEST_AND_ENTER_NON_STOP (old.l, new.l);
- /* Atomically enable and raise (if appropriate) exceptions set in `new'. */
- fesetenv_register (new.fenv);
+ /* If requesting to keep status, replace control, and merge exceptions,
+ and exceptions haven't changed, we can just set new control instead
+ of the whole FPSCR. */
+ if ((old_mask & (FPSCR_CONTROL_MASK|FPSCR_STATUS_MASK|FPSCR_EXCEPTIONS_MASK))
+ == (FPSCR_STATUS_MASK|FPSCR_EXCEPTIONS_MASK) &&
+ (new_mask & (FPSCR_CONTROL_MASK|FPSCR_STATUS_MASK|FPSCR_EXCEPTIONS_MASK))
+ == (FPSCR_CONTROL_MASK|FPSCR_EXCEPTIONS_MASK) &&
+ (old.l & FPSCR_EXCEPTIONS_MASK) == (new.l & FPSCR_EXCEPTIONS_MASK))
+ {
+ fesetenv_mode (new.fenv);
+ }
+ else
+ /* Atomically enable and raise (if appropriate) exceptions set in `new'. */
+ fesetenv_register (new.fenv);
return old.l;
}