Add generic HAVE_RM_CTX implementation
Commit Message
Hi Marcus,
Since there have been no further comments, could you check this in?
Wilco
ChangeLog:
2014-05-08 Wilco <wdijkstr@arm.com>
* sysdeps/generic/math_private.h: Add default HAVE_RM_CTX
implementation. New function (libc_feholdsetround_noex_ctx).
-----Original Message-----
From: Wilco [mailto:wdijkstr@arm.com]
Sent: 15 April 2014 14:35
To: 'libc-alpha@sourceware.org'
Subject: [PATCH] Add generic HAVE_RM_CTX implementation
Hi,
This patch adds a generic implementation of HAVE_RM_CTX using standard fenv calls. As a result math
functions using SET_RESTORE_ROUND* macros do not suffer from a large slowdown on targets which do
not implement optimized libc_fe*_ctx inline functions. Most of the libc_fe* inline functions are now
unused and could be removed in the future (there are a few math functions left which use a mixture
of standard fenv calls and libc_fe* inline functions - they could be updated to use
SET_RESTORE_ROUND or improved to avoid expensive fenv manipulations across just a few FP
instructions).
libc_feholdsetround*_noex_ctx is added to enable better optimization of SET_RESTORE_ROUND_NOEX*
implementations.
Performance measurements on ARM and x86 of sin() show significant gains over the current default,
fairly close to a highly optimized fenv_private:
ARM x86
no fenv_private : 100% 100%
generic HAVE_RM_CTX : 250% 350%
fenv_private (CTX) : 250% 450%
Wilco
ChangeLog:
2014-04-15 Wilco <wdijkstr@arm.com>
* sysdeps/generic/math_private.h: Add generic HAVE_RM_CTX
implementation. New function (libc_feholdsetround_noex_ctx).
---
sysdeps/generic/math_private.h | 116 ++++++++++++++++++++++++++++++++--------
1 file changed, 93 insertions(+), 23 deletions(-)
Comments
On Thu, May 08, 2014 at 04:27:29PM +0100, Wilco wrote:
> Hi Marcus,
>
> Since there have been no further comments, could you check this in?
I had suggested a couple of changes in addition to my comment.
Siddhesh
>
> Wilco
>
> ChangeLog:
> 2014-05-08 Wilco <wdijkstr@arm.com>
>
> * sysdeps/generic/math_private.h: Add default HAVE_RM_CTX
> implementation. New function (libc_feholdsetround_noex_ctx).
>
>
> -----Original Message-----
> From: Wilco [mailto:wdijkstr@arm.com]
> Sent: 15 April 2014 14:35
> To: 'libc-alpha@sourceware.org'
> Subject: [PATCH] Add generic HAVE_RM_CTX implementation
>
> Hi,
>
> This patch adds a generic implementation of HAVE_RM_CTX using standard fenv calls. As a result math
> functions using SET_RESTORE_ROUND* macros do not suffer from a large slowdown on targets which do
> not implement optimized libc_fe*_ctx inline functions. Most of the libc_fe* inline functions are now
> unused and could be removed in the future (there are a few math functions left which use a mixture
> of standard fenv calls and libc_fe* inline functions - they could be updated to use
> SET_RESTORE_ROUND or improved to avoid expensive fenv manipulations across just a few FP
> instructions).
>
> libc_feholdsetround*_noex_ctx is added to enable better optimization of SET_RESTORE_ROUND_NOEX*
> implementations.
>
> Performance measurements on ARM and x86 of sin() show significant gains over the current default,
> fairly close to a highly optimized fenv_private:
>
> ARM x86
> no fenv_private : 100% 100%
> generic HAVE_RM_CTX : 250% 350%
> fenv_private (CTX) : 250% 450%
>
> Wilco
>
> ChangeLog:
> 2014-04-15 Wilco <wdijkstr@arm.com>
>
> * sysdeps/generic/math_private.h: Add generic HAVE_RM_CTX
> implementation. New function (libc_feholdsetround_noex_ctx).
>
> ---
> sysdeps/generic/math_private.h | 116 ++++++++++++++++++++++++++++++++--------
> 1 file changed, 93 insertions(+), 23 deletions(-)
>
> diff --git a/sysdeps/generic/math_private.h b/sysdeps/generic/math_private.h
> index 9b881a3..fade483 100644
> --- a/sysdeps/generic/math_private.h
> +++ b/sysdeps/generic/math_private.h
> @@ -20,6 +20,7 @@
> #include <stdint.h>
> #include <sys/types.h>
> #include <fenv.h>
> +#include <get-rounding-mode.h>
>
> /* The original fdlibm code used statements like:
> n0 = ((*(int*)&one)>>29)^1; * index of high word *
> @@ -557,6 +558,16 @@ default_libc_feupdateenv_test (fenv_t *e, int ex)
> block is different from the current state. This saves a lot of time when
> the floating point unit is much slower than the fixed point units. */
>
> +# ifndef libc_feholdsetround_noex_ctx
> +# define libc_feholdsetround_noex_ctx libc_feholdsetround_ctx
> +# endif
> +# ifndef libc_feholdsetround_noexf_ctx
> +# define libc_feholdsetround_noexf_ctx libc_feholdsetroundf_ctx
> +# endif
> +# ifndef libc_feholdsetround_noexl_ctx
> +# define libc_feholdsetround_noexl_ctx libc_feholdsetroundl_ctx
> +# endif
> +
> # ifndef libc_feresetround_noex_ctx
> # define libc_feresetround_noex_ctx libc_fesetenv_ctx
> # endif
> @@ -567,24 +578,80 @@ default_libc_feupdateenv_test (fenv_t *e, int ex)
> # define libc_feresetround_noexl_ctx libc_fesetenvl_ctx
> # endif
>
> -# ifndef libc_feholdsetround_53bit_ctx
> -# define libc_feholdsetround_53bit_ctx libc_feholdsetround_ctx
> -# endif
> +#else
>
> -# ifndef libc_feresetround_53bit_ctx
> -# define libc_feresetround_53bit_ctx libc_feresetround_ctx
> -# endif
> +/* Default implementation using standard fenv functions.
> + Avoid unnecessary rounding mode changes by first checking the
> + current rounding mode. Note the use of __glibc_unlikely is
> + important for performance. */
>
> -# define SET_RESTORE_ROUND_GENERIC(RM,ROUNDFUNC,CLEANUPFUNC) \
> - struct rm_ctx ctx __attribute__((cleanup(CLEANUPFUNC ## _ctx))); \
> - ROUNDFUNC ## _ctx (&ctx, (RM))
> -#else
> -# define SET_RESTORE_ROUND_GENERIC(RM, ROUNDFUNC, CLEANUPFUNC) \
> - fenv_t __libc_save_rm __attribute__((cleanup(CLEANUPFUNC))); \
> - ROUNDFUNC (&__libc_save_rm, (RM))
> +static __always_inline void
> +libc_feholdsetround_ctx (struct rm_ctx *ctx, int round)
> +{
> + ctx->updated_status = false;
> +
> + /* Update rounding mode only if different. */
> + if (__glibc_unlikely (round != get_rounding_mode ()))
> + {
> + ctx->updated_status = true;
> + fegetenv (&ctx->env);
> + fesetround (round);
> + }
> +}
> +
> +static __always_inline void
> +libc_feresetround_ctx (struct rm_ctx *ctx)
> +{
> + /* Restore the rounding mode if updated. */
> + if (__glibc_unlikely (ctx->updated_status))
> + feupdateenv (&ctx->env);
> +}
> +
> +static __always_inline void
> +libc_feholdsetround_noex_ctx (struct rm_ctx *ctx, int round)
> +{
> + /* Save exception flags and rounding mode. */
> + fegetenv (&ctx->env);
> +
> + /* Update rounding mode only if different. */
> + if (__glibc_unlikely (round != get_rounding_mode ()))
> + fesetround (round);
> +}
> +
> +static __always_inline void
> +libc_feresetround_noex_ctx (struct rm_ctx *ctx)
> +{
> + /* Restore exception flags and rounding mode. */
> + fesetenv (&ctx->env);
> +}
> +
> +# define libc_feholdsetroundf_ctx libc_feholdsetround_ctx
> +# define libc_feholdsetroundl_ctx libc_feholdsetround_ctx
> +# define libc_feresetroundf_ctx libc_feresetround_ctx
> +# define libc_feresetroundl_ctx libc_feresetround_ctx
> +
> +# define libc_feholdsetround_noexf_ctx libc_feholdsetround_noex_ctx
> +# define libc_feholdsetround_noexl_ctx libc_feholdsetround_noex_ctx
> +# define libc_feresetround_noexf_ctx libc_feresetround_noex_ctx
> +# define libc_feresetround_noexl_ctx libc_feresetround_noex_ctx
> +
> +#endif
> +
> +#ifndef libc_feholdsetround_53bit_ctx
> +# define libc_feholdsetround_53bit_ctx libc_feholdsetround_ctx
> #endif
> +#ifndef libc_feresetround_53bit_ctx
> +# define libc_feresetround_53bit_ctx libc_feresetround_ctx
> +#endif
> +
> +#define SET_RESTORE_ROUND_GENERIC(RM,ROUNDFUNC,CLEANUPFUNC) \
> + struct rm_ctx ctx __attribute__((cleanup (CLEANUPFUNC ## _ctx))); \
> + ROUNDFUNC ## _ctx (&ctx, (RM))
>
> -/* Save and restore the rounding mode within a lexical block. */
> +/* Set the rounding mode within a lexical block. Restore the rounding mode to
> + the value at the start of the block. The exception mode must be preserved.
> + Exceptions raised within the block must be set in the exception flags.
> + Non-stop mode may be enabled inside the block. */
>
> #define SET_RESTORE_ROUND(RM) \
> SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround, libc_feresetround)
> @@ -593,15 +660,18 @@ default_libc_feupdateenv_test (fenv_t *e, int ex)
> #define SET_RESTORE_ROUNDL(RM) \
> SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundl, libc_feresetroundl)
>
> -/* Save and restore the rounding mode within a lexical block, and also
> - the set of exceptions raised within the block may be discarded. */
> -
> -#define SET_RESTORE_ROUND_NOEX(RM) \
> - SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround, libc_feresetround_noex)
> -#define SET_RESTORE_ROUND_NOEXF(RM) \
> - SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundf, libc_feresetround_noexf)
> -#define SET_RESTORE_ROUND_NOEXL(RM) \
> - SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundl, libc_feresetround_noexl)
> +/* Set the rounding mode within a lexical block. Restore the rounding mode to
> + the value at the start of the block. The exception mode must be preserved.
> + Exceptions raised within the block must be discarded, and exception flags
> + are restored to the value at the start of the block.
> + Non-stop mode may be enabled inside the block. */
> +
> +#define SET_RESTORE_ROUND_NOEX(RM) SET_RESTORE_ROUND_GENERIC (RM, \
> + libc_feholdsetround_noex, libc_feresetround_noex)
> +#define SET_RESTORE_ROUND_NOEXF(RM) SET_RESTORE_ROUND_GENERIC (RM, \
> + libc_feholdsetround_noexf, libc_feresetround_noexf)
> +#define SET_RESTORE_ROUND_NOEXL(RM) SET_RESTORE_ROUND_GENERIC (RM, \
> + libc_feholdsetround_noexl, libc_feresetround_noexl)
>
> /* Like SET_RESTORE_ROUND, but also set rounding precision to 53 bits. */
> #define SET_RESTORE_ROUND_53BIT(RM) \
> --
> 1.7.9.5
>
@@ -20,6 +20,7 @@
#include <stdint.h>
#include <sys/types.h>
#include <fenv.h>
+#include <get-rounding-mode.h>
/* The original fdlibm code used statements like:
n0 = ((*(int*)&one)>>29)^1; * index of high word *
@@ -557,6 +558,16 @@ default_libc_feupdateenv_test (fenv_t *e, int ex)
block is different from the current state. This saves a lot of time when
the floating point unit is much slower than the fixed point units. */
+# ifndef libc_feholdsetround_noex_ctx
+# define libc_feholdsetround_noex_ctx libc_feholdsetround_ctx
+# endif
+# ifndef libc_feholdsetround_noexf_ctx
+# define libc_feholdsetround_noexf_ctx libc_feholdsetroundf_ctx
+# endif
+# ifndef libc_feholdsetround_noexl_ctx
+# define libc_feholdsetround_noexl_ctx libc_feholdsetroundl_ctx
+# endif
+
# ifndef libc_feresetround_noex_ctx
# define libc_feresetround_noex_ctx libc_fesetenv_ctx
# endif
@@ -567,24 +578,80 @@ default_libc_feupdateenv_test (fenv_t *e, int ex)
# define libc_feresetround_noexl_ctx libc_fesetenvl_ctx
# endif
-# ifndef libc_feholdsetround_53bit_ctx
-# define libc_feholdsetround_53bit_ctx libc_feholdsetround_ctx
-# endif
+#else
-# ifndef libc_feresetround_53bit_ctx
-# define libc_feresetround_53bit_ctx libc_feresetround_ctx
-# endif
+/* Default implementation using standard fenv functions.
+ Avoid unnecessary rounding mode changes by first checking the
+ current rounding mode. Note the use of __glibc_unlikely is
+ important for performance. */
-# define SET_RESTORE_ROUND_GENERIC(RM,ROUNDFUNC,CLEANUPFUNC) \
- struct rm_ctx ctx __attribute__((cleanup(CLEANUPFUNC ## _ctx))); \
- ROUNDFUNC ## _ctx (&ctx, (RM))
-#else
-# define SET_RESTORE_ROUND_GENERIC(RM, ROUNDFUNC, CLEANUPFUNC) \
- fenv_t __libc_save_rm __attribute__((cleanup(CLEANUPFUNC))); \
- ROUNDFUNC (&__libc_save_rm, (RM))
+static __always_inline void
+libc_feholdsetround_ctx (struct rm_ctx *ctx, int round)
+{
+ ctx->updated_status = false;
+
+ /* Update rounding mode only if different. */
+ if (__glibc_unlikely (round != get_rounding_mode ()))
+ {
+ ctx->updated_status = true;
+ fegetenv (&ctx->env);
+ fesetround (round);
+ }
+}
+
+static __always_inline void
+libc_feresetround_ctx (struct rm_ctx *ctx)
+{
+ /* Restore the rounding mode if updated. */
+ if (__glibc_unlikely (ctx->updated_status))
+ feupdateenv (&ctx->env);
+}
+
+static __always_inline void
+libc_feholdsetround_noex_ctx (struct rm_ctx *ctx, int round)
+{
+ /* Save exception flags and rounding mode. */
+ fegetenv (&ctx->env);
+
+ /* Update rounding mode only if different. */
+ if (__glibc_unlikely (round != get_rounding_mode ()))
+ fesetround (round);
+}
+
+static __always_inline void
+libc_feresetround_noex_ctx (struct rm_ctx *ctx)
+{
+ /* Restore exception flags and rounding mode. */
+ fesetenv (&ctx->env);
+}
+
+# define libc_feholdsetroundf_ctx libc_feholdsetround_ctx
+# define libc_feholdsetroundl_ctx libc_feholdsetround_ctx
+# define libc_feresetroundf_ctx libc_feresetround_ctx
+# define libc_feresetroundl_ctx libc_feresetround_ctx
+
+# define libc_feholdsetround_noexf_ctx libc_feholdsetround_noex_ctx
+# define libc_feholdsetround_noexl_ctx libc_feholdsetround_noex_ctx
+# define libc_feresetround_noexf_ctx libc_feresetround_noex_ctx
+# define libc_feresetround_noexl_ctx libc_feresetround_noex_ctx
+
+#endif
+
+#ifndef libc_feholdsetround_53bit_ctx
+# define libc_feholdsetround_53bit_ctx libc_feholdsetround_ctx
#endif
+#ifndef libc_feresetround_53bit_ctx
+# define libc_feresetround_53bit_ctx libc_feresetround_ctx
+#endif
+
+#define SET_RESTORE_ROUND_GENERIC(RM,ROUNDFUNC,CLEANUPFUNC) \
+ struct rm_ctx ctx __attribute__((cleanup (CLEANUPFUNC ## _ctx))); \
+ ROUNDFUNC ## _ctx (&ctx, (RM))
-/* Save and restore the rounding mode within a lexical block. */
+/* Set the rounding mode within a lexical block. Restore the rounding mode to
+ the value at the start of the block. The exception mode must be preserved.
+ Exceptions raised within the block must be set in the exception flags.
+ Non-stop mode may be enabled inside the block. */
#define SET_RESTORE_ROUND(RM) \
SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround, libc_feresetround)
@@ -593,15 +660,18 @@ default_libc_feupdateenv_test (fenv_t *e, int ex)
#define SET_RESTORE_ROUNDL(RM) \
SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundl, libc_feresetroundl)
-/* Save and restore the rounding mode within a lexical block, and also
- the set of exceptions raised within the block may be discarded. */
-
-#define SET_RESTORE_ROUND_NOEX(RM) \
- SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround, libc_feresetround_noex)
-#define SET_RESTORE_ROUND_NOEXF(RM) \
- SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundf, libc_feresetround_noexf)
-#define SET_RESTORE_ROUND_NOEXL(RM) \
- SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetroundl, libc_feresetround_noexl)
+/* Set the rounding mode within a lexical block. Restore the rounding mode to
+ the value at the start of the block. The exception mode must be preserved.
+ Exceptions raised within the block must be discarded, and exception flags
+ are restored to the value at the start of the block.
+ Non-stop mode may be enabled inside the block. */
+
+#define SET_RESTORE_ROUND_NOEX(RM) SET_RESTORE_ROUND_GENERIC (RM, \
+ libc_feholdsetround_noex, libc_feresetround_noex)
+#define SET_RESTORE_ROUND_NOEXF(RM) SET_RESTORE_ROUND_GENERIC (RM, \
+ libc_feholdsetround_noexf, libc_feresetround_noexf)
+#define SET_RESTORE_ROUND_NOEXL(RM) SET_RESTORE_ROUND_GENERIC (RM, \
+ libc_feholdsetround_noexl, libc_feresetround_noexl)
/* Like SET_RESTORE_ROUND, but also set rounding precision to 53 bits. */
#define SET_RESTORE_ROUND_53BIT(RM) \