[PING] Inline C99 math functions

Message ID 001c01d0bd7e$25a147c0$70e3d740$@com
State Committed
Headers

Commit Message

Wilco Dijkstra July 13, 2015, 3:11 p.m. UTC
  > Wilco Dijkstra wrote:
> Add inlining of the C99 math functions isinf/isnan/signbit/isfinite/isnormal/fpclassify using
> GCC built-ins when available. Since going through the PLT is expensive for these small
> functions, inlining results in major speedups (about 7x on Cortex-A57 for isinf). The GCC
> built-ins are not correct if signalling NaN support is required, and thus are turned off in
> that case (see GCC bug 66462). The test-snan.c tests sNaNs and so must be explicitly built
> with -fsignaling-nans.
> 
> As a result of this many target overrides and the various __isnan/__finite inlines in
> math_private.h are no longer required. If agreed we could remove all this code and only keep
> the generic definition of isinf/etc which will use the builtin.
> 
> Tested on AArch64. OK for commit?
> 
> ChangeLog:
> 2015-06-15  Wilco Dijkstra  <wdijkstr@arm.com>
> 
> 	* math/Makefile: Build test-snan.c with -fsignaling-nans.
> 	* math/math.h (fpclassify): Use __builtin_fpclassify when
> 	available.  (signbit): Use __builtin_signbit(f/l).
> 	(isfinite): Use__builtin_isfinite.  (isnormal): Use
> 	__builtin_isnormal.  (isnan): Use __builtin_isnan.
> 	(isinf): Use __builtin_isinf_sign.

As suggested __fpclassify is not inlined when optimizing for size, and a benchmark
has been created (json output for x64 attached showing the large gains due to inlining).

OK for commit?


---
 math/Makefile |  1 +
 math/math.h   | 51 ++++++++++++++++++++++++++++++---------------------
 2 files changed, 31 insertions(+), 21 deletions(-)
  

Comments

Carlos O'Donell July 13, 2015, 4:56 p.m. UTC | #1
On 07/13/2015 11:11 AM, Wilco Dijkstra wrote:
>> Wilco Dijkstra wrote:
>> Add inlining of the C99 math functions isinf/isnan/signbit/isfinite/isnormal/fpclassify using
>> GCC built-ins when available. Since going through the PLT is expensive for these small
>> functions, inlining results in major speedups (about 7x on Cortex-A57 for isinf). The GCC
>> built-ins are not correct if signalling NaN support is required, and thus are turned off in
>> that case (see GCC bug 66462). The test-snan.c tests sNaNs and so must be explicitly built
>> with -fsignaling-nans.
>>
>> As a result of this many target overrides and the various __isnan/__finite inlines in
>> math_private.h are no longer required. If agreed we could remove all this code and only keep
>> the generic definition of isinf/etc which will use the builtin.
>>
>> Tested on AArch64. OK for commit?
>>
>> ChangeLog:
>> 2015-06-15  Wilco Dijkstra  <wdijkstr@arm.com>
>>
>> 	* math/Makefile: Build test-snan.c with -fsignaling-nans.
>> 	* math/math.h (fpclassify): Use __builtin_fpclassify when
>> 	available.  (signbit): Use __builtin_signbit(f/l).
>> 	(isfinite): Use__builtin_isfinite.  (isnormal): Use
>> 	__builtin_isnormal.  (isnan): Use __builtin_isnan.
>> 	(isinf): Use __builtin_isinf_sign.
> 
> As suggested __fpclassify is not inlined when optimizing for size, and a benchmark
> has been created (json output for x64 attached showing the large gains due to inlining).
> 
> OK for commit?
 
This looks good to me for 2.23, it is not OK for 2.22.

Please wait until after 2.23 opens and until after you commit the benchmark.

Cheers,
Carlos.

> ---
>  math/Makefile |  1 +
>  math/math.h   | 51 ++++++++++++++++++++++++++++++---------------------
>  2 files changed, 31 insertions(+), 21 deletions(-)
> 
> diff --git a/math/Makefile b/math/Makefile
> index 9a3cf32..f78d75b 100644
> --- a/math/Makefile
> +++ b/math/Makefile
> @@ -155,6 +155,7 @@ CFLAGS-test-tgmath.c = -fno-builtin
>  CFLAGS-test-tgmath2.c = -fno-builtin
>  CFLAGS-test-tgmath-ret.c = -fno-builtin
>  CFLAGS-test-powl.c = -fno-builtin
> +CFLAGS-test-snan.c = -fsignaling-nans
>  CPPFLAGS-test-ifloat.c = -U__LIBC_INTERNAL_MATH_INLINES -D__FAST_MATH__ \
>  			 -DTEST_FAST_MATH -fno-builtin
>  CPPFLAGS-test-idouble.c = -U__LIBC_INTERNAL_MATH_INLINES -D__FAST_MATH__ \
> diff --git a/math/math.h b/math/math.h
> index 22f0989..1721118 100644
> --- a/math/math.h
> +++ b/math/math.h
> @@ -215,8 +215,15 @@ enum
>        FP_NORMAL
>    };
>  
> +/* GCC bug 66462 means we cannot use the math builtins with -fsignaling-nan,
> +   so disable builtins if this is enabled.  When fixed in a newer GCC,
> +   the __SUPPORT_SNAN__ check may be skipped for those versions.  */
> +
>  /* Return number of classification appropriate for X.  */
> -# ifdef __NO_LONG_DOUBLE_MATH
> +# if __GNUC_PREREQ (4,4) && !defined __SUPPORT_SNAN__ && !defined __OPTIMIZE_SIZE__
> +#  define fpclassify(x) __builtin_fpclassify (FP_NAN, FP_INFINITE,	      \
> +     FP_NORMAL, FP_SUBNORMAL, FP_ZERO, x)
> +# elif defined __NO_LONG_DOUBLE_MATH
>  #  define fpclassify(x) \
>       (sizeof (x) == sizeof (float) ? __fpclassifyf (x) : __fpclassify (x))
>  # else
> @@ -229,32 +236,26 @@ enum
>  
>  /* Return nonzero value if sign of X is negative.  */
>  # if __GNUC_PREREQ (4,0)
> -#  ifdef __NO_LONG_DOUBLE_MATH
> -#   define signbit(x) \
> -     (sizeof (x) == sizeof (float) \
> -      ? __builtin_signbitf (x) : __builtin_signbit (x))
> -#  else
> -#   define signbit(x) \
> -     (sizeof (x) == sizeof (float)                                            \
> -      ? __builtin_signbitf (x)                                                        \
> -      : sizeof (x) == sizeof (double)                                         \
> +#  define signbit(x) \
> +     (sizeof (x) == sizeof (float)					      \
> +      ? __builtin_signbitf (x)						      \
> +      : sizeof (x) == sizeof (double)					      \
>        ? __builtin_signbit (x) : __builtin_signbitl (x))
> -# endif
> -# else
> -#  ifdef __NO_LONG_DOUBLE_MATH
> -#   define signbit(x) \
> +# elif defined __NO_LONG_DOUBLE_MATH
> +#  define signbit(x) \
>       (sizeof (x) == sizeof (float) ? __signbitf (x) : __signbit (x))
> -#  else
> -#   define signbit(x) \
> +# else
> +#  define signbit(x) \
>       (sizeof (x) == sizeof (float)					      \
>        ? __signbitf (x)							      \
>        : sizeof (x) == sizeof (double)					      \
>        ? __signbit (x) : __signbitl (x))
> -#  endif
>  # endif
>  
>  /* Return nonzero value if X is not +-Inf or NaN.  */
> -# ifdef __NO_LONG_DOUBLE_MATH
> +# if __GNUC_PREREQ (4,4) && !defined __SUPPORT_SNAN__
> +#  define isfinite(x) __builtin_isfinite (x)
> +# elif defined __NO_LONG_DOUBLE_MATH
>  #  define isfinite(x) \
>       (sizeof (x) == sizeof (float) ? __finitef (x) : __finite (x))
>  # else
> @@ -266,11 +267,17 @@ enum
>  # endif
>  
>  /* Return nonzero value if X is neither zero, subnormal, Inf, nor NaN.  */
> -# define isnormal(x) (fpclassify (x) == FP_NORMAL)
> +# if __GNUC_PREREQ (4,4) && !defined __SUPPORT_SNAN__
> +#  define isnormal(x) __builtin_isnormal (x)
> +# else
> +#  define isnormal(x) (fpclassify (x) == FP_NORMAL)
> +# endif
>  
>  /* Return nonzero value if X is a NaN.  We could use `fpclassify' but
>     we already have this functions `__isnan' and it is faster.  */
> -# ifdef __NO_LONG_DOUBLE_MATH
> +# if __GNUC_PREREQ (4,4) && !defined __SUPPORT_SNAN__
> +#  define isnan(x) __builtin_isnan (x)
> +# elif defined __NO_LONG_DOUBLE_MATH
>  #  define isnan(x) \
>       (sizeof (x) == sizeof (float) ? __isnanf (x) : __isnan (x))
>  # else
> @@ -282,7 +289,9 @@ enum
>  # endif
>  
>  /* Return nonzero value if X is positive or negative infinity.  */
> -# ifdef __NO_LONG_DOUBLE_MATH
> +# if __GNUC_PREREQ (4,4) && !defined __SUPPORT_SNAN__
> +#  define isinf(x) __builtin_isinf_sign (x)
> +# elif defined __NO_LONG_DOUBLE_MATH
>  #  define isinf(x) \
>       (sizeof (x) == sizeof (float) ? __isinff (x) : __isinf (x))
>  # else
>
  
Joseph Myers July 22, 2015, 3:16 p.m. UTC | #2
On Mon, 13 Jul 2015, Wilco Dijkstra wrote:

> > Wilco Dijkstra wrote:
> > Add inlining of the C99 math functions isinf/isnan/signbit/isfinite/isnormal/fpclassify using
> > GCC built-ins when available. Since going through the PLT is expensive for these small
> > functions, inlining results in major speedups (about 7x on Cortex-A57 for isinf). The GCC
> > built-ins are not correct if signalling NaN support is required, and thus are turned off in
> > that case (see GCC bug 66462). The test-snan.c tests sNaNs and so must be explicitly built
> > with -fsignaling-nans.
> > 
> > As a result of this many target overrides and the various __isnan/__finite inlines in
> > math_private.h are no longer required. If agreed we could remove all this code and only keep
> > the generic definition of isinf/etc which will use the builtin.
> > 
> > Tested on AArch64. OK for commit?
> > 
> > ChangeLog:
> > 2015-06-15  Wilco Dijkstra  <wdijkstr@arm.com>
> > 
> > 	* math/Makefile: Build test-snan.c with -fsignaling-nans.
> > 	* math/math.h (fpclassify): Use __builtin_fpclassify when
> > 	available.  (signbit): Use __builtin_signbit(f/l).
> > 	(isfinite): Use__builtin_isfinite.  (isnormal): Use
> > 	__builtin_isnormal.  (isnan): Use __builtin_isnan.
> > 	(isinf): Use __builtin_isinf_sign.
> 
> As suggested __fpclassify is not inlined when optimizing for size, and a benchmark
> has been created (json output for x64 attached showing the large gains due to inlining).

I don't see an updated ChangeLog entry (with the [BZ #N] notation I 
requested).  Please include the ChangeLog entry with each patch 
submission.
  
Wilco Dijkstra July 22, 2015, 3:58 p.m. UTC | #3
> Joseph Myers wrote:
> On Mon, 13 Jul 2015, Wilco Dijkstra wrote:
> 
> > > Wilco Dijkstra wrote:
> > > Add inlining of the C99 math functions isinf/isnan/signbit/isfinite/isnormal/fpclassify
> using
> > > GCC built-ins when available. Since going through the PLT is expensive for these small
> > > functions, inlining results in major speedups (about 7x on Cortex-A57 for isinf). The GCC
> > > built-ins are not correct if signalling NaN support is required, and thus are turned off
> in
> > > that case (see GCC bug 66462). The test-snan.c tests sNaNs and so must be explicitly built
> > > with -fsignaling-nans.
> > >
> > > As a result of this many target overrides and the various __isnan/__finite inlines in
> > > math_private.h are no longer required. If agreed we could remove all this code and only
> keep
> > > the generic definition of isinf/etc which will use the builtin.
> > >
> > > Tested on AArch64. OK for commit?
> > >
> > > ChangeLog:
> > > 2015-06-15  Wilco Dijkstra  <wdijkstr@arm.com>
> > >
> > > 	* math/Makefile: Build test-snan.c with -fsignaling-nans.
> > > 	* math/math.h (fpclassify): Use __builtin_fpclassify when
> > > 	available.  (signbit): Use __builtin_signbit(f/l).
> > > 	(isfinite): Use__builtin_isfinite.  (isnormal): Use
> > > 	__builtin_isnormal.  (isnan): Use __builtin_isnan.
> > > 	(isinf): Use __builtin_isinf_sign.
> >
> > As suggested __fpclassify is not inlined when optimizing for size, and a benchmark
> > has been created (json output for x64 attached showing the large gains due to inlining).
> 
> I don't see an updated ChangeLog entry (with the [BZ #N] notation I
> requested).  Please include the ChangeLog entry with each patch
> submission.

It was still in the quotes. Something like this?

2015-07-xx  Wilco Dijkstra  <wdijkstr@arm.com>

	* math/Makefile: Build test-snan.c with -fsignaling-nans.
	* math/math.h (fpclassify): Use __builtin_fpclassify when
	available.  (signbit): Use __builtin_signbit(f/l).
	(isfinite): Use__builtin_isfinite.  (isnormal): Use
	__builtin_isnormal.  (isnan): Use __builtin_isnan - fixes [BZ #17441].
	(isinf): Use __builtin_isinf_sign - fixes [BZ #15367].
  
Joseph Myers July 22, 2015, 7:47 p.m. UTC | #4
On Wed, 22 Jul 2015, Wilco Dijkstra wrote:

> > I don't see an updated ChangeLog entry (with the [BZ #N] notation I
> > requested).  Please include the ChangeLog entry with each patch
> > submission.
> 
> It was still in the quotes. Something like this?
> 
> 2015-07-xx  Wilco Dijkstra  <wdijkstr@arm.com>
> 
> 	* math/Makefile: Build test-snan.c with -fsignaling-nans.
> 	* math/math.h (fpclassify): Use __builtin_fpclassify when
> 	available.  (signbit): Use __builtin_signbit(f/l).
> 	(isfinite): Use__builtin_isfinite.  (isnormal): Use
> 	__builtin_isnormal.  (isnan): Use __builtin_isnan - fixes [BZ #17441].
> 	(isinf): Use __builtin_isinf_sign - fixes [BZ #15367].

That's not the style used (and your line breaks are all wrong).  
<TAB>[BZ #N]<LF> at the start of the ChangeLog entry, after the initial 
author-date line and blank line and before the lines describing changes to 
each file.
  
Andreas Schwab Nov. 4, 2015, 3:04 p.m. UTC | #5
"Wilco Dijkstra" <wdijkstr@arm.com> writes:

>> Wilco Dijkstra wrote:
>> Add inlining of the C99 math functions isinf/isnan/signbit/isfinite/isnormal/fpclassify using
>> GCC built-ins when available. Since going through the PLT is expensive for these small
>> functions, inlining results in major speedups (about 7x on Cortex-A57 for isinf). The GCC
>> built-ins are not correct if signalling NaN support is required, and thus are turned off in
>> that case (see GCC bug 66462). The test-snan.c tests sNaNs and so must be explicitly built
>> with -fsignaling-nans.
>> 
>> As a result of this many target overrides and the various __isnan/__finite inlines in
>> math_private.h are no longer required. If agreed we could remove all this code and only keep
>> the generic definition of isinf/etc which will use the builtin.
>> 
>> Tested on AArch64. OK for commit?

FAIL: elf/check-localplt
$ cat elf/check-localplt.out 
Missing required PLT reference: libm.so: __signbitl
Missing required PLT reference: libc.so: __signbitl
Missing required PLT reference: libm.so: __signbitf
Missing required PLT reference: libm.so: __signbit
Missing required PLT reference: libc.so: __signbit

Andreas.
  
Wilco Dijkstra Nov. 6, 2015, 2:29 p.m. UTC | #6
Andreas Schwab wrote: 
> "Wilco Dijkstra" <wdijkstr@arm.com> writes:
> 
> >> Wilco Dijkstra wrote:
> >> Add inlining of the C99 math functions
> >> isinf/isnan/signbit/isfinite/isnormal/fpclassify using GCC built-ins
> >> when available. Since going through the PLT is expensive for these
> >> small functions, inlining results in major speedups (about 7x on
> >> Cortex-A57 for isinf). The GCC built-ins are not correct if
> >> signalling NaN support is required, and thus are turned off in that
case
> (see GCC bug 66462). The test-snan.c tests sNaNs and so must be explicitly
> built with -fsignaling-nans.
> >>
> >> As a result of this many target overrides and the various
> >> __isnan/__finite inlines in math_private.h are no longer required. If
> >> agreed we could remove all this code and only keep the generic
definition
> of isinf/etc which will use the builtin.
> >>
> >> Tested on AArch64. OK for commit?
> 
> FAIL: elf/check-localplt
> $ cat elf/check-localplt.out
> Missing required PLT reference: libm.so: __signbitl Missing required PLT
> reference: libc.so: __signbitl Missing required PLT reference: libm.so:
> __signbitf Missing required PLT reference: libm.so: __signbit Missing
> required PLT reference: libc.so: __signbit

I'm not exactly sure what this means - should we now remove the __signbit*
from
all the localplt.data files now that signbit is always inlined on all
targets?

Wilco
  
Andreas Schwab Nov. 9, 2015, 9:21 a.m. UTC | #7
"Wilco Dijkstra" <Wilco.Dijkstra@arm.com> writes:

> I'm not exactly sure what this means - should we now remove the __signbit*
> from
> all the localplt.data files now that signbit is always inlined on all
> targets?

Yes, that is correct.  The references are not supposed to reappear.

Andreas.
  

Patch

diff --git a/math/Makefile b/math/Makefile
index 9a3cf32..f78d75b 100644
--- a/math/Makefile
+++ b/math/Makefile
@@ -155,6 +155,7 @@  CFLAGS-test-tgmath.c = -fno-builtin
 CFLAGS-test-tgmath2.c = -fno-builtin
 CFLAGS-test-tgmath-ret.c = -fno-builtin
 CFLAGS-test-powl.c = -fno-builtin
+CFLAGS-test-snan.c = -fsignaling-nans
 CPPFLAGS-test-ifloat.c = -U__LIBC_INTERNAL_MATH_INLINES -D__FAST_MATH__ \
 			 -DTEST_FAST_MATH -fno-builtin
 CPPFLAGS-test-idouble.c = -U__LIBC_INTERNAL_MATH_INLINES -D__FAST_MATH__ \
diff --git a/math/math.h b/math/math.h
index 22f0989..1721118 100644
--- a/math/math.h
+++ b/math/math.h
@@ -215,8 +215,15 @@  enum
       FP_NORMAL
   };
 
+/* GCC bug 66462 means we cannot use the math builtins with -fsignaling-nan,
+   so disable builtins if this is enabled.  When fixed in a newer GCC,
+   the __SUPPORT_SNAN__ check may be skipped for those versions.  */
+
 /* Return number of classification appropriate for X.  */
-# ifdef __NO_LONG_DOUBLE_MATH
+# if __GNUC_PREREQ (4,4) && !defined __SUPPORT_SNAN__ && !defined __OPTIMIZE_SIZE__
+#  define fpclassify(x) __builtin_fpclassify (FP_NAN, FP_INFINITE,	      \
+     FP_NORMAL, FP_SUBNORMAL, FP_ZERO, x)
+# elif defined __NO_LONG_DOUBLE_MATH
 #  define fpclassify(x) \
      (sizeof (x) == sizeof (float) ? __fpclassifyf (x) : __fpclassify (x))
 # else
@@ -229,32 +236,26 @@  enum
 
 /* Return nonzero value if sign of X is negative.  */
 # if __GNUC_PREREQ (4,0)
-#  ifdef __NO_LONG_DOUBLE_MATH
-#   define signbit(x) \
-     (sizeof (x) == sizeof (float) \
-      ? __builtin_signbitf (x) : __builtin_signbit (x))
-#  else
-#   define signbit(x) \
-     (sizeof (x) == sizeof (float)                                            \
-      ? __builtin_signbitf (x)                                                        \
-      : sizeof (x) == sizeof (double)                                         \
+#  define signbit(x) \
+     (sizeof (x) == sizeof (float)					      \
+      ? __builtin_signbitf (x)						      \
+      : sizeof (x) == sizeof (double)					      \
       ? __builtin_signbit (x) : __builtin_signbitl (x))
-# endif
-# else
-#  ifdef __NO_LONG_DOUBLE_MATH
-#   define signbit(x) \
+# elif defined __NO_LONG_DOUBLE_MATH
+#  define signbit(x) \
      (sizeof (x) == sizeof (float) ? __signbitf (x) : __signbit (x))
-#  else
-#   define signbit(x) \
+# else
+#  define signbit(x) \
      (sizeof (x) == sizeof (float)					      \
       ? __signbitf (x)							      \
       : sizeof (x) == sizeof (double)					      \
       ? __signbit (x) : __signbitl (x))
-#  endif
 # endif
 
 /* Return nonzero value if X is not +-Inf or NaN.  */
-# ifdef __NO_LONG_DOUBLE_MATH
+# if __GNUC_PREREQ (4,4) && !defined __SUPPORT_SNAN__
+#  define isfinite(x) __builtin_isfinite (x)
+# elif defined __NO_LONG_DOUBLE_MATH
 #  define isfinite(x) \
      (sizeof (x) == sizeof (float) ? __finitef (x) : __finite (x))
 # else
@@ -266,11 +267,17 @@  enum
 # endif
 
 /* Return nonzero value if X is neither zero, subnormal, Inf, nor NaN.  */
-# define isnormal(x) (fpclassify (x) == FP_NORMAL)
+# if __GNUC_PREREQ (4,4) && !defined __SUPPORT_SNAN__
+#  define isnormal(x) __builtin_isnormal (x)
+# else
+#  define isnormal(x) (fpclassify (x) == FP_NORMAL)
+# endif
 
 /* Return nonzero value if X is a NaN.  We could use `fpclassify' but
    we already have this functions `__isnan' and it is faster.  */
-# ifdef __NO_LONG_DOUBLE_MATH
+# if __GNUC_PREREQ (4,4) && !defined __SUPPORT_SNAN__
+#  define isnan(x) __builtin_isnan (x)
+# elif defined __NO_LONG_DOUBLE_MATH
 #  define isnan(x) \
      (sizeof (x) == sizeof (float) ? __isnanf (x) : __isnan (x))
 # else
@@ -282,7 +289,9 @@  enum
 # endif
 
 /* Return nonzero value if X is positive or negative infinity.  */
-# ifdef __NO_LONG_DOUBLE_MATH
+# if __GNUC_PREREQ (4,4) && !defined __SUPPORT_SNAN__
+#  define isinf(x) __builtin_isinf_sign (x)
+# elif defined __NO_LONG_DOUBLE_MATH
 #  define isinf(x) \
      (sizeof (x) == sizeof (float) ? __isinff (x) : __isinf (x))
 # else