Cleanup FAST_MATH misc functions from sysdeps/x86/fpu/bits/mathinline.h
Commit Message
Hello!
Attached patch cleans up FAST_MATH misc functions from
sysdeps/x86/fpu/bits/mathinline.h. The patch uses x87 __builtin
functions that are always defined for gcc-4.6+, uses
__builtin_copysign instead of homegrown __sgn function to avoid
partial memory stalls.
Since we are in FAST_MATH, we don't need to handle the sign of zero,
so the patch removes
- return __temp ? __temp : __x
from the definition of expm1.
Patch was tested on x86_64-linux-gnu, and the resulting mathinline.h
was used to calculate test results of all changed functions.
Please note I have no write access to glibc repository.
Uros.
2017-04-26 Uros Bizjak <ubizjak@gmail.com>
* sysdeps/x86/fpu/bits/mathinline.h [__FAST_MATH__]
(__expm1_code): Remove define and undefine.
[__FAST_MATH__] (__expm1l): Remove inline function.
[__FAST_MATH__] (__exp_code): Remove define and undefine.
[__FAST_MATH__] (exp): Remove inline function.
[__FAST_MATH__] (__expl): Remove inline function.
[__FAST_MATH__] (__libc_sqrtl): Remove define.
(fabs): Remove define.
(fabsf): Remove define.
(fabsl): Remove define.
(__fabsl): Remove define.
(__sgn1l): Remove define.
[__FAST_MATH__] (sinh): Rewrite using __builtin functions.
Use __builtin_copysign to calculate sign of the result.
[__FAST_MATH__] (cosh): Rewrite using __builtin functions.
[__FAST_MATH__] (tanh): Rewrite using __builtin functions.
Use __builtin_copysign to calculate sign of the result.
[__USE_ISOC99 && __FAST_MATH__] (expm1): Remove inline function.
[__USE_ISOC99 && __FAST_MATH__] (asinh): Rewrite using __builtin
functions. Use __builtin_copysign to calculate sign of the result.
[__USE_ISOC99 && __FAST_MATH__] (acosh): Rewrite using
__builtin functions.
[__USE_ISOC99 && __FAST_MATH__] (atanh): Rewrite using __builtin
functions. Use __builtin_copysign to calculate sign of the result.
[__USE_ISOC99 && __FAST_MATH__] (hypot): Rewrite using
__builtin functions.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Comments
On Thu, 26 Apr 2018, Uros Bizjak wrote:
> Hello!
>
> Attached patch cleans up FAST_MATH misc functions from
> sysdeps/x86/fpu/bits/mathinline.h. The patch uses x87 __builtin
What we actually want to do is eliminate bits/mathinline.h completely
(with libc-internal inlines moving to an internal header) (and then
eliminate the separate inline function ulps and tests). There may be a
few remaining cases that would be useful for GCC to optimize with
-ffast-math but that it doesn't currently have built-in optimizations for,
however.
The header, while it exists, does need to work with old GCC versions as
it's an installed header (optimizations might be disabled with old
versions, however).
> Patch was tested on x86_64-linux-gnu, and the resulting mathinline.h
> was used to calculate test results of all changed functions.
Note this code isn't used for 64-bit anyway (it's all inside "#if !defined
__SSE2_MATH__ && !defined __x86_64__").
On Thu, Apr 26, 2018 at 2:00 PM, Joseph Myers <joseph@codesourcery.com> wrote:
> On Thu, 26 Apr 2018, Uros Bizjak wrote:
>
>> Hello!
>>
>> Attached patch cleans up FAST_MATH misc functions from
>> sysdeps/x86/fpu/bits/mathinline.h. The patch uses x87 __builtin
>
> What we actually want to do is eliminate bits/mathinline.h completely
> (with libc-internal inlines moving to an internal header) (and then
> eliminate the separate inline function ulps and tests). There may be a
> few remaining cases that would be useful for GCC to optimize with
> -ffast-math but that it doesn't currently have built-in optimizations for,
> however.
>
> The header, while it exists, does need to work with old GCC versions as
> it's an installed header (optimizations might be disabled with old
> versions, however).
Maybe we can protect these functions with __GNUC_PREREQ (4, 4) in the
mean time? gcc-4.4 that introduced __builtin_copysign is now 10 years
old, so perhaps we can declare it "too old" for these fast-math
optimizations.
>> Patch was tested on x86_64-linux-gnu, and the resulting mathinline.h
>> was used to calculate test results of all changed functions.
>
> Note this code isn't used for 64-bit anyway (it's all inside "#if !defined
> __SSE2_MATH__ && !defined __x86_64__").
Yes - as said, I have included the header in other test files to
exercise the mentioned part of code. The build confirmed that no build
regressions were introduced.
Uros.
On Thu, 26 Apr 2018, Uros Bizjak wrote:
> Maybe we can protect these functions with __GNUC_PREREQ (4, 4) in the
> mean time? gcc-4.4 that introduced __builtin_copysign is now 10 years
> old, so perhaps we can declare it "too old" for these fast-math
> optimizations.
In my view, simply having a GCC bug filed requesting the optimization in
question should be sufficient to remove an inline from the header (even
without an actual GCC implementation of the corresponding built-in
function optimization).
@@ -170,149 +170,60 @@
# if !defined __NO_MATH_INLINES && defined __OPTIMIZE__
+
/* Miscellaneous functions */
/* __FAST_MATH__ is defined by gcc -ffast-math. */
# ifdef __FAST_MATH__
+
/* Optimized inline implementation, sometimes with reduced precision
and/or argument range. */
-# if __GNUC_PREREQ (3, 5)
-# define __expm1_code \
- register long double __temp; \
- __temp = __builtin_expm1l (__x); \
- return __temp ? __temp : __x
-# else
-# define __expm1_code \
- register long double __value; \
- register long double __exponent; \
- register long double __temp; \
- __asm __volatile__ \
- ("fldl2e # e^x - 1 = 2^(x * log2(e)) - 1\n\t" \
- "fmul %%st(1) # x * log2(e)\n\t" \
- "fst %%st(1)\n\t" \
- "frndint # int(x * log2(e))\n\t" \
- "fxch\n\t" \
- "fsub %%st(1) # fract(x * log2(e))\n\t" \
- "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t" \
- "fscale # 2^(x * log2(e)) - 2^(int(x * log2(e)))\n\t" \
- : "=t" (__value), "=u" (__exponent) : "0" (__x)); \
- __asm __volatile__ \
- ("fscale # 2^int(x * log2(e))\n\t" \
- : "=t" (__temp) : "0" (1.0), "u" (__exponent)); \
- __temp -= 1.0; \
- __temp += __value; \
- return __temp ? __temp : __x
-# endif
-__inline_mathcodeNP_ (long double, __expm1l, __x, __expm1_code)
-
-# if __GNUC_PREREQ (3, 4)
-__inline_mathcodeNP_ (long double, __expl, __x, return __builtin_expl (__x))
-# else
-# define __exp_code \
- register long double __value; \
- register long double __exponent; \
- __asm __volatile__ \
- ("fldl2e # e^x = 2^(x * log2(e))\n\t" \
- "fmul %%st(1) # x * log2(e)\n\t" \
- "fst %%st(1)\n\t" \
- "frndint # int(x * log2(e))\n\t" \
- "fxch\n\t" \
- "fsub %%st(1) # fract(x * log2(e))\n\t" \
- "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t" \
- : "=t" (__value), "=u" (__exponent) : "0" (__x)); \
- __value += 1.0; \
- __asm __volatile__ \
- ("fscale" \
- : "=t" (__value) : "0" (__value), "u" (__exponent)); \
- return __value
-__inline_mathcodeNP (exp, __x, __exp_code)
-__inline_mathcodeNP_ (long double, __expl, __x, __exp_code)
-# endif
-# endif /* __FAST_MATH__ */
-
-
-# ifdef __FAST_MATH__
-# if !__GNUC_PREREQ (3,3)
-__inline_mathopNP (sqrt, "fsqrt")
-__inline_mathopNP_ (long double, __sqrtl, "fsqrt")
-# define __libc_sqrtl(n) __sqrtl (n)
-# else
-# define __libc_sqrtl(n) __builtin_sqrtl (n)
-# endif
-# endif
-
-# if __GNUC_PREREQ (2, 8)
-__inline_mathcodeNP_ (double, fabs, __x, return __builtin_fabs (__x))
-# ifdef __USE_ISOC99
-__inline_mathcodeNP_ (float, fabsf, __x, return __builtin_fabsf (__x))
-__inline_mathcodeNP_ (long double, fabsl, __x, return __builtin_fabsl (__x))
-# endif
-__inline_mathcodeNP_ (long double, __fabsl, __x, return __builtin_fabsl (__x))
-# else
-__inline_mathop (fabs, "fabs")
-__inline_mathop_ (long double, __fabsl, "fabs")
-# endif
-
-__inline_mathcode_ (long double, __sgn1l, __x, \
- __extension__ union { long double __xld; unsigned int __xi[3]; } __n = \
- { __xld: __x }; \
- __n.__xi[2] = (__n.__xi[2] & 0x8000) | 0x3fff; \
- __n.__xi[1] = 0x80000000; \
- __n.__xi[0] = 0; \
- return __n.__xld)
-
-
-# ifdef __FAST_MATH__
/* The argument range of the inline version of sinhl is slightly reduced. */
-__inline_mathcodeNP (sinh, __x, \
- register long double __exm1 = __expm1l (__fabsl (__x)); \
- return 0.5 * (__exm1 / (__exm1 + 1.0) + __exm1) * __sgn1l (__x))
+__inline_mathcodeNP (sinh, __x, \
+ long double __exm1 = __builtin_expm1l (__builtin_fabsl (__x)); \
+ long double __temp = 0.5l * (__exm1 / (__exm1 + 1.0l) + __exm1); \
+ return __builtin_copysignl (__temp, __x))
__inline_mathcodeNP (cosh, __x, \
- register long double __ex = __expl (__x); \
- return 0.5 * (__ex + 1.0 / __ex))
+ long double __ex = __builtin_expl (__x); \
+ return 0.5l * (__ex + 1.0l / __ex))
__inline_mathcodeNP (tanh, __x, \
- register long double __exm1 = __expm1l (-__fabsl (__x + __x)); \
- return __exm1 / (__exm1 + 2.0) * __sgn1l (-__x))
-# endif
+ long double __exm1 = __builtin_expm1l (-__builtin_fabsl (__x + __x)); \
+ long double __temp = __exm1 / (__exm1 + 2.0l); \
+ return __builtin_copysignl (__temp, __x))
-/* Optimized versions for some non-standardized functions. */
-# ifdef __USE_ISOC99
-
-# ifdef __FAST_MATH__
-__inline_mathcodeNP (expm1, __x, __expm1_code)
+# ifdef __USE_ISOC99
/* The argument range of the inline version of asinhl is slightly reduced. */
__inline_mathcodeNP (asinh, __x, \
- register long double __y = __fabsl (__x); \
- return (log1pl (__y * __y / (__libc_sqrtl (__y * __y + 1.0) + 1.0) + __y) \
- * __sgn1l (__x)))
+ long double __y = __builtin_fabsl (__x); \
+ long double __y2 = __y * __y;
+ long double __temp \
+ = __builtin_log1pl (__y2 / (__builtin_sqrtl (__y2 + 1.0l) + 1.0l) + __y); \
+ return __builtin_copysignl (__temp, __x))
__inline_mathcodeNP (acosh, __x, \
- return logl (__x + __libc_sqrtl (__x - 1.0) * __libc_sqrtl (__x + 1.0)))
+ long double __temp \
+ = __builtin_sqrtl (__x - 1.0l) * __builtin_sqrtl (__x + 1.0l); \
+ return __builtin_logl (__x + __temp))
__inline_mathcodeNP (atanh, __x, \
- register long double __y = __fabsl (__x); \
- return -0.5 * log1pl (-(__y + __y) / (1.0 + __y)) * __sgn1l (__x))
+ long double __y = __builtin_fabsl (__x); \
+ long double __temp = -0.5l * __builtin_log1pl (-(__y + __y) / (1.0l + __y)); \
+ return __builtin_copysignl (__temp, __x))
/* The argument range of the inline version of hypotl is slightly reduced. */
-__inline_mathcodeNP2 (hypot, __x, __y,
- return __libc_sqrtl (__x * __x + __y * __y))
+__inline_mathcodeNP2 (hypot, __x, __y, \
+ return __builtin_sqrtl (__x * __x + __y * __y))
-# endif
-# endif
+# endif /* __USE_ISOC99 */
-
-/* Undefine some of the large macros which are not used anymore. */
-# ifdef __FAST_MATH__
-# undef __expm1_code
-# undef __exp_code
# endif /* __FAST_MATH__ */
-# endif /* __NO_MATH_INLINES */
+# endif /* __NO_MATH_INLINES */
/* This code is used internally in the GNU libc. */