[3/5] i386: Use generic exp10f
Commit Message
The generic implementation is twice as fast. Using the exp10f
benchmark:
* master:
"exp10f": {
"": {
"duration": 4.25753e+09,
"iterations": 3.3376e+07,
"max": 1414.77,
"min": 103.649,
"mean": 127.563
}
}
* patched:
"exp10f": {
"": {
"duration": 4.05755e+09,
"iterations": 6.9136e+07,
"max": 1489.64,
"min": 50.95,
"mean": 58.6894
}
}
Checked on i686-linux-gnu.
---
sysdeps/i386/fpu/e_exp10f.S | 54 -------------------------------------
1 file changed, 54 deletions(-)
delete mode 100644 sysdeps/i386/fpu/e_exp10f.S
Comments
Ping (x2).
On 29/04/2020 14:11, Adhemerval Zanella wrote:
> Ping.
>
> On 09/04/2020 16:59, Adhemerval Zanella wrote:
>> The generic implementation is twice as fast. Using the exp10f
>> benchmark:
>>
>> * master:
>> "exp10f": {
>> "": {
>> "duration": 4.25753e+09,
>> "iterations": 3.3376e+07,
>> "max": 1414.77,
>> "min": 103.649,
>> "mean": 127.563
>> }
>> }
>>
>> * patched:
>> "exp10f": {
>> "": {
>> "duration": 4.05755e+09,
>> "iterations": 6.9136e+07,
>> "max": 1489.64,
>> "min": 50.95,
>> "mean": 58.6894
>> }
>> }
>>
>> Checked on i686-linux-gnu.
>> ---
>> sysdeps/i386/fpu/e_exp10f.S | 54 -------------------------------------
>> 1 file changed, 54 deletions(-)
>> delete mode 100644 sysdeps/i386/fpu/e_exp10f.S
>>
>> diff --git a/sysdeps/i386/fpu/e_exp10f.S b/sysdeps/i386/fpu/e_exp10f.S
>> deleted file mode 100644
>> index 196ce8744a..0000000000
>> --- a/sysdeps/i386/fpu/e_exp10f.S
>> +++ /dev/null
>> @@ -1,54 +0,0 @@
>> -/*
>> - * Written by Ulrich Drepper.
>> - */
>> -
>> -#include <machine/asm.h>
>> -#include <i386-math-asm.h>
>> -#include <libm-alias-finite.h>
>> -
>> -DEFINE_FLT_MIN
>> -
>> -#ifdef PIC
>> -# define MO(op) op##@GOTOFF(%ecx)
>> -#else
>> -# define MO(op) op
>> -#endif
>> -
>> - .text
>> -/* 10^x = 2^(x * log2(10)) */
>> -ENTRY(__ieee754_exp10f)
>> -#ifdef PIC
>> - LOAD_PIC_REG (cx)
>> -#endif
>> - flds 4(%esp)
>> -/* I added the following ugly construct because exp(+-Inf) resulted
>> - in NaN. The ugliness results from the bright minds at Intel.
>> - For the i686 the code can be written better.
>> - -- drepper@cygnus.com. */
>> - fxam /* Is NaN or +-Inf? */
>> - fstsw %ax
>> - movb $0x45, %dh
>> - andb %ah, %dh
>> - cmpb $0x05, %dh
>> - je 1f /* Is +-Inf, jump. */
>> - fldl2t
>> - fmulp /* x * log2(10) */
>> - fld %st
>> - frndint /* int(x * log2(10)) */
>> - fsubr %st,%st(1) /* fract(x * log2(10)) */
>> - fxch
>> - f2xm1 /* 2^(fract(x * log2(10))) - 1 */
>> - fld1
>> - faddp /* 2^(fract(x * log2(10))) */
>> - fscale /* e^x */
>> - fstp %st(1)
>> - FLT_NARROW_EVAL_UFLOW_NONNEG_NAN
>> - ret
>> -
>> -1: testl $0x200, %eax /* Test sign. */
>> - jz 2f /* If positive, jump. */
>> - fstp %st
>> - fldz /* Set result to 0. */
>> -2: ret
>> -END (__ieee754_exp10f)
>> -libm_alias_finite (__ieee754_exp10f, __exp10f)
>>
deleted file mode 100644
@@ -1,54 +0,0 @@
-/*
- * Written by Ulrich Drepper.
- */
-
-#include <machine/asm.h>
-#include <i386-math-asm.h>
-#include <libm-alias-finite.h>
-
-DEFINE_FLT_MIN
-
-#ifdef PIC
-# define MO(op) op##@GOTOFF(%ecx)
-#else
-# define MO(op) op
-#endif
-
- .text
-/* 10^x = 2^(x * log2(10)) */
-ENTRY(__ieee754_exp10f)
-#ifdef PIC
- LOAD_PIC_REG (cx)
-#endif
- flds 4(%esp)
-/* I added the following ugly construct because exp(+-Inf) resulted
- in NaN. The ugliness results from the bright minds at Intel.
- For the i686 the code can be written better.
- -- drepper@cygnus.com. */
- fxam /* Is NaN or +-Inf? */
- fstsw %ax
- movb $0x45, %dh
- andb %ah, %dh
- cmpb $0x05, %dh
- je 1f /* Is +-Inf, jump. */
- fldl2t
- fmulp /* x * log2(10) */
- fld %st
- frndint /* int(x * log2(10)) */
- fsubr %st,%st(1) /* fract(x * log2(10)) */
- fxch
- f2xm1 /* 2^(fract(x * log2(10))) - 1 */
- fld1
- faddp /* 2^(fract(x * log2(10))) */
- fscale /* e^x */
- fstp %st(1)
- FLT_NARROW_EVAL_UFLOW_NONNEG_NAN
- ret
-
-1: testl $0x200, %eax /* Test sign. */
- jz 2f /* If positive, jump. */
- fstp %st
- fldz /* Set result to 0. */
-2: ret
-END (__ieee754_exp10f)
-libm_alias_finite (__ieee754_exp10f, __exp10f)