[v2,06/10] i386: Use generic exp10
Checks
Context |
Check |
Description |
redhat-pt-bot/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
linaro-tcwg-bot/tcwg_glibc_build--master-arm |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-arm |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 |
success
|
Testing passed
|
Commit Message
The resulting performance is slight better (Ryzen 5900, gcc 13.2.1):
* master
"exp10": {
"": {
"duration": 3.70091e+09,
"iterations": 5.8534e+07,
"max": 91.279,
"min": 62.6225,
"mean": 63.2267
}
}
* patch
"exp10": {
"": {
"duration": 3.70793e+09,
"iterations": 6.328e+07,
"max": 259.592,
"min": 52.1145,
"mean": 58.5957
}
}
Checked on i686-linux-gnu.
---
sysdeps/i386/fpu/Versions | 1 +
sysdeps/i386/fpu/e_exp10.S | 51 -----------------------
sysdeps/i386/fpu/e_exp10.c | 2 +
sysdeps/i386/fpu/e_exp_data.c | 1 -
sysdeps/i386/fpu/w_exp10_compat.c | 8 ----
sysdeps/ieee754/dbl-64/e_exp10.c | 7 +++-
sysdeps/mach/hurd/i386/libm.abilist | 1 +
sysdeps/unix/sysv/linux/i386/libm.abilist | 1 +
8 files changed, 10 insertions(+), 62 deletions(-)
delete mode 100644 sysdeps/i386/fpu/e_exp10.S
create mode 100644 sysdeps/i386/fpu/e_exp10.c
delete mode 100644 sysdeps/i386/fpu/e_exp_data.c
delete mode 100644 sysdeps/i386/fpu/w_exp10_compat.c
Comments
On Wed, Mar 27, 2024 at 12:40 PM Adhemerval Zanella
<adhemerval.zanella@linaro.org> wrote:
>
> The resulting performance is slight better (Ryzen 5900, gcc 13.2.1):
>
> * master
> "exp10": {
> "": {
> "duration": 3.70091e+09,
> "iterations": 5.8534e+07,
> "max": 91.279,
> "min": 62.6225,
> "mean": 63.2267
> }
> }
>
> * patch
> "exp10": {
> "": {
> "duration": 3.70793e+09,
> "iterations": 6.328e+07,
> "max": 259.592,
> "min": 52.1145,
> "mean": 58.5957
> }
> }
>
> Checked on i686-linux-gnu.
> ---
> sysdeps/i386/fpu/Versions | 1 +
> sysdeps/i386/fpu/e_exp10.S | 51 -----------------------
> sysdeps/i386/fpu/e_exp10.c | 2 +
> sysdeps/i386/fpu/e_exp_data.c | 1 -
> sysdeps/i386/fpu/w_exp10_compat.c | 8 ----
> sysdeps/ieee754/dbl-64/e_exp10.c | 7 +++-
> sysdeps/mach/hurd/i386/libm.abilist | 1 +
> sysdeps/unix/sysv/linux/i386/libm.abilist | 1 +
> 8 files changed, 10 insertions(+), 62 deletions(-)
> delete mode 100644 sysdeps/i386/fpu/e_exp10.S
> create mode 100644 sysdeps/i386/fpu/e_exp10.c
> delete mode 100644 sysdeps/i386/fpu/e_exp_data.c
> delete mode 100644 sysdeps/i386/fpu/w_exp10_compat.c
>
> diff --git a/sysdeps/i386/fpu/Versions b/sysdeps/i386/fpu/Versions
> index 9509f9b7c7..7326f25583 100644
> --- a/sysdeps/i386/fpu/Versions
> +++ b/sysdeps/i386/fpu/Versions
> @@ -5,6 +5,7 @@ libm {
> }
> GLIBC_2.40 {
> # No SVID compatible error handling.
> + exp10;
> fmod; fmodf;
> }
> }
> diff --git a/sysdeps/i386/fpu/e_exp10.S b/sysdeps/i386/fpu/e_exp10.S
> deleted file mode 100644
> index 902f70b77f..0000000000
> --- a/sysdeps/i386/fpu/e_exp10.S
> +++ /dev/null
> @@ -1,51 +0,0 @@
> -
> -#include <machine/asm.h>
> -#include <i386-math-asm.h>
> -#include <libm-alias-finite.h>
> -
> -DEFINE_DBL_MIN
> -
> -#ifdef PIC
> -# define MO(op) op##@GOTOFF(%ecx)
> -#else
> -# define MO(op) op
> -#endif
> -
> - .text
> -/* 10^x = 2^(x * log2(10)) */
> -ENTRY(__ieee754_exp10)
> -#ifdef PIC
> - LOAD_PIC_REG (cx)
> -#endif
> - fldl 4(%esp)
> -/* I added the following ugly construct because exp(+-Inf) resulted
> - in NaN. The ugliness results from the bright minds at Intel.
> - For the i686 the code can be written better.
> - -- drepper@cygnus.com. */
> - fxam /* Is NaN or +-Inf? */
> - fstsw %ax
> - movb $0x45, %dh
> - andb %ah, %dh
> - cmpb $0x05, %dh
> - je 1f /* Is +-Inf, jump. */
> - fldl2t
> - fmulp /* x * log2(10) */
> - fld %st
> - frndint /* int(x * log2(10)) */
> - fsubr %st,%st(1) /* fract(x * log2(10)) */
> - fxch
> - f2xm1 /* 2^(fract(x * log2(10))) - 1 */
> - fld1
> - faddp /* 2^(fract(x * log2(10))) */
> - fscale /* e^x */
> - fstp %st(1)
> - DBL_NARROW_EVAL_UFLOW_NONNEG_NAN
> - ret
> -
> -1: testl $0x200, %eax /* Test sign. */
> - jz 2f /* If positive, jump. */
> - fstp %st
> - fldz /* Set result to 0. */
> -2: ret
> -END (__ieee754_exp10)
> -libm_alias_finite (__ieee754_exp10, __exp10)
> diff --git a/sysdeps/i386/fpu/e_exp10.c b/sysdeps/i386/fpu/e_exp10.c
> new file mode 100644
> index 0000000000..340254fc6e
> --- /dev/null
> +++ b/sysdeps/i386/fpu/e_exp10.c
> @@ -0,0 +1,2 @@
> +#define EXP10_VERSION GLIBC_2_40
> +#include <sysdeps/ieee754/dbl-64/e_exp10.c>
> diff --git a/sysdeps/i386/fpu/e_exp_data.c b/sysdeps/i386/fpu/e_exp_data.c
> deleted file mode 100644
> index 1cc8931700..0000000000
> --- a/sysdeps/i386/fpu/e_exp_data.c
> +++ /dev/null
> @@ -1 +0,0 @@
> -/* Not needed. */
> diff --git a/sysdeps/i386/fpu/w_exp10_compat.c b/sysdeps/i386/fpu/w_exp10_compat.c
> deleted file mode 100644
> index 49a0e03385..0000000000
> --- a/sysdeps/i386/fpu/w_exp10_compat.c
> +++ /dev/null
> @@ -1,8 +0,0 @@
> -/* i386 provides an optimized __ieee754_exp10. */
> -#ifdef SHARED
> -# define NO_COMPAT_NEEDED 1
> -# include <math/w_exp10_compat.c>
> -#else
> -# include <math-type-macros-double.h>
> -# include <w_exp10_template.c>
> -#endif
> diff --git a/sysdeps/ieee754/dbl-64/e_exp10.c b/sysdeps/ieee754/dbl-64/e_exp10.c
> index 225fc74c4c..c63b852f72 100644
> --- a/sysdeps/ieee754/dbl-64/e_exp10.c
> +++ b/sysdeps/ieee754/dbl-64/e_exp10.c
> @@ -99,7 +99,7 @@ __exp10 (double x)
>
> /* Reduce x: z = x * N / log10(2), k = round(z). */
> double_t z = __exp_data.invlog10_2N * x;
> - double_t kd;
> + double kd;
> int64_t ki;
> #if TOINT_INTRINSICS
> kd = roundtoint (z);
> @@ -147,7 +147,10 @@ __exp10 (double x)
> strong_alias (__exp10, __ieee754_exp10)
> libm_alias_finite (__ieee754_exp10, __exp10)
> #if LIBM_SVID_COMPAT
> -versioned_symbol (libm, __exp10, exp10, GLIBC_2_39);
> +# ifndef EXP10_VERSION
> +# define EXP10_VERSION GLIBC_2_39
> +# endif
> +versioned_symbol (libm, __exp10, exp10, EXP10_VERSION);
> libm_alias_double_other (__exp10, exp10)
> #else
> libm_alias_double (__exp10, exp10)
> diff --git a/sysdeps/mach/hurd/i386/libm.abilist b/sysdeps/mach/hurd/i386/libm.abilist
> index 88e7538e51..01c5633663 100644
> --- a/sysdeps/mach/hurd/i386/libm.abilist
> +++ b/sysdeps/mach/hurd/i386/libm.abilist
> @@ -1181,5 +1181,6 @@ GLIBC_2.35 fsqrt F
> GLIBC_2.35 fsqrtl F
> GLIBC_2.35 hypot F
> GLIBC_2.35 hypotf F
> +GLIBC_2.40 exp10 F
> GLIBC_2.40 fmod F
> GLIBC_2.40 fmodf F
> diff --git a/sysdeps/unix/sysv/linux/i386/libm.abilist b/sysdeps/unix/sysv/linux/i386/libm.abilist
> index c99c60161d..3413cfdbe7 100644
> --- a/sysdeps/unix/sysv/linux/i386/libm.abilist
> +++ b/sysdeps/unix/sysv/linux/i386/libm.abilist
> @@ -1188,5 +1188,6 @@ GLIBC_2.35 fsqrt F
> GLIBC_2.35 fsqrtl F
> GLIBC_2.35 hypot F
> GLIBC_2.35 hypotf F
> +GLIBC_2.40 exp10 F
> GLIBC_2.40 fmod F
> GLIBC_2.40 fmodf F
> --
> 2.34.1
>
Also need a bug report.
@@ -5,6 +5,7 @@ libm {
}
GLIBC_2.40 {
# No SVID compatible error handling.
+ exp10;
fmod; fmodf;
}
}
deleted file mode 100644
@@ -1,51 +0,0 @@
-
-#include <machine/asm.h>
-#include <i386-math-asm.h>
-#include <libm-alias-finite.h>
-
-DEFINE_DBL_MIN
-
-#ifdef PIC
-# define MO(op) op##@GOTOFF(%ecx)
-#else
-# define MO(op) op
-#endif
-
- .text
-/* 10^x = 2^(x * log2(10)) */
-ENTRY(__ieee754_exp10)
-#ifdef PIC
- LOAD_PIC_REG (cx)
-#endif
- fldl 4(%esp)
-/* I added the following ugly construct because exp(+-Inf) resulted
- in NaN. The ugliness results from the bright minds at Intel.
- For the i686 the code can be written better.
- -- drepper@cygnus.com. */
- fxam /* Is NaN or +-Inf? */
- fstsw %ax
- movb $0x45, %dh
- andb %ah, %dh
- cmpb $0x05, %dh
- je 1f /* Is +-Inf, jump. */
- fldl2t
- fmulp /* x * log2(10) */
- fld %st
- frndint /* int(x * log2(10)) */
- fsubr %st,%st(1) /* fract(x * log2(10)) */
- fxch
- f2xm1 /* 2^(fract(x * log2(10))) - 1 */
- fld1
- faddp /* 2^(fract(x * log2(10))) */
- fscale /* e^x */
- fstp %st(1)
- DBL_NARROW_EVAL_UFLOW_NONNEG_NAN
- ret
-
-1: testl $0x200, %eax /* Test sign. */
- jz 2f /* If positive, jump. */
- fstp %st
- fldz /* Set result to 0. */
-2: ret
-END (__ieee754_exp10)
-libm_alias_finite (__ieee754_exp10, __exp10)
new file mode 100644
@@ -0,0 +1,2 @@
+#define EXP10_VERSION GLIBC_2_40
+#include <sysdeps/ieee754/dbl-64/e_exp10.c>
deleted file mode 100644
@@ -1 +0,0 @@
-/* Not needed. */
deleted file mode 100644
@@ -1,8 +0,0 @@
-/* i386 provides an optimized __ieee754_exp10. */
-#ifdef SHARED
-# define NO_COMPAT_NEEDED 1
-# include <math/w_exp10_compat.c>
-#else
-# include <math-type-macros-double.h>
-# include <w_exp10_template.c>
-#endif
@@ -99,7 +99,7 @@ __exp10 (double x)
/* Reduce x: z = x * N / log10(2), k = round(z). */
double_t z = __exp_data.invlog10_2N * x;
- double_t kd;
+ double kd;
int64_t ki;
#if TOINT_INTRINSICS
kd = roundtoint (z);
@@ -147,7 +147,10 @@ __exp10 (double x)
strong_alias (__exp10, __ieee754_exp10)
libm_alias_finite (__ieee754_exp10, __exp10)
#if LIBM_SVID_COMPAT
-versioned_symbol (libm, __exp10, exp10, GLIBC_2_39);
+# ifndef EXP10_VERSION
+# define EXP10_VERSION GLIBC_2_39
+# endif
+versioned_symbol (libm, __exp10, exp10, EXP10_VERSION);
libm_alias_double_other (__exp10, exp10)
#else
libm_alias_double (__exp10, exp10)
@@ -1181,5 +1181,6 @@ GLIBC_2.35 fsqrt F
GLIBC_2.35 fsqrtl F
GLIBC_2.35 hypot F
GLIBC_2.35 hypotf F
+GLIBC_2.40 exp10 F
GLIBC_2.40 fmod F
GLIBC_2.40 fmodf F
@@ -1188,5 +1188,6 @@ GLIBC_2.35 fsqrt F
GLIBC_2.35 fsqrtl F
GLIBC_2.35 hypot F
GLIBC_2.35 hypotf F
+GLIBC_2.40 exp10 F
GLIBC_2.40 fmod F
GLIBC_2.40 fmodf F