[16/17] x86_64: Add exp10m1f with FMA
Commit Message
The CORE-MATH exp10m1f implementation showed slight worse latency
when using x86_64 baseline ABI. This patch adds a ifunc variant
with similar performance for x86_64-v3.
---
sysdeps/ieee754/flt-32/s_exp10m1f.c | 2 ++
sysdeps/x86_64/fpu/multiarch/Makefile | 2 ++
sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c | 4 +++
sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c | 33 +++++++++++++++++++
4 files changed, 41 insertions(+)
create mode 100644 sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c
create mode 100644 sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c
Comments
On Fri, Oct 25, 2024 at 1:31 PM Adhemerval Zanella
<adhemerval.zanella@linaro.org> wrote:
>
> The CORE-MATH exp10m1f implementation showed slight worse latency
> when using x86_64 baseline ABI. This patch adds a ifunc variant
> with similar performance for x86_64-v3.
> ---
> sysdeps/ieee754/flt-32/s_exp10m1f.c | 2 ++
> sysdeps/x86_64/fpu/multiarch/Makefile | 2 ++
> sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c | 4 +++
> sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c | 33 +++++++++++++++++++
> 4 files changed, 41 insertions(+)
> create mode 100644 sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c
> create mode 100644 sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c
>
> diff --git a/sysdeps/ieee754/flt-32/s_exp10m1f.c b/sysdeps/ieee754/flt-32/s_exp10m1f.c
> index 610f269b01..c918c905e8 100644
> --- a/sysdeps/ieee754/flt-32/s_exp10m1f.c
> +++ b/sysdeps/ieee754/flt-32/s_exp10m1f.c
> @@ -222,4 +222,6 @@ __exp10m1f (float x)
> return (s - 1.0) + w * c0;
> }
> }
> +#ifndef __exp10m1f
> libm_alias_float (__exp10m1, exp10m1)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
> index cbe09d49f4..dcff4df2f1 100644
> --- a/sysdeps/x86_64/fpu/multiarch/Makefile
> +++ b/sysdeps/x86_64/fpu/multiarch/Makefile
> @@ -11,6 +11,7 @@ CFLAGS-s_log1p-fma.c = -mfma -mavx2
> CFLAGS-s_sin-fma.c = -mfma -mavx2
> CFLAGS-s_tan-fma.c = -mfma -mavx2
> CFLAGS-s_sincos-fma.c = -mfma -mavx2
> +CFLAGS-s_exp10m1f-fma.c = -mfma -mavx2
>
> CFLAGS-e_exp2f-fma.c = -mfma -mavx2
> CFLAGS-e_expf-fma.c = -mfma -mavx2
> @@ -72,6 +73,7 @@ libm-sysdep_routines += \
> s_ceilf-sse4_1 \
> s_cosf-fma \
> s_cosf-sse2 \
> + s_exp10m1f-fma \
> s_expm1-fma \
> s_floor-sse4_1 \
> s_floorf-sse4_1 \
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c
> new file mode 100644
> index 0000000000..3dda04e2dd
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c
> @@ -0,0 +1,4 @@
> +#define __exp10m1f __exp10m1f_fma
> +#define SECTION __attribute__ ((section (".text.fma")))
> +
> +#include <sysdeps/ieee754/flt-32/s_exp10m1f.c>
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c
> new file mode 100644
> index 0000000000..8040b7ed79
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c
> @@ -0,0 +1,33 @@
> +/* Multiple versions of exp10m1.
> + Copyright (C) 2024 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#include <sysdeps/x86/isa-level.h>
> +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
> +# include <libm-alias-float.h>
> +
> +extern float __redirect_exp10m1f (float);
> +
> +# define SYMBOL_NAME exp10m1f
> +# include "ifunc-fma.h"
> +
> +libc_ifunc_redirected (__redirect_exp10m1f, __exp10m1f, IFUNC_SELECTOR ());
> +libm_alias_float (__exp10m1, exp10m1)
> +
> +# define __exp10m1f __exp10m1f_sse2
> +#endif
> +#include <sysdeps/ieee754/flt-32/s_exp10m1f.c>
> --
> 2.43.0
>
LGTM
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
LGTM
Reviewed-by: DJ Delorie <dj@redhat.com>
Adhemerval Zanella <adhemerval.zanella@linaro.org> writes:
> diff --git a/sysdeps/ieee754/flt-32/s_exp10m1f.c b/sysdeps/ieee754/flt-32/s_exp10m1f.c
> +#ifndef __exp10m1f
> libm_alias_float (__exp10m1, exp10m1)
> +#endif
Ok.
> diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
> CFLAGS-s_sin-fma.c = -mfma -mavx2
> CFLAGS-s_tan-fma.c = -mfma -mavx2
> CFLAGS-s_sincos-fma.c = -mfma -mavx2
> +CFLAGS-s_exp10m1f-fma.c = -mfma -mavx2
Ok.
> s_ceilf-sse4_1 \
> s_cosf-fma \
> s_cosf-sse2 \
> + s_exp10m1f-fma \
Ok.
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c
> new file mode 100644
> +#define __exp10m1f __exp10m1f_fma
> +#define SECTION __attribute__ ((section (".text.fma")))
> +
> +#include <sysdeps/ieee754/flt-32/s_exp10m1f.c>
Ok.
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c
> new file mode 100644
> +/* Multiple versions of exp10m1.
> + Copyright (C) 2024 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#include <sysdeps/x86/isa-level.h>
> +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
> +# include <libm-alias-float.h>
> +
> +extern float __redirect_exp10m1f (float);
> +
> +# define SYMBOL_NAME exp10m1f
> +# include "ifunc-fma.h"
> +
> +libc_ifunc_redirected (__redirect_exp10m1f, __exp10m1f, IFUNC_SELECTOR ());
> +libm_alias_float (__exp10m1, exp10m1)
> +
> +# define __exp10m1f __exp10m1f_sse2
> +#endif
> +#include <sysdeps/ieee754/flt-32/s_exp10m1f.c>
Ok.
@@ -222,4 +222,6 @@ __exp10m1f (float x)
return (s - 1.0) + w * c0;
}
}
+#ifndef __exp10m1f
libm_alias_float (__exp10m1, exp10m1)
+#endif
@@ -11,6 +11,7 @@ CFLAGS-s_log1p-fma.c = -mfma -mavx2
CFLAGS-s_sin-fma.c = -mfma -mavx2
CFLAGS-s_tan-fma.c = -mfma -mavx2
CFLAGS-s_sincos-fma.c = -mfma -mavx2
+CFLAGS-s_exp10m1f-fma.c = -mfma -mavx2
CFLAGS-e_exp2f-fma.c = -mfma -mavx2
CFLAGS-e_expf-fma.c = -mfma -mavx2
@@ -72,6 +73,7 @@ libm-sysdep_routines += \
s_ceilf-sse4_1 \
s_cosf-fma \
s_cosf-sse2 \
+ s_exp10m1f-fma \
s_expm1-fma \
s_floor-sse4_1 \
s_floorf-sse4_1 \
new file mode 100644
@@ -0,0 +1,4 @@
+#define __exp10m1f __exp10m1f_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/flt-32/s_exp10m1f.c>
new file mode 100644
@@ -0,0 +1,33 @@
+/* Multiple versions of exp10m1.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
+# include <libm-alias-float.h>
+
+extern float __redirect_exp10m1f (float);
+
+# define SYMBOL_NAME exp10m1f
+# include "ifunc-fma.h"
+
+libc_ifunc_redirected (__redirect_exp10m1f, __exp10m1f, IFUNC_SELECTOR ());
+libm_alias_float (__exp10m1, exp10m1)
+
+# define __exp10m1f __exp10m1f_sse2
+#endif
+#include <sysdeps/ieee754/flt-32/s_exp10m1f.c>