x86_64: Add tanh with FMA
Checks
Context |
Check |
Description |
redhat-pt-bot/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_glibc_build--master-arm |
success
|
Build passed
|
redhat-pt-bot/TryBot-32bit |
success
|
Build for i686
|
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 |
success
|
Test passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-arm |
success
|
Test passed
|
Commit Message
On Skylake, it improves tanh bench performance by:
Before After Improvement
max 110.89 95.826 14%
min 20.966 20.157 4%
mean 30.9601 29.8431 4%
---
sysdeps/ieee754/dbl-64/s_tanh.c | 5 ++++
sysdeps/x86_64/fpu/multiarch/Makefile | 2 ++
sysdeps/x86_64/fpu/multiarch/s_tanh-fma.c | 11 ++++++++
sysdeps/x86_64/fpu/multiarch/s_tanh.c | 31 +++++++++++++++++++++++
4 files changed, 49 insertions(+)
create mode 100644 sysdeps/x86_64/fpu/multiarch/s_tanh-fma.c
create mode 100644 sysdeps/x86_64/fpu/multiarch/s_tanh.c
Comments
On Wed, Mar 12, 2025 at 2:00 PM Sunil K Pandey <skpgkp2@gmail.com> wrote:
>
> On Skylake, it improves tanh bench performance by:
>
> Before After Improvement
> max 110.89 95.826 14%
> min 20.966 20.157 4%
> mean 30.9601 29.8431 4%
> ---
> sysdeps/ieee754/dbl-64/s_tanh.c | 5 ++++
> sysdeps/x86_64/fpu/multiarch/Makefile | 2 ++
> sysdeps/x86_64/fpu/multiarch/s_tanh-fma.c | 11 ++++++++
> sysdeps/x86_64/fpu/multiarch/s_tanh.c | 31 +++++++++++++++++++++++
> 4 files changed, 49 insertions(+)
> create mode 100644 sysdeps/x86_64/fpu/multiarch/s_tanh-fma.c
> create mode 100644 sysdeps/x86_64/fpu/multiarch/s_tanh.c
>
> diff --git a/sysdeps/ieee754/dbl-64/s_tanh.c b/sysdeps/ieee754/dbl-64/s_tanh.c
> index 673a97102d..13063db04e 100644
> --- a/sysdeps/ieee754/dbl-64/s_tanh.c
> +++ b/sysdeps/ieee754/dbl-64/s_tanh.c
> @@ -46,6 +46,11 @@ static char rcsid[] = "$NetBSD: s_tanh.c,v 1.7 1995/05/10 20:48:22 jtc Exp $";
>
> static const double one = 1.0, two = 2.0, tiny = 1.0e-300;
>
> +#ifndef SECTION
> +# define SECTION
> +#endif
> +
> +SECTION
> double
> __tanh (double x)
> {
> diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
> index e823d2fcc6..a198315f1c 100644
> --- a/sysdeps/x86_64/fpu/multiarch/Makefile
> +++ b/sysdeps/x86_64/fpu/multiarch/Makefile
> @@ -10,6 +10,7 @@ CFLAGS-s_expm1-fma.c = -mfma -mavx2
> CFLAGS-s_log1p-fma.c = -mfma -mavx2
> CFLAGS-s_sin-fma.c = -mfma -mavx2
> CFLAGS-s_tan-fma.c = -mfma -mavx2
> +CFLAGS-s_tanh-fma.c = -mfma -mavx2
> CFLAGS-s_sincos-fma.c = -mfma -mavx2
> CFLAGS-s_exp10m1f-fma.c = -mfma -mavx2
> CFLAGS-s_exp2m1f-fma.c = -mfma -mavx2
> @@ -96,6 +97,7 @@ libm-sysdep_routines += \
> s_sinf-sse2 \
> s_tan-avx \
> s_tan-fma \
> + s_tanh-fma \
> s_trunc-sse4_1 \
> s_truncf-sse4_1 \
> # libm-sysdep_routines
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_tanh-fma.c b/sysdeps/x86_64/fpu/multiarch/s_tanh-fma.c
> new file mode 100644
> index 0000000000..1b808b1227
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/s_tanh-fma.c
> @@ -0,0 +1,11 @@
> +#define __tanh __tanh_fma
> +#define __expm1 __expm1_fma
> +
> +/* NB: __expm1 may be expanded to __expm1_fma in the following
> + prototypes. */
> +extern long double __expm1l (long double);
> +extern long double __expm1f128 (long double);
> +
> +#define SECTION __attribute__ ((section (".text.fma")))
> +
> +#include <sysdeps/ieee754/dbl-64/s_tanh.c>
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_tanh.c b/sysdeps/x86_64/fpu/multiarch/s_tanh.c
> new file mode 100644
> index 0000000000..5539b6c61c
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/s_tanh.c
> @@ -0,0 +1,31 @@
> +/* Multiple versions of tanh.
> + Copyright (C) 2025 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#include <sysdeps/x86/isa-level.h>
> +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
> +
> +extern double __redirect_tanh (double);
> +
> +# define SYMBOL_NAME tanh
> +# include "ifunc-fma.h"
> +
> +libc_ifunc_redirected (__redirect_tanh, __tanh, IFUNC_SELECTOR ());
> +
> +# define __tanh __tanh_sse2
> +#endif
> +#include <sysdeps/ieee754/dbl-64/s_tanh.c>
> --
> 2.48.1
>
LGTM.
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
Thanks.
On Wed, Mar 12, 2025 at 3:02 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> On Wed, Mar 12, 2025 at 2:00 PM Sunil K Pandey <skpgkp2@gmail.com> wrote:
> >
> > On Skylake, it improves tanh bench performance by:
> >
> > Before After Improvement
> > max 110.89 95.826 14%
> > min 20.966 20.157 4%
> > mean 30.9601 29.8431 4%
> > ---
> > sysdeps/ieee754/dbl-64/s_tanh.c | 5 ++++
> > sysdeps/x86_64/fpu/multiarch/Makefile | 2 ++
> > sysdeps/x86_64/fpu/multiarch/s_tanh-fma.c | 11 ++++++++
> > sysdeps/x86_64/fpu/multiarch/s_tanh.c | 31 +++++++++++++++++++++++
> > 4 files changed, 49 insertions(+)
> > create mode 100644 sysdeps/x86_64/fpu/multiarch/s_tanh-fma.c
> > create mode 100644 sysdeps/x86_64/fpu/multiarch/s_tanh.c
> >
> > diff --git a/sysdeps/ieee754/dbl-64/s_tanh.c
> b/sysdeps/ieee754/dbl-64/s_tanh.c
> > index 673a97102d..13063db04e 100644
> > --- a/sysdeps/ieee754/dbl-64/s_tanh.c
> > +++ b/sysdeps/ieee754/dbl-64/s_tanh.c
> > @@ -46,6 +46,11 @@ static char rcsid[] = "$NetBSD: s_tanh.c,v 1.7
> 1995/05/10 20:48:22 jtc Exp $";
> >
> > static const double one = 1.0, two = 2.0, tiny = 1.0e-300;
> >
> > +#ifndef SECTION
> > +# define SECTION
> > +#endif
> > +
> > +SECTION
> > double
> > __tanh (double x)
> > {
> > diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile
> b/sysdeps/x86_64/fpu/multiarch/Makefile
> > index e823d2fcc6..a198315f1c 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/Makefile
> > +++ b/sysdeps/x86_64/fpu/multiarch/Makefile
> > @@ -10,6 +10,7 @@ CFLAGS-s_expm1-fma.c = -mfma -mavx2
> > CFLAGS-s_log1p-fma.c = -mfma -mavx2
> > CFLAGS-s_sin-fma.c = -mfma -mavx2
> > CFLAGS-s_tan-fma.c = -mfma -mavx2
> > +CFLAGS-s_tanh-fma.c = -mfma -mavx2
> > CFLAGS-s_sincos-fma.c = -mfma -mavx2
> > CFLAGS-s_exp10m1f-fma.c = -mfma -mavx2
> > CFLAGS-s_exp2m1f-fma.c = -mfma -mavx2
> > @@ -96,6 +97,7 @@ libm-sysdep_routines += \
> > s_sinf-sse2 \
> > s_tan-avx \
> > s_tan-fma \
> > + s_tanh-fma \
> > s_trunc-sse4_1 \
> > s_truncf-sse4_1 \
> > # libm-sysdep_routines
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_tanh-fma.c
> b/sysdeps/x86_64/fpu/multiarch/s_tanh-fma.c
> > new file mode 100644
> > index 0000000000..1b808b1227
> > --- /dev/null
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_tanh-fma.c
> > @@ -0,0 +1,11 @@
> > +#define __tanh __tanh_fma
> > +#define __expm1 __expm1_fma
> > +
> > +/* NB: __expm1 may be expanded to __expm1_fma in the following
> > + prototypes. */
> > +extern long double __expm1l (long double);
> > +extern long double __expm1f128 (long double);
> > +
> > +#define SECTION __attribute__ ((section (".text.fma")))
> > +
> > +#include <sysdeps/ieee754/dbl-64/s_tanh.c>
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_tanh.c
> b/sysdeps/x86_64/fpu/multiarch/s_tanh.c
> > new file mode 100644
> > index 0000000000..5539b6c61c
> > --- /dev/null
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_tanh.c
> > @@ -0,0 +1,31 @@
> > +/* Multiple versions of tanh.
> > + Copyright (C) 2025 Free Software Foundation, Inc.
> > + This file is part of the GNU C Library.
> > +
> > + The GNU C Library is free software; you can redistribute it and/or
> > + modify it under the terms of the GNU Lesser General Public
> > + License as published by the Free Software Foundation; either
> > + version 2.1 of the License, or (at your option) any later version.
> > +
> > + The GNU C Library is distributed in the hope that it will be useful,
> > + but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > + Lesser General Public License for more details.
> > +
> > + You should have received a copy of the GNU Lesser General Public
> > + License along with the GNU C Library; if not, see
> > + <https://www.gnu.org/licenses/>. */
> > +
> > +#include <sysdeps/x86/isa-level.h>
> > +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
> > +
> > +extern double __redirect_tanh (double);
> > +
> > +# define SYMBOL_NAME tanh
> > +# include "ifunc-fma.h"
> > +
> > +libc_ifunc_redirected (__redirect_tanh, __tanh, IFUNC_SELECTOR ());
> > +
> > +# define __tanh __tanh_sse2
> > +#endif
> > +#include <sysdeps/ieee754/dbl-64/s_tanh.c>
> > --
> > 2.48.1
> >
>
> LGTM.
>
> Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
>
> Thanks.
>
> --
> H.J.
>
I would like to backport this patch to release branches.
Any comments or objections?
--Sunil
On Mon, Mar 17, 2025 at 8:49 AM Sunil Pandey <skpgkp2@gmail.com> wrote:
>
>
> On Wed, Mar 12, 2025 at 3:02 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
>> On Wed, Mar 12, 2025 at 2:00 PM Sunil K Pandey <skpgkp2@gmail.com> wrote:
>> >
>> > On Skylake, it improves tanh bench performance by:
>> >
>> > Before After Improvement
>> > max 110.89 95.826 14%
>> > min 20.966 20.157 4%
>> > mean 30.9601 29.8431 4%
>> > ---
>> > sysdeps/ieee754/dbl-64/s_tanh.c | 5 ++++
>> > sysdeps/x86_64/fpu/multiarch/Makefile | 2 ++
>> > sysdeps/x86_64/fpu/multiarch/s_tanh-fma.c | 11 ++++++++
>> > sysdeps/x86_64/fpu/multiarch/s_tanh.c | 31 +++++++++++++++++++++++
>> > 4 files changed, 49 insertions(+)
>> > create mode 100644 sysdeps/x86_64/fpu/multiarch/s_tanh-fma.c
>> > create mode 100644 sysdeps/x86_64/fpu/multiarch/s_tanh.c
>> >
>> > diff --git a/sysdeps/ieee754/dbl-64/s_tanh.c
>> b/sysdeps/ieee754/dbl-64/s_tanh.c
>> > index 673a97102d..13063db04e 100644
>> > --- a/sysdeps/ieee754/dbl-64/s_tanh.c
>> > +++ b/sysdeps/ieee754/dbl-64/s_tanh.c
>> > @@ -46,6 +46,11 @@ static char rcsid[] = "$NetBSD: s_tanh.c,v 1.7
>> 1995/05/10 20:48:22 jtc Exp $";
>> >
>> > static const double one = 1.0, two = 2.0, tiny = 1.0e-300;
>> >
>> > +#ifndef SECTION
>> > +# define SECTION
>> > +#endif
>> > +
>> > +SECTION
>> > double
>> > __tanh (double x)
>> > {
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile
>> b/sysdeps/x86_64/fpu/multiarch/Makefile
>> > index e823d2fcc6..a198315f1c 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/Makefile
>> > +++ b/sysdeps/x86_64/fpu/multiarch/Makefile
>> > @@ -10,6 +10,7 @@ CFLAGS-s_expm1-fma.c = -mfma -mavx2
>> > CFLAGS-s_log1p-fma.c = -mfma -mavx2
>> > CFLAGS-s_sin-fma.c = -mfma -mavx2
>> > CFLAGS-s_tan-fma.c = -mfma -mavx2
>> > +CFLAGS-s_tanh-fma.c = -mfma -mavx2
>> > CFLAGS-s_sincos-fma.c = -mfma -mavx2
>> > CFLAGS-s_exp10m1f-fma.c = -mfma -mavx2
>> > CFLAGS-s_exp2m1f-fma.c = -mfma -mavx2
>> > @@ -96,6 +97,7 @@ libm-sysdep_routines += \
>> > s_sinf-sse2 \
>> > s_tan-avx \
>> > s_tan-fma \
>> > + s_tanh-fma \
>> > s_trunc-sse4_1 \
>> > s_truncf-sse4_1 \
>> > # libm-sysdep_routines
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_tanh-fma.c
>> b/sysdeps/x86_64/fpu/multiarch/s_tanh-fma.c
>> > new file mode 100644
>> > index 0000000000..1b808b1227
>> > --- /dev/null
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_tanh-fma.c
>> > @@ -0,0 +1,11 @@
>> > +#define __tanh __tanh_fma
>> > +#define __expm1 __expm1_fma
>> > +
>> > +/* NB: __expm1 may be expanded to __expm1_fma in the following
>> > + prototypes. */
>> > +extern long double __expm1l (long double);
>> > +extern long double __expm1f128 (long double);
>> > +
>> > +#define SECTION __attribute__ ((section (".text.fma")))
>> > +
>> > +#include <sysdeps/ieee754/dbl-64/s_tanh.c>
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_tanh.c
>> b/sysdeps/x86_64/fpu/multiarch/s_tanh.c
>> > new file mode 100644
>> > index 0000000000..5539b6c61c
>> > --- /dev/null
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_tanh.c
>> > @@ -0,0 +1,31 @@
>> > +/* Multiple versions of tanh.
>> > + Copyright (C) 2025 Free Software Foundation, Inc.
>> > + This file is part of the GNU C Library.
>> > +
>> > + The GNU C Library is free software; you can redistribute it and/or
>> > + modify it under the terms of the GNU Lesser General Public
>> > + License as published by the Free Software Foundation; either
>> > + version 2.1 of the License, or (at your option) any later version.
>> > +
>> > + The GNU C Library is distributed in the hope that it will be useful,
>> > + but WITHOUT ANY WARRANTY; without even the implied warranty of
>> > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> > + Lesser General Public License for more details.
>> > +
>> > + You should have received a copy of the GNU Lesser General Public
>> > + License along with the GNU C Library; if not, see
>> > + <https://www.gnu.org/licenses/>. */
>> > +
>> > +#include <sysdeps/x86/isa-level.h>
>> > +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
>> > +
>> > +extern double __redirect_tanh (double);
>> > +
>> > +# define SYMBOL_NAME tanh
>> > +# include "ifunc-fma.h"
>> > +
>> > +libc_ifunc_redirected (__redirect_tanh, __tanh, IFUNC_SELECTOR ());
>> > +
>> > +# define __tanh __tanh_sse2
>> > +#endif
>> > +#include <sysdeps/ieee754/dbl-64/s_tanh.c>
>> > --
>> > 2.48.1
>> >
>>
>> LGTM.
>>
>> Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
>>
>> Thanks.
>>
>> --
>> H.J.
>
>
I would like to backport this patch to release branches.
Any comments or objections?
--Sunil
@@ -46,6 +46,11 @@ static char rcsid[] = "$NetBSD: s_tanh.c,v 1.7 1995/05/10 20:48:22 jtc Exp $";
static const double one = 1.0, two = 2.0, tiny = 1.0e-300;
+#ifndef SECTION
+# define SECTION
+#endif
+
+SECTION
double
__tanh (double x)
{
@@ -10,6 +10,7 @@ CFLAGS-s_expm1-fma.c = -mfma -mavx2
CFLAGS-s_log1p-fma.c = -mfma -mavx2
CFLAGS-s_sin-fma.c = -mfma -mavx2
CFLAGS-s_tan-fma.c = -mfma -mavx2
+CFLAGS-s_tanh-fma.c = -mfma -mavx2
CFLAGS-s_sincos-fma.c = -mfma -mavx2
CFLAGS-s_exp10m1f-fma.c = -mfma -mavx2
CFLAGS-s_exp2m1f-fma.c = -mfma -mavx2
@@ -96,6 +97,7 @@ libm-sysdep_routines += \
s_sinf-sse2 \
s_tan-avx \
s_tan-fma \
+ s_tanh-fma \
s_trunc-sse4_1 \
s_truncf-sse4_1 \
# libm-sysdep_routines
new file mode 100644
@@ -0,0 +1,11 @@
+#define __tanh __tanh_fma
+#define __expm1 __expm1_fma
+
+/* NB: __expm1 may be expanded to __expm1_fma in the following
+ prototypes. */
+extern long double __expm1l (long double);
+extern long double __expm1f128 (long double);
+
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/s_tanh.c>
new file mode 100644
@@ -0,0 +1,31 @@
+/* Multiple versions of tanh.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
+
+extern double __redirect_tanh (double);
+
+# define SYMBOL_NAME tanh
+# include "ifunc-fma.h"
+
+libc_ifunc_redirected (__redirect_tanh, __tanh, IFUNC_SELECTOR ());
+
+# define __tanh __tanh_sse2
+#endif
+#include <sysdeps/ieee754/dbl-64/s_tanh.c>